Spaces:
Running
on
Zero
Running
on
Zero
#! /usr/bin/env python3 | |
# This script takes a directory of MIDI files and converts them to images | |
# It was intended for the P909 dataset | |
# It will create a directory of images for each MIDI file, where each image is a frame of the MIDI file | |
# if you want to use the chord extractor, you need to run chord extractor on all the files, and then output | |
# a list of all the unique chord names to all_chords.txt e.g. | |
# $ cat ~/datasets/jsb_chorales_midi/*/*_chords.txt | awk -F'\t' '{print $3}' | sort | uniq -c | awk '{print $2}' > all_chords.txt | |
# cat midis/*_chords.txt | awk -F'\t' '{print $3}' | sort | uniq -c | awk '{print $2}' > all_chords.txt | |
# find midis -name '*_chords.txt' -exec cat {} + | awk -F'\t' '{print $3}' | sort | uniq -c | awk '{print $2}' > all_chords.txt | |
import os | |
import sys | |
from multiprocessing import Pool, cpu_count, set_start_method | |
from tqdm import tqdm | |
from control_toys.data import fast_scandir | |
from functools import partial | |
import argparse | |
from control_toys.pianoroll import midi_to_pr_img | |
from control_toys.chords import simplify_chord, POSSIBLE_CHORDS | |
def wrapper(args, midi_file, all_chords=None): | |
return midi_to_pr_img(midi_file, args.output_dir, show_chords=args.chords, all_chords=all_chords, | |
chord_names=args.chord_names, filter_mp=args.filter_mp, add_onsets=args.onsets, | |
remove_leading_silence=(not args.silence)) | |
if __name__ == '__main__': | |
p = argparse.ArgumentParser(description=__doc__, | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
p.add_argument('-c','--chords', action='store_true', help='infer chords and add markers') | |
p.add_argument('--chord-names', action='store_true', help='Add text for chord names') | |
p.add_argument('--filter-mp', default=True, help='filter out non-piano, non-melody instruments') | |
#p.add_argument('--onsets', default=True, type=bool, help='add onset markers') | |
p.add_argument('--onsets', default=True, action=argparse.BooleanOptionalAction, help='Produce onset markers') # either --onsets or --no-onsets, default is...? | |
p.add_argument('--silence', default=True, action=argparse.BooleanOptionalAction, help='Leave silence at start of song (True) or remove it (False)') | |
p.add_argument('--start-method', type=str, default='fork', | |
choices=['fork', 'forkserver', 'spawn'], | |
help='the multiprocessing start method') | |
p.add_argument('--simplify', action='store_true', help='Simplify chord types, e.g. remove 13s') | |
p.add_argument('--skip-versions', default=True, help='skip extra versions of the same song') | |
p.add_argument("midi_dirs", nargs='+', help="directories containing MIDI files") | |
p.add_argument("output_dir", help="output directory") | |
args = p.parse_args() | |
print("args = ",args) | |
set_start_method(args.start_method) | |
midi_dirs, output_dir = args.midi_dirs, args.output_dir | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
if os.path.isdir(midi_dirs[0]): | |
midi_files = [] | |
for mdir in midi_dirs: | |
m_subdirs, mf = fast_scandir(mdir, ['mid', 'midi']) | |
if mf != []: midi_files = midi_files + mf | |
elif os.path.isfile(midi_dirs[0]): | |
midi_files = midi_dirs | |
if args.skip_versions: | |
midi_files = [f for f in midi_files if '/versions/' not in f] | |
print("len(midi_files) = ",len(midi_files)) # just a check for debugging | |
if args.chords: | |
# TODO: this is janky af but for now... | |
# Get a list of all unique chords from a premade text file list of possible chords | |
# to make the file in bash, assuming you've already run extract_chords.py: (leave sort alphabetical, don't sort in order of freq) | |
# cat */*_chords.txt | awk -F'\t' '{print $3}' | sort | uniq -c | awk '{print $2}' > all_chords.txt | |
#with open('all_chords.txt') as f: | |
# all_chords = f.read().splitlines() | |
# use possible chords as all chords | |
all_chords = POSSIBLE_CHORDS # now we just generate these | |
if args.simplify: | |
all_chords = list(set([simplify_chord(c) for c in all_chords])) | |
print("len(all_chords) = ",len(all_chords)) | |
print("all_chords = ",all_chords) # just a check for debugging | |
else: | |
all_chords = None | |
process_one = partial(wrapper, args, all_chords=all_chords) | |
num_cpus = cpu_count() | |
with Pool(num_cpus) as p: | |
list(tqdm(p.imap(process_one, midi_files), total=len(midi_files), desc='Processing MIDI files')) | |
print("Finished") | |