|
import random |
|
from joblib import Parallel, delayed |
|
from pathlib import Path |
|
from constants import INSTRUMENT_CLASSES, INSTRUMENT_TRANSFER_CLASSES |
|
from utils import get_files, timeit, FileCompressor |
|
|
|
|
|
class Familizer: |
|
def __init__(self, n_jobs=-1, arbitrary=False): |
|
self.n_jobs = n_jobs |
|
self.reverse_family(arbitrary) |
|
|
|
def get_family_number(self, program_number): |
|
""" |
|
Given a MIDI instrument number, return its associated instrument family number. |
|
""" |
|
for instrument_class in INSTRUMENT_CLASSES: |
|
if program_number in instrument_class["program_range"]: |
|
return instrument_class["family_number"] |
|
|
|
def reverse_family(self, arbitrary): |
|
""" |
|
Create a dictionary of family numbers to randomly assigned program numbers. |
|
This is used to reverse the family number tokens back to program number tokens. |
|
""" |
|
|
|
if arbitrary is True: |
|
int_class = INSTRUMENT_TRANSFER_CLASSES |
|
else: |
|
int_class = INSTRUMENT_CLASSES |
|
|
|
self.reference_programs = {} |
|
for family in int_class: |
|
self.reference_programs[family["family_number"]] = random.choice( |
|
family["program_range"] |
|
) |
|
|
|
def get_program_number(self, family_number): |
|
""" |
|
Given given a family number return a random program number in the respective program_range. |
|
This is the reverse operation of get_family_number. |
|
""" |
|
assert family_number in self.reference_programs |
|
return self.reference_programs[family_number] |
|
|
|
|
|
def replace_instrument_token(self, token): |
|
""" |
|
Given a MIDI program number in a word token, replace it with the family or program |
|
number token depending on the operation. |
|
e.g. INST=86 -> INST=10 |
|
""" |
|
inst_number = int(token.split("=")[1]) |
|
if self.operation == "family": |
|
return "INST=" + str(self.get_family_number(inst_number)) |
|
elif self.operation == "program": |
|
return "INST=" + str(self.get_program_number(inst_number)) |
|
|
|
def replace_instrument_in_text(self, text): |
|
"""Given a text piece, replace all instrument tokens with family number tokens.""" |
|
return " ".join( |
|
[ |
|
self.replace_instrument_token(token) |
|
if token.startswith("INST=") and not token == "INST=DRUMS" |
|
else token |
|
for token in text.split(" ") |
|
] |
|
) |
|
|
|
def replace_instruments_in_file(self, file): |
|
"""Given a text file, replace all instrument tokens with family number tokens.""" |
|
text = file.read_text() |
|
file.write_text(self.replace_instrument_in_text(text)) |
|
|
|
@timeit |
|
def replace_instruments(self): |
|
""" |
|
Given a directory of text files: |
|
Replace all instrument tokens with family number tokens. |
|
""" |
|
files = get_files(self.output_directory, extension="txt") |
|
Parallel(n_jobs=self.n_jobs)( |
|
delayed(self.replace_instruments_in_file)(file) for file in files |
|
) |
|
|
|
def replace_tokens(self, input_directory, output_directory, operation): |
|
""" |
|
Given a directory and an operation, perform the operation on all text files in the directory. |
|
operation can be either 'family' or 'program'. |
|
""" |
|
self.input_directory = input_directory |
|
self.output_directory = output_directory |
|
self.operation = operation |
|
|
|
|
|
fc = FileCompressor(self.input_directory, self.output_directory, self.n_jobs) |
|
fc.unzip() |
|
self.replace_instruments() |
|
fc.zip() |
|
print(self.operation + " complete.") |
|
|
|
def to_family(self, input_directory, output_directory): |
|
""" |
|
Given a directory containing zip files, replace all instrument tokens with |
|
family number tokens. The output is a directory of zip files. |
|
""" |
|
self.replace_tokens(input_directory, output_directory, "family") |
|
|
|
def to_program(self, input_directory, output_directory): |
|
""" |
|
Given a directory containing zip files, replace all instrument tokens with |
|
program number tokens. The output is a directory of zip files. |
|
""" |
|
self.replace_tokens(input_directory, output_directory, "program") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
n_jobs = -1 |
|
|
|
|
|
familizer = Familizer(n_jobs) |
|
|
|
|
|
input_directory = Path("midi/dataset/first_selection/validate").resolve() |
|
output_directory = input_directory / "family" |
|
|
|
|
|
familizer.to_family(input_directory, output_directory) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|