File size: 5,169 Bytes
7edf1ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import random
from joblib import Parallel, delayed
from pathlib import Path
from constants import INSTRUMENT_CLASSES, INSTRUMENT_TRANSFER_CLASSES
from utils import get_files, timeit, FileCompressor
class Familizer:
def __init__(self, n_jobs=-1, arbitrary=False):
self.n_jobs = n_jobs
self.reverse_family(arbitrary)
def get_family_number(self, program_number):
"""
Given a MIDI instrument number, return its associated instrument family number.
"""
for instrument_class in INSTRUMENT_CLASSES:
if program_number in instrument_class["program_range"]:
return instrument_class["family_number"]
def reverse_family(self, arbitrary):
"""
Create a dictionary of family numbers to randomly assigned program numbers.
This is used to reverse the family number tokens back to program number tokens.
"""
if arbitrary is True:
int_class = INSTRUMENT_TRANSFER_CLASSES
else:
int_class = INSTRUMENT_CLASSES
self.reference_programs = {}
for family in int_class:
self.reference_programs[family["family_number"]] = random.choice(
family["program_range"]
)
def get_program_number(self, family_number):
"""
Given given a family number return a random program number in the respective program_range.
This is the reverse operation of get_family_number.
"""
assert family_number in self.reference_programs
return self.reference_programs[family_number]
# Replace instruments in text files
def replace_instrument_token(self, token):
"""
Given a MIDI program number in a word token, replace it with the family or program
number token depending on the operation.
e.g. INST=86 -> INST=10
"""
inst_number = int(token.split("=")[1])
if self.operation == "family":
return "INST=" + str(self.get_family_number(inst_number))
elif self.operation == "program":
return "INST=" + str(self.get_program_number(inst_number))
def replace_instrument_in_text(self, text):
"""Given a text piece, replace all instrument tokens with family number tokens."""
return " ".join(
[
self.replace_instrument_token(token)
if token.startswith("INST=") and not token == "INST=DRUMS"
else token
for token in text.split(" ")
]
)
def replace_instruments_in_file(self, file):
"""Given a text file, replace all instrument tokens with family number tokens."""
text = file.read_text()
file.write_text(self.replace_instrument_in_text(text))
@timeit
def replace_instruments(self):
"""
Given a directory of text files:
Replace all instrument tokens with family number tokens.
"""
files = get_files(self.output_directory, extension="txt")
Parallel(n_jobs=self.n_jobs)(
delayed(self.replace_instruments_in_file)(file) for file in files
)
def replace_tokens(self, input_directory, output_directory, operation):
"""
Given a directory and an operation, perform the operation on all text files in the directory.
operation can be either 'family' or 'program'.
"""
self.input_directory = input_directory
self.output_directory = output_directory
self.operation = operation
# Uncompress files, replace tokens, compress files
fc = FileCompressor(self.input_directory, self.output_directory, self.n_jobs)
fc.unzip()
self.replace_instruments()
fc.zip()
print(self.operation + " complete.")
def to_family(self, input_directory, output_directory):
"""
Given a directory containing zip files, replace all instrument tokens with
family number tokens. The output is a directory of zip files.
"""
self.replace_tokens(input_directory, output_directory, "family")
def to_program(self, input_directory, output_directory):
"""
Given a directory containing zip files, replace all instrument tokens with
program number tokens. The output is a directory of zip files.
"""
self.replace_tokens(input_directory, output_directory, "program")
if __name__ == "__main__":
# Choose number of jobs for parallel processing
n_jobs = -1
# Instantiate Familizer
familizer = Familizer(n_jobs)
# Choose directory to process for program
input_directory = Path("midi/dataset/first_selection/validate").resolve() # fmt: skip
output_directory = input_directory / "family"
# familize files
familizer.to_family(input_directory, output_directory)
# Choose directory to process for family
# input_directory = Path("../data/music_picks/encoded_samples/validate/family").resolve() # fmt: skip
# output_directory = input_directory.parent / "program"
# # programize files
# familizer.to_program(input_directory, output_directory)
|