File size: 5,168 Bytes
7edf1ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import random
from joblib import Parallel, delayed
from pathlib import Path
from constants import INSTRUMENT_CLASSES, INSTRUMENT_TRANSFER_CLASSES
from utils import get_files, timeit, FileCompressor


class Familizer:
    def __init__(self, n_jobs=-1, arbitrary=False):
        self.n_jobs = n_jobs
        self.reverse_family(arbitrary)

    def get_family_number(self, program_number):
        """
        Given a MIDI instrument number, return its associated instrument family number.
        """
        for instrument_class in INSTRUMENT_CLASSES:
            if program_number in instrument_class["program_range"]:
                return instrument_class["family_number"]

    def reverse_family(self, arbitrary):
        """
        Create a dictionary of family numbers to randomly assigned program numbers.
        This is used to reverse the family number tokens back to program number tokens.
        """

        if arbitrary is True:
            int_class = INSTRUMENT_TRANSFER_CLASSES
        else:
            int_class = INSTRUMENT_CLASSES

        self.reference_programs = {}
        for family in int_class:
            self.reference_programs[family["family_number"]] = random.choice(
                family["program_range"]
            )

    def get_program_number(self, family_number):
        """
        Given given a family number return a random program number in the respective program_range.
        This is the reverse operation of get_family_number.
        """
        assert family_number in self.reference_programs
        return self.reference_programs[family_number]

    # Replace instruments in text files
    def replace_instrument_token(self, token):
        """
        Given a MIDI program number in a word token, replace it with the family or program
        number token depending on the operation.
        e.g. INST=86 -> INST=10
        """
        inst_number = int(token.split("=")[1])
        if self.operation == "family":
            return "INST=" + str(self.get_family_number(inst_number))
        elif self.operation == "program":
            return "INST=" + str(self.get_program_number(inst_number))

    def replace_instrument_in_text(self, text):
        """Given a text piece, replace all instrument tokens with family number tokens."""
        return " ".join(
            [
                self.replace_instrument_token(token)
                if token.startswith("INST=") and not token == "INST=DRUMS"
                else token
                for token in text.split(" ")
            ]
        )

    def replace_instruments_in_file(self, file):
        """Given a text file, replace all instrument tokens with family number tokens."""
        text = file.read_text()
        file.write_text(self.replace_instrument_in_text(text))

    @timeit
    def replace_instruments(self):
        """
        Given a directory of text files:
        Replace all instrument tokens with family number tokens.
        """
        files = get_files(self.output_directory, extension="txt")
        Parallel(n_jobs=self.n_jobs)(
            delayed(self.replace_instruments_in_file)(file) for file in files
        )

    def replace_tokens(self, input_directory, output_directory, operation):
        """
        Given a directory and an operation, perform the operation on all text files in the directory.
        operation can be either 'family' or 'program'.
        """
        self.input_directory = input_directory
        self.output_directory = output_directory
        self.operation = operation

        # Uncompress files, replace tokens, compress files
        fc = FileCompressor(self.input_directory, self.output_directory, self.n_jobs)
        fc.unzip()
        self.replace_instruments()
        fc.zip()
        print(self.operation + " complete.")

    def to_family(self, input_directory, output_directory):
        """
        Given a directory containing zip files, replace all instrument tokens with
        family number tokens. The output is a directory of zip files.
        """
        self.replace_tokens(input_directory, output_directory, "family")

    def to_program(self, input_directory, output_directory):
        """
        Given a directory containing zip files, replace all instrument tokens with
        program number tokens. The output is a directory of zip files.
        """
        self.replace_tokens(input_directory, output_directory, "program")


if __name__ == "__main__":
    # Choose number of jobs for parallel processing
    n_jobs = -1

    # Instantiate Familizer
    familizer = Familizer(n_jobs)

    # Choose directory to process for program
    input_directory = Path("midi/dataset/first_selection/validate").resolve()  # fmt: skip
    output_directory = input_directory / "family"

    # familize files
    familizer.to_family(input_directory, output_directory)

    # Choose directory to process for family
    # input_directory = Path("../data/music_picks/encoded_samples/validate/family").resolve()  # fmt: skip
    # output_directory = input_directory.parent / "program"

    # # programize files
    # familizer.to_program(input_directory, output_directory)