File size: 10,281 Bytes
5fc3f34
8ce6b5b
5fc3f34
 
 
 
 
 
 
8ce6b5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fc3f34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252

import os
# from mido import MidiFile
import mido
import music21
import numpy as np
import pandas as pd
from music21 import *
from mido import Message, MidiFile, MidiTrack
#number of notes to be used for prediction
window = 3

#num of notes to generate
#TODO: change this to accept values according to user
num_notes = 100

#midi ticks per quarter note, indicates tempo of track
quarter_note_ticks = 480

#accepted note durations: ranges from 16th note to whole dotted notes
accepeted_lengths = [0.25,0.375,0.5,0.75,1,1.5,2.0,3.0,4.0]
#Finds all absolute paths in directory
#https://stackoverflow.com/questions/9816816/get-absolute-paths-of-all-files-in-a-directory
def abs_paths(dir):
    for dir_path,_,filenames in os.walk(dir):
        for f in filenames:
            yield os.path.abspath(os.path.join(dir_path, f))
def pitch_to_int(nameWithOctave):
    # letter names with corresponding values
    letter_dict = {'C':0,'D':2,'E':4,'F':5,'G':7,'A':9,'B':11}
    # parse characters from strings
    chars = list(nameWithOctave)
    # convert octave number to corresponding midi value
    octave = 12*(int(chars[-1])+1)
    # select value from letter_dict using first character
    note = letter_dict[chars[0]]
    # set accidental value
    accidental = 0
    # does accidental exist?
    if not len(chars)==2:
        # increase (sharp) or decrease (flat) value by one
        accidental = 1 if chars[1]=='#' else -1
    # return sum of these numbers, middle C(4) == 60
    return octave + note + accidental
def generate_notes():
    df_notes = pd.read_csv('prepared.csv')
    print(df_notes.shape)
    # define arrays for generated notes and durations
    gen_notes = []
    gen_durations = []
    # define note and duration feature columns based on names
    features = df_notes.columns[:-2]
    note_features = [s for s in features if "note" in s]
    duration_features = [s for s in features if "duration" in s]
    # define target columns
    note_target = df_notes.columns[-2]
    duration_target = df_notes.columns[-1]

    # sample random row from dataframe and define start notes and durations
    initial_sample = df_notes.sample()
    start_notes = list(initial_sample[note_features].values[0])
    start_durations = list(initial_sample[duration_features].values[0])
    # append starting notes and durations to gen arrays
    for note in start_notes:
        gen_notes.append(int(note))
    for duration in start_durations:
        gen_durations.append(duration)

    for i in range(num_notes) :
        rows = df_notes
        for i in range(window-1):
            rows = rows.loc[df_notes[note_features[i]] == start_notes[i]]
            rows = rows.loc[df_notes[duration_features[i]]== start_durations[i]]
    
        #This gives the same effect as probability.
        # We effectively sample from a list which might have more than 1 C note, Hence increasing its probability
        #Sometime, The start_notes and durations could be selected in such a way that we cannot generate any further notes uptill num_notes,
        #This means there maybe some combinations of notes such as 76,68 which are not there in the dataset and hence cannot be sampled.
        #In such cases, the only way about it would be to reset the start notes, because we cannot sample from an empty row 
        #So here we check if any rows which we ta
        if len(rows):
            next_sample = rows.sample()
            next_note = next_sample[note_target].values[0]
            next_duration = next_sample[duration_target].values[0]
            gen_notes.append(int(next_note))
            gen_durations.append(next_duration)

            start_notes.pop()
            start_durations.pop()

            start_notes.append(next_note)
            start_durations.append(next_duration)
        else:
            #Received empty row
            # print("Exiting!!!!!!")
            #restarting again to get new start notes
            return [],[]
           
        # print(rows[note_target].value_counts(normalize=True))
        # print(rows[duration_target].value_counts(normalize=True))

    return gen_notes, gen_durations

#MAIN FUNCTION
if __name__=="__main__":
    # https://stackoverflow.com/questions/49462107/how-can-i-get-all-piano-parts-from-a-music21-score
    if not os.path.exists('tracks'):
        os.mkdir('tracks')
        i = 0
        #Parse midi files into tracks folder
        for path in abs_paths('data'):
            print(path)
            # mid = MidiFile(path)
            piece = converter.parse(path)
            print(list(piece.parts))
            for part in piece.parts:
                part_notes = []
                #get all note messages from all tracks
                for event in part:
                    if getattr(event, 'isNote', None) and event.isNote:
                        print('note in {}'.format(part))

                        #check if note is in accepted length
                        #convert string to numerical value 
                        if event.quarterLength in accepeted_lengths:
                            part_notes.append([pitch_to_int(event.nameWithOctave), event.quarterLength])
                if not len(part_notes) == 0:
                    np.save('tracks/{}.npy'.format(i), np.array(part_notes))
                    i+=1
        print('Number of tracks parsed: {}'.format(i))
    if not os.path.exists('prepared.csv'):
        columns = []
        for i in range(window):
            columns.append('note' + str(i))
            columns.append('duration' + str(i))
        df_notes = pd.DataFrame(columns=columns)
        # append segments from each track as rows to dataframe
        for path in abs_paths('tracks'):
            notes = np.load(path)
            for i in range(len(notes)-window):
                # take every x notes and durations
                segment = notes[i:i+window].flatten()
                # make into pd.Series row
                row = pd.Series(segment, index=df_notes.columns)
                # append row to dataframe
                df_notes = df_notes.append(row, ignore_index=True)
        # export
        df_notes.to_csv('prepared.csv', index=False)
    success = False
    gen_notes =[]
    gen_durations =[]

    #Retry mechanism
    while len(gen_notes)<num_notes:
            gen_notes,gen_durations = generate_notes()

# import
# df_notes = pd.read_csv('prepared.csv')
# print(df_notes.shape)
# # define arrays for generated notes and durations
# gen_notes = []
# gen_durations = []
# # define note and duration feature columns based on names
# features = df_notes.columns[:-2]
# note_features = [s for s in features if "note" in s]
# duration_features = [s for s in features if "duration" in s]
# # define target columns
# note_target = df_notes.columns[-2]
# duration_target = df_notes.columns[-1]

# # sample random row from dataframe and define start notes and durations
# initial_sample = df_notes.sample()
# start_notes = list(initial_sample[note_features].values[0])
# start_durations = list(initial_sample[duration_features].values[0])
# # append starting notes and durations to gen arrays
# for note in start_notes:
#     gen_notes.append(int(note))
# for duration in start_durations:
#     gen_durations.append(duration)


# for i in range(num_notes) :
#     rows = df_notes
#     for i in range(window-1):
#         rows = rows.loc[df_notes[note_features[i]] == start_notes[i]]
#         rows = rows.loc[df_notes[duration_features[i]]== start_durations[i]]
    
#     #This gives the same effect as probability.
#     # We effectively sample from a list which might have more than 1 C note, Hence increasing its probability
#     #Sometime, The start_notes and durations could be selected in such a way that we cannot generate any further notes uptill num_notes,
#     #This means there maybe some combinations of notes such as 76,68 which are not there in the dataset and hence cannot be sampled.
#     #In such cases, the only way about it would be to reset the start notes, because we cannot sample from an empty row 
#     #So here we check if any rows which we ta
#     if len(rows):
#         next_sample = rows.sample()
#         next_note = next_sample[note_target].values[0]
#         next_duration = next_sample[duration_target].values[0]
#         gen_notes.append(int(next_note))
#         gen_durations.append(next_duration)

#         start_notes.pop()
#         start_durations.pop()

#         start_notes.append(next_note)
#         start_durations.append(next_duration)
#     else:
#         #Received empty row
#         print("Exiting!!!!!!")
# print(rows[note_target].value_counts(normalize=True))
# print(rows[duration_target].value_counts(normalize=True))

    print('Generated  notes/durations'.format(num_notes))
    print(gen_notes)
    print(gen_durations)

    mid = MidiFile()
    track = MidiTrack()
    mid.tracks.append(track)
    for i in range(num_notes):
        track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=60, time=0))
        track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=0,time=int(gen_durations[i]*quarter_note_ticks)))
    mid.save('new_song.mid')
#create new midi file which can be engraved
#https://mido.readthedocs.io/en/latest/midi_files.html , crreating a New file sectoin
# mid = MidiFile()
# track = MidiTrack
# mid.tracks.append(track)

# for i in range(num_notes):
#     track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=60, time=0))
#     track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=0,time=int(gen_durations[i]*quarter_note_ticks)))
# mid.save('output.mid')
# def inspect_midi():
#     #Just inspecting midi file 1.
#     for path in abs_paths('data'):
#         # print(path)
#         mid = MidiFile(path)
#         for i, track in enumerate(mid.tracks):
#             print('Track {}: {}'.format(i, track.name))
#             for message in track:
#                 print(message)
#         break
# inspect_midi()
# def isolate_note_on_msgs():
#     #round each note duration to 250ms
#     #Build adjaceny matrix


# LILYPOND COMMANDS : To be used for generating music scores
# Installation : sudo apt-get install -y lilypond
# !midi2ly "new_song.ly"
# !lilypond -fpng "new_song-midi.ly"