Create audio_methods.py
Browse files- audio_methods.py +101 -0
audio_methods.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# WHERE DID I USE SCALING FACTORS TO CONVERT WAV TO MAP?
|
2 |
+
|
3 |
+
_SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
|
4 |
+
_INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
|
5 |
+
_SCALING_FACTORS = pd.Series(
|
6 |
+
{"pitch": 64.024558, "step": 0.101410, "duration": 0.199386}
|
7 |
+
) # Factors used to normalize song maps
|
8 |
+
|
9 |
+
def midi_to_notes(midi_file: str) -> pd.DataFrame:
|
10 |
+
pm = pretty_midi.PrettyMIDI(midi_file)
|
11 |
+
instrument = pm.instruments[0]
|
12 |
+
notes = collections.defaultdict(list)
|
13 |
+
|
14 |
+
# Sort the notes by start time
|
15 |
+
sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
|
16 |
+
prev_start = sorted_notes[0].start
|
17 |
+
|
18 |
+
for note in sorted_notes:
|
19 |
+
start = note.start
|
20 |
+
end = note.end
|
21 |
+
notes['pitch'].append(note.pitch)
|
22 |
+
notes['start'].append(start)
|
23 |
+
notes['end'].append(end)
|
24 |
+
notes['step'].append(start - prev_start)
|
25 |
+
notes['duration'].append(end - start)
|
26 |
+
prev_start = start
|
27 |
+
|
28 |
+
return pd.DataFrame({name: np.array(value) for name, value in notes.items()})
|
29 |
+
|
30 |
+
def notes_to_midi(
|
31 |
+
notes: pd.DataFrame,
|
32 |
+
out_file: str,
|
33 |
+
velocity: int = 100, # note loudness
|
34 |
+
) -> pretty_midi.PrettyMIDI:
|
35 |
+
|
36 |
+
pm = pretty_midi.PrettyMIDI()
|
37 |
+
instrument = pretty_midi.Instrument(
|
38 |
+
program=pretty_midi.instrument_name_to_program(
|
39 |
+
_INSTRUMENT_NAME))
|
40 |
+
|
41 |
+
prev_start = 0
|
42 |
+
for i, note in notes.iterrows():
|
43 |
+
start = float(prev_start + note['step'])
|
44 |
+
end = float(start + note['duration'])
|
45 |
+
note = pretty_midi.Note(
|
46 |
+
velocity=velocity,
|
47 |
+
pitch=int(note['pitch']),
|
48 |
+
start=start,
|
49 |
+
end=end,
|
50 |
+
)
|
51 |
+
instrument.notes.append(note)
|
52 |
+
prev_start = start
|
53 |
+
|
54 |
+
pm.instruments.append(instrument)
|
55 |
+
pm.write(out_file)
|
56 |
+
return pm
|
57 |
+
|
58 |
+
|
59 |
+
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=120):
|
60 |
+
waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
|
61 |
+
# Take a sample of the generated waveform to mitigate kernel resets
|
62 |
+
waveform_short = waveform[:seconds*_SAMPLING_RATE]
|
63 |
+
return display.Audio(waveform_short, rate=_SAMPLING_RATE)
|
64 |
+
|
65 |
+
|
66 |
+
# Define function to convert song map to wav
|
67 |
+
|
68 |
+
def map_to_wav(song_map, out_file, instrument_name, velocity=100):
|
69 |
+
contracted_map = tf.squeeze(song_map)
|
70 |
+
song_map_T = contracted_map.numpy().T
|
71 |
+
notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
|
72 |
+
notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127)
|
73 |
+
|
74 |
+
pm = pretty_midi.PrettyMIDI()
|
75 |
+
instrument = pretty_midi.Instrument(
|
76 |
+
program=pretty_midi.instrument_name_to_program(
|
77 |
+
instrument_name))
|
78 |
+
|
79 |
+
prev_start = 0
|
80 |
+
for i, note in notes.iterrows():
|
81 |
+
start = float(prev_start + note['step'])
|
82 |
+
end = float(start + note['duration'])
|
83 |
+
note = pretty_midi.Note(
|
84 |
+
velocity=velocity,
|
85 |
+
pitch=int(note['pitch']),
|
86 |
+
start=start,
|
87 |
+
end=end,
|
88 |
+
)
|
89 |
+
instrument.notes.append(note)
|
90 |
+
prev_start = start
|
91 |
+
|
92 |
+
pm.instruments.append(instrument)
|
93 |
+
pm.write(out_file)
|
94 |
+
return pm
|
95 |
+
|
96 |
+
def generate_and_display(out_file, instrument_name, model, z_sample=None, velocity=100, seconds=120):
|
97 |
+
song_map = model.generate(z_sample)
|
98 |
+
display.display(imshow(tf.squeeze(song_map)[:,:50]))
|
99 |
+
wav = map_to_wav(song_map, out_file, instrument_name, velocity)
|
100 |
+
|
101 |
+
return display_audio(wav, seconds)
|