Dmytro Lopushanskyy
fix bugs
358d884
import pickle
import pretty_midi
import gradio as gr
from music21 import *
from midi2audio import FluidSynth
import torch
import torch.nn as nn
from torch.nn import functional as F
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
file_path = './objects/int_to_note.pkl'
with open(file_path, 'rb') as f:
int_to_note = pickle.load(f)
file_path = './objects/note_to_int.pkl'
with open(file_path, 'rb') as f:
note_to_int = pickle.load(f)
class GenerationRNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers=1):
super(GenerationRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers = n_layers
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
self.decoder = nn.Linear(hidden_size * n_layers, output_size)
def forward(self, input, hidden):
# Creates embedding of the input texts
#print('initial input', input.size())
input = self.embedding(input.view(1, -1))
#print('input after embedding', input.size())
output, hidden = self.gru(input, hidden)
#print('output after gru', output.size())
#print('hidden after gru', hidden.size())
output = self.decoder(hidden.view(1, -1))
#print('output after decoder', output.size())
return output, hidden
def init_hidden(self):
return torch.zeros(self.n_layers, 1, self.hidden_size).to(device)
def predict_multimomial(net, prime_seq, predict_len, temperature=0.8):
'''
Arguments:
prime_seq - priming sequence (converted t)
predict_len - number of notes to predict for after prime sequence
'''
hidden = net.init_hidden()
predicted = prime_seq.copy()
prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device)
# "Building up" the hidden state using the prime sequence
for p in range(len(prime_seq) - 1):
input = prime_seq[p]
_, hidden = net(input, hidden)
# Last character of prime sequence
input = prime_seq[-1]
# For every index to predict
for p in range(predict_len):
# Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters
output, hidden = net(input, hidden)
# Sample from the network output as a multinomial distribution
output = output.data.view(-1).div(temperature).exp()
predicted_id = torch.multinomial(output, 1)
# Add predicted index to the list and use as next input
predicted.append(predicted_id.item())
input = predicted_id
return predicted
def create_midi(prediction_output):
""" convert the output from the prediction to notes and create a midi file
from the notes """
offset = 0
output_notes = []
# create note and chord objects based on the values generated by the model
for pattern in prediction_output:
# pattern is a chord
if ('.' in pattern) or pattern.isdigit():
notes_in_chord = pattern.split('.')
notes = []
for current_note in notes_in_chord:
new_note = note.Note(int(current_note))
new_note.storedInstrument = instrument.Piano()
notes.append(new_note)
new_chord = chord.Chord(notes)
new_chord.offset = offset
output_notes.append(new_chord)
# pattern is a note
else:
new_note = note.Note(pattern)
new_note.offset = offset
new_note.storedInstrument = instrument.Piano()
output_notes.append(new_note)
# increase offset each iteration so that notes do not stack
offset += 0.5
midi_stream = stream.Stream(output_notes)
return midi_stream
def get_note_names(midi):
s2 = instrument.partitionByInstrument(midi)
piano_part = None
# Filter for only the piano part
instr = instrument.Piano
for part in s2:
if isinstance(part.getInstrument(), instr):
piano_part = part
notes_song = []
if not piano_part: # Some songs somehow have no piano parts
# Just take the first part
piano_part = s2[0]
for element in piano_part:
if isinstance(element, note.Note):
# Return the pitch of the single note
notes_song.append(str(element.pitch))
elif isinstance(element, chord.Chord):
# Returns the normal order of a Chord represented in a list of integers
notes_song.append('.'.join(str(n) for n in element.normalOrder))
return notes_song
def process_input(input_midi_file, input_randomness, input_duration):
print(input_midi_file.name)
midi = converter.parse(input_midi_file.name)
note_names = get_note_names(midi)
int_notes = [note_to_int[note_name] for note_name in note_names]
generated_seq_multinomial = predict_multimomial(model, int_notes, predict_len = 100, temperature = 2.2)
generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial]
pred_midi_multinomial = create_midi(generated_seq_multinomial)
pred_midi_multinomial.write('midi', fp='result.midi')
# sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2"
FluidSynth().midi_to_audio('result.midi', 'result.wav')
return 'result.wav', 'result.midi'
file_path = './objects/model_cpu.pkl'
with open(file_path, 'rb') as f:
model = pickle.load(f)
midi_file_desc = """
This model allows to generate music based on your input.
Please upload a MIDI file below, choose music randomness and duration. Enjoy!
"""
article = """# Music Generation
This project has been created by the students of Ukrainian Catholic University for our ML course.
We are using a GRU model to output new notes based on the given input. You can find more information at our Git repo: https://github.com/DmytroLopushanskyy/music-generation
We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time.
"""
iface = gr.Interface(
fn=process_input,
inputs=[
gr.inputs.File(label=midi_file_desc),
gr.inputs.Slider(0, 250, default=100, step=50),
gr.inputs.Radio([10, 20, 30], type="value", default=20)
],
outputs=["audio", "file"],
article=article,
# examples=['examples/mozart.midi']
)
iface.launch()