import pickle import pretty_midi import gradio as gr from music21 import * from midi2audio import FluidSynth import torch import torch.nn as nn from torch.nn import functional as F device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') file_path = './objects/int_to_note.pkl' with open(file_path, 'rb') as f: int_to_note = pickle.load(f) file_path = './objects/note_to_int.pkl' with open(file_path, 'rb') as f: note_to_int = pickle.load(f) class GenerationRNN(nn.Module): def __init__(self, input_size, hidden_size, output_size, n_layers=1): super(GenerationRNN, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size, n_layers) self.decoder = nn.Linear(hidden_size * n_layers, output_size) def forward(self, input, hidden): # Creates embedding of the input texts #print('initial input', input.size()) input = self.embedding(input.view(1, -1)) #print('input after embedding', input.size()) output, hidden = self.gru(input, hidden) #print('output after gru', output.size()) #print('hidden after gru', hidden.size()) output = self.decoder(hidden.view(1, -1)) #print('output after decoder', output.size()) return output, hidden def init_hidden(self): return torch.zeros(self.n_layers, 1, self.hidden_size).to(device) def predict_multimomial(net, prime_seq, predict_len, temperature=0.8): ''' Arguments: prime_seq - priming sequence (converted t) predict_len - number of notes to predict for after prime sequence ''' hidden = net.init_hidden() predicted = prime_seq.copy() prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device) # "Building up" the hidden state using the prime sequence for p in range(len(prime_seq) - 1): input = prime_seq[p] _, hidden = net(input, hidden) # Last character of prime sequence input = prime_seq[-1] # For every index to predict for p in range(predict_len): # Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters output, hidden = net(input, hidden) # Sample from the network output as a multinomial distribution output = output.data.view(-1).div(temperature).exp() predicted_id = torch.multinomial(output, 1) # Add predicted index to the list and use as next input predicted.append(predicted_id.item()) input = predicted_id return predicted def create_midi(prediction_output): """ convert the output from the prediction to notes and create a midi file from the notes """ offset = 0 output_notes = [] # create note and chord objects based on the values generated by the model for pattern in prediction_output: # pattern is a chord if ('.' in pattern) or pattern.isdigit(): notes_in_chord = pattern.split('.') notes = [] for current_note in notes_in_chord: new_note = note.Note(int(current_note)) new_note.storedInstrument = instrument.Piano() notes.append(new_note) new_chord = chord.Chord(notes) new_chord.offset = offset output_notes.append(new_chord) # pattern is a note else: new_note = note.Note(pattern) new_note.offset = offset new_note.storedInstrument = instrument.Piano() output_notes.append(new_note) # increase offset each iteration so that notes do not stack offset += 0.5 midi_stream = stream.Stream(output_notes) return midi_stream def get_note_names(midi): s2 = instrument.partitionByInstrument(midi) piano_part = None # Filter for only the piano part instr = instrument.Piano for part in s2: if isinstance(part.getInstrument(), instr): piano_part = part notes_song = [] if not piano_part: # Some songs somehow have no piano parts # Just take the first part piano_part = s2[0] for element in piano_part: if isinstance(element, note.Note): # Return the pitch of the single note notes_song.append(str(element.pitch)) elif isinstance(element, chord.Chord): # Returns the normal order of a Chord represented in a list of integers notes_song.append('.'.join(str(n) for n in element.normalOrder)) return notes_song def process_input(input_midi_file, input_randomness, input_duration): print(input_midi_file.name) midi = converter.parse(input_midi_file.name) note_names = get_note_names(midi) int_notes = [note_to_int[note_name] for note_name in note_names] generated_seq_multinomial = predict_multimomial(model, int_notes, predict_len = 100, temperature = 2.2) generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial] pred_midi_multinomial = create_midi(generated_seq_multinomial) pred_midi_multinomial.write('midi', fp='result.midi') # sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2" FluidSynth().midi_to_audio('result.midi', 'result.wav') return 'result.wav', 'result.midi' file_path = './objects/model_cpu.pkl' with open(file_path, 'rb') as f: model = pickle.load(f) midi_file_desc = """ This model allows to generate music based on your input. Please upload a MIDI file below, choose music randomness and duration. Enjoy! """ article = """# Music Generation This project has been created by the students of Ukrainian Catholic University for our ML course. We are using a GRU model to output new notes based on the given input. You can find more information at our Git repo: https://github.com/DmytroLopushanskyy/music-generation We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time. """ iface = gr.Interface( fn=process_input, inputs=[ gr.inputs.File(label=midi_file_desc), gr.inputs.Slider(0, 250, default=100, step=50), gr.inputs.Radio([10, 20, 30], type="value", default=20) ], outputs=["audio", "file"], article=article, # examples=['examples/mozart.midi'] ) iface.launch()