File size: 6,661 Bytes
c12a65c
 
358d884
c12a65c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358d884
 
c12a65c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358d884
c12a65c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import pickle
import pretty_midi
import gradio as gr
from music21 import *
from midi2audio import FluidSynth

import torch
import torch.nn as nn
from torch.nn import functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
file_path = './objects/int_to_note.pkl'
with open(file_path, 'rb') as f:
    int_to_note = pickle.load(f)
    
file_path = './objects/note_to_int.pkl'
with open(file_path, 'rb') as f:
    note_to_int = pickle.load(f)
    

class GenerationRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(GenerationRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size * n_layers, output_size)
    
    def forward(self, input, hidden):
        # Creates embedding of the input texts
        #print('initial input', input.size())
        input = self.embedding(input.view(1, -1))
        #print('input after embedding', input.size())
        output, hidden = self.gru(input, hidden)
        #print('output after gru', output.size())
        #print('hidden after gru', hidden.size())
        output = self.decoder(hidden.view(1, -1))
        #print('output after decoder', output.size())
        return output, hidden

    def init_hidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_size).to(device)
    
    
def predict_multimomial(net, prime_seq, predict_len, temperature=0.8):
    '''
    Arguments:
    prime_seq - priming sequence (converted t)
    predict_len - number of notes to predict for after prime sequence
    '''
    hidden = net.init_hidden()

    predicted = prime_seq.copy()
    prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device)


    # "Building up" the hidden state using the prime sequence
    for p in range(len(prime_seq) - 1):
        input = prime_seq[p]
        _, hidden = net(input, hidden)
    
    # Last character of prime sequence
    input = prime_seq[-1]
    
    # For every index to predict
    for p in range(predict_len):

        # Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters
        output, hidden = net(input, hidden)
        # Sample from the network output as a multinomial distribution
        output = output.data.view(-1).div(temperature).exp()
        predicted_id = torch.multinomial(output, 1)

        # Add predicted index to the list and use as next input
        predicted.append(predicted_id.item()) 
        input = predicted_id

    return predicted


def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)

    return midi_stream


def get_note_names(midi):
    s2 = instrument.partitionByInstrument(midi)

    piano_part = None
    # Filter for only the piano part
    instr = instrument.Piano
    for part in s2:
        if isinstance(part.getInstrument(), instr):
            piano_part = part

    notes_song = []
    if not piano_part: # Some songs somehow have no piano parts
        # Just take the first part
        piano_part = s2[0]
    
    for element in piano_part:
        if isinstance(element, note.Note):
            # Return the pitch of the single note
            notes_song.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            # Returns the normal order of a Chord represented in a list of integers
            notes_song.append('.'.join(str(n) for n in element.normalOrder))
            
    return notes_song


def process_input(input_midi_file, input_randomness, input_duration):
    print(input_midi_file.name)
    midi = converter.parse(input_midi_file.name)
    note_names = get_note_names(midi)
    int_notes = [note_to_int[note_name] for note_name in note_names]
    
    generated_seq_multinomial = predict_multimomial(model, int_notes, predict_len = 100, temperature = 2.2)
    generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial]
    pred_midi_multinomial = create_midi(generated_seq_multinomial)
    
    pred_midi_multinomial.write('midi', fp='result.midi')
    
    # sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2"
    FluidSynth().midi_to_audio('result.midi', 'result.wav')
    return 'result.wav', 'result.midi'


file_path = './objects/model_cpu.pkl'
with open(file_path, 'rb') as f:
    model = pickle.load(f)
    

midi_file_desc = """
This model allows to generate music based on your input. 
Please upload a MIDI file below, choose music randomness and duration. Enjoy!
"""

article = """# Music Generation
This project has been created by the students of Ukrainian Catholic University for our ML course.

We are using a GRU model to output new notes based on the given input. You can find more information at our Git repo: https://github.com/DmytroLopushanskyy/music-generation
We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time.
"""

iface = gr.Interface(
    fn=process_input, 
    inputs=[
        gr.inputs.File(label=midi_file_desc),
        gr.inputs.Slider(0, 250, default=100, step=50),
        gr.inputs.Radio([10, 20, 30], type="value", default=20)
        ], 
    outputs=["audio", "file"],
    article=article,
#     examples=['examples/mozart.midi']
)

iface.launch()