{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "61e10139", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "from music21 import *" ] }, { "cell_type": "code", "execution_count": 3, "id": "1a2b28be", "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "from torch.nn import functional as F\n", "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "\n", "class GenerationRNN(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size, n_layers=1):\n", " super(GenerationRNN, self).__init__()\n", " self.input_size = input_size\n", " self.hidden_size = hidden_size\n", " self.output_size = output_size\n", " self.n_layers = n_layers\n", " \n", " self.embedding = nn.Embedding(input_size, hidden_size)\n", " self.gru = nn.GRU(hidden_size, hidden_size, n_layers)\n", " self.decoder = nn.Linear(hidden_size * n_layers, output_size)\n", " \n", " def forward(self, input, hidden):\n", " # Creates embedding of the input texts\n", " #print('initial input', input.size())\n", " input = self.embedding(input.view(1, -1))\n", " #print('input after embedding', input.size())\n", " output, hidden = self.gru(input, hidden)\n", " #print('output after gru', output.size())\n", " #print('hidden after gru', hidden.size())\n", " output = self.decoder(hidden.view(1, -1))\n", " #print('output after decoder', output.size())\n", " return output, hidden\n", "\n", " def init_hidden(self):\n", " return torch.zeros(self.n_layers, 1, self.hidden_size).to(device)" ] }, { "cell_type": "code", "execution_count": 4, "id": "5b7120cf", "metadata": {}, "outputs": [], "source": [ "def predict_multimomial(net, prime_seq, predict_len, temperature=0.8):\n", " '''\n", " Arguments:\n", " prime_seq - priming sequence (converted t)\n", " predict_len - number of notes to predict for after prime sequence\n", " '''\n", " hidden = net.init_hidden()\n", "\n", " predicted = prime_seq.copy()\n", " prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device)\n", "\n", "\n", " # \"Building up\" the hidden state using the prime sequence\n", " for p in range(len(prime_seq) - 1):\n", " input = prime_seq[p]\n", " _, hidden = net(input, hidden)\n", " \n", " # Last character of prime sequence\n", " input = prime_seq[-1]\n", " \n", " # For every index to predict\n", " for p in range(predict_len):\n", "\n", " # Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters\n", " output, hidden = net(input, hidden)\n", " # Sample from the network output as a multinomial distribution\n", " output = output.data.view(-1).div(temperature).exp()\n", " predicted_id = torch.multinomial(output, 1)\n", "\n", " # Add predicted index to the list and use as next input\n", " predicted.append(predicted_id.item()) \n", " input = predicted_id\n", "\n", " return predicted" ] }, { "cell_type": "code", "execution_count": 5, "id": "8ce30142", "metadata": {}, "outputs": [], "source": [ "file_path = '/home/dmytro/ucu/music-generation/model.pkl'\n", "with open(file_path, 'rb') as f:\n", " model = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 6, "id": "84a2ea9b", "metadata": {}, "outputs": [], "source": [ "file_path = '/home/dmytro/ucu/music-generation/int_to_note.pkl'\n", "with open(file_path, 'rb') as f:\n", " int_to_note = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 12, "id": "07815507", "metadata": {}, "outputs": [], "source": [ "def create_midi(prediction_output):\n", " \"\"\" convert the output from the prediction to notes and create a midi file\n", " from the notes \"\"\"\n", " offset = 0\n", " output_notes = []\n", "\n", " # create note and chord objects based on the values generated by the model\n", " for pattern in prediction_output:\n", " # pattern is a chord\n", " if ('.' in pattern) or pattern.isdigit():\n", " notes_in_chord = pattern.split('.')\n", " notes = []\n", " for current_note in notes_in_chord:\n", " new_note = note.Note(int(current_note))\n", " new_note.storedInstrument = instrument.Piano()\n", " notes.append(new_note)\n", " new_chord = chord.Chord(notes)\n", " new_chord.offset = offset\n", " output_notes.append(new_chord)\n", " # pattern is a note\n", " else:\n", " new_note = note.Note(pattern)\n", " new_note.offset = offset\n", " new_note.storedInstrument = instrument.Piano()\n", " output_notes.append(new_note)\n", "\n", " # increase offset each iteration so that notes do not stack\n", " offset += 0.5\n", "\n", " midi_stream = stream.Stream(output_notes)\n", "\n", " return midi_stream" ] }, { "cell_type": "code", "execution_count": 30, "id": "a70a41f1", "metadata": {}, "outputs": [], "source": [ "input_melody = [727,\n", " 224,\n", " 55,\n", " 55,\n", " 727,\n", " 224,\n", " 55]\n" ] }, { "cell_type": "code", "execution_count": 28, "id": "c9afc0c0", "metadata": {}, "outputs": [], "source": [ "generated_seq_multinomial = predict_multimomial(model, input_melody, predict_len = 100, temperature = 2.2)\n", "generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial]\n", "pred_midi_multinomial = create_midi(generated_seq_multinomial)" ] }, { "cell_type": "code", "execution_count": 29, "id": "99a1aabe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/home/dmytro/ucu/music-generation/output/new_2.mid'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred_midi_multinomial.write('midi', fp='result.mid')" ] }, { "cell_type": "code", "execution_count": null, "id": "ba84139a", "metadata": {}, "outputs": [], "source": [ "sound_font = \"/usr/share/sounds/sf2/FluidR3_GM.sf2\"\n", "FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav')\n", "return 'result.wav', 'result.midi'" ] }, { "cell_type": "code", "execution_count": null, "id": "0f4481b8", "metadata": {}, "outputs": [], "source": [ "def process_input():\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "id": "2f2e7a91", "metadata": {}, "outputs": [], "source": [ "midi_file_desc = \"\"\"Please entUpload your own MIDI file here (try to keep it small).\n", "If you do not have a MIDI file, add some text and we will turn it into music!\n", "\"\"\"\n", "\n", "article = \"\"\"# Pop Music Transformer\n", "We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time.\n", "\n", "This is all following the great work you can find [at this repo](https://github.com/bearpelican/musicautobot). Moreover check out [their full web app](http://musicautobot.com/). We use the pretrained model they created as well as the utilities for converting between MIDI, audio streams, numpy encodings, and WAV files.\n", "\n", "## Sonification\n", "\n", "This is the process of turning something not inherently musical into music. Here we do something pretty simple. We take your input text \"pretty cool\", get a sentiment score (hard coded right now, model TODO), and use a major progression if it's positive and a minor progression if it's negative, and then factor the score into the randomness of the generated music. We also take the text and extract a melody by taking any of the letters from A to G, which in the example is just \"E C\". With the simple \"E C\" melody and a major progression a musical idea is generated.\n", "\"\"\"\n", "\n", "iface = gr.Interface(\n", " fn=process_input, \n", " inputs=[\n", " gr.inputs.File(optional=True, label=midi_file_desc),\n", " \"text\", \n", " gr.inputs.Slider(0, 250, default=100, step=50),\n", " gr.inputs.Radio([100, 200, 500], type=\"value\", default=100)\n", " ], \n", " outputs=[\"audio\", \"file\"],\n", " article=article\n", " # examples=['C major scale.midi']\n", ")\n", "\n", "iface.launch()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }