psistolar's picture
Add sentiment scoring to complete sonification.
544159d
raw history blame
No virus
5.92 kB
"""
Find the inspiration for this project as well as the pretrained model
we used here: https://github.com/bearpelican/musicautobot
"""
import gradio as gr
from musicautobot.utils.setup_musescore import play_wav
from music21.midi.translate import midiFileToStream
from pathlib import Path
from midi2audio import FluidSynth
# from musicautobot.numpy_encode import *
from musicautobot.config import default_config
from musicautobot.music_transformer import *
from musicautobot.utils.midifile import *
# from musicautobot.utils.file_processing import process_all
import pickle
import subprocess
import os
print(os.getcwd())
# Load the stored data. This is needed to generate the vocab.
print('Loading data to build vocabulary.')
data_dir = Path('.')
data = load_data(data_dir, 'data.pkl')
from huggingface_hub import hf_hub_download
print('Downloading model.')
model_cache_path = hf_hub_download(repo_id="psistolar/musicautobot-fine1", filename="model.pth")
from transformers import pipeline
classifier = pipeline("sentiment-analysis")
# Default config options
config = default_config()
config['encode_position'] = True
print("Building model.")
# Load our fine-tuned model
learner = music_model_learner(
data,
config=config.copy(),
pretrained_path=model_cache_path
)
print("Ready to use.")
musical_letters = 'abcdefg'
from music21 import note
def sonify_text(text, sentiment):
name = Path('C Major Scale.midi')
item = MusicItem.from_file(name, data.vocab)
note_names = [f"{letter.upper()}4" for letter in text.lower() if letter in musical_letters]
p = music21.stream.Part()
if sentiment == 'NEGATIVE':
# If negative, use TODO
p.append(music21.chord.Chord('A3 C4 E4', type='half')) # i
p.append(music21.chord.Chord('F3 A4 C4', type='half')) # VI
p.append(music21.chord.Chord('C3 E3 G3', type='half')) # III
p.append(music21.chord.Chord('G3 B3 D4', type='half')) # VII
else:
# If positive, use a partial progression I-V-vi in C Major.
p.append(music21.chord.Chord('C4 E4 G4', type='half')) # I
p.append(music21.chord.Chord('G3 B3 D4', type='half')) # V
p.append(music21.chord.Chord('A3 C4 E4', type='half')) # vi
notes = []
for note_name in note_names:
note_obj = note.Note(note_name)
note_obj.duration.type = "quarter"
p.append(note_obj)
s = music21.stream.Score([p])
musical_seed = MusicItem.from_stream(s, data.vocab)
return musical_seed
def process_midi(MIDI_File, Text_to_Sonify, Randomness, Amount_of_Music_to_Add):
if MIDI_File is not None:
name = Path(MIDI_File.name)
else:
name = Path('C Major Scale.midi')
sonification = False
if MIDI_File is None and Text_to_Sonify is not None:
sonification = True
# create the model input object
if sonification:
sentiment_analysis = classifier(Text_to_Sonify)[0]
sentiment = sentiment_analysis['label']
score = sentiment_analysis['score']
item = sonify_text(Text_to_Sonify, sentiment)
# the lower our confidence in the sentiment, the more randomness we inject
score = max(0.25, score)
temp = Randomness / (100 * score)
else:
item = MusicItem.from_file(name, data.vocab)
temp = Randomness / 100
# full is the prediction appended to the input
pred, full = learner.predict(
item,
n_words=Amount_of_Music_to_Add,
temperatures=(temp, temp)
)
# convert to stream and then MIDI file
if sonification:
# do not replay the musical seed if sonifying
stream = pred.to_stream()
else:
stream = full.to_stream()
out = music21.midi.translate.streamToMidiFile(stream)
# save MIDI file
out.open('result.midi', 'wb')
out.write()
out.close()
# use fluidsynth to convert MIDI to WAV so the user can hear the output
sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2"
FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav')
# TODO: if we can personalize the file names, let's do that with the text
return 'result.wav', 'result.midi'
midi_file_desc = """Upload your own MIDI file here (try to keep it small without any fun time signatures).
If you do not have a MIDI file, add some text and we will turn it into music!
"""
article = """# Pop Music Transformer
We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time.
This is all following the great work you can find [at this repo](https://github.com/bearpelican/musicautobot). Moreover check out [their full web app](http://musicautobot.com/). We use the pretrained model they created as well as the utilities for converting between MIDI, audio streams, numpy encodings, and WAV files.
## Sonification
This is the process of turning something not inherently musical into music. Here we do something pretty simple. We take your input text "pretty cool", get a sentiment score (hard coded right now, model TODO), and use a major progression if it's positive and a minor progression if it's negative, and then factor the score into the randomness of the generated music. We also take the text and extract a melody by taking any of the letters from A to G, which in the example is just "E C". With the simple "E C" melody and a major progression a musical idea is generated.
"""
iface = gr.Interface(
fn=process_midi,
inputs=[
gr.inputs.File(optional=True, label=midi_file_desc),
"text",
gr.inputs.Slider(0, 250, default=100, step=50),
gr.inputs.Radio([100, 200, 500], type="value", default=100)
],
outputs=["audio", "file"],
article=article
# examples=['C major scale.midi']
)
iface.launch()