Spaces:

psistolar
/

pop-music-transformer

Runtime error

File size: 5,917 Bytes

71d673c
 
 
 
 
a931541
 
ff647b0
 
1de73a8
789718b
ff647b0
84a8d96
2dcf4b6
e49932c
 
84a8d96
e49932c
 
 
3f41985
a973402
3f41985
a973402
f19aeec
e49932c
d02a40d
84a8d96
e49932c
776a79b
c90d42e
 
d02a40d
c90d42e
 
544159d
c90d42e
544159d
c90d42e
e49932c
 
 
50efdde
c90d42e
d02a40d
e49932c
 
 
c90d42e
 
e49932c
a1f6dfc
d02a40d
 
71d673c
 
 
 
 
a1f6dfc
71d673c
7e691d1
 
71d673c
 
 
544159d
 
71d673c
 
 
8907f55
 
 
 
71d673c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9764fc
1f0a89c
7a02029
 
 
 
7e691d1
 
 
 
 
e49932c
 
7e691d1
544159d
 
 
71d673c
544159d
 
 
7e691d1
 
544159d
e49932c
 
1f0a89c
 
 
 
 
e49932c
 
71d673c
 
 
 
 
 
e49932c
 
 
 
 
 
 
 
 
 
1637cee
 
c7cb9a5
0fd1c62
71d673c
 
 
0fd1c62
 
 
 
 
 
 
 
 
 
c7cb9a5
 
7a02029
71d673c
7a02029
1f0a89c
 
7a02029
1637cee
0fd1c62
7a02029
c7cb9a5
a931541

"""
Find the inspiration for this project as well as the pretrained model
we used here: https://github.com/bearpelican/musicautobot
"""

import gradio as gr

from musicautobot.utils.setup_musescore import play_wav
from music21.midi.translate import midiFileToStream
from pathlib import Path
from midi2audio import FluidSynth

# from musicautobot.numpy_encode import *
from musicautobot.config import default_config
from musicautobot.music_transformer import *
from musicautobot.utils.midifile import *
# from musicautobot.utils.file_processing import process_all

import pickle

import subprocess
import os

print(os.getcwd())

# Load the stored data. This is needed to generate the vocab.
print('Loading data to build vocabulary.')
data_dir = Path('.')
data = load_data(data_dir, 'data.pkl')

from huggingface_hub import hf_hub_download

print('Downloading model.')
model_cache_path = hf_hub_download(repo_id="psistolar/musicautobot-fine1", filename="model.pth")

from transformers import pipeline

classifier = pipeline("sentiment-analysis")

# Default config options
config = default_config()
config['encode_position'] = True


print("Building model.")
# Load our fine-tuned model
learner = music_model_learner(
    data, 
    config=config.copy(), 
    pretrained_path=model_cache_path
)

print("Ready to use.")

musical_letters = 'abcdefg'



from music21 import note

def sonify_text(text, sentiment):
    name = Path('C Major Scale.midi')
    item = MusicItem.from_file(name, data.vocab)

    note_names = [f"{letter.upper()}4" for letter in text.lower() if letter in musical_letters]



    p = music21.stream.Part()
    if sentiment == 'NEGATIVE':
        # If negative, use TODO 
        p.append(music21.chord.Chord('A3 C4 E4', type='half')) # i
        p.append(music21.chord.Chord('F3 A4 C4', type='half')) # VI
        p.append(music21.chord.Chord('C3 E3 G3', type='half')) # III
        p.append(music21.chord.Chord('G3 B3 D4', type='half')) # VII
    else:
        # If positive, use a partial progression I-V-vi in C Major.
        p.append(music21.chord.Chord('C4 E4 G4', type='half')) # I
        p.append(music21.chord.Chord('G3 B3 D4', type='half')) # V
        p.append(music21.chord.Chord('A3 C4 E4', type='half')) # vi

    notes = []
    for note_name in note_names:
        note_obj = note.Note(note_name)
        note_obj.duration.type = "quarter"
        p.append(note_obj)

    s = music21.stream.Score([p])

    musical_seed = MusicItem.from_stream(s, data.vocab)
    return musical_seed

def process_midi(MIDI_File, Text_to_Sonify, Randomness, Amount_of_Music_to_Add):
    if MIDI_File is not None:
        name = Path(MIDI_File.name)
    else:
        name = Path('C Major Scale.midi')

    sonification = False
    if MIDI_File is None and Text_to_Sonify is not None:
        sonification = True

    
    # create the model input object
    if sonification:
        sentiment_analysis = classifier(Text_to_Sonify)[0]
        sentiment = sentiment_analysis['label']
        score = sentiment_analysis['score']
        item = sonify_text(Text_to_Sonify, sentiment)
        # the lower our confidence in the sentiment, the more randomness we inject
        score = max(0.25, score)
        temp = Randomness / (100 * score)
    else:
        item = MusicItem.from_file(name, data.vocab)
        temp = Randomness / 100

    # full is the prediction appended to the input
    pred, full = learner.predict(
        item, 
        n_words=Amount_of_Music_to_Add,
        temperatures=(temp, temp)
    )
   
    # convert to stream and then MIDI file
    if sonification:
        # do not replay the musical seed if sonifying
        stream = pred.to_stream()
    else:
        stream = full.to_stream()

    out = music21.midi.translate.streamToMidiFile(stream)
    
    # save MIDI file
    out.open('result.midi', 'wb')
    out.write()
    out.close()
    
    # use fluidsynth to convert MIDI to WAV so the user can hear the output
    sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2"
    FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav')
    # TODO: if we can personalize the file names, let's do that with the text
    return 'result.wav', 'result.midi'

midi_file_desc = """Upload your own MIDI file here (try to keep it small without any fun time signatures).
If you do not have a MIDI file, add some text and we will turn it into music!
"""

article = """# Pop Music Transformer
We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time.

This is all following the great work you can find [at this repo](https://github.com/bearpelican/musicautobot). Moreover check out [their full web app](http://musicautobot.com/). We use the pretrained model they created as well as the utilities for converting between MIDI, audio streams, numpy encodings, and WAV files.

## Sonification

This is the process of turning something not inherently musical into music. Here we do something pretty simple. We take your input text "pretty cool", get a sentiment score (hard coded right now, model TODO), and use a major progression if it's positive and a minor progression if it's negative, and then factor the score into the randomness of the generated music. We also take the text and extract a melody by taking any of the letters from A to G, which in the example is just "E C". With the simple "E C" melody and a major progression a musical idea is generated.
"""

iface = gr.Interface(
    fn=process_midi, 
    inputs=[
        gr.inputs.File(optional=True, label=midi_file_desc),
        "text", 
        gr.inputs.Slider(0, 250, default=100, step=50),
        gr.inputs.Radio([100, 200, 500], type="value", default=100)
        ], 
    outputs=["audio", "file"],
    article=article
    # examples=['C major scale.midi']
)

iface.launch()