juancopi81's picture
Remove fluidsynth
0e29585
raw
history blame
3.71 kB
import gradio as gr
from transformers import AutoTokenizer, TFGPT2LMHeadModel
from transformers import pipeline
import note_seq
from utils import token_sequence_to_note_sequence, create_image_from_note_sequence
SAMPLE_RATE=44100
# Feel free to change this, I am using only three notes here because the model
# works better this way.
notes = ["D3", "G3", "E4"]
notes_to_midi = {"D3": 50, "G3": 55, "E4": 64}
time_signatures = ["4/4", "3/4", "2/4", "6/8"]
time_signature_to_tokens = {"4/4": "4_4", "3/4": "3_4", "2/4": "2_4", "6/8": "6_8"}
# Mutopi model for music generation
mutopia_model = TFGPT2LMHeadModel.from_pretrained("juancopi81/mutopia_guitar_mmm")
mutopia_tokenizer = AutoTokenizer.from_pretrained("juancopi81/mutopia_guitar_mmm")
pipe = pipeline(
"text-generation", model=mutopia_model, tokenizer=mutopia_tokenizer, device=0
)
# Content for your demo:
title = "Mutopia Guitar Composer"
# I am adding here an image that I generated using DALL-E
description = """
The bot was trained to compose guitar music using the
[Mutopia Guitar Dataset](https://huggingface.co/datasets/juancopi81/mutopia_guitar_dataset).
Change the controllers and receive a new guitar piece!
<figure>
<center>
<img src="https://drive.google.com/uc?export=view&id=1F22ofTCeJAHqVag4lJvBZugAE1OyabVA"
width=200px alt="Robot playing the guitar">
<figcaption>Image generated using DALL-E</figcaption>
</center>
</figure>
"""
article = """
For a complete tutorial on how to create this demo from scratch, check out this
[GitHub Repo](https://github.com/juancopi81/MMM_Mutopia_Guitar).
"""
# Helper function to create the string seed
def create_seed(time_signature: str,
note: str,
bpm: int,
density: int) -> str:
seed = (f"PIECE_START TIME_SIGNATURE={time_signature_to_tokens[time_signature]} "
f"BPM={bpm} TRACK_START INST=0 DENSITY={density} "
f"BAR_START NOTE_ON={notes_to_midi[note]} ")
return seed
def generate_guitar_piece(time_signature: str,
note: str,
bpm: int,
density: int):
seed = create_seed(time_signature, note, bpm, density)
piece = pipe(seed, max_length=250)[0]["generated_text"]
# Convert text of notes to audio
note_sequence = token_sequence_to_note_sequence(piece)
synth = note_seq.midi_synth.synthesize
array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
piano_roll = create_image_from_note_sequence(note_sequence)
return (SAMPLE_RATE, int16_data), piano_roll
# Create a block object
demo = gr.Blocks()
# Use your Block object as a context
with demo:
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
+ title
+ "</h1>")
gr.Markdown(description)
# UI for the inputs of the model
gr.Markdown("Select the generation parameters.")
with gr.Row():
time_signature = gr.Dropdown(time_signatures, value="4/4", label="Time signature")
note = gr.Dropdown(notes, value="G3", label="First note")
bpm = gr.Slider(minimum=60, maximum=140, step=10, value=90, label="Tempo")
density = gr.Slider(minimum=0, maximum=4, step=1, value=2, label="Density")
with gr.Row():
btn = gr.Button("Compose")
with gr.Row():
audio_output = gr.Audio()
image_output = gr.Image()
btn.click(generate_guitar_piece,
inputs = [
time_signature,
note,
bpm,
density
],
outputs=[audio_output, image_output])
gr.Markdown(article)
# Launch your demo
demo.launch(debug=False)