" + title + "

import os

os.system("python3 -m pip install -e .")

import gradio as gr

import note_seq
from pytube import YouTube
from pydub import AudioSegment
from music21 import converter, environment

from inferencemodel import InferenceModel
from utils import upload_audio, create_image_from_note_sequence

import nest_asyncio
nest_asyncio.apply()

SAMPLE_RATE = 16000
SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2"

# Set up music21 with musescore
us = environment.UserSettings()
us["musescoreDirectPNGPath"] = "/usr/bin/mscore3"
os.putenv("QT_QPA_PLATFORM", "offscreen")
os.putenv("XDG_RUNTIME_DIR", environment.Environment().getRootTempDir())

def load_model(model=str):
    checkpoint_path = f"/home/user/app/checkpoints/{model}/"
    # Start inference model
    inference_model = InferenceModel(checkpoint_path, model)
    return inference_model
    

# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
def get_audio(url):
    yt = YouTube(url)
    video = yt.streams.filter(only_audio=True).first()
    out_file = video.download(output_path=".")
    base, ext = os.path.splitext(out_file)
    new_file = base + ".wav"
    os.rename(out_file, new_file)
    a = new_file
    return a

# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
def populate_metadata(link):
    yt = YouTube(link)
    audio = get_audio(link)
    return yt.thumbnail_url, yt.title, audio, audio

def inference(yt_audio_path, model):

    with open(yt_audio_path, 'rb') as fd:
      contents = fd.read()
    
    audio = upload_audio(contents,sample_rate=SAMPLE_RATE)
    
    inference_model = load_model(model)

    est_ns = inference_model(audio)
    
    note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid")

    synth = note_seq.midi_synth.fluidsynth
    array_of_floats = synth(est_ns, sample_rate=SAMPLE_RATE, sf2_path=SF2_PATH)
    int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
    piano_roll = create_image_from_note_sequence(est_ns)
    
    parsed = converter.parse("./transcribed.mid")
    score = parsed.write("musicxml.png")
    return "./transcribed.mid", (SAMPLE_RATE, int16_data), piano_roll, score
  
title = "Transcribe music from YouTube videos using Transformers."
description = """
Gradio demo for Music Transcription with Transformers. Read more in the links below.
To use this demo, just add a YouTube link with the music you want to transcribe.
"""
article = "<p style='text-align: center'><a href='https://magenta.tensorflow.org/transcription-with-transformers' target='_blank'>Blog: Music Transcription with Transformers</a> | <a href='https://github.com/magenta/mt3' target='_blank'>Github Repo</a></p>"

# Create a block object
demo = gr.Blocks()

# Use your Block object as a context
with demo:
    gr.Markdown("<h1 style='text-align: center'>" 
                + title 
                + "</h1>")
    gr.Markdown(description)
    with gr.Box():
        with gr.Box():
            model_label = """
            What kind of model you want to use? 
            The ismir2021 model transcribes piano only, with note velocities. 
            The mt3 model transcribes multiple simultaneous instruments, but without velocities.
            """
            model = gr.Radio(
                ["mt3"], 
                label=model_label, 
                value="mt3"
            )

            with gr.Row():
                link = gr.Textbox(label="YouTube Link")
            with gr.Row():
                preview_btn = gr.Button("Preview")
                
        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):
                title = gr.Label(label="Video Title", placeholder="Title")
                img = gr.Image(label="Thumbnail")
            with gr.Row():
                yt_audio = gr.Audio()
                yt_audio_path = gr.Textbox(visible=False)

            preview_btn.click(fn=populate_metadata,
                            inputs=[link],
                            outputs=[img, title, yt_audio, yt_audio_path])
            
            with gr.Row():
                btn = gr.Button("Transcribe music")
        
        with gr.Row():
            midi_file = gr.File()
            midi_audio = gr.Audio()
        with gr.Row():
            piano_roll = gr.Image()
            score = gr.Image()
        btn.click(inference,
                  inputs=[yt_audio_path, model],
                  outputs=[midi_file, midi_audio, piano_roll, score],
                  api_name="transcribe_wav_to_midi")

    gr.Markdown('''
      [![Twitter Follow](https://img.shields.io/twitter/follow/juancopi81?style=social)](https://twitter.com/juancopi81)
      ![visitors](https://visitor-badge.glitch.me/badge?page_id=Juancopi81.YoutubeMusicTranscribe)
    ''')
        
    gr.Markdown(article)
        

demo.launch()