Spaces:

juancopi81
/

youtube-music-transcribe

Build error

File size: 3,563 Bytes

ef98e8e
 
 
 
2daf15a
 
0760318
cf24f3c
2ab00ef
0760318
13af1af
c970deb
13af1af
85250f0
ed28ae4
85250f0
 
ed28ae4
 
 
 
 
 
 
 
 
 
2ab00ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85250f0
cf24f3c
 
 
2ab00ef
 
cf24f3c
85250f0
ed28ae4
 
0f3083a
ed28ae4
1a06f79
ed28ae4
 
 
 
 
85250f0
ed28ae4
cf24f3c
 
ed28ae4
85250f0
 
 
ed28ae4
 
85250f0
ed28ae4
 
 
 
 
 
 
cf24f3c
 
 
 
 
 
2ab00ef
cf24f3c
 
 
 
 
 
 
2ab00ef
 
 
ed28ae4
 
 
 
85250f0
 
 
 
 
 
 
ed28ae4

import os

os.system("python3 -m pip install -e .")

import gradio as gr

import note_seq
from pytube import YouTube
from pydub import AudioSegment

from inferencemodel import InferenceModel
from utils import upload_audio

SAMPLE_RATE = 16000
SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2"

# Start inference model
inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", "mt3")
current_model = "mt3"

def change_model(model):
    global current_model
    if model == current_model:
        return
    global inference_model 
    inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
    current_model = model
    print("Inferece model", inference_model)

# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
def get_audio(url):
    yt = YouTube(url)
    video = yt.streams.filter(only_audio=True).first()
    out_file = video.download(output_path=".")
    base, ext = os.path.splitext(out_file)
    print("the extension is", ext)
    new_file = base + ".wav"
    os.rename(out_file, new_file)
    a = new_file
    
    wav_to_cut = AudioSegment.from_wav(a)
    # pydub does things in milliseconds
    ten_seconds = 10 * 1000
    first_10_seconds = wav_to_cut[:ten_seconds]
    
    return first_10_seconds

# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
def populate_metadata(link):
    yt = YouTube(link)
    audio = get_audio(link)
    return yt.thumbnail_url, yt.title, audio

def inference(audio):
    with open(audio, "rb") as fd:
        contents = fd.read()

    audio = upload_audio(contents,sample_rate=16000)

    est_ns = inference_model(audio)
    
    note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid")
    
    return "./transcribed.mid"
  
title = "Transcribe music from YouTube videos using Transformers."
description = """
Gradio demo for Music Transcription with Transformers. Read more in the links below.
"""

article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.03017' target='_blank'>MT3: Multi-Task Multitrack Music Transcription</a> | <a href='https://github.com/magenta/mt3' target='_blank'>Github Repo</a></p>"

# Create a block object
demo = gr.Blocks()

# Use your Block object as a context
with demo:
    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" 
                + title 
                + "</h1>")
    gr.Markdown(description)
    with gr.Box():
        gr.Markdown("<h2>Select your model</h2>")
        gr.Markdown("""
                    The ismir2021 model transcribes piano only, with note velocities. 
                    The mt3 model transcribes multiple simultaneous instruments, but without velocities."
                    """)
        model = gr.Radio(
            ["mt3", "ismir2021"], label="What kind of model you want to use?", value="mt3"
        )
        model.change(fn=change_model, inputs=model, outputs=[])

        link = gr.Textbox(label="YouTube Link")
        with gr.Row().style(mobile_collapse=False, equal_height=True):
            title = gr.Label(label="Video Title", placeholder="Title")
            img = gr.Image(label="Thumbnail")
        with gr.Row():
            yt_audio = gr.Audio()
        link.change(fn=populate_metadata, inputs=link, outputs=[img, title, yt_audio])

demo.launch()  
  
""" gr.Interface(
    inference, 
    gr.inputs.Audio(type="filepath", label="Input"), 
    [gr.outputs.File(label="Output")],
    title=title,
    description=description,
    article=article,
    examples=examples,
    ).launch().queue() """