Spaces:
Build error
Build error
import os | |
os.system("python3 -m pip install -e .") | |
import gradio as gr | |
import note_seq | |
from pytube import YouTube | |
from pydub import AudioSegment | |
from inferencemodel import InferenceModel | |
from utils import upload_audio | |
SAMPLE_RATE = 16000 | |
SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2" | |
# Start inference model | |
inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", "mt3") | |
current_model = "mt3" | |
def change_model(model): | |
global current_model | |
checkpoint_path = f"/home/user/app/checkpoints/{model}/" | |
if model == current_model: | |
return | |
global inference_model | |
inference_model = InferenceModel(checkpoint_path, model) | |
current_model = model | |
print("Inferece model", inference_model) | |
# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper | |
def get_audio(url): | |
yt = YouTube(url) | |
video = yt.streams.filter(only_audio=True).first() | |
out_file = video.download(output_path=".") | |
base, ext = os.path.splitext(out_file) | |
new_file = base + ".wav" | |
os.rename(out_file, new_file) | |
a = new_file | |
print("file a is:", a) | |
wav_to_cut = AudioSegment.from_file(a) | |
# pydub does things in milliseconds | |
ten_seconds = 10 * 1000 | |
first_10_seconds = wav_to_cut[:ten_seconds] | |
os.remove(new_file) | |
return first_10_seconds | |
# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer | |
def populate_metadata(link): | |
yt = YouTube(link) | |
audio = get_audio(link) | |
return yt.thumbnail_url, yt.title, audio | |
def inference(audio): | |
with open(audio, "rb") as fd: | |
contents = fd.read() | |
audio = upload_audio(contents,sample_rate=16000) | |
est_ns = inference_model(audio) | |
note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid") | |
return "./transcribed.mid" | |
title = "Transcribe music from YouTube videos using Transformers." | |
description = """ | |
Gradio demo for Music Transcription with Transformers. Read more in the links below. | |
""" | |
article = "<p style='text-align: center'><a href='https://magenta.tensorflow.org/transcription-with-transformers' target='_blank'>Blog: Music Transcription with Transformers</a> | <a href='https://github.com/magenta/mt3' target='_blank'>Github Repo</a></p>" | |
# Create a block object | |
demo = gr.Blocks() | |
# Use your Block object as a context | |
with demo: | |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" | |
+ title | |
+ "</h1>") | |
gr.Markdown(description) | |
with gr.Box(): | |
model_label = """ | |
What kind of model you want to use? | |
The ismir2021 model transcribes piano only, with note velocities. | |
The mt3 model transcribes multiple simultaneous instruments, but without velocities. | |
""" | |
model = gr.Radio( | |
["mt3", "ismir2021"], | |
label=model_label, | |
value="mt3" | |
) | |
model.change(fn=change_model, inputs=model, outputs=[]) | |
link = gr.Textbox(label="YouTube Link") | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
title = gr.Label(label="Video Title", placeholder="Title") | |
img = gr.Image(label="Thumbnail") | |
with gr.Row(): | |
yt_audio = gr.Audio() | |
link.change(fn=populate_metadata, inputs=link, outputs=[img, title, yt_audio]) | |
with gr.Row(): | |
btn = gr.Button("Transcribe music") | |
audio_file = gr.File() | |
btn.click(inference, | |
inputs = [ | |
yt_audio | |
], | |
outputs=audio_file) | |
gr.Markdown(article) | |
demo.launch() | |
""" gr.Interface( | |
inference, | |
gr.inputs.Audio(type="filepath", label="Input"), | |
[gr.outputs.File(label="Output")], | |
title=title, | |
description=description, | |
article=article, | |
examples=examples, | |
).launch().queue() """ |