Spaces:
Build error
Build error
File size: 3,563 Bytes
ef98e8e 2daf15a 0760318 cf24f3c 2ab00ef 0760318 13af1af c970deb 13af1af 85250f0 ed28ae4 85250f0 ed28ae4 2ab00ef 85250f0 cf24f3c 2ab00ef cf24f3c 85250f0 ed28ae4 0f3083a ed28ae4 1a06f79 ed28ae4 85250f0 ed28ae4 cf24f3c ed28ae4 85250f0 ed28ae4 85250f0 ed28ae4 cf24f3c 2ab00ef cf24f3c 2ab00ef ed28ae4 85250f0 ed28ae4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import os
os.system("python3 -m pip install -e .")
import gradio as gr
import note_seq
from pytube import YouTube
from pydub import AudioSegment
from inferencemodel import InferenceModel
from utils import upload_audio
SAMPLE_RATE = 16000
SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2"
# Start inference model
inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", "mt3")
current_model = "mt3"
def change_model(model):
global current_model
if model == current_model:
return
global inference_model
inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
current_model = model
print("Inferece model", inference_model)
# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
def get_audio(url):
yt = YouTube(url)
video = yt.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
base, ext = os.path.splitext(out_file)
print("the extension is", ext)
new_file = base + ".wav"
os.rename(out_file, new_file)
a = new_file
wav_to_cut = AudioSegment.from_wav(a)
# pydub does things in milliseconds
ten_seconds = 10 * 1000
first_10_seconds = wav_to_cut[:ten_seconds]
return first_10_seconds
# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
def populate_metadata(link):
yt = YouTube(link)
audio = get_audio(link)
return yt.thumbnail_url, yt.title, audio
def inference(audio):
with open(audio, "rb") as fd:
contents = fd.read()
audio = upload_audio(contents,sample_rate=16000)
est_ns = inference_model(audio)
note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid")
return "./transcribed.mid"
title = "Transcribe music from YouTube videos using Transformers."
description = """
Gradio demo for Music Transcription with Transformers. Read more in the links below.
"""
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.03017' target='_blank'>MT3: Multi-Task Multitrack Music Transcription</a> | <a href='https://github.com/magenta/mt3' target='_blank'>Github Repo</a></p>"
# Create a block object
demo = gr.Blocks()
# Use your Block object as a context
with demo:
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
+ title
+ "</h1>")
gr.Markdown(description)
with gr.Box():
gr.Markdown("<h2>Select your model</h2>")
gr.Markdown("""
The ismir2021 model transcribes piano only, with note velocities.
The mt3 model transcribes multiple simultaneous instruments, but without velocities."
""")
model = gr.Radio(
["mt3", "ismir2021"], label="What kind of model you want to use?", value="mt3"
)
model.change(fn=change_model, inputs=model, outputs=[])
link = gr.Textbox(label="YouTube Link")
with gr.Row().style(mobile_collapse=False, equal_height=True):
title = gr.Label(label="Video Title", placeholder="Title")
img = gr.Image(label="Thumbnail")
with gr.Row():
yt_audio = gr.Audio()
link.change(fn=populate_metadata, inputs=link, outputs=[img, title, yt_audio])
demo.launch()
""" gr.Interface(
inference,
gr.inputs.Audio(type="filepath", label="Input"),
[gr.outputs.File(label="Output")],
title=title,
description=description,
article=article,
examples=examples,
).launch().queue() """ |