Spaces:

juancopi81
/

youtube-music-transcribe

Build error

App Files Files Community

juancopi81 commited on Nov 7, 2022

Commit

2ab00ef

1 Parent(s): cf24f3c

Get first 10 sec of yt vido

Browse files

Files changed (2) hide show

app.py +26 -4
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import gradio as gr
 import note_seq
 from pytube import YouTube
 from inferencemodel import InferenceModel
 from utils import upload_audio
@@ -24,11 +25,31 @@ def change_model(model):
     global inference_model
     inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
     current_model = model
 # Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
 def populate_metadata(link):
     yt = YouTube(link)
-    return yt.thumbnail_url, yt.title
 def inference(audio):
     with open(audio, "rb") as fd:
@@ -65,7 +86,7 @@ with demo:
                     The mt3 model transcribes multiple simultaneous instruments, but without velocities."
                     """)
         model = gr.Radio(
-            ["mt3", "ismir2021"], label="What kind of model you want to use?"
         )
         model.change(fn=change_model, inputs=model, outputs=[])
@@ -73,8 +94,9 @@ with demo:
         with gr.Row().style(mobile_collapse=False, equal_height=True):
             title = gr.Label(label="Video Title", placeholder="Title")
             img = gr.Image(label="Thumbnail")
-        link.change(fn=populate_metadata, inputs=link, outputs=[img, title])
 demo.launch()

 import note_seq
 from pytube import YouTube
+from pydub import AudioSegment
 from inferencemodel import InferenceModel
 from utils import upload_audio
     global inference_model
     inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
     current_model = model
+    print("Inferece model", inference_model)
+# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
+def get_audio(url):
+    yt = YouTube(url)
+    video = yt.streams.filter(only_audio=True).first()
+    out_file = video.download(output_path=".")
+    base, ext = os.path.splitext(out_file)
+    print("the extension is", ext)
+    new_file = base + ".wav"
+    os.rename(out_file, new_file)
+    a = new_file
+    wav_to_cut = AudioSegment.from_wav(a)
+    # pydub does things in milliseconds
+    ten_seconds = 10 * 1000
+    first_10_seconds = wav_to_cut[:ten_seconds]
+    return first_10_seconds
 # Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
 def populate_metadata(link):
     yt = YouTube(link)
+    audio = get_audio(link)
+    return yt.thumbnail_url, yt.title, audio
 def inference(audio):
     with open(audio, "rb") as fd:
                     The mt3 model transcribes multiple simultaneous instruments, but without velocities."
                     """)
         model = gr.Radio(
+            ["mt3", "ismir2021"], label="What kind of model you want to use?", value="mt3"
         )
         model.change(fn=change_model, inputs=model, outputs=[])
         with gr.Row().style(mobile_collapse=False, equal_height=True):
             title = gr.Label(label="Video Title", placeholder="Title")
             img = gr.Image(label="Thumbnail")
+        with gr.Row():
+            yt_audio = gr.Audio()
+        link.change(fn=populate_metadata, inputs=link, outputs=[img, title, yt_audio])
 demo.launch()

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ jax[cpu]==0.3.15 -f https://storage.googleapis.com/jax-releases/jax_releases.htm
 clu==0.0.7
 # pin Orbax to use Checkpointer
 orbax==0.0.2
-pytube

 clu==0.0.7
 # pin Orbax to use Checkpointer
 orbax==0.0.2
+pytube
+pydub