Spaces:
Build error
Build error
juancopi81
commited on
Commit
•
2ab00ef
1
Parent(s):
cf24f3c
Get first 10 sec of yt vido
Browse files- app.py +26 -4
- requirements.txt +2 -1
app.py
CHANGED
@@ -6,6 +6,7 @@ import gradio as gr
|
|
6 |
|
7 |
import note_seq
|
8 |
from pytube import YouTube
|
|
|
9 |
|
10 |
from inferencemodel import InferenceModel
|
11 |
from utils import upload_audio
|
@@ -24,11 +25,31 @@ def change_model(model):
|
|
24 |
global inference_model
|
25 |
inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
|
26 |
current_model = model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
|
29 |
def populate_metadata(link):
|
30 |
yt = YouTube(link)
|
31 |
-
|
|
|
32 |
|
33 |
def inference(audio):
|
34 |
with open(audio, "rb") as fd:
|
@@ -65,7 +86,7 @@ with demo:
|
|
65 |
The mt3 model transcribes multiple simultaneous instruments, but without velocities."
|
66 |
""")
|
67 |
model = gr.Radio(
|
68 |
-
["mt3", "ismir2021"], label="What kind of model you want to use?"
|
69 |
)
|
70 |
model.change(fn=change_model, inputs=model, outputs=[])
|
71 |
|
@@ -73,8 +94,9 @@ with demo:
|
|
73 |
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
74 |
title = gr.Label(label="Video Title", placeholder="Title")
|
75 |
img = gr.Image(label="Thumbnail")
|
76 |
-
|
77 |
-
|
|
|
78 |
|
79 |
demo.launch()
|
80 |
|
|
|
6 |
|
7 |
import note_seq
|
8 |
from pytube import YouTube
|
9 |
+
from pydub import AudioSegment
|
10 |
|
11 |
from inferencemodel import InferenceModel
|
12 |
from utils import upload_audio
|
|
|
25 |
global inference_model
|
26 |
inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
|
27 |
current_model = model
|
28 |
+
print("Inferece model", inference_model)
|
29 |
+
|
30 |
+
# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
|
31 |
+
def get_audio(url):
|
32 |
+
yt = YouTube(url)
|
33 |
+
video = yt.streams.filter(only_audio=True).first()
|
34 |
+
out_file = video.download(output_path=".")
|
35 |
+
base, ext = os.path.splitext(out_file)
|
36 |
+
print("the extension is", ext)
|
37 |
+
new_file = base + ".wav"
|
38 |
+
os.rename(out_file, new_file)
|
39 |
+
a = new_file
|
40 |
+
|
41 |
+
wav_to_cut = AudioSegment.from_wav(a)
|
42 |
+
# pydub does things in milliseconds
|
43 |
+
ten_seconds = 10 * 1000
|
44 |
+
first_10_seconds = wav_to_cut[:ten_seconds]
|
45 |
+
|
46 |
+
return first_10_seconds
|
47 |
|
48 |
# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
|
49 |
def populate_metadata(link):
|
50 |
yt = YouTube(link)
|
51 |
+
audio = get_audio(link)
|
52 |
+
return yt.thumbnail_url, yt.title, audio
|
53 |
|
54 |
def inference(audio):
|
55 |
with open(audio, "rb") as fd:
|
|
|
86 |
The mt3 model transcribes multiple simultaneous instruments, but without velocities."
|
87 |
""")
|
88 |
model = gr.Radio(
|
89 |
+
["mt3", "ismir2021"], label="What kind of model you want to use?", value="mt3"
|
90 |
)
|
91 |
model.change(fn=change_model, inputs=model, outputs=[])
|
92 |
|
|
|
94 |
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
95 |
title = gr.Label(label="Video Title", placeholder="Title")
|
96 |
img = gr.Image(label="Thumbnail")
|
97 |
+
with gr.Row():
|
98 |
+
yt_audio = gr.Audio()
|
99 |
+
link.change(fn=populate_metadata, inputs=link, outputs=[img, title, yt_audio])
|
100 |
|
101 |
demo.launch()
|
102 |
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ jax[cpu]==0.3.15 -f https://storage.googleapis.com/jax-releases/jax_releases.htm
|
|
8 |
clu==0.0.7
|
9 |
# pin Orbax to use Checkpointer
|
10 |
orbax==0.0.2
|
11 |
-
pytube
|
|
|
|
8 |
clu==0.0.7
|
9 |
# pin Orbax to use Checkpointer
|
10 |
orbax==0.0.2
|
11 |
+
pytube
|
12 |
+
pydub
|