juancopi81 commited on
Commit
2ab00ef
1 Parent(s): cf24f3c

Get first 10 sec of yt vido

Browse files
Files changed (2) hide show
  1. app.py +26 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -6,6 +6,7 @@ import gradio as gr
6
 
7
  import note_seq
8
  from pytube import YouTube
 
9
 
10
  from inferencemodel import InferenceModel
11
  from utils import upload_audio
@@ -24,11 +25,31 @@ def change_model(model):
24
  global inference_model
25
  inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
26
  current_model = model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
29
  def populate_metadata(link):
30
  yt = YouTube(link)
31
- return yt.thumbnail_url, yt.title
 
32
 
33
  def inference(audio):
34
  with open(audio, "rb") as fd:
@@ -65,7 +86,7 @@ with demo:
65
  The mt3 model transcribes multiple simultaneous instruments, but without velocities."
66
  """)
67
  model = gr.Radio(
68
- ["mt3", "ismir2021"], label="What kind of model you want to use?"
69
  )
70
  model.change(fn=change_model, inputs=model, outputs=[])
71
 
@@ -73,8 +94,9 @@ with demo:
73
  with gr.Row().style(mobile_collapse=False, equal_height=True):
74
  title = gr.Label(label="Video Title", placeholder="Title")
75
  img = gr.Image(label="Thumbnail")
76
-
77
- link.change(fn=populate_metadata, inputs=link, outputs=[img, title])
 
78
 
79
  demo.launch()
80
 
 
6
 
7
  import note_seq
8
  from pytube import YouTube
9
+ from pydub import AudioSegment
10
 
11
  from inferencemodel import InferenceModel
12
  from utils import upload_audio
 
25
  global inference_model
26
  inference_model = InferenceModel("/home/user/app/checkpoints/mt3/", model)
27
  current_model = model
28
+ print("Inferece model", inference_model)
29
+
30
+ # Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
31
+ def get_audio(url):
32
+ yt = YouTube(url)
33
+ video = yt.streams.filter(only_audio=True).first()
34
+ out_file = video.download(output_path=".")
35
+ base, ext = os.path.splitext(out_file)
36
+ print("the extension is", ext)
37
+ new_file = base + ".wav"
38
+ os.rename(out_file, new_file)
39
+ a = new_file
40
+
41
+ wav_to_cut = AudioSegment.from_wav(a)
42
+ # pydub does things in milliseconds
43
+ ten_seconds = 10 * 1000
44
+ first_10_seconds = wav_to_cut[:ten_seconds]
45
+
46
+ return first_10_seconds
47
 
48
  # Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
49
  def populate_metadata(link):
50
  yt = YouTube(link)
51
+ audio = get_audio(link)
52
+ return yt.thumbnail_url, yt.title, audio
53
 
54
  def inference(audio):
55
  with open(audio, "rb") as fd:
 
86
  The mt3 model transcribes multiple simultaneous instruments, but without velocities."
87
  """)
88
  model = gr.Radio(
89
+ ["mt3", "ismir2021"], label="What kind of model you want to use?", value="mt3"
90
  )
91
  model.change(fn=change_model, inputs=model, outputs=[])
92
 
 
94
  with gr.Row().style(mobile_collapse=False, equal_height=True):
95
  title = gr.Label(label="Video Title", placeholder="Title")
96
  img = gr.Image(label="Thumbnail")
97
+ with gr.Row():
98
+ yt_audio = gr.Audio()
99
+ link.change(fn=populate_metadata, inputs=link, outputs=[img, title, yt_audio])
100
 
101
  demo.launch()
102
 
requirements.txt CHANGED
@@ -8,4 +8,5 @@ jax[cpu]==0.3.15 -f https://storage.googleapis.com/jax-releases/jax_releases.htm
8
  clu==0.0.7
9
  # pin Orbax to use Checkpointer
10
  orbax==0.0.2
11
- pytube
 
 
8
  clu==0.0.7
9
  # pin Orbax to use Checkpointer
10
  orbax==0.0.2
11
+ pytube
12
+ pydub