Spaces:
Runtime error
Runtime error
danilotpnta
commited on
Commit
·
59fe5e1
1
Parent(s):
051ee03
feat: transcribe works now (2min base -> 9sec)
Browse files
app.py
CHANGED
@@ -3,8 +3,8 @@ import whisper
|
|
3 |
import gradio as gr
|
4 |
import os
|
5 |
|
6 |
-
# Function to download the audio from YouTube
|
7 |
-
def
|
8 |
ydl_opts = {
|
9 |
'format': 'bestaudio/best',
|
10 |
'outtmpl': 'audio.%(ext)s',
|
@@ -17,35 +17,59 @@ def download_audio(url):
|
|
17 |
|
18 |
try:
|
19 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
20 |
-
ydl.
|
|
|
|
|
|
|
21 |
audio_file = "audio.mp3"
|
22 |
-
return audio_file
|
23 |
except Exception as e:
|
24 |
-
return str(e)
|
25 |
|
26 |
# Function to transcribe the downloaded audio using Whisper
|
27 |
-
def transcribe_audio(audio_path):
|
28 |
-
model = whisper.load_model(
|
29 |
result = model.transcribe(audio_path)
|
30 |
return result['text']
|
31 |
|
32 |
-
#
|
33 |
-
def
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
# Gradio interface setup using gradio.components
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# Launch the app
|
50 |
if __name__ == "__main__":
|
51 |
-
interface.launch(share=True)
|
|
|
3 |
import gradio as gr
|
4 |
import os
|
5 |
|
6 |
+
# Function to download the audio and extract metadata from YouTube
|
7 |
+
def download_video_info(url):
|
8 |
ydl_opts = {
|
9 |
'format': 'bestaudio/best',
|
10 |
'outtmpl': 'audio.%(ext)s',
|
|
|
17 |
|
18 |
try:
|
19 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
20 |
+
info = ydl.extract_info(url, download=False) # Extract video info
|
21 |
+
title = info.get('title', 'Unknown Title')
|
22 |
+
thumbnail_url = info.get('thumbnail', '')
|
23 |
+
ydl.download([url]) # Download the audio
|
24 |
audio_file = "audio.mp3"
|
25 |
+
return audio_file, title, thumbnail_url
|
26 |
except Exception as e:
|
27 |
+
return None, None, str(e)
|
28 |
|
29 |
# Function to transcribe the downloaded audio using Whisper
|
30 |
+
def transcribe_audio(audio_path, model_size="base"):
|
31 |
+
model = whisper.load_model(model_size)
|
32 |
result = model.transcribe(audio_path)
|
33 |
return result['text']
|
34 |
|
35 |
+
# Split logic: First fetch title and thumbnail, then transcribe
|
36 |
+
def get_video_info_and_transcribe(youtube_url, model_size="base"):
|
37 |
+
# Fetch title and thumbnail first
|
38 |
+
audio_path, title, thumbnail_url = download_video_info(youtube_url)
|
39 |
+
|
40 |
+
# If fetching video info fails
|
41 |
+
if not audio_path or not os.path.exists(audio_path):
|
42 |
+
return gr.update(value=f"Error fetching video: {thumbnail_url}"), None, None, None
|
43 |
+
|
44 |
+
# Show title and thumbnail to the user while the transcription is happening
|
45 |
+
title_output = gr.update(value=title)
|
46 |
+
thumbnail_output = gr.update(value=thumbnail_url)
|
47 |
+
|
48 |
+
# Start transcription
|
49 |
+
transcription = transcribe_audio(audio_path, model_size)
|
50 |
+
|
51 |
+
return title_output, thumbnail_output, gr.update(value=transcription)
|
52 |
|
53 |
# Gradio interface setup using gradio.components
|
54 |
+
with gr.Blocks() as interface:
|
55 |
+
with gr.Row():
|
56 |
+
youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
|
57 |
+
model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)
|
58 |
+
|
59 |
+
title_output = gr.Textbox(label="Video Title", interactive=False)
|
60 |
+
|
61 |
+
with gr.Row():
|
62 |
+
thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
|
63 |
+
transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)
|
64 |
+
|
65 |
+
transcribe_button = gr.Button("Transcribe")
|
66 |
+
|
67 |
+
transcribe_button.click(
|
68 |
+
get_video_info_and_transcribe,
|
69 |
+
inputs=[youtube_url, model_size],
|
70 |
+
outputs=[title_output, thumbnail_output, transcription_output]
|
71 |
+
)
|
72 |
|
73 |
# Launch the app
|
74 |
if __name__ == "__main__":
|
75 |
+
interface.launch(share=True)
|