danilotpnta commited on
Commit
59fe5e1
·
1 Parent(s): 051ee03

feat: transcribe works now (2min base -> 9sec)

Browse files
Files changed (1) hide show
  1. app.py +46 -22
app.py CHANGED
@@ -3,8 +3,8 @@ import whisper
3
  import gradio as gr
4
  import os
5
 
6
- # Function to download the audio from YouTube using yt-dlp
7
- def download_audio(url):
8
  ydl_opts = {
9
  'format': 'bestaudio/best',
10
  'outtmpl': 'audio.%(ext)s',
@@ -17,35 +17,59 @@ def download_audio(url):
17
 
18
  try:
19
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
20
- ydl.download([url])
 
 
 
21
  audio_file = "audio.mp3"
22
- return audio_file
23
  except Exception as e:
24
- return str(e) # Return the error message for debugging
25
 
26
  # Function to transcribe the downloaded audio using Whisper
27
- def transcribe_audio(audio_path):
28
- model = whisper.load_model("base") # Use other models like "small", "medium", "large" if necessary
29
  result = model.transcribe(audio_path)
30
  return result['text']
31
 
32
- # Main function to integrate download and transcription
33
- def transcribe_youtube_video(youtube_url):
34
- audio_path = download_audio(youtube_url)
35
- if not os.path.exists(audio_path): # Check if an error was returned
36
- return f"Error: {audio_path}" # Return the error message to the user
37
- transcription = transcribe_audio(audio_path)
38
- return transcription
 
 
 
 
 
 
 
 
 
 
39
 
40
  # Gradio interface setup using gradio.components
41
- interface = gr.Interface(
42
- fn=transcribe_youtube_video,
43
- inputs=gr.components.Textbox(label="YouTube URL"),
44
- outputs=gr.components.Textbox(label="Transcription"),
45
- title="YouTube Video Transcription",
46
- description="Paste a YouTube video link to get the audio transcribed using Whisper."
47
- )
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # Launch the app
50
  if __name__ == "__main__":
51
- interface.launch(share=True) # Enables sharing with public link
 
3
  import gradio as gr
4
  import os
5
 
6
+ # Function to download the audio and extract metadata from YouTube
7
+ def download_video_info(url):
8
  ydl_opts = {
9
  'format': 'bestaudio/best',
10
  'outtmpl': 'audio.%(ext)s',
 
17
 
18
  try:
19
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
20
+ info = ydl.extract_info(url, download=False) # Extract video info
21
+ title = info.get('title', 'Unknown Title')
22
+ thumbnail_url = info.get('thumbnail', '')
23
+ ydl.download([url]) # Download the audio
24
  audio_file = "audio.mp3"
25
+ return audio_file, title, thumbnail_url
26
  except Exception as e:
27
+ return None, None, str(e)
28
 
29
  # Function to transcribe the downloaded audio using Whisper
30
+ def transcribe_audio(audio_path, model_size="base"):
31
+ model = whisper.load_model(model_size)
32
  result = model.transcribe(audio_path)
33
  return result['text']
34
 
35
+ # Split logic: First fetch title and thumbnail, then transcribe
36
+ def get_video_info_and_transcribe(youtube_url, model_size="base"):
37
+ # Fetch title and thumbnail first
38
+ audio_path, title, thumbnail_url = download_video_info(youtube_url)
39
+
40
+ # If fetching video info fails
41
+ if not audio_path or not os.path.exists(audio_path):
42
+ return gr.update(value=f"Error fetching video: {thumbnail_url}"), None, None, None
43
+
44
+ # Show title and thumbnail to the user while the transcription is happening
45
+ title_output = gr.update(value=title)
46
+ thumbnail_output = gr.update(value=thumbnail_url)
47
+
48
+ # Start transcription
49
+ transcription = transcribe_audio(audio_path, model_size)
50
+
51
+ return title_output, thumbnail_output, gr.update(value=transcription)
52
 
53
  # Gradio interface setup using gradio.components
54
+ with gr.Blocks() as interface:
55
+ with gr.Row():
56
+ youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
57
+ model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)
58
+
59
+ title_output = gr.Textbox(label="Video Title", interactive=False)
60
+
61
+ with gr.Row():
62
+ thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
63
+ transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)
64
+
65
+ transcribe_button = gr.Button("Transcribe")
66
+
67
+ transcribe_button.click(
68
+ get_video_info_and_transcribe,
69
+ inputs=[youtube_url, model_size],
70
+ outputs=[title_output, thumbnail_output, transcription_output]
71
+ )
72
 
73
  # Launch the app
74
  if __name__ == "__main__":
75
+ interface.launch(share=True)