Spaces:

danilotpnta
/

Youtube-Whisper

Runtime error

App Files Files Community

Youtube-Whisper / app.py

danilotpnta

feat: transcribe works now (2min base -> 9sec)

59fe5e1 5 months ago

raw

history blame

2.75 kB

	import yt_dlp
	import whisper
	import gradio as gr
	import os

	# Function to download the audio and extract metadata from YouTube
	def download_video_info(url):
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': 'audio.%(ext)s',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	}

	try:
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=False) # Extract video info
	title = info.get('title', 'Unknown Title')
	thumbnail_url = info.get('thumbnail', '')
	ydl.download([url]) # Download the audio
	audio_file = "audio.mp3"
	return audio_file, title, thumbnail_url
	except Exception as e:
	return None, None, str(e)

	# Function to transcribe the downloaded audio using Whisper
	def transcribe_audio(audio_path, model_size="base"):
	model = whisper.load_model(model_size)
	result = model.transcribe(audio_path)
	return result['text']

	# Split logic: First fetch title and thumbnail, then transcribe
	def get_video_info_and_transcribe(youtube_url, model_size="base"):
	# Fetch title and thumbnail first
	audio_path, title, thumbnail_url = download_video_info(youtube_url)

	# If fetching video info fails
	if not audio_path or not os.path.exists(audio_path):
	return gr.update(value=f"Error fetching video: {thumbnail_url}"), None, None, None

	# Show title and thumbnail to the user while the transcription is happening
	title_output = gr.update(value=title)
	thumbnail_output = gr.update(value=thumbnail_url)

	# Start transcription
	transcription = transcribe_audio(audio_path, model_size)

	return title_output, thumbnail_output, gr.update(value=transcription)

	# Gradio interface setup using gradio.components
	with gr.Blocks() as interface:
	with gr.Row():
	youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
	model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)

	title_output = gr.Textbox(label="Video Title", interactive=False)

	with gr.Row():
	thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
	transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)

	transcribe_button = gr.Button("Transcribe")

	transcribe_button.click(
	get_video_info_and_transcribe,
	inputs=[youtube_url, model_size],
	outputs=[title_output, thumbnail_output, transcription_output]
	)

	# Launch the app
	if __name__ == "__main__":
	interface.launch(share=True)