Spaces:

bencser
/

whisperdemo

Sleeping

App Files Files Community

whisperdemo / app.py

bencser

Create app.py

660f424 verified 11 months ago

raw

history blame contribute delete

4.99 kB

	import gradio as gr
	import whisper
	import yt_dlp
	import os
	import traceback
	from pydub import AudioSegment
	from threading import Thread
	from queue import Queue

	# Global variable to store the selected model
	selected_model = None

	def load_whisper_model(model_name):
	global selected_model
	selected_model = whisper.load_model(model_name)
	return f"Loaded {model_name} model"

	def chunk_audio(audio_file, chunk_size_ms=30000):
	audio = AudioSegment.from_file(audio_file)
	chunks = [audio[i:i+chunk_size_ms] for i in range(0, len(audio), chunk_size_ms)]
	return chunks

	def stream_transcription(audio_file):
	segment_queue = Queue()

	def transcribe_worker():
	try:
	chunks = chunk_audio(audio_file)
	for i, chunk in enumerate(chunks):
	chunk_file = f"temp_chunk_{i}.wav"
	chunk.export(chunk_file, format="wav")
	result = selected_model.transcribe(chunk_file)
	os.remove(chunk_file)
	for segment in result['segments']:
	segment_text = f"[{segment['start'] + i30:.2f}s -> {segment['end'] + i30:.2f}s] {segment['text']}\n"
	segment_queue.put(segment_text)
	segment_queue.put(None) # Signal end of transcription
	except Exception as e:
	segment_queue.put(f"Error: {str(e)}")
	segment_queue.put(None)

	Thread(target=transcribe_worker).start()

	full_transcript = ""
	while True:
	segment_text = segment_queue.get()
	if segment_text is None:
	break
	if segment_text.startswith("Error"):
	yield segment_text
	break
	full_transcript += segment_text
	yield full_transcript

	def download_youtube_audio(youtube_url):
	ydl_opts = {
	'format': 'bestaudio/best',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	'outtmpl': 'temp_audio.%(ext)s',
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([youtube_url])
	return "temp_audio.mp3"

	def process_input(model, input_type, youtube_url=None, audio_file=None):
	try:
	yield "Loading Whisper model..."
	load_whisper_model(model)
	yield f"Loaded {model} model. "

	if input_type == "YouTube URL":
	if youtube_url:
	yield "Downloading audio from YouTube..."
	audio_file = download_youtube_audio(youtube_url)
	yield "Download complete. Starting transcription...\n"
	else:
	yield "Please provide a valid YouTube URL."
	return
	elif input_type == "Audio File":
	if not audio_file:
	yield "Please upload an audio file."
	return
	else:
	yield "Starting transcription...\n"

	yield from stream_transcription(audio_file)
	except Exception as e:
	error_msg = f"An error occurred: {str(e)}\n"
	error_msg += traceback.format_exc()
	print(error_msg)
	yield f"Error: {str(e)}"
	finally:
	if input_type == "YouTube URL" and audio_file:
	os.remove(audio_file)
	# Define the Gradio interface
	with gr.Blocks() as iface:
	gr.Markdown("# Whisper Transcription App")
	gr.Markdown("Transcribe YouTube videos or audio files using OpenAI's Whisper model. Large files and long videos can take a very long time to process.")

	with gr.Row():
	with gr.Column():
	model = gr.Radio(
	choices=["tiny", "base", "small", "medium", "large"],
	label="Whisper Model",
	value="base"
	)
	gr.Markdown("""
	- tiny: very fast, less accurate
	- base: medium speed and accuracy
	- small: balanced speed and accuracy
	- medium: more accurate, slower
	- large: most accurate, very slow
	""")

	input_type = gr.Radio(["YouTube URL", "Audio File"], label="Input Type")
	youtube_url = gr.Textbox(label="YouTube URL")
	audio_file = gr.Audio(label="Audio File", type="filepath")

	with gr.Row():
	submit_button = gr.Button("Submit")
	clear_button = gr.Button("Clear")

	with gr.Column():
	output = gr.Textbox(label="Transcription", lines=25)

	submit_button.click(
	fn=process_input,
	inputs=[model, input_type, youtube_url, audio_file],
	outputs=output,
	api_name="transcribe"
	)

	def clear_outputs():
	return {youtube_url: "", audio_file: None, output: ""}

	clear_button.click(
	fn=clear_outputs,
	inputs=[],
	outputs=[youtube_url, audio_file, output],
	api_name="clear"
	)

	# Launch the interface
	iface.queue().launch(share=True)