Spaces:

chinhon
/

whisper_transcribe

Running

whisper_transcribe / app.py

ver1.1

8832bce almost 2 years ago

No virus

1.77 kB

	import gradio as gr
	from pytube import YouTube
	import whisper

	# define function for transcription
	def whisper_transcript(model_size, url, audio_file):
	if url:
	link = YouTube(url)
	source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4")

	else:
	source = audio_file

	if model_size.endswith(".en"):
	language = "english"

	else:
	language = None

	options = whisper.DecodingOptions(without_timestamps=True)

	loaded_model = whisper.load_model(model_size)
	transcript = loaded_model.transcribe(source, language=language)

	return transcript["text"]

	# define Gradio app interface
	gradio_ui = gr.Interface(
	fn=whisper_transcript,
	title="Transcribe multi-lingual audio clips with Whisper",
	description="How to use: Select a model, paste in a Youtube link or upload an audio clip, then click submit. Select models ending in '.en' if your clip is in English. For clips in other languages, select models without '.en'",
	article="Note: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.",
	inputs=[
	gr.Dropdown(
	label="Select Model",
	choices=[
	"tiny.en",
	"base.en",
	"small.en",
	"medium.en",
	"tiny",
	"base",
	"small",
	"medium",
	"large",
	],
	value="base",
	),
	gr.Textbox(label="Paste YouTube link here"),
	gr.Audio(label="Upload Audio File", source="upload", type="filepath"),
	],
	outputs=gr.outputs.Textbox(label="Whisper Transcript"),
	)

	gradio_ui.queue().launch()