ksoky
/

whisper-small-km

Automatic Speech Recognition

hf-asr-leaderboard

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

whisper-small-km / app.py

ksoky's picture

Create app.py

168b854 over 1 year ago

history blame contribute delete

No virus

2.17 kB

	import gradio as gr
	import whisper
	from pytube import YouTube

	loaded_model = whisper.load_model("ksoky/whisper-small-km")
	current_size = 'small'
	def inference(link):
	yt = YouTube(link)
	path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
	options = whisper.DecodingOptions(without_timestamps=True)
	results = loaded_model.transcribe(path)
	return results['text']

	def change_model(size):
	if size == current_size:
	return
	loaded_model = whisper.load_model(size)
	current_size = size

	def populate_metadata(link):
	yt = YouTube(link)
	return yt.thumbnail_url, yt.title

	title="Youtube Whisperer"
	description="Speech to text transcription of Youtube videos using fine-tuned OpenAI's Whisper on Khmer"
	block = gr.Blocks()

	with block:
	gr.HTML(
	"""
	<div style="text-align: center; max-width: 500px; margin: 0 auto;">
	<div>
	<h1>Youtube Whisperer</h1>
	</div>
	<p style="margin-bottom: 10px; font-size: 94%">
	Speech to text transcription of Youtube videos using OpenAI's Whisper
	</p>
	</div>
	"""
	)
	with gr.Group():
	with gr.Box():
	# sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
	sz = gr.Dropdown(label="Model Size", choices=['small'], value='small')
	link = gr.Textbox(label="YouTube Link")

	with gr.Row().style(mobile_collapse=False, equal_height=True):
	title = gr.Label(label="Video Title", placeholder="Title")
	img = gr.Image(label="Thumbnail")
	text = gr.Textbox(
	label="Transcription",
	placeholder="Transcription Output",
	lines=5)
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	btn = gr.Button("Transcribe")

	# Events
	btn.click(inference, inputs=[link], outputs=[text])
	link.change(populate_metadata, inputs=[link], outputs=[img, title])
	sz.change(change_model, inputs=[sz], outputs=[])

	block.launch(debug=True)