Spaces:

michakomo
/

youtube-sum

Sleeping

App Files Files Community

youtube-sum / app.py

michakomo

Update app.py

6fd27e6 about 1 year ago

raw

history blame contribute delete

2.26 kB

	import gradio as gr
	import whisper
	from pytube import YouTube
	from typing import List
	from transformers import pipeline

	def transcribe(
	url: str,
	model_size: str
	) -> str:
	# Get audio from the video.
	yt_client = YouTube(url=url)
	audio_file = yt_client.streams.filter(only_audio=True)[0].download(filename="file.mp4")

	# Load the model
	model = whisper.load_model(model_size)

	# Load the audio into the model
	audio = whisper.load_audio(audio_file)

	# Get results
	result = model.transcribe(audio)
	return format_result(result), summarize(result["text"])


	def summarize(text: str) -> str:
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	out = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
	return out


	def format_result(result: whisper.DecodingResult) -> str:
	out = []
	for item in result["segments"]:
	out.append(f"from {item['start']:6.2f} to {item['end']:6.2f} {item['text']}")
	return "\n".join(out)


	def get_model_sizes() -> List[str]:
	"""
	:rtype: list
	:return: List of possible sizes of the Whisper model.
	"""
	return list(
	whisper._MODELS.keys()
	)


	title = "YouTube transcribe + summarization"
	desc = "Transcribe YouTube videos using OpenAI Whisper."

	with gr.Blocks() as demo:
	gr.HTML(title)
	with gr.Row():
	with gr.Column():
	gr.Markdown(
	f"""
	{desc}
	"""
	)
	with gr.Row():
	model_size = gr.Dropdown(
	label="Model size",
	choices=get_model_sizes(),
	value="tiny"
	)
	url = gr.Textbox(label="YouTube URL")
	with gr.Row():
	text = gr.Textbox(
	label="Transcription",
	lines=10
	)
	with gr.Row():
	summarization = gr.Textbox(
	label="Summarization",
	lines=5
	)
	with gr.Row().style(equal_height=True):
	submit_button = gr.Button("Submit")

	submit_button.click(
	transcribe,
	inputs=[
	url,
	model_size
	],
	outputs=[
	text,
	summarization
	]
	)

	demo.launch()