youtube-sum / app.py
michakomo's picture
Update app.py
6fd27e6
import gradio as gr
import whisper
from pytube import YouTube
from typing import List
from transformers import pipeline
def transcribe(
url: str,
model_size: str
) -> str:
# Get audio from the video.
yt_client = YouTube(url=url)
audio_file = yt_client.streams.filter(only_audio=True)[0].download(filename="file.mp4")
# Load the model
model = whisper.load_model(model_size)
# Load the audio into the model
audio = whisper.load_audio(audio_file)
# Get results
result = model.transcribe(audio)
return format_result(result), summarize(result["text"])
def summarize(text: str) -> str:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
out = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
return out
def format_result(result: whisper.DecodingResult) -> str:
out = []
for item in result["segments"]:
out.append(f"from {item['start']:6.2f} to {item['end']:6.2f} {item['text']}")
return "\n".join(out)
def get_model_sizes() -> List[str]:
"""
:rtype: list
:return: List of possible sizes of the Whisper model.
"""
return list(
whisper._MODELS.keys()
)
title = "YouTube transcribe + summarization"
desc = "Transcribe YouTube videos using OpenAI Whisper."
with gr.Blocks() as demo:
gr.HTML(title)
with gr.Row():
with gr.Column():
gr.Markdown(
f"""
{desc}
"""
)
with gr.Row():
model_size = gr.Dropdown(
label="Model size",
choices=get_model_sizes(),
value="tiny"
)
url = gr.Textbox(label="YouTube URL")
with gr.Row():
text = gr.Textbox(
label="Transcription",
lines=10
)
with gr.Row():
summarization = gr.Textbox(
label="Summarization",
lines=5
)
with gr.Row().style(equal_height=True):
submit_button = gr.Button("Submit")
submit_button.click(
transcribe,
inputs=[
url,
model_size
],
outputs=[
text,
summarization
]
)
demo.launch()