Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
from pytube import YouTube | |
from typing import List | |
from transformers import pipeline | |
def transcribe( | |
url: str, | |
model_size: str | |
) -> str: | |
# Get audio from the video. | |
yt_client = YouTube(url=url) | |
audio_file = yt_client.streams.filter(only_audio=True)[0].download(filename="file.mp4") | |
# Load the model | |
model = whisper.load_model(model_size) | |
# Load the audio into the model | |
audio = whisper.load_audio(audio_file) | |
# Get results | |
result = model.transcribe(audio) | |
return format_result(result), summarize(result["text"]) | |
def summarize(text: str) -> str: | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
out = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"] | |
return out | |
def format_result(result: whisper.DecodingResult) -> str: | |
out = [] | |
for item in result["segments"]: | |
out.append(f"from {item['start']:6.2f} to {item['end']:6.2f} {item['text']}") | |
return "\n".join(out) | |
def get_model_sizes() -> List[str]: | |
""" | |
:rtype: list | |
:return: List of possible sizes of the Whisper model. | |
""" | |
return list( | |
whisper._MODELS.keys() | |
) | |
title = "YouTube transcribe + summarization" | |
desc = "Transcribe YouTube videos using OpenAI Whisper." | |
with gr.Blocks() as demo: | |
gr.HTML(title) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown( | |
f""" | |
{desc} | |
""" | |
) | |
with gr.Row(): | |
model_size = gr.Dropdown( | |
label="Model size", | |
choices=get_model_sizes(), | |
value="tiny" | |
) | |
url = gr.Textbox(label="YouTube URL") | |
with gr.Row(): | |
text = gr.Textbox( | |
label="Transcription", | |
lines=10 | |
) | |
with gr.Row(): | |
summarization = gr.Textbox( | |
label="Summarization", | |
lines=5 | |
) | |
with gr.Row().style(equal_height=True): | |
submit_button = gr.Button("Submit") | |
submit_button.click( | |
transcribe, | |
inputs=[ | |
url, | |
model_size | |
], | |
outputs=[ | |
text, | |
summarization | |
] | |
) | |
demo.launch() | |