Spaces:
Sleeping
Sleeping
File size: 2,261 Bytes
b9cf626 6fd27e6 b9cf626 6fd27e6 b9cf626 6fd27e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import gradio as gr
import whisper
from pytube import YouTube
from typing import List
from transformers import pipeline
def transcribe(
url: str,
model_size: str
) -> str:
# Get audio from the video.
yt_client = YouTube(url=url)
audio_file = yt_client.streams.filter(only_audio=True)[0].download(filename="file.mp4")
# Load the model
model = whisper.load_model(model_size)
# Load the audio into the model
audio = whisper.load_audio(audio_file)
# Get results
result = model.transcribe(audio)
return format_result(result), summarize(result["text"])
def summarize(text: str) -> str:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
out = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
return out
def format_result(result: whisper.DecodingResult) -> str:
out = []
for item in result["segments"]:
out.append(f"from {item['start']:6.2f} to {item['end']:6.2f} {item['text']}")
return "\n".join(out)
def get_model_sizes() -> List[str]:
"""
:rtype: list
:return: List of possible sizes of the Whisper model.
"""
return list(
whisper._MODELS.keys()
)
title = "YouTube transcribe + summarization"
desc = "Transcribe YouTube videos using OpenAI Whisper."
with gr.Blocks() as demo:
gr.HTML(title)
with gr.Row():
with gr.Column():
gr.Markdown(
f"""
{desc}
"""
)
with gr.Row():
model_size = gr.Dropdown(
label="Model size",
choices=get_model_sizes(),
value="tiny"
)
url = gr.Textbox(label="YouTube URL")
with gr.Row():
text = gr.Textbox(
label="Transcription",
lines=10
)
with gr.Row():
summarization = gr.Textbox(
label="Summarization",
lines=5
)
with gr.Row().style(equal_height=True):
submit_button = gr.Button("Submit")
submit_button.click(
transcribe,
inputs=[
url,
model_size
],
outputs=[
text,
summarization
]
)
demo.launch()
|