from tempfile import TemporaryDirectory from typing import List from pytube import YouTube import whisper from transformers import pipeline import gradio as gr def get_title(url: str) -> str: """Returns the title of the YouTube video at the given URL.""" yt = YouTube(url) return f"**{yt.title}**" def download_audio(url: str, path: str) -> None: """Downloads the audio from the YouTube video at the given URL and saves it to the specified path.""" yt = YouTube(url) audio = yt.streams.filter(only_audio=True).first() audio.download(output_path=path, filename="a.mp4") def transcribe(path: str) -> List[str]: """Transcribes the audio file at the given path and returns the text.""" model = whisper.load_model("tiny") transcription = model.transcribe(path, fp16=False)["text"] transcription_chunks = [transcription[i : i + 1000] for i in range(0, len(transcription), 1000)] return transcription_chunks def summarize(transcription: List[str]) -> str: """Summarizes the given text and returns the summary.""" model = pipeline("summarization", model="facebook/bart-large-cnn") summary_chunks = model(transcription, max_length=80, min_length=30) summary = (" ".join([chunks["summary_text"] for chunks in summary_chunks]).strip().replace(" . ", ". ")) return summary def execute_pipeline(url: str) -> str: """Generates a temporary directory and executes the pipeline to download, transcribe and summarize the video.""" with TemporaryDirectory(dir=".") as tmp_dir: download_audio(url, tmp_dir) result = transcribe(f"{tmp_dir}/a.mp4") text = summarize(result) return text def main() -> None: """Generates the Gradio interface.""" with gr.Blocks(analytics_enabled=True, title="یک ویدیو را خلاصه کنید") as page: gr.HTML('

خلاصه گر یوتیوب ویدیو

') url = gr.Textbox(label="لینک صفحه ویدیو در یوتیوب را وارد کنید:") title = gr.Markdown() output = gr.Textbox(label="خلاصه") summarize_btn = gr.Button("برو!").style(full_width=False) summarize_btn.click(fn=get_title, inputs=url, outputs=title) summarize_btn.click(fn=execute_pipeline, inputs=url, outputs=output) gr.Markdown("*با ویدیوهای کمتر از 10 دقیقه بهترین عملکرد را دارد. معمولا 2-3 دقیقه طول می کشد تا اجرا شود.*") page.launch() if __name__ == "__main__": main()