from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import gradio as gr import os import torch import json import uuid import langdetect import moviepy.editor as mp import yt_dlp import whisper from graphviz import Digraph print("Starting the program...") # Load BART model for summarization model_path = "facebook/bart-large-cnn" print(f"Loading model {model_path}...") tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to("cuda" if torch.cuda.is_available() else "cpu") model.eval() print("Model loaded.") def generate_unique_filename(extension): return f"{uuid.uuid4()}{extension}" def cleanup_files(*files): for file in files: if file and os.path.exists(file): os.remove(file) print(f"Removed file: {file}") def download_youtube_audio(url): print(f"Downloading audio from YouTube: {url}") output_path = generate_unique_filename(".wav") ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', }], 'outtmpl': output_path, 'keepvideo': True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return output_path def transcribe_audio(file_path): print(f"Transcribing with Whisper: {file_path}") if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')): print("Extracting audio from video...") video = mp.VideoFileClip(file_path) temp_audio = generate_unique_filename(".wav") video.audio.write_audiofile(temp_audio) file_path = temp_audio model = whisper.load_model("large") # use "base", "medium" if slow result = model.transcribe(file_path) print("Transcription done.") return result["text"] def generate_summary_stream(transcription): print("Generating Bullet point summary...") inputs = tokenizer(transcription, return_tensors="pt", max_length=1024, truncation=True, padding="max_length") inputs = {k: v.to(model.device) for k, v in inputs.items()} summary_ids = model.generate(inputs['input_ids'], max_length=300, num_beams=4, early_stopping=True) raw_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) bullet_summary = "\n".join(f"• {sentence.strip()}" for sentence in raw_summary.split('.') if sentence.strip()) return bullet_summary def generate_mindmap_from_summary(summary_text): dot = Digraph(comment='Mind Map') dot.node('A', 'Summary') lines = summary_text.split('\n') for idx, line in enumerate(lines): node_id = f'B{idx}' dot.node(node_id, line.replace("• ", "").strip()) dot.edge('A', node_id) output_path = generate_unique_filename(".png") dot.render(output_path, format='png', cleanup=True) return output_path + ".png" def process_youtube(url): if not url: return "No URL", None try: audio_file = download_youtube_audio(url) transcription = transcribe_audio(audio_file) return transcription, None except Exception as e: return f"Error: {e}", None finally: cleanup_files(audio_file) def process_uploaded_video(video_path): try: transcription = transcribe_audio(video_path) return transcription, None except Exception as e: return f"Error: {e}", None # Gradio UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🎥 Video Transcription and Summary") with gr.Tabs(): with gr.TabItem("📤 Upload Video"): video_input = gr.Video(label="Upload video") video_button = gr.Button("🚀 Process Video") with gr.TabItem("🔗 YouTube Link"): url_input = gr.Textbox(label="YouTube URL") url_button = gr.Button("🚀 Process URL") with gr.Row(): transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True) summary_output = gr.Textbox(label="📊 Summary Points", lines=10, show_copy_button=True) mindmap_output = gr.Image(label="🧠 Mind Map") summary_button = gr.Button("📝 Generate Summary") mindmap_button = gr.Button("🧠 Generate Mind Map") def process_video_and_update(video): if video is None: return "No video uploaded", "Please upload a video" transcription, _ = process_uploaded_video(video) return transcription or "Transcription error", "" video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output]) url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output]) summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output]) mindmap_button.click(generate_mindmap_from_summary, inputs=[summary_output], outputs=[mindmap_output]) demo.launch(share=True)