File size: 4,982 Bytes
463e52e
 
 
 
 
 
 
 
 
 
ce47ffe
463e52e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2460f07
463e52e
 
 
2460f07
 
 
463e52e
ce47ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
463e52e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9c2826
ce47ffe
463e52e
 
ce47ffe
463e52e
 
 
 
 
 
 
 
 
 
ce47ffe
463e52e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
import os
import torch
import json
import uuid
import langdetect
import moviepy.editor as mp
import yt_dlp
import whisper
from graphviz import Digraph

print("Starting the program...")

# Load BART model for summarization
model_path = "facebook/bart-large-cnn"
print(f"Loading model {model_path}...")
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
print("Model loaded.")

def generate_unique_filename(extension):
    return f"{uuid.uuid4()}{extension}"

def cleanup_files(*files):
    for file in files:
        if file and os.path.exists(file):
            os.remove(file)
            print(f"Removed file: {file}")

def download_youtube_audio(url):
    print(f"Downloading audio from YouTube: {url}")
    output_path = generate_unique_filename(".wav")
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }],
        'outtmpl': output_path,
        'keepvideo': True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    return output_path

def transcribe_audio(file_path):
    print(f"Transcribing with Whisper: {file_path}")
    if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
        print("Extracting audio from video...")
        video = mp.VideoFileClip(file_path)
        temp_audio = generate_unique_filename(".wav")
        video.audio.write_audiofile(temp_audio)
        file_path = temp_audio

    model = whisper.load_model("large")  # use "base", "medium" if slow
    result = model.transcribe(file_path)
    print("Transcription done.")
    return result["text"]

def generate_summary_stream(transcription):
    print("Generating Bullet point summary...")
    inputs = tokenizer(transcription, return_tensors="pt", max_length=1024, truncation=True, padding="max_length")
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    summary_ids = model.generate(inputs['input_ids'], max_length=300, num_beams=4, early_stopping=True)
    raw_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    bullet_summary = "\n".join(f"• {sentence.strip()}" for sentence in raw_summary.split('.') if sentence.strip())
    return bullet_summary

def generate_mindmap_from_summary(summary_text):
    dot = Digraph(comment='Mind Map')
    dot.node('A', 'Summary')

    lines = summary_text.split('\n')
    for idx, line in enumerate(lines):
        node_id = f'B{idx}'
        dot.node(node_id, line.replace("• ", "").strip())
        dot.edge('A', node_id)

    output_path = generate_unique_filename(".png")
    dot.render(output_path, format='png', cleanup=True)
    return output_path + ".png"

def process_youtube(url):
    if not url:
        return "No URL", None
    try:
        audio_file = download_youtube_audio(url)
        transcription = transcribe_audio(audio_file)
        return transcription, None
    except Exception as e:
        return f"Error: {e}", None
    finally:
        cleanup_files(audio_file)

def process_uploaded_video(video_path):
    try:
        transcription = transcribe_audio(video_path)
        return transcription, None
    except Exception as e:
        return f"Error: {e}", None

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎥 Video Transcription and Summary")

    with gr.Tabs():
        with gr.TabItem("📤 Upload Video"):
            video_input = gr.Video(label="Upload video")
            video_button = gr.Button("🚀 Process Video")
        with gr.TabItem("🔗 YouTube Link"):
            url_input = gr.Textbox(label="YouTube URL")
            url_button = gr.Button("🚀 Process URL")

    with gr.Row():
        transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
        summary_output = gr.Textbox(label="📊 Summary Points", lines=10, show_copy_button=True)
        mindmap_output = gr.Image(label="🧠 Mind Map")

    summary_button = gr.Button("📝 Generate Summary")
    mindmap_button = gr.Button("🧠 Generate Mind Map")

    def process_video_and_update(video):
        if video is None:
            return "No video uploaded", "Please upload a video"
        transcription, _ = process_uploaded_video(video)
        return transcription or "Transcription error", ""

    video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])
    url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
    summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
    mindmap_button.click(generate_mindmap_from_summary, inputs=[summary_output], outputs=[mindmap_output])

demo.launch(share=True)