File size: 2,199 Bytes
0a0970f
 
 
 
 
 
 
 
 
4b14c04
0a0970f
26797c8
0a0970f
4b14c04
 
26797c8
0a0970f
4b14c04
 
0a0970f
 
4b14c04
 
 
 
 
 
 
26797c8
4b14c04
0a0970f
4b14c04
 
0a0970f
26797c8
0a0970f
 
4b14c04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a0970f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding: utf-8 -*-
"""app.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1KEjET5alwy21xLIkUvgYZmTYjsSYpSh1
"""

import os
import whisper
import ffmpeg
from transformers import pipeline
from fpdf import FPDF
#from notion_client import Client
import gradio as gr

# Initialize models
transcriber = whisper.load_model("base")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Notion config (optional)
# NOTION_TOKEN = os.getenv("NOTION_TOKEN")  # Set this in your env if deploying
# NOTION_DB_ID = os.getenv("NOTION_DB_ID")  # Also from your Notion integration
# notion = Client(auth=NOTION_TOKEN) if NOTION_TOKEN else None


def extract_audio(video_path, audio_path="temp.wav"):
    ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True)
    return audio_path


def summarize_text(text):
    chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
    summary = [summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]["summary_text"] for chunk in chunks]
    return "\n\n".join(summary)


def save_summary_to_pdf(summary_text, output_file="summary.pdf"):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    for line in summary_text.split("\n"):
        pdf.multi_cell(0, 10, line)
    pdf.output(output_file)
    return output_file


def process_video(video_path):
    audio_path = extract_audio(video_path)
    transcript = transcriber.transcribe(audio_path)["text"]
    os.remove(audio_path)

    summary = summarize_text(transcript)
    pdf_path = save_summary_to_pdf(summary)
    notion_url = export_to_notion(summary)

    return summary, pdf_path, notion_url


iface = gr.Interface(
    fn=process_video,
    inputs=gr.Video(label="Upload MP4 Lecture"),
    outputs=[
        gr.Textbox(label="Summarized Notes"),
        gr.File(label="Download Summary PDF"),
       # gr.Textbox(label="Notion Export Result")
    ],
    title="📘 ML Lecture Video Summarizer",
    description="Upload a lecture video to get summarized notes, downloadable as PDF, and sent to Notion (if configured)."
)

if __name__ == "__main__":
    iface.launch()