Spaces:
Sleeping
Sleeping
File size: 2,199 Bytes
0a0970f 4b14c04 0a0970f 26797c8 0a0970f 4b14c04 26797c8 0a0970f 4b14c04 0a0970f 4b14c04 26797c8 4b14c04 0a0970f 4b14c04 0a0970f 26797c8 0a0970f 4b14c04 0a0970f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# -*- coding: utf-8 -*-
"""app.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1KEjET5alwy21xLIkUvgYZmTYjsSYpSh1
"""
import os
import whisper
import ffmpeg
from transformers import pipeline
from fpdf import FPDF
#from notion_client import Client
import gradio as gr
# Initialize models
transcriber = whisper.load_model("base")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Notion config (optional)
# NOTION_TOKEN = os.getenv("NOTION_TOKEN") # Set this in your env if deploying
# NOTION_DB_ID = os.getenv("NOTION_DB_ID") # Also from your Notion integration
# notion = Client(auth=NOTION_TOKEN) if NOTION_TOKEN else None
def extract_audio(video_path, audio_path="temp.wav"):
ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True)
return audio_path
def summarize_text(text):
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
summary = [summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]["summary_text"] for chunk in chunks]
return "\n\n".join(summary)
def save_summary_to_pdf(summary_text, output_file="summary.pdf"):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
for line in summary_text.split("\n"):
pdf.multi_cell(0, 10, line)
pdf.output(output_file)
return output_file
def process_video(video_path):
audio_path = extract_audio(video_path)
transcript = transcriber.transcribe(audio_path)["text"]
os.remove(audio_path)
summary = summarize_text(transcript)
pdf_path = save_summary_to_pdf(summary)
notion_url = export_to_notion(summary)
return summary, pdf_path, notion_url
iface = gr.Interface(
fn=process_video,
inputs=gr.Video(label="Upload MP4 Lecture"),
outputs=[
gr.Textbox(label="Summarized Notes"),
gr.File(label="Download Summary PDF"),
# gr.Textbox(label="Notion Export Result")
],
title="📘 ML Lecture Video Summarizer",
description="Upload a lecture video to get summarized notes, downloadable as PDF, and sent to Notion (if configured)."
)
if __name__ == "__main__":
iface.launch()
|