Spaces:
Sleeping
Sleeping
File size: 4,982 Bytes
463e52e ce47ffe 463e52e 2460f07 463e52e 2460f07 463e52e ce47ffe 463e52e a9c2826 ce47ffe 463e52e ce47ffe 463e52e ce47ffe 463e52e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
import os
import torch
import json
import uuid
import langdetect
import moviepy.editor as mp
import yt_dlp
import whisper
from graphviz import Digraph
print("Starting the program...")
# Load BART model for summarization
model_path = "facebook/bart-large-cnn"
print(f"Loading model {model_path}...")
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
print("Model loaded.")
def generate_unique_filename(extension):
return f"{uuid.uuid4()}{extension}"
def cleanup_files(*files):
for file in files:
if file and os.path.exists(file):
os.remove(file)
print(f"Removed file: {file}")
def download_youtube_audio(url):
print(f"Downloading audio from YouTube: {url}")
output_path = generate_unique_filename(".wav")
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
}],
'outtmpl': output_path,
'keepvideo': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return output_path
def transcribe_audio(file_path):
print(f"Transcribing with Whisper: {file_path}")
if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
print("Extracting audio from video...")
video = mp.VideoFileClip(file_path)
temp_audio = generate_unique_filename(".wav")
video.audio.write_audiofile(temp_audio)
file_path = temp_audio
model = whisper.load_model("large") # use "base", "medium" if slow
result = model.transcribe(file_path)
print("Transcription done.")
return result["text"]
def generate_summary_stream(transcription):
print("Generating Bullet point summary...")
inputs = tokenizer(transcription, return_tensors="pt", max_length=1024, truncation=True, padding="max_length")
inputs = {k: v.to(model.device) for k, v in inputs.items()}
summary_ids = model.generate(inputs['input_ids'], max_length=300, num_beams=4, early_stopping=True)
raw_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
bullet_summary = "\n".join(f"• {sentence.strip()}" for sentence in raw_summary.split('.') if sentence.strip())
return bullet_summary
def generate_mindmap_from_summary(summary_text):
dot = Digraph(comment='Mind Map')
dot.node('A', 'Summary')
lines = summary_text.split('\n')
for idx, line in enumerate(lines):
node_id = f'B{idx}'
dot.node(node_id, line.replace("• ", "").strip())
dot.edge('A', node_id)
output_path = generate_unique_filename(".png")
dot.render(output_path, format='png', cleanup=True)
return output_path + ".png"
def process_youtube(url):
if not url:
return "No URL", None
try:
audio_file = download_youtube_audio(url)
transcription = transcribe_audio(audio_file)
return transcription, None
except Exception as e:
return f"Error: {e}", None
finally:
cleanup_files(audio_file)
def process_uploaded_video(video_path):
try:
transcription = transcribe_audio(video_path)
return transcription, None
except Exception as e:
return f"Error: {e}", None
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎥 Video Transcription and Summary")
with gr.Tabs():
with gr.TabItem("📤 Upload Video"):
video_input = gr.Video(label="Upload video")
video_button = gr.Button("🚀 Process Video")
with gr.TabItem("🔗 YouTube Link"):
url_input = gr.Textbox(label="YouTube URL")
url_button = gr.Button("🚀 Process URL")
with gr.Row():
transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
summary_output = gr.Textbox(label="📊 Summary Points", lines=10, show_copy_button=True)
mindmap_output = gr.Image(label="🧠 Mind Map")
summary_button = gr.Button("📝 Generate Summary")
mindmap_button = gr.Button("🧠 Generate Mind Map")
def process_video_and_update(video):
if video is None:
return "No video uploaded", "Please upload a video"
transcription, _ = process_uploaded_video(video)
return transcription or "Transcription error", ""
video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])
url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
mindmap_button.click(generate_mindmap_from_summary, inputs=[summary_output], outputs=[mindmap_output])
demo.launch(share=True) |