import gradio as gr from faster_whisper import WhisperModel import yt_dlp import os import uuid import cv2 import pytesseract from PIL import Image import requests # Hugging Face Inference API for Mistral HF_TOKEN = os.getenv("HF_TOKEN") API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" def query_mistral(prompt): headers = {"Authorization": f"Bearer {HF_TOKEN}"} payload = { "inputs": prompt, "parameters": { "max_new_tokens": 512, "temperature": 0.7, "top_k": 50 } } response = requests.post(API_URL, headers=headers, json=payload) try: json_data = response.json() if isinstance(json_data, list) and "generated_text" in json_data[0]: return json_data[0]["generated_text"] else: return "⚠️ Unexpected response: " + str(json_data) except Exception as e: return f"⚠️ LLM API error: {e} | Raw response: {response.text}" # Whisper model for transcription whisper_model = WhisperModel("base", compute_type="int8") def download_video(url): filename = f"/tmp/{uuid.uuid4().hex}.mp4" ydl_opts = { 'format': 'bestvideo+bestaudio/best', 'outtmpl': filename, 'quiet': True, 'merge_output_format': 'mp4', 'cookiefile': 'cookies.txt' } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return filename except Exception as e: raise RuntimeError(f"YouTube download failed: {str(e)}") def extract_board_notes(video_path): cap = cv2.VideoCapture(video_path) board_texts = [] frame_count = 0 while True: ret, frame = cap.read() if not ret: break if frame_count % 90 == 0: gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) temp_path = f"/tmp/frame_{uuid.uuid4().hex}.png" cv2.imwrite(temp_path, gray) text = pytesseract.image_to_string(Image.open(temp_path)) if len(text.strip()) > 10: board_texts.append(text.strip()) os.remove(temp_path) frame_count += 1 cap.release() return "\n---\n".join(board_texts) def lazy_agent(video_file, video_url): video_path = None try: if video_url: try: video_path = download_video(video_url) except Exception as e: return f"❌ YouTube download failed: {str(e)}", "", "", None elif video_file: video_path = video_file else: return "❌ No input provided", "", "", None segments, _ = whisper_model.transcribe(video_path) if not segments: return "❌ Transcription failed. No audio found.", "", "", None transcript = " ".join([seg.text for seg in segments]) transcript = transcript[:2000] prompt = f""" Transcript: {transcript} Now: 1. Summarize this lecture in one paragraph. 2. Generate 5 flashcards in Q&A format. 3. Generate 5 quiz questions with four multiple-choice answers (mark the correct one). """ summary = query_mistral(prompt) board_notes = extract_board_notes(video_path) return transcript, summary, board_notes, video_path if os.path.isfile(video_path) else None except Exception as e: return f"⚠️ Unexpected error: {str(e)}", "", "", None with gr.Blocks(title="Lazy G Lecture Agent", theme=gr.themes.Soft()) as app: gr.Markdown("## 🎓 Lazy G Lecture Agent\nUpload or paste a lecture and get everything you need!") with gr.Row(): video_file = gr.Video(label="🎥 Upload Video") video_url = gr.Textbox(label="🌐 Or Paste URL") with gr.Row(): run_btn = gr.Button("🚀 Run Agent") with gr.Row(): transcript_out = gr.Textbox(label="📝 Transcript", lines=12) summary_out = gr.Textbox(label="📚 Summary + Flashcards + Quiz", lines=12) with gr.Row(): board_notes_out = gr.Textbox(label="🖍️ Board Notes (OCR)") video_path_out = gr.File(label="📁 Downloaded Video", type="filepath") def run_pipeline(file, url): return lazy_agent(file, url) run_btn.click(fn=run_pipeline, inputs=[video_file, video_url], outputs=[transcript_out, summary_out, board_notes_out, video_path_out]) app.launch()