Spaces:

Yasinfalkean
/

lazy-lecture-agent

Sleeping

File size: 4,414 Bytes

99f174a
9cd109e
 
7ba9001
9cd109e
 
 
 
 
99f174a
947bee3
 
9cd109e
947bee3
 
 
 
 
 
 
 
 
 
 
9cd109e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947bee3
9cd109e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947bee3
9cd109e
 
947bee3
9cd109e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a4a712
 
 
99f174a
6a4a712
 
 
 
 
947bee3
9cd109e
99f174a
9cd109e
a011f9d
99f174a
9cd109e
99f174a
947bee3
9cd109e
 
 
 
 
 
 
 
947bee3
9cd109e
 
 
947bee3
9cd109e
 
 
947bee3
9cd109e
 
947bee3
9cd109e
947bee3
99f174a

import gradio as gr
from faster_whisper import WhisperModel
import yt_dlp
import os
import uuid
import cv2
import pytesseract
from PIL import Image
import requests

# Hugging Face Inference API for Mistral
HF_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"

def query_mistral(prompt):
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_k": 50
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    try:
        json_data = response.json()
        if isinstance(json_data, list) and "generated_text" in json_data[0]:
            return json_data[0]["generated_text"]
        else:
            return "⚠️ Unexpected response: " + str(json_data)
    except Exception as e:
        return f"⚠️ LLM API error: {e} | Raw response: {response.text}"

# Whisper model for transcription
whisper_model = WhisperModel("base", compute_type="int8")

def download_video(url):
    filename = f"/tmp/{uuid.uuid4().hex}.mp4"
    ydl_opts = {
        'format': 'bestvideo+bestaudio/best',
        'outtmpl': filename,
        'quiet': True,
        'merge_output_format': 'mp4',
        'cookiefile': 'cookies.txt'
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return filename
    except Exception as e:
        raise RuntimeError(f"YouTube download failed: {str(e)}")

def extract_board_notes(video_path):
    cap = cv2.VideoCapture(video_path)
    board_texts = []
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % 90 == 0:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            temp_path = f"/tmp/frame_{uuid.uuid4().hex}.png"
            cv2.imwrite(temp_path, gray)
            text = pytesseract.image_to_string(Image.open(temp_path))
            if len(text.strip()) > 10:
                board_texts.append(text.strip())
            os.remove(temp_path)
        frame_count += 1
    cap.release()
    return "\n---\n".join(board_texts)

def lazy_agent(video_file, video_url):
    video_path = None
    try:
        if video_url:
            try:
                video_path = download_video(video_url)
            except Exception as e:
                return f"❌ YouTube download failed: {str(e)}", "", "", None
        elif video_file:
            video_path = video_file
        else:
            return "❌ No input provided", "", "", None

        segments, _ = whisper_model.transcribe(video_path)
        if not segments:
            return "❌ Transcription failed. No audio found.", "", "", None

        transcript = " ".join([seg.text for seg in segments])
        transcript = transcript[:2000]

        prompt = f"""
Transcript:
{transcript}

Now:
1. Summarize this lecture in one paragraph.
2. Generate 5 flashcards in Q&A format.
3. Generate 5 quiz questions with four multiple-choice answers (mark the correct one).
"""
        summary = query_mistral(prompt)
        board_notes = extract_board_notes(video_path)

        return transcript, summary, board_notes, video_path if os.path.isfile(video_path) else None

    except Exception as e:
        return f"⚠️ Unexpected error: {str(e)}", "", "", None

with gr.Blocks(title="Lazy G Lecture Agent", theme=gr.themes.Soft()) as app:
    gr.Markdown("## 🎓 Lazy G Lecture Agent\nUpload or paste a lecture and get everything you need!")

    with gr.Row():
        video_file = gr.Video(label="🎥 Upload Video")
        video_url = gr.Textbox(label="🌐 Or Paste URL")

    with gr.Row():
        run_btn = gr.Button("🚀 Run Agent")

    with gr.Row():
        transcript_out = gr.Textbox(label="📝 Transcript", lines=12)
        summary_out = gr.Textbox(label="📚 Summary + Flashcards + Quiz", lines=12)

    with gr.Row():
        board_notes_out = gr.Textbox(label="🖍️ Board Notes (OCR)")
        video_path_out = gr.File(label="📁 Downloaded Video", type="filepath")

    def run_pipeline(file, url):
        return lazy_agent(file, url)

    run_btn.click(fn=run_pipeline, inputs=[video_file, video_url],
                  outputs=[transcript_out, summary_out, board_notes_out, video_path_out])

app.launch()