File size: 4,414 Bytes
99f174a
9cd109e
 
7ba9001
9cd109e
 
 
 
 
99f174a
947bee3
 
9cd109e
947bee3
 
 
 
 
 
 
 
 
 
 
9cd109e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947bee3
9cd109e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947bee3
9cd109e
 
947bee3
9cd109e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a4a712
 
 
99f174a
6a4a712
 
 
 
 
947bee3
9cd109e
99f174a
9cd109e
a011f9d
99f174a
9cd109e
99f174a
947bee3
9cd109e
 
 
 
 
 
 
 
947bee3
9cd109e
 
 
947bee3
9cd109e
 
 
947bee3
9cd109e
 
947bee3
9cd109e
947bee3
99f174a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
from faster_whisper import WhisperModel
import yt_dlp
import os
import uuid
import cv2
import pytesseract
from PIL import Image
import requests

# Hugging Face Inference API for Mistral
HF_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"

def query_mistral(prompt):
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_k": 50
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    try:
        json_data = response.json()
        if isinstance(json_data, list) and "generated_text" in json_data[0]:
            return json_data[0]["generated_text"]
        else:
            return "⚠️ Unexpected response: " + str(json_data)
    except Exception as e:
        return f"⚠️ LLM API error: {e} | Raw response: {response.text}"

# Whisper model for transcription
whisper_model = WhisperModel("base", compute_type="int8")

def download_video(url):
    filename = f"/tmp/{uuid.uuid4().hex}.mp4"
    ydl_opts = {
        'format': 'bestvideo+bestaudio/best',
        'outtmpl': filename,
        'quiet': True,
        'merge_output_format': 'mp4',
        'cookiefile': 'cookies.txt'
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return filename
    except Exception as e:
        raise RuntimeError(f"YouTube download failed: {str(e)}")

def extract_board_notes(video_path):
    cap = cv2.VideoCapture(video_path)
    board_texts = []
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % 90 == 0:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            temp_path = f"/tmp/frame_{uuid.uuid4().hex}.png"
            cv2.imwrite(temp_path, gray)
            text = pytesseract.image_to_string(Image.open(temp_path))
            if len(text.strip()) > 10:
                board_texts.append(text.strip())
            os.remove(temp_path)
        frame_count += 1
    cap.release()
    return "\n---\n".join(board_texts)

def lazy_agent(video_file, video_url):
    video_path = None
    try:
        if video_url:
            try:
                video_path = download_video(video_url)
            except Exception as e:
                return f"❌ YouTube download failed: {str(e)}", "", "", None
        elif video_file:
            video_path = video_file
        else:
            return "❌ No input provided", "", "", None

        segments, _ = whisper_model.transcribe(video_path)
        if not segments:
            return "❌ Transcription failed. No audio found.", "", "", None

        transcript = " ".join([seg.text for seg in segments])
        transcript = transcript[:2000]

        prompt = f"""
Transcript:
{transcript}

Now:
1. Summarize this lecture in one paragraph.
2. Generate 5 flashcards in Q&A format.
3. Generate 5 quiz questions with four multiple-choice answers (mark the correct one).
"""
        summary = query_mistral(prompt)
        board_notes = extract_board_notes(video_path)

        return transcript, summary, board_notes, video_path if os.path.isfile(video_path) else None

    except Exception as e:
        return f"⚠️ Unexpected error: {str(e)}", "", "", None

with gr.Blocks(title="Lazy G Lecture Agent", theme=gr.themes.Soft()) as app:
    gr.Markdown("## πŸŽ“ Lazy G Lecture Agent\nUpload or paste a lecture and get everything you need!")

    with gr.Row():
        video_file = gr.Video(label="πŸŽ₯ Upload Video")
        video_url = gr.Textbox(label="🌐 Or Paste URL")

    with gr.Row():
        run_btn = gr.Button("πŸš€ Run Agent")

    with gr.Row():
        transcript_out = gr.Textbox(label="πŸ“ Transcript", lines=12)
        summary_out = gr.Textbox(label="πŸ“š Summary + Flashcards + Quiz", lines=12)

    with gr.Row():
        board_notes_out = gr.Textbox(label="πŸ–οΈ Board Notes (OCR)")
        video_path_out = gr.File(label="πŸ“ Downloaded Video", type="filepath")

    def run_pipeline(file, url):
        return lazy_agent(file, url)

    run_btn.click(fn=run_pipeline, inputs=[video_file, video_url],
                  outputs=[transcript_out, summary_out, board_notes_out, video_path_out])

app.launch()