import gradio as gr
from faster_whisper import WhisperModel
import yt_dlp
import os
import uuid
import cv2
import pytesseract
from PIL import Image
import requests

# Hugging Face Inference API for Mistral
HF_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"

def query_mistral(prompt):
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_k": 50
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    try:
        json_data = response.json()
        if isinstance(json_data, list) and "generated_text" in json_data[0]:
            return json_data[0]["generated_text"]
        else:
            return "⚠️ Unexpected response: " + str(json_data)
    except Exception as e:
        return f"⚠️ LLM API error: {e} | Raw response: {response.text}"

# Whisper model for transcription
whisper_model = WhisperModel("base", compute_type="int8")

def download_video(url):
    filename = f"/tmp/{uuid.uuid4().hex}.mp4"
    ydl_opts = {
        'format': 'bestvideo+bestaudio/best',
        'outtmpl': filename,
        'quiet': True,
        'merge_output_format': 'mp4',
        'cookiefile': 'cookies.txt'
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return filename
    except Exception as e:
        raise RuntimeError(f"YouTube download failed: {str(e)}")

def extract_board_notes(video_path):
    cap = cv2.VideoCapture(video_path)
    board_texts = []
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % 90 == 0:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            temp_path = f"/tmp/frame_{uuid.uuid4().hex}.png"
            cv2.imwrite(temp_path, gray)
            text = pytesseract.image_to_string(Image.open(temp_path))
            if len(text.strip()) > 10:
                board_texts.append(text.strip())
            os.remove(temp_path)
        frame_count += 1
    cap.release()
    return "\n---\n".join(board_texts)

def lazy_agent(video_file, video_url):
    video_path = None
    try:
        if video_url:
            try:
                video_path = download_video(video_url)
            except Exception as e:
                return f"❌ YouTube download failed: {str(e)}", "", "", None
        elif video_file:
            video_path = video_file
        else:
            return "❌ No input provided", "", "", None

        segments, _ = whisper_model.transcribe(video_path)
        if not segments:
            return "❌ Transcription failed. No audio found.", "", "", None

        transcript = " ".join([seg.text for seg in segments])
        transcript = transcript[:2000]

        prompt = f"""
Transcript:
{transcript}

Now:
1. Summarize this lecture in one paragraph.
2. Generate 5 flashcards in Q&A format.
3. Generate 5 quiz questions with four multiple-choice answers (mark the correct one).
"""
        summary = query_mistral(prompt)
        board_notes = extract_board_notes(video_path)

        return transcript, summary, board_notes, video_path if os.path.isfile(video_path) else None

    except Exception as e:
        return f"⚠️ Unexpected error: {str(e)}", "", "", None

with gr.Blocks(title="Lazy G Lecture Agent", theme=gr.themes.Soft()) as app:
    gr.Markdown("## 🎓 Lazy G Lecture Agent\nUpload or paste a lecture and get everything you need!")

    with gr.Row():
        video_file = gr.Video(label="🎥 Upload Video")
        video_url = gr.Textbox(label="🌐 Or Paste URL")

    with gr.Row():
        run_btn = gr.Button("🚀 Run Agent")

    with gr.Row():
        transcript_out = gr.Textbox(label="📝 Transcript", lines=12)
        summary_out = gr.Textbox(label="📚 Summary + Flashcards + Quiz", lines=12)

    with gr.Row():
        board_notes_out = gr.Textbox(label="🖍️ Board Notes (OCR)")
        video_path_out = gr.File(label="📁 Downloaded Video", type="filepath")

    def run_pipeline(file, url):
        return lazy_agent(file, url)

    run_btn.click(fn=run_pipeline, inputs=[video_file, video_url],
                  outputs=[transcript_out, summary_out, board_notes_out, video_path_out])

app.launch()