Yasinfalkean's picture
Update app.py
9cd109e verified
import gradio as gr
from faster_whisper import WhisperModel
import yt_dlp
import os
import uuid
import cv2
import pytesseract
from PIL import Image
import requests
# Hugging Face Inference API for Mistral
HF_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
def query_mistral(prompt):
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 512,
"temperature": 0.7,
"top_k": 50
}
}
response = requests.post(API_URL, headers=headers, json=payload)
try:
json_data = response.json()
if isinstance(json_data, list) and "generated_text" in json_data[0]:
return json_data[0]["generated_text"]
else:
return "⚠️ Unexpected response: " + str(json_data)
except Exception as e:
return f"⚠️ LLM API error: {e} | Raw response: {response.text}"
# Whisper model for transcription
whisper_model = WhisperModel("base", compute_type="int8")
def download_video(url):
filename = f"/tmp/{uuid.uuid4().hex}.mp4"
ydl_opts = {
'format': 'bestvideo+bestaudio/best',
'outtmpl': filename,
'quiet': True,
'merge_output_format': 'mp4',
'cookiefile': 'cookies.txt'
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return filename
except Exception as e:
raise RuntimeError(f"YouTube download failed: {str(e)}")
def extract_board_notes(video_path):
cap = cv2.VideoCapture(video_path)
board_texts = []
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
if frame_count % 90 == 0:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
temp_path = f"/tmp/frame_{uuid.uuid4().hex}.png"
cv2.imwrite(temp_path, gray)
text = pytesseract.image_to_string(Image.open(temp_path))
if len(text.strip()) > 10:
board_texts.append(text.strip())
os.remove(temp_path)
frame_count += 1
cap.release()
return "\n---\n".join(board_texts)
def lazy_agent(video_file, video_url):
video_path = None
try:
if video_url:
try:
video_path = download_video(video_url)
except Exception as e:
return f"❌ YouTube download failed: {str(e)}", "", "", None
elif video_file:
video_path = video_file
else:
return "❌ No input provided", "", "", None
segments, _ = whisper_model.transcribe(video_path)
if not segments:
return "❌ Transcription failed. No audio found.", "", "", None
transcript = " ".join([seg.text for seg in segments])
transcript = transcript[:2000]
prompt = f"""
Transcript:
{transcript}
Now:
1. Summarize this lecture in one paragraph.
2. Generate 5 flashcards in Q&A format.
3. Generate 5 quiz questions with four multiple-choice answers (mark the correct one).
"""
summary = query_mistral(prompt)
board_notes = extract_board_notes(video_path)
return transcript, summary, board_notes, video_path if os.path.isfile(video_path) else None
except Exception as e:
return f"⚠️ Unexpected error: {str(e)}", "", "", None
with gr.Blocks(title="Lazy G Lecture Agent", theme=gr.themes.Soft()) as app:
gr.Markdown("## πŸŽ“ Lazy G Lecture Agent\nUpload or paste a lecture and get everything you need!")
with gr.Row():
video_file = gr.Video(label="πŸŽ₯ Upload Video")
video_url = gr.Textbox(label="🌐 Or Paste URL")
with gr.Row():
run_btn = gr.Button("πŸš€ Run Agent")
with gr.Row():
transcript_out = gr.Textbox(label="πŸ“ Transcript", lines=12)
summary_out = gr.Textbox(label="πŸ“š Summary + Flashcards + Quiz", lines=12)
with gr.Row():
board_notes_out = gr.Textbox(label="πŸ–οΈ Board Notes (OCR)")
video_path_out = gr.File(label="πŸ“ Downloaded Video", type="filepath")
def run_pipeline(file, url):
return lazy_agent(file, url)
run_btn.click(fn=run_pipeline, inputs=[video_file, video_url],
outputs=[transcript_out, summary_out, board_notes_out, video_path_out])
app.launch()