Spaces:
Sleeping
Sleeping
File size: 4,414 Bytes
99f174a 9cd109e 7ba9001 9cd109e 99f174a 947bee3 9cd109e 947bee3 9cd109e 947bee3 9cd109e 947bee3 9cd109e 947bee3 9cd109e 6a4a712 99f174a 6a4a712 947bee3 9cd109e 99f174a 9cd109e a011f9d 99f174a 9cd109e 99f174a 947bee3 9cd109e 947bee3 9cd109e 947bee3 9cd109e 947bee3 9cd109e 947bee3 9cd109e 947bee3 99f174a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
from faster_whisper import WhisperModel
import yt_dlp
import os
import uuid
import cv2
import pytesseract
from PIL import Image
import requests
# Hugging Face Inference API for Mistral
HF_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
def query_mistral(prompt):
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 512,
"temperature": 0.7,
"top_k": 50
}
}
response = requests.post(API_URL, headers=headers, json=payload)
try:
json_data = response.json()
if isinstance(json_data, list) and "generated_text" in json_data[0]:
return json_data[0]["generated_text"]
else:
return "β οΈ Unexpected response: " + str(json_data)
except Exception as e:
return f"β οΈ LLM API error: {e} | Raw response: {response.text}"
# Whisper model for transcription
whisper_model = WhisperModel("base", compute_type="int8")
def download_video(url):
filename = f"/tmp/{uuid.uuid4().hex}.mp4"
ydl_opts = {
'format': 'bestvideo+bestaudio/best',
'outtmpl': filename,
'quiet': True,
'merge_output_format': 'mp4',
'cookiefile': 'cookies.txt'
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return filename
except Exception as e:
raise RuntimeError(f"YouTube download failed: {str(e)}")
def extract_board_notes(video_path):
cap = cv2.VideoCapture(video_path)
board_texts = []
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
if frame_count % 90 == 0:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
temp_path = f"/tmp/frame_{uuid.uuid4().hex}.png"
cv2.imwrite(temp_path, gray)
text = pytesseract.image_to_string(Image.open(temp_path))
if len(text.strip()) > 10:
board_texts.append(text.strip())
os.remove(temp_path)
frame_count += 1
cap.release()
return "\n---\n".join(board_texts)
def lazy_agent(video_file, video_url):
video_path = None
try:
if video_url:
try:
video_path = download_video(video_url)
except Exception as e:
return f"β YouTube download failed: {str(e)}", "", "", None
elif video_file:
video_path = video_file
else:
return "β No input provided", "", "", None
segments, _ = whisper_model.transcribe(video_path)
if not segments:
return "β Transcription failed. No audio found.", "", "", None
transcript = " ".join([seg.text for seg in segments])
transcript = transcript[:2000]
prompt = f"""
Transcript:
{transcript}
Now:
1. Summarize this lecture in one paragraph.
2. Generate 5 flashcards in Q&A format.
3. Generate 5 quiz questions with four multiple-choice answers (mark the correct one).
"""
summary = query_mistral(prompt)
board_notes = extract_board_notes(video_path)
return transcript, summary, board_notes, video_path if os.path.isfile(video_path) else None
except Exception as e:
return f"β οΈ Unexpected error: {str(e)}", "", "", None
with gr.Blocks(title="Lazy G Lecture Agent", theme=gr.themes.Soft()) as app:
gr.Markdown("## π Lazy G Lecture Agent\nUpload or paste a lecture and get everything you need!")
with gr.Row():
video_file = gr.Video(label="π₯ Upload Video")
video_url = gr.Textbox(label="π Or Paste URL")
with gr.Row():
run_btn = gr.Button("π Run Agent")
with gr.Row():
transcript_out = gr.Textbox(label="π Transcript", lines=12)
summary_out = gr.Textbox(label="π Summary + Flashcards + Quiz", lines=12)
with gr.Row():
board_notes_out = gr.Textbox(label="ποΈ Board Notes (OCR)")
video_path_out = gr.File(label="π Downloaded Video", type="filepath")
def run_pipeline(file, url):
return lazy_agent(file, url)
run_btn.click(fn=run_pipeline, inputs=[video_file, video_url],
outputs=[transcript_out, summary_out, board_notes_out, video_path_out])
app.launch()
|