Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import gradio as gr
|
|
| 4 |
import numpy as np
|
| 5 |
import faiss
|
| 6 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 7 |
-
from sentence_transformers import SentenceTransformer
|
| 8 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 9 |
from groq import Groq
|
| 10 |
|
|
@@ -12,11 +12,11 @@ from groq import Groq
|
|
| 12 |
# CONFIGURATION
|
| 13 |
# ===============================
|
| 14 |
|
| 15 |
-
# Load Groq API Key from
|
| 16 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 17 |
groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
|
| 18 |
|
| 19 |
-
# Load embedding model
|
| 20 |
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 21 |
|
| 22 |
# Global variables to store the "brain" of the current video
|
|
@@ -29,22 +29,24 @@ chunks_store = []
|
|
| 29 |
|
| 30 |
def extract_video_id(url):
|
| 31 |
"""Extracts the 11-character YouTube video ID."""
|
| 32 |
-
|
|
|
|
| 33 |
match = re.search(regex, url)
|
| 34 |
return match.group(1) if match else None
|
| 35 |
|
| 36 |
def get_transcript(url):
|
| 37 |
-
"""Fetches transcript
|
| 38 |
video_id = extract_video_id(url)
|
| 39 |
if not video_id:
|
| 40 |
return "ERROR: Invalid YouTube URL."
|
| 41 |
|
| 42 |
try:
|
| 43 |
-
# Correct
|
| 44 |
-
|
| 45 |
-
|
|
|
|
| 46 |
except Exception as e:
|
| 47 |
-
return f"ERROR: {str(e)}"
|
| 48 |
|
| 49 |
def build_vector_index(text):
|
| 50 |
"""Chunks text and stores it in a FAISS vector database."""
|
|
@@ -73,7 +75,9 @@ def get_ai_response(user_query):
|
|
| 73 |
D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
|
| 74 |
context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
|
| 75 |
|
| 76 |
-
prompt = f"""Use the following video transcript context to answer the question.
|
|
|
|
|
|
|
| 77 |
Context: {context}
|
| 78 |
Question: {user_query}
|
| 79 |
Answer:"""
|
|
@@ -101,7 +105,8 @@ def process_video_step(url):
|
|
| 101 |
|
| 102 |
def chat_step(message, history):
|
| 103 |
if not GROQ_API_KEY:
|
| 104 |
-
|
|
|
|
| 105 |
|
| 106 |
answer = get_ai_response(message)
|
| 107 |
history.append((message, answer))
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
import faiss
|
| 6 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 7 |
+
from sentence_transformers import Transformer, SentenceTransformer
|
| 8 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 9 |
from groq import Groq
|
| 10 |
|
|
|
|
| 12 |
# CONFIGURATION
|
| 13 |
# ===============================
|
| 14 |
|
| 15 |
+
# Load Groq API Key from Hugging Face Secrets
|
| 16 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 17 |
groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
|
| 18 |
|
| 19 |
+
# Load embedding model
|
| 20 |
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 21 |
|
| 22 |
# Global variables to store the "brain" of the current video
|
|
|
|
| 29 |
|
| 30 |
def extract_video_id(url):
|
| 31 |
"""Extracts the 11-character YouTube video ID."""
|
| 32 |
+
# Handles standard URLs, shorts, and shared links
|
| 33 |
+
regex = r"(?:v=|\/|be\/|embed\/|shorts\/)([0-9A-Za-z_-]{11})"
|
| 34 |
match = re.search(regex, url)
|
| 35 |
return match.group(1) if match else None
|
| 36 |
|
| 37 |
def get_transcript(url):
|
| 38 |
+
"""Fetches transcript from YouTube."""
|
| 39 |
video_id = extract_video_id(url)
|
| 40 |
if not video_id:
|
| 41 |
return "ERROR: Invalid YouTube URL."
|
| 42 |
|
| 43 |
try:
|
| 44 |
+
# Correct Method Call using the imported class
|
| 45 |
+
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
| 46 |
+
text = " ".join([i['text'] for i in transcript_list])
|
| 47 |
+
return text
|
| 48 |
except Exception as e:
|
| 49 |
+
return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
|
| 50 |
|
| 51 |
def build_vector_index(text):
|
| 52 |
"""Chunks text and stores it in a FAISS vector database."""
|
|
|
|
| 75 |
D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
|
| 76 |
context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
|
| 77 |
|
| 78 |
+
prompt = f"""Use the following video transcript context to answer the question.
|
| 79 |
+
If the answer isn't in the context, say you don't know based on the video.
|
| 80 |
+
|
| 81 |
Context: {context}
|
| 82 |
Question: {user_query}
|
| 83 |
Answer:"""
|
|
|
|
| 105 |
|
| 106 |
def chat_step(message, history):
|
| 107 |
if not GROQ_API_KEY:
|
| 108 |
+
history.append((message, "Error: Groq API Key missing in Secrets."))
|
| 109 |
+
return history, ""
|
| 110 |
|
| 111 |
answer = get_ai_response(message)
|
| 112 |
history.append((message, answer))
|