Spaces:

agnixcode
/

youtube-rag-chat

Runtime error

App Files Files Community

agnixcode commited on 22 days ago

Commit

587e2e0

verified ·

1 Parent(s): 670c1c5

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -11

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import gradio as gr
 import numpy as np
 import faiss
 from youtube_transcript_api import YouTubeTranscriptApi
-from sentence_transformers import SentenceTransformer
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from groq import Groq
@@ -12,11 +12,11 @@ from groq import Groq
 # CONFIGURATION
 # ===============================
-# Load Groq API Key from environment variables (Hugging Face Secrets)
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-# Load embedding model (runs on CPU in HF Spaces)
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # Global variables to store the "brain" of the current video
@@ -29,22 +29,24 @@ chunks_store = []
 def extract_video_id(url):
     """Extracts the 11-character YouTube video ID."""
-    regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
     match = re.search(regex, url)
     return match.group(1) if match else None
 def get_transcript(url):
-    """Fetches transcript and handles potential library errors."""
     video_id = extract_video_id(url)
     if not video_id:
         return "ERROR: Invalid YouTube URL."
     try:
-        # Correct static method call on the YouTubeTranscriptApi class
-        transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
-        return " ".join([item['text'] for item in transcript_data])
     except Exception as e:
-        return f"ERROR: {str(e)}"
 def build_vector_index(text):
     """Chunks text and stores it in a FAISS vector database."""
@@ -73,7 +75,9 @@ def get_ai_response(user_query):
     D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
     context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
-    prompt = f"""Use the following video transcript context to answer the question.
     Context: {context}
     Question: {user_query}
     Answer:"""
@@ -101,7 +105,8 @@ def process_video_step(url):
 def chat_step(message, history):
     if not GROQ_API_KEY:
-        return history + [("Error", "Groq API Key missing in Secrets.")], ""
     answer = get_ai_response(message)
     history.append((message, answer))

 import numpy as np
 import faiss
 from youtube_transcript_api import YouTubeTranscriptApi
+from sentence_transformers import Transformer, SentenceTransformer
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from groq import Groq
 # CONFIGURATION
 # ===============================
+# Load Groq API Key from Hugging Face Secrets
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
+# Load embedding model
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # Global variables to store the "brain" of the current video
 def extract_video_id(url):
     """Extracts the 11-character YouTube video ID."""
+    # Handles standard URLs, shorts, and shared links
+    regex = r"(?:v=|\/|be\/|embed\/|shorts\/)([0-9A-Za-z_-]{11})"
     match = re.search(regex, url)
     return match.group(1) if match else None
 def get_transcript(url):
+    """Fetches transcript from YouTube."""
     video_id = extract_video_id(url)
     if not video_id:
         return "ERROR: Invalid YouTube URL."
     try:
+        # Correct Method Call using the imported class
+        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        text = " ".join([i['text'] for i in transcript_list])
+        return text
     except Exception as e:
+        return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
 def build_vector_index(text):
     """Chunks text and stores it in a FAISS vector database."""
     D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
     context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
+    prompt = f"""Use the following video transcript context to answer the question.
+    If the answer isn't in the context, say you don't know based on the video.
     Context: {context}
     Question: {user_query}
     Answer:"""
 def chat_step(message, history):
     if not GROQ_API_KEY:
+        history.append((message, "Error: Groq API Key missing in Secrets."))
+        return history, ""
     answer = get_ai_response(message)
     history.append((message, answer))