agnixcode commited on
Commit
587e2e0
·
verified ·
1 Parent(s): 670c1c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
  import numpy as np
5
  import faiss
6
  from youtube_transcript_api import YouTubeTranscriptApi
7
- from sentence_transformers import SentenceTransformer
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from groq import Groq
10
 
@@ -12,11 +12,11 @@ from groq import Groq
12
  # CONFIGURATION
13
  # ===============================
14
 
15
- # Load Groq API Key from environment variables (Hugging Face Secrets)
16
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
17
  groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
18
 
19
- # Load embedding model (runs on CPU in HF Spaces)
20
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
21
 
22
  # Global variables to store the "brain" of the current video
@@ -29,22 +29,24 @@ chunks_store = []
29
 
30
  def extract_video_id(url):
31
  """Extracts the 11-character YouTube video ID."""
32
- regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
 
33
  match = re.search(regex, url)
34
  return match.group(1) if match else None
35
 
36
  def get_transcript(url):
37
- """Fetches transcript and handles potential library errors."""
38
  video_id = extract_video_id(url)
39
  if not video_id:
40
  return "ERROR: Invalid YouTube URL."
41
 
42
  try:
43
- # Correct static method call on the YouTubeTranscriptApi class
44
- transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
45
- return " ".join([item['text'] for item in transcript_data])
 
46
  except Exception as e:
47
- return f"ERROR: {str(e)}"
48
 
49
  def build_vector_index(text):
50
  """Chunks text and stores it in a FAISS vector database."""
@@ -73,7 +75,9 @@ def get_ai_response(user_query):
73
  D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
74
  context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
75
 
76
- prompt = f"""Use the following video transcript context to answer the question.
 
 
77
  Context: {context}
78
  Question: {user_query}
79
  Answer:"""
@@ -101,7 +105,8 @@ def process_video_step(url):
101
 
102
  def chat_step(message, history):
103
  if not GROQ_API_KEY:
104
- return history + [("Error", "Groq API Key missing in Secrets.")], ""
 
105
 
106
  answer = get_ai_response(message)
107
  history.append((message, answer))
 
4
  import numpy as np
5
  import faiss
6
  from youtube_transcript_api import YouTubeTranscriptApi
7
+ from sentence_transformers import Transformer, SentenceTransformer
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from groq import Groq
10
 
 
12
  # CONFIGURATION
13
  # ===============================
14
 
15
+ # Load Groq API Key from Hugging Face Secrets
16
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
17
  groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
18
 
19
+ # Load embedding model
20
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
21
 
22
  # Global variables to store the "brain" of the current video
 
29
 
30
  def extract_video_id(url):
31
  """Extracts the 11-character YouTube video ID."""
32
+ # Handles standard URLs, shorts, and shared links
33
+ regex = r"(?:v=|\/|be\/|embed\/|shorts\/)([0-9A-Za-z_-]{11})"
34
  match = re.search(regex, url)
35
  return match.group(1) if match else None
36
 
37
  def get_transcript(url):
38
+ """Fetches transcript from YouTube."""
39
  video_id = extract_video_id(url)
40
  if not video_id:
41
  return "ERROR: Invalid YouTube URL."
42
 
43
  try:
44
+ # Correct Method Call using the imported class
45
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
46
+ text = " ".join([i['text'] for i in transcript_list])
47
+ return text
48
  except Exception as e:
49
+ return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
50
 
51
  def build_vector_index(text):
52
  """Chunks text and stores it in a FAISS vector database."""
 
75
  D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
76
  context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
77
 
78
+ prompt = f"""Use the following video transcript context to answer the question.
79
+ If the answer isn't in the context, say you don't know based on the video.
80
+
81
  Context: {context}
82
  Question: {user_query}
83
  Answer:"""
 
105
 
106
  def chat_step(message, history):
107
  if not GROQ_API_KEY:
108
+ history.append((message, "Error: Groq API Key missing in Secrets."))
109
+ return history, ""
110
 
111
  answer = get_ai_response(message)
112
  history.append((message, answer))