ABAO77 commited on
Commit
cb82316
·
verified ·
1 Parent(s): b947038

Upload 57 files

Browse files
src/agents/agent_transcript/__pycache__/func.cpython-311.pyc CHANGED
Binary files a/src/agents/agent_transcript/__pycache__/func.cpython-311.pyc and b/src/agents/agent_transcript/__pycache__/func.cpython-311.pyc differ
 
src/utils/__pycache__/helper.cpython-311.pyc CHANGED
Binary files a/src/utils/__pycache__/helper.cpython-311.pyc and b/src/utils/__pycache__/helper.cpython-311.pyc differ
 
src/utils/helper.py CHANGED
@@ -11,6 +11,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
11
  from youtube_comment_downloader import YoutubeCommentDownloader
12
  from src.utils.logger import logger
13
  import requests
 
14
 
15
  State = TypeVar("State", bound=Dict[str, Any])
16
 
@@ -108,29 +109,34 @@ async def preprocess_messages(query: str, attachs: list[UploadFile]):
108
  return messages
109
 
110
 
 
 
 
 
 
111
  # def extract_transcript(video_link: str):
112
  # ytt_api = YouTubeTranscriptApi()
113
  # # extract video id from video link
114
- # video_id = video_link.split("v=")[1]
 
 
115
  # transcript = ytt_api.fetch(video_id)
116
  # transcript_str = ""
117
  # for trans in transcript:
118
  # transcript_str += trans.text + " "
119
  # logger.info(f"Transcript: {transcript_str}")
120
  # return transcript_str
121
- def extract_video_id(video_link: str):
122
 
123
- return video_link.replace("\u2069", "").split("v=")[1].strip()
124
- import os
125
  def extract_transcript(video_link: str):
126
  try:
127
  # extract video id from video link
128
  video_id = extract_video_id(video_link)
129
-
130
  api_key = os.getenv("SUPADATA_API_KEY")
131
  if not api_key:
132
  raise ValueError("SUPADATA_API_KEY environment variable is not set")
133
-
134
  # Call Supadata API
135
  url = f"https://api.supadata.ai/v1/youtube/transcript"
136
  headers = {
@@ -139,20 +145,20 @@ def extract_transcript(video_link: str):
139
  params = {
140
  "videoId": video_id
141
  }
142
-
143
  response = requests.get(url, headers=headers, params=params)
144
  response.raise_for_status() # Raise exception for non-200 status codes
145
-
146
  data = response.json()
147
  logger.info(f"Data: {data}")
148
  if not data.get("content"):
149
  raise ValueError("No transcript content found in the API response")
150
-
151
  text = ""
152
  for item in data["content"]:
153
  if "text" in item:
154
  text += item["text"] + " "
155
-
156
  logger.info(f"Transcript: {text}")
157
  return text
158
  except requests.exceptions.RequestException as e:
 
11
  from youtube_comment_downloader import YoutubeCommentDownloader
12
  from src.utils.logger import logger
13
  import requests
14
+ import os
15
 
16
  State = TypeVar("State", bound=Dict[str, Any])
17
 
 
109
  return messages
110
 
111
 
112
+ def extract_video_id(video_link: str):
113
+
114
+ return video_link.replace("\u2069", "").split("v=")[1].strip()
115
+
116
+
117
  # def extract_transcript(video_link: str):
118
  # ytt_api = YouTubeTranscriptApi()
119
  # # extract video id from video link
120
+
121
+ # video_id = extract_video_id(video_link)
122
+ # logger.info(f"Video ID: {video_id}")
123
  # transcript = ytt_api.fetch(video_id)
124
  # transcript_str = ""
125
  # for trans in transcript:
126
  # transcript_str += trans.text + " "
127
  # logger.info(f"Transcript: {transcript_str}")
128
  # return transcript_str
 
129
 
130
+
 
131
  def extract_transcript(video_link: str):
132
  try:
133
  # extract video id from video link
134
  video_id = extract_video_id(video_link)
135
+
136
  api_key = os.getenv("SUPADATA_API_KEY")
137
  if not api_key:
138
  raise ValueError("SUPADATA_API_KEY environment variable is not set")
139
+
140
  # Call Supadata API
141
  url = f"https://api.supadata.ai/v1/youtube/transcript"
142
  headers = {
 
145
  params = {
146
  "videoId": video_id
147
  }
148
+
149
  response = requests.get(url, headers=headers, params=params)
150
  response.raise_for_status() # Raise exception for non-200 status codes
151
+
152
  data = response.json()
153
  logger.info(f"Data: {data}")
154
  if not data.get("content"):
155
  raise ValueError("No transcript content found in the API response")
156
+
157
  text = ""
158
  for item in data["content"]:
159
  if "text" in item:
160
  text += item["text"] + " "
161
+
162
  logger.info(f"Transcript: {text}")
163
  return text
164
  except requests.exceptions.RequestException as e: