ABAO77 commited on
Commit
f45ead8
·
verified ·
1 Parent(s): cb82316

Update src/utils/helper.py

Browse files
Files changed (1) hide show
  1. src/utils/helper.py +16 -10
src/utils/helper.py CHANGED
@@ -109,16 +109,26 @@ async def preprocess_messages(query: str, attachs: list[UploadFile]):
109
  return messages
110
 
111
 
112
- def extract_video_id(video_link: str):
113
 
114
- return video_link.replace("\u2069", "").split("v=")[1].strip()
 
 
 
 
 
 
 
 
 
 
115
 
116
 
117
  # def extract_transcript(video_link: str):
118
  # ytt_api = YouTubeTranscriptApi()
119
  # # extract video id from video link
120
 
121
- # video_id = extract_video_id(video_link)
122
  # logger.info(f"Video ID: {video_id}")
123
  # transcript = ytt_api.fetch(video_id)
124
  # transcript_str = ""
@@ -131,7 +141,7 @@ def extract_video_id(video_link: str):
131
  def extract_transcript(video_link: str):
132
  try:
133
  # extract video id from video link
134
- video_id = extract_video_id(video_link)
135
 
136
  api_key = os.getenv("SUPADATA_API_KEY")
137
  if not api_key:
@@ -139,12 +149,8 @@ def extract_transcript(video_link: str):
139
 
140
  # Call Supadata API
141
  url = f"https://api.supadata.ai/v1/youtube/transcript"
142
- headers = {
143
- "x-api-key": api_key
144
- }
145
- params = {
146
- "videoId": video_id
147
- }
148
 
149
  response = requests.get(url, headers=headers, params=params)
150
  response.raise_for_status() # Raise exception for non-200 status codes
 
109
  return messages
110
 
111
 
112
+ import re
113
 
114
+
115
+ def extract_video_id_regex(url):
116
+ """
117
+ Extracts the YouTube video ID using a regular expression.
118
+
119
+ Returns:
120
+ The video ID as a string if found, otherwise None.
121
+ """
122
+ pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11})(?:\?|&|$)"
123
+ match = re.search(pattern, url)
124
+ return match.group(1) if match else None
125
 
126
 
127
  # def extract_transcript(video_link: str):
128
  # ytt_api = YouTubeTranscriptApi()
129
  # # extract video id from video link
130
 
131
+ # video_id = extract_video_id_regex(video_link)
132
  # logger.info(f"Video ID: {video_id}")
133
  # transcript = ytt_api.fetch(video_id)
134
  # transcript_str = ""
 
141
  def extract_transcript(video_link: str):
142
  try:
143
  # extract video id from video link
144
+ video_id = extract_video_id_regex(video_link)
145
 
146
  api_key = os.getenv("SUPADATA_API_KEY")
147
  if not api_key:
 
149
 
150
  # Call Supadata API
151
  url = f"https://api.supadata.ai/v1/youtube/transcript"
152
+ headers = {"x-api-key": api_key}
153
+ params = {"videoId": video_id}
 
 
 
 
154
 
155
  response = requests.get(url, headers=headers, params=params)
156
  response.raise_for_status() # Raise exception for non-200 status codes