Spaces:

ABAO77
/

AutoGenScript

Sleeping

ABAO77 commited on Jul 4, 2025

Commit

f45ead8

verified ·

1 Parent(s): cb82316

Update src/utils/helper.py

Files changed (1) hide show

src/utils/helper.py CHANGED Viewed

@@ -109,16 +109,26 @@ async def preprocess_messages(query: str, attachs: list[UploadFile]):
     return messages
-def extract_video_id(video_link: str):
-    return video_link.replace("\u2069", "").split("v=")[1].strip()
 # def extract_transcript(video_link: str):
 #     ytt_api = YouTubeTranscriptApi()
 #     # extract video id from video link
-#     video_id = extract_video_id(video_link)
 #     logger.info(f"Video ID: {video_id}")
 #     transcript = ytt_api.fetch(video_id)
 #     transcript_str = ""
@@ -131,7 +141,7 @@ def extract_video_id(video_link: str):
 def extract_transcript(video_link: str):
     try:
         # extract video id from video link
-        video_id = extract_video_id(video_link)
         api_key = os.getenv("SUPADATA_API_KEY")
         if not api_key:
@@ -139,12 +149,8 @@ def extract_transcript(video_link: str):
         # Call Supadata API
         url = f"https://api.supadata.ai/v1/youtube/transcript"
-        headers = {
-            "x-api-key": api_key
-        }
-        params = {
-            "videoId": video_id
-        }
         response = requests.get(url, headers=headers, params=params)
         response.raise_for_status()  # Raise exception for non-200 status codes

     return messages
+import re
+def extract_video_id_regex(url):
+    """
+    Extracts the YouTube video ID using a regular expression.
+    Returns:
+        The video ID as a string if found, otherwise None.
+    """
+    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11})(?:\?|&|$)"
+    match = re.search(pattern, url)
+    return match.group(1) if match else None
 # def extract_transcript(video_link: str):
 #     ytt_api = YouTubeTranscriptApi()
 #     # extract video id from video link
+#     video_id = extract_video_id_regex(video_link)
 #     logger.info(f"Video ID: {video_id}")
 #     transcript = ytt_api.fetch(video_id)
 #     transcript_str = ""
 def extract_transcript(video_link: str):
     try:
         # extract video id from video link
+        video_id = extract_video_id_regex(video_link)
         api_key = os.getenv("SUPADATA_API_KEY")
         if not api_key:
         # Call Supadata API
         url = f"https://api.supadata.ai/v1/youtube/transcript"
+        headers = {"x-api-key": api_key}
+        params = {"videoId": video_id}
         response = requests.get(url, headers=headers, params=params)
         response.raise_for_status()  # Raise exception for non-200 status codes