Spaces:
Sleeping
Sleeping
Update src/utils/helper.py
Browse files- src/utils/helper.py +16 -10
src/utils/helper.py
CHANGED
|
@@ -109,16 +109,26 @@ async def preprocess_messages(query: str, attachs: list[UploadFile]):
|
|
| 109 |
return messages
|
| 110 |
|
| 111 |
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
# def extract_transcript(video_link: str):
|
| 118 |
# ytt_api = YouTubeTranscriptApi()
|
| 119 |
# # extract video id from video link
|
| 120 |
|
| 121 |
-
# video_id =
|
| 122 |
# logger.info(f"Video ID: {video_id}")
|
| 123 |
# transcript = ytt_api.fetch(video_id)
|
| 124 |
# transcript_str = ""
|
|
@@ -131,7 +141,7 @@ def extract_video_id(video_link: str):
|
|
| 131 |
def extract_transcript(video_link: str):
|
| 132 |
try:
|
| 133 |
# extract video id from video link
|
| 134 |
-
video_id =
|
| 135 |
|
| 136 |
api_key = os.getenv("SUPADATA_API_KEY")
|
| 137 |
if not api_key:
|
|
@@ -139,12 +149,8 @@ def extract_transcript(video_link: str):
|
|
| 139 |
|
| 140 |
# Call Supadata API
|
| 141 |
url = f"https://api.supadata.ai/v1/youtube/transcript"
|
| 142 |
-
headers = {
|
| 143 |
-
|
| 144 |
-
}
|
| 145 |
-
params = {
|
| 146 |
-
"videoId": video_id
|
| 147 |
-
}
|
| 148 |
|
| 149 |
response = requests.get(url, headers=headers, params=params)
|
| 150 |
response.raise_for_status() # Raise exception for non-200 status codes
|
|
|
|
| 109 |
return messages
|
| 110 |
|
| 111 |
|
| 112 |
+
import re
|
| 113 |
|
| 114 |
+
|
| 115 |
+
def extract_video_id_regex(url):
|
| 116 |
+
"""
|
| 117 |
+
Extracts the YouTube video ID using a regular expression.
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
The video ID as a string if found, otherwise None.
|
| 121 |
+
"""
|
| 122 |
+
pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11})(?:\?|&|$)"
|
| 123 |
+
match = re.search(pattern, url)
|
| 124 |
+
return match.group(1) if match else None
|
| 125 |
|
| 126 |
|
| 127 |
# def extract_transcript(video_link: str):
|
| 128 |
# ytt_api = YouTubeTranscriptApi()
|
| 129 |
# # extract video id from video link
|
| 130 |
|
| 131 |
+
# video_id = extract_video_id_regex(video_link)
|
| 132 |
# logger.info(f"Video ID: {video_id}")
|
| 133 |
# transcript = ytt_api.fetch(video_id)
|
| 134 |
# transcript_str = ""
|
|
|
|
| 141 |
def extract_transcript(video_link: str):
|
| 142 |
try:
|
| 143 |
# extract video id from video link
|
| 144 |
+
video_id = extract_video_id_regex(video_link)
|
| 145 |
|
| 146 |
api_key = os.getenv("SUPADATA_API_KEY")
|
| 147 |
if not api_key:
|
|
|
|
| 149 |
|
| 150 |
# Call Supadata API
|
| 151 |
url = f"https://api.supadata.ai/v1/youtube/transcript"
|
| 152 |
+
headers = {"x-api-key": api_key}
|
| 153 |
+
params = {"videoId": video_id}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
response = requests.get(url, headers=headers, params=params)
|
| 156 |
response.raise_for_status() # Raise exception for non-200 status codes
|