youngtsai commited on
Commit
cf25313
1 Parent(s): fda35a7

extract_youtube_id

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -15,6 +15,9 @@ import os
15
  from google.oauth2 import service_account
16
  from googleapiclient.discovery import build
17
 
 
 
 
18
  # 假设您的环境变量或Secret的名称是GOOGLE_APPLICATION_CREDENTIALS_JSON
19
  credentials_json_string = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
20
  credentials_dict = json.loads(credentials_json_string)
@@ -86,10 +89,24 @@ def format_seconds_to_time(seconds):
86
  seconds = int(seconds % 60)
87
  return f"{hours:02}:{minutes:02}:{seconds:02}"
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def process_youtube_link(link):
90
  # 使用 YouTube API 获取逐字稿
91
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
92
- video_id = link.split("=")[-1]
 
93
  # 先下載 video
94
  download_youtube_video(video_id, output_path=OUTPUT_PATH)
95
  # 再取得 transcript
 
15
  from google.oauth2 import service_account
16
  from googleapiclient.discovery import build
17
 
18
+ from urllib.parse import urlparse, parse_qs
19
+
20
+
21
  # 假设您的环境变量或Secret的名称是GOOGLE_APPLICATION_CREDENTIALS_JSON
22
  credentials_json_string = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
23
  credentials_dict = json.loads(credentials_json_string)
 
89
  seconds = int(seconds % 60)
90
  return f"{hours:02}:{minutes:02}:{seconds:02}"
91
 
92
+ def extract_youtube_id(url):
93
+ parsed_url = urlparse(url)
94
+
95
+ if "youtube.com" in parsed_url.netloc:
96
+ # 对于标准链接,视频ID在查询参数'v'中
97
+ query_params = parse_qs(parsed_url.query)
98
+ return query_params.get("v")[0] if "v" in query_params else None
99
+ elif "youtu.be" in parsed_url.netloc:
100
+ # 对于短链接,视频ID是路径的一部分
101
+ return parsed_url.path.lstrip('/')
102
+ else:
103
+ return None
104
+
105
  def process_youtube_link(link):
106
  # 使用 YouTube API 获取逐字稿
107
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
108
+ video_id = extract_youtube_id(link)
109
+
110
  # 先下載 video
111
  download_youtube_video(video_id, output_path=OUTPUT_PATH)
112
  # 再取得 transcript