youngtsai commited on
Commit
d363c44
1 Parent(s): c7454f7

from youtube_transcript_api._errors import NoTranscriptFound

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -6,7 +6,10 @@ from docx import Document
6
  import os
7
  from openai import OpenAI
8
  import json
 
9
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
10
 
11
  from moviepy.editor import VideoFileClip
12
  from pytube import YouTube
@@ -218,6 +221,16 @@ def extract_youtube_id(url):
218
  else:
219
  return None
220
 
 
 
 
 
 
 
 
 
 
 
221
  def process_transcript_and_screenshots(video_id):
222
  print("====process_transcript_and_screenshots====")
223
  service = init_drive_service()
@@ -229,7 +242,11 @@ def process_transcript_and_screenshots(video_id):
229
  exists, file_id = check_file_exists(service, folder_id, file_name)
230
  if not exists:
231
  # 从YouTube获取逐字稿并上传
232
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
 
 
 
 
233
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
234
  file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
235
  print("逐字稿已上传到Google Drive")
 
6
  import os
7
  from openai import OpenAI
8
  import json
9
+
10
  from youtube_transcript_api import YouTubeTranscriptApi
11
+ from youtube_transcript_api._errors import NoTranscriptFound
12
+
13
 
14
  from moviepy.editor import VideoFileClip
15
  from pytube import YouTube
 
221
  else:
222
  return None
223
 
224
+ def get_transcript(video_id):
225
+ languages = ['zh-TW', 'zh-Hant', 'en'] # 優先順序列表
226
+ for language in languages:
227
+ try:
228
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
229
+ return transcript # 成功獲取字幕,直接返回結果
230
+ except NoTranscriptFound:
231
+ continue # 當前語言的字幕沒有找到,繼續嘗試下一個語言
232
+ return None # 所有嘗試都失敗,返回None
233
+
234
  def process_transcript_and_screenshots(video_id):
235
  print("====process_transcript_and_screenshots====")
236
  service = init_drive_service()
 
242
  exists, file_id = check_file_exists(service, folder_id, file_name)
243
  if not exists:
244
  # 从YouTube获取逐字稿并上传
245
+ transcript = get_transcript(video_id)
246
+ if transcript:
247
+ print("成功獲取字幕")
248
+ else:
249
+ print("沒有找到字幕")
250
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
251
  file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
252
  print("逐字稿已上传到Google Drive")