youngtsai commited on
Commit
a48473e
1 Parent(s): 474b2c8

print("逐字稿已存在于Google Drive中")

Browse files
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -108,6 +108,19 @@ def upload_content_directly(service, file_name, folder_id, content):
108
  # 执行上传
109
  service.files().create(body=file_metadata, media_body=media, fields='id').execute()
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  def process_file(file):
113
  # 读取文件
@@ -178,6 +191,7 @@ def process_youtube_link(link):
178
  file_name = f"{video_id}_transcript.txt"
179
 
180
  # 检查逐字稿是否存在
 
181
  exists, file_id = check_file_exists(service, folder_id, file_name)
182
  if not exists:
183
  # 获取逐字稿
@@ -188,9 +202,9 @@ def process_youtube_link(link):
188
  print("逐字稿已上传到Google Drive")
189
  else:
190
  print("逐字稿已存在于Google Drive中")
 
 
191
 
192
- # 再取得 transcript
193
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
194
  # 基于逐字稿生成其他所需的输出
195
  questions = generate_questions(transcript)
196
  df_summarise = generate_df_summarise(transcript)
 
108
  # 执行上传
109
  service.files().create(body=file_metadata, media_body=media, fields='id').execute()
110
 
111
+ def download_file_as_string(service, file_id):
112
+ """
113
+ 从Google Drive下载文件并将其作为字符串返回。
114
+ """
115
+ request = service.files().get_media(fileId=file_id)
116
+ fh = io.BytesIO()
117
+ downloader = MediaIoBaseDownload(fh, request)
118
+ done = False
119
+ while done is False:
120
+ status, done = downloader.next_chunk()
121
+ fh.seek(0)
122
+ content = fh.read().decode('utf-8')
123
+ return content
124
 
125
  def process_file(file):
126
  # 读取文件
 
191
  file_name = f"{video_id}_transcript.txt"
192
 
193
  # 检查逐字稿是否存在
194
+ transcript = None
195
  exists, file_id = check_file_exists(service, folder_id, file_name)
196
  if not exists:
197
  # 获取逐字稿
 
202
  print("逐字稿已上传到Google Drive")
203
  else:
204
  print("逐字稿已存在于Google Drive中")
205
+ transcript_text = download_file_as_string(service, file_id)
206
+ transcript = json.loads(transcript_text)
207
 
 
 
208
  # 基于逐字稿生成其他所需的输出
209
  questions = generate_questions(transcript)
210
  df_summarise = generate_df_summarise(transcript)