youngtsai commited on
Commit
3cc7368
1 Parent(s): 642f40d

print("逐字稿已上传到GCS")

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -423,17 +423,18 @@ def process_transcript_and_screenshots_on_gcs(video_id):
423
  gcs_client = init_gcs_client(GCS_KEY)
424
  bucket_name = 'video_ai_assistant'
425
  # 检查 folder 是否存在
426
- is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
427
- if not is_gcs_exists:
428
- gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, video_id)
429
- print("GCS folder:{video_id} 已创建")
430
- else:
431
- print("GCS folder:{video_id} 已存在")
432
 
433
  # 逐字稿文件名
434
- file_name = f'{video_id}_transcript.json'
 
435
  # 检查逐字稿是否存在
436
- exists = gcs_check_file_exists(gcs_client, bucket_name, file_name)
437
  if not exists:
438
  # 从YouTube获取逐字稿并上传
439
  transcript = get_transcript(video_id)
@@ -442,22 +443,21 @@ def process_transcript_and_screenshots_on_gcs(video_id):
442
  else:
443
  print("沒有找到字幕")
444
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
445
- destination_blob_name = f"{video_id}/{file_name}"
446
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, transcript_text)
447
-
448
- raise gr.Error("test")
449
- # # 处理逐字稿中的每个条目,检查并上传截图 GCS,然後設定 GCS 權限
450
- # for entry in transcript:
451
- # if 'img_file_id' not in entry:
452
- # screenshot_path = screenshot_youtube_video(video_id, entry['start'])
453
- # img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, f"{video_id}_{entry['start']}.jpg", screenshot_path)
454
- # entry['img_file_id'] = img_file_id
455
- # print(f"截图已上传到GCS: {img_file_id}")
456
 
457
- # # 更新逐字稿文件
458
- # updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
459
- # upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
460
- # print("逐字稿已更新,包括截图链接")
461
 
462
  return transcript
463
 
 
423
  gcs_client = init_gcs_client(GCS_KEY)
424
  bucket_name = 'video_ai_assistant'
425
  # 检查 folder 是否存在
426
+ # is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
427
+ # if not is_gcs_exists:
428
+ # gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, video_id)
429
+ # print("GCS folder:{video_id} 已创建")
430
+ # else:
431
+ # print("GCS folder:{video_id} 已存在")
432
 
433
  # 逐字稿文件名
434
+ transcript_file_name = f'{video_id}_transcript.json'
435
+ transcript_blob_name = f"{video_id}/{transcript_file_name}"
436
  # 检查逐字稿是否存在
437
+ exists = gcs_check_file_exists(gcs_client, bucket_name, transcript_blob_name)
438
  if not exists:
439
  # 从YouTube获取逐字稿并上传
440
  transcript = get_transcript(video_id)
 
443
  else:
444
  print("沒有找到字幕")
445
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
446
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
447
+ print("逐字稿已上传到GCS")
448
+
449
+ for entry in transcript:
450
+ if 'img_file_id' not in entry:
451
+ screenshot_path = screenshot_youtube_video(video_id, entry['start'])
452
+ transcript_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
453
+ img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, transcript_blob_name, screenshot_path)
454
+ entry['img_file_id'] = img_file_id
455
+ print(f"截图已上传到GCS: {img_file_id}")
 
456
 
457
+ # 更新逐字稿文件
458
+ updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
459
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
460
+ print("逐字稿已更新,包括截图链接")
461
 
462
  return transcript
463