youngtsai commited on
Commit
2e1a0a6
1 Parent(s): 345a1ff
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -442,19 +442,26 @@ def process_transcript_and_screenshots_on_gcs(video_id):
442
  else:
443
  print("沒有找到字幕")
444
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
 
445
  else:
446
  # 逐字稿已存在,下载逐字稿内容
447
  print("逐字稿已存在于GCS中")
448
  transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name)
449
  transcript = json.loads(transcript_text)
450
 
451
- for entry in transcript:
452
- if 'img_file_id' not in entry:
453
- screenshot_path = screenshot_youtube_video(video_id, entry['start'])
454
- screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
455
- img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
456
- entry['img_file_id'] = img_file_id
457
- print(f"截图已上传到GCS: {img_file_id}")
 
 
 
 
 
 
458
 
459
  # 更新逐字稿文件
460
  print("===更新逐字稿文件===")
@@ -491,7 +498,8 @@ def process_youtube_link(link):
491
  start_time = format_seconds_to_time(entry['start'])
492
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
493
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
494
- img_file_id = entry['img_file_id']
 
495
  # 先取消 Google Drive 的图片
496
  # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
497
  screenshot_path = img_file_id
 
442
  else:
443
  print("沒有找到字幕")
444
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
445
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
446
  else:
447
  # 逐字稿已存在,下载逐字稿内容
448
  print("逐字稿已存在于GCS中")
449
  transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name)
450
  transcript = json.loads(transcript_text)
451
 
452
+ source = "gcs"
453
+ get_questions(video_id, transcript_text, source)
454
+ get_video_id_summary(video_id, transcript_text, source)
455
+ get_mind_map(video_id, transcript_text, source)
456
+
457
+ # 處理截圖
458
+ # for entry in transcript:
459
+ # if 'img_file_id' not in entry:
460
+ # screenshot_path = screenshot_youtube_video(video_id, entry['start'])
461
+ # screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
462
+ # img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
463
+ # entry['img_file_id'] = img_file_id
464
+ # print(f"截图已上传到GCS: {img_file_id}")
465
 
466
  # 更新逐字稿文件
467
  print("===更新逐字稿文件===")
 
498
  start_time = format_seconds_to_time(entry['start'])
499
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
500
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
501
+ # img_file_id = entry['img_file_id']
502
+ img_file_id =""
503
  # 先取消 Google Drive 的图片
504
  # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
505
  screenshot_path = img_file_id