youngtsai commited on
Commit
2df824b
1 Parent(s): 1491bd4

upload_file_to_gcs_with_json_string

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -93,12 +93,20 @@ def gcs_check_file_exists(gcs_client, bucket_name, file_name):
93
  blob = bucket.blob(file_name)
94
  return blob.exists()
95
 
96
- def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, source_file_name):
97
  """上传文件到指定的 GCS 存储桶"""
98
  bucket = gcs_client.bucket(bucket_name)
99
  blob = bucket.blob(destination_blob_name)
100
- blob.upload_from_filename(source_file_name)
101
- print(f"File {source_file_name} uploaded to {destination_blob_name}.")
 
 
 
 
 
 
 
 
102
 
103
  def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
104
  """从 GCS 下载文件内容到字符串"""
@@ -433,20 +441,22 @@ def process_transcript_and_screenshots_on_gcs(video_id):
433
  print("成功獲取字幕")
434
  else:
435
  print("沒有找到字幕")
436
- transcript
 
 
437
 
438
- # 处理逐字稿中的每个条目,检查并上传截图 到 GCS,然後設定 GCS 權限
439
- for entry in transcript:
440
- if 'img_file_id' not in entry:
441
- screenshot_path = screenshot_youtube_video(video_id, entry['start'])
442
- img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, f"{video_id}_{entry['start']}.jpg", screenshot_path)
443
- entry['img_file_id'] = img_file_id
444
- print(f"截图已上传到GCS: {img_file_id}")
445
 
446
- # 更新逐字稿文件
447
- updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
448
- upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
449
- print("逐字稿已更新,包括截图链接")
450
 
451
  return transcript
452
 
 
93
  blob = bucket.blob(file_name)
94
  return blob.exists()
95
 
96
+ def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path):
97
  """上传文件到指定的 GCS 存储桶"""
98
  bucket = gcs_client.bucket(bucket_name)
99
  blob = bucket.blob(destination_blob_name)
100
+ blob.upload_from_filename(file_path)
101
+ print(f"File {file_path} uploaded to {destination_blob_name} in GCS.")
102
+
103
+ def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string):
104
+ """上传字符串到指定的 GCS 存储桶"""
105
+ bucket = gcs_client.bucket(bucket_name)
106
+ blob = bucket.blob(destination_blob_name)
107
+ blob.upload_from_string(json_string)
108
+ print(f"JSON string uploaded to {destination_blob_name} in GCS.")
109
+
110
 
111
  def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
112
  """从 GCS 下载文件内容到字符串"""
 
441
  print("成功獲取字幕")
442
  else:
443
  print("沒有找到字幕")
444
+ transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
445
+ destination_blob_name = f"{video_id}/{file_name}"
446
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, transcript_text)
447
 
448
+ # # 处理逐字稿中的每个条目,检查并上传截图 到 GCS,然後設定 GCS 權限
449
+ # for entry in transcript:
450
+ # if 'img_file_id' not in entry:
451
+ # screenshot_path = screenshot_youtube_video(video_id, entry['start'])
452
+ # img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, f"{video_id}_{entry['start']}.jpg", screenshot_path)
453
+ # entry['img_file_id'] = img_file_id
454
+ # print(f"截图已上传到GCS: {img_file_id}")
455
 
456
+ # # 更新逐字稿文件
457
+ # updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
458
+ # upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
459
+ # print("逐字稿已更新,包括截图链接")
460
 
461
  return transcript
462