youngtsai commited on
Commit
c282048
1 Parent(s): 84e1cf2
Files changed (1) hide show
  1. app.py +61 -7
app.py CHANGED
@@ -67,20 +67,62 @@ def init_gcs_client(service_account_key_string):
67
  gcs_client = storage.Client(credentials=credentials, project=credentials_dict['project_id'])
68
  return gcs_client
69
 
70
- def upload_file_to_gcs(gcs_client, bucket_name, source_file_name, destination_blob_name, service_account_key_file):
71
- """上传文件到 GCS"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  bucket = gcs_client.bucket(bucket_name)
73
  blob = bucket.blob(destination_blob_name)
74
  blob.upload_from_filename(source_file_name)
75
- print(f"{source_file_name} uploaded to {destination_blob_name}.")
76
 
77
- def download_file_from_gcs(gcs_client, bucket_name, source_blob_name, destination_file_name, service_account_key_file):
78
- """从 GCS 下载文件"""
79
  bucket = gcs_client.bucket(bucket_name)
80
  blob = bucket.blob(source_blob_name)
81
- blob.download_to_filename(destination_file_name)
82
- print(f"{source_blob_name} downloaded to {destination_file_name}.")
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  # # ====drive====初始化Google Drive服务
86
  def init_drive_service():
@@ -278,6 +320,8 @@ def get_transcript(video_id):
278
 
279
  def process_transcript_and_screenshots(video_id):
280
  print("====process_transcript_and_screenshots====")
 
 
281
  service = init_drive_service()
282
  parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
283
  folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
@@ -316,6 +360,16 @@ def process_transcript_and_screenshots(video_id):
316
  update_file_on_drive(service, file_id, updated_transcript_text)
317
  print("逐字稿已更新,包括截图链接")
318
 
 
 
 
 
 
 
 
 
 
 
319
  return transcript
320
 
321
  def process_youtube_link(link):
 
67
  gcs_client = storage.Client(credentials=credentials, project=credentials_dict['project_id'])
68
  return gcs_client
69
 
70
+ def gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, folder_name):
71
+ """检查是否存在特定名称的文件夹(前缀),如果不存在则创建一个标记文件来模拟文件夹"""
72
+ bucket = gcs_client.bucket(bucket_name)
73
+ blob = bucket.blob(folder_name)
74
+ if not blob.exists():
75
+ blob.upload_from_string('', content_type='application/x-www-form-urlencoded;charset=UTF-8')
76
+ print(f"GCS Folder '{folder_name}' created.")
77
+ else:
78
+ print(f"GCS Folder '{folder_name}' already exists.")
79
+
80
+ def gcs_check_folder_exists(gcs_client, bucket_name, folder_name):
81
+ """检查 GCS 存储桶中是否存在指定的文件夹"""
82
+ bucket = gcs_client.bucket(bucket_name)
83
+ blobs = list(bucket.list_blobs(prefix=folder_name))
84
+ return len(blobs) > 0
85
+
86
+ def gcs_check_file_exists(gcs_client, bucket_name, file_name):
87
+ """
88
+ 检查 GCS 存储桶中是否存在指定的文件
89
+ file_name 格式:{folder_name}/{file_name}
90
+ """
91
+ bucket = gcs_client.bucket(bucket_name)
92
+ blob = bucket.blob(file_name)
93
+ return blob.exists()
94
+
95
+ def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, source_file_name):
96
+ """上传文件到指定的 GCS 存储桶"""
97
  bucket = gcs_client.bucket(bucket_name)
98
  blob = bucket.blob(destination_blob_name)
99
  blob.upload_from_filename(source_file_name)
100
+ print(f"File {source_file_name} uploaded to {destination_blob_name}.")
101
 
102
+ def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
103
+ """从 GCS 下载文件内容到字符串"""
104
  bucket = gcs_client.bucket(bucket_name)
105
  blob = bucket.blob(source_blob_name)
106
+ return blob.download_as_text()
 
107
 
108
+ def make_blob_public(gcs_client, bucket_name, blob_name):
109
+ """将指定的 GCS 对象设置为公共可读"""
110
+ bucket = gcs_client.bucket(bucket_name)
111
+ blob = bucket.blob(blob_name)
112
+ blob.make_public()
113
+ print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}")
114
+
115
+ def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name):
116
+ # Get all files from the folder
117
+ query = f"'{drive_folder_id}' in parents and trashed = false"
118
+ response = drive_service.files().list(q=query).execute()
119
+ files = response.get('files', [])
120
+ for file in files:
121
+ # Copy each file to GCS
122
+ file_id = file['id']
123
+ file_name = file['name']
124
+ gcs_destination_path = f"{gcs_folder_name}/{file_name}"
125
+ copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path)
126
 
127
  # # ====drive====初始化Google Drive服务
128
  def init_drive_service():
 
320
 
321
  def process_transcript_and_screenshots(video_id):
322
  print("====process_transcript_and_screenshots====")
323
+
324
+ # Drive
325
  service = init_drive_service()
326
  parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
327
  folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
 
360
  update_file_on_drive(service, file_id, updated_transcript_text)
361
  print("逐字稿已更新,包括截图链接")
362
 
363
+ # init gcs client
364
+ gcs_client = init_gcs_client(GCS_KEY)
365
+ bucket_name = 'video_ai_assistant'
366
+ # 检查 folder 是否存在
367
+ is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
368
+ if not is_gcs_exists:
369
+ gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, video_id)
370
+ copy_all_files_from_drive_to_gcs(service, gcs_client, folder_id, bucket_name, video_id)
371
+ print("Drive file 已上传到GCS")
372
+
373
  return transcript
374
 
375
  def process_youtube_link(link):