youngtsai commited on
Commit
7ed5900
1 Parent(s): a931b41

folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)

Browse files
Files changed (1) hide show
  1. app.py +26 -3
app.py CHANGED
@@ -14,6 +14,8 @@ import os
14
 
15
  from google.oauth2 import service_account
16
  from googleapiclient.discovery import build
 
 
17
 
18
  from urllib.parse import urlparse, parse_qs
19
 
@@ -55,6 +57,24 @@ def init_drive_service():
55
  service = build('drive', 'v3', credentials=credentials)
56
  return service
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  # 检查Google Drive上是否存在文件
59
  def check_file_exists(service, folder_name, file_name):
60
  query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
@@ -136,17 +156,20 @@ def process_youtube_link(link):
136
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
137
  video_id = extract_youtube_id(link)
138
  service = init_drive_service()
139
- folder_name = 'youtube逐字稿圖檔/{video_id}' # Google Drive上的文件夹ID
 
 
 
140
  file_name = f"{video_id}_transcript.txt"
141
 
142
  # 检查逐字稿是否存在
143
- exists, file_id = check_file_exists(service, folder_name, file_name)
144
  if not exists:
145
  # 获取逐字稿
146
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
147
  transcript_text = "\n".join([f"{item['start']}: {item['text']}" for item in transcript])
148
  # 上传到Google Drive
149
- upload_to_drive(service, file_name, folder_name, transcript_text)
150
  print("逐字稿已上传到Google Drive")
151
  else:
152
  print("逐字稿已存在于Google Drive中")
 
14
 
15
  from google.oauth2 import service_account
16
  from googleapiclient.discovery import build
17
+ from googleapiclient.http import MediaFileUpload
18
+
19
 
20
  from urllib.parse import urlparse, parse_qs
21
 
 
57
  service = build('drive', 'v3', credentials=credentials)
58
  return service
59
 
60
+ def create_folder_if_not_exists(service, folder_name, parent_id):
61
+ """检查是否存在特定名称的文件夹,如果不存在则创建"""
62
+ query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent_id}' in parents and trashed=false"
63
+ response = service.files().list(q=query, spaces='drive', fields="files(id, name)").execute()
64
+ folders = response.get('files', [])
65
+ if not folders:
66
+ # 文件夹不存在,创建新文件夹
67
+ file_metadata = {
68
+ 'name': folder_name,
69
+ 'mimeType': 'application/vnd.google-apps.folder',
70
+ 'parents': [parent_id]
71
+ }
72
+ folder = service.files().create(body=file_metadata, fields='id').execute()
73
+ return folder.get('id')
74
+ else:
75
+ # 文件夹已存在
76
+ return folders[0]['id']
77
+
78
  # 检查Google Drive上是否存在文件
79
  def check_file_exists(service, folder_name, file_name):
80
  query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
 
156
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
157
  video_id = extract_youtube_id(link)
158
  service = init_drive_service()
159
+ parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' # youtube逐字稿圖檔的ID
160
+
161
+ # 检查/创建视频ID命名的子文件夹
162
+ folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
163
  file_name = f"{video_id}_transcript.txt"
164
 
165
  # 检查逐字稿是否存在
166
+ exists, file_id = check_file_exists(service, folder_id, file_name)
167
  if not exists:
168
  # 获取逐字稿
169
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
170
  transcript_text = "\n".join([f"{item['start']}: {item['text']}" for item in transcript])
171
  # 上传到Google Drive
172
+ upload_to_drive(service, file_name, folder_id, transcript_text)
173
  print("逐字稿已上传到Google Drive")
174
  else:
175
  print("逐字稿已存在于Google Drive中")