youngtsai commited on
Commit
d22eec5
1 Parent(s): f99c291

def process_transcript_and_screenshots(video_id):

Browse files
Files changed (1) hide show
  1. app.py +9 -28
app.py CHANGED
@@ -107,7 +107,8 @@ def upload_content_directly(service, file_name, folder_id, content):
107
  media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
108
 
109
  # 执行上传
110
- service.files().create(body=file_metadata, media_body=media, fields='id').execute()
 
111
 
112
  def download_file_as_string(service, file_id):
113
  """
@@ -237,12 +238,11 @@ def process_transcript_and_screenshots(video_id):
237
  # 处理逐字稿中的每个条目,检查并上传截图
238
  for entry in transcript:
239
  if 'img_src' not in entry:
240
- screenshot_path = screenshot_youtube_video(video_id, entry['start'])
241
  img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
242
  img_src = f"https://drive.google.com/uc?export=view&id={img_file_id}"
243
  entry['img_src'] = img_src
244
- # 删除本地截图文件
245
- os.remove(screenshot_path)
246
 
247
  # 更新逐字稿文件
248
  updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
@@ -255,11 +255,11 @@ def process_youtube_link(link):
255
  # 使用 YouTube API 获取逐字稿
256
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
257
  video_id = extract_youtube_id(link)
 
258
  transcript = process_transcript_and_screenshots(video_id)
259
 
260
  formatted_transcript = []
261
  screenshot_paths = []
262
- download_youtube_video(video_id, output_path=OUTPUT_PATH)
263
  for entry in transcript:
264
  start_time = format_seconds_to_time(entry['start'])
265
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
@@ -321,29 +321,10 @@ def download_youtube_video(youtube_id, output_path=OUTPUT_PATH):
321
 
322
 
323
  def screenshot_youtube_video(youtube_id, snapshot_sec):
324
- service = init_drive_service()
325
- parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' # 替换为实际的Google Drive文件夹ID
326
- folder_id = create_folder_if_not_exists(service, youtube_id, parent_folder_id)
327
- file_name = f'{youtube_id}_{snapshot_sec}.jpg'
328
-
329
- # 检查Google Drive是否已有截图
330
- exists, file_id = check_file_exists(service, folder_id, file_name)
331
- if not exists:
332
- # 如果没有找到截图,处理视频并上传
333
- video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
334
- with VideoFileClip(video_path) as video:
335
- screenshot_path = f'{OUTPUT_PATH}/{file_name}'
336
- video.save_frame(screenshot_path, snapshot_sec)
337
- # 上传并获取文件ID
338
- file_id = upload_img_directly(service, file_name, folder_id, screenshot_path)
339
- print(f"截图已上传到Google Drive, 文件ID: {file_id}")
340
- set_public_permission(service, file_id)
341
-
342
- # 上传后删除本地临时截图文件
343
- os.remove(screenshot_path)
344
-
345
- image_src = f"https://drive.google.com/uc?export=view&id={file_id}"
346
- return image_src
347
 
348
  def get_screenshot_from_video(video_link, start_time):
349
  # 实现从视频中提取帧的逻辑
 
107
  media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
108
 
109
  # 执行上传
110
+ file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
111
+ return file.get('id')
112
 
113
  def download_file_as_string(service, file_id):
114
  """
 
238
  # 处理逐字稿中的每个条目,检查并上传截图
239
  for entry in transcript:
240
  if 'img_src' not in entry:
241
+ screenshot_youtube_video(video_id, entry['start'])
242
  img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
243
  img_src = f"https://drive.google.com/uc?export=view&id={img_file_id}"
244
  entry['img_src'] = img_src
245
+ print(f"截图已上传到Google Drive: {img_src}")
 
246
 
247
  # 更新逐字稿文件
248
  updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
 
255
  # 使用 YouTube API 获取逐字稿
256
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
257
  video_id = extract_youtube_id(link)
258
+ download_youtube_video(video_id, output_path=OUTPUT_PATH)
259
  transcript = process_transcript_and_screenshots(video_id)
260
 
261
  formatted_transcript = []
262
  screenshot_paths = []
 
263
  for entry in transcript:
264
  start_time = format_seconds_to_time(entry['start'])
265
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
 
321
 
322
 
323
  def screenshot_youtube_video(youtube_id, snapshot_sec):
324
+ video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
325
+ with VideoFileClip(video_path) as video:
326
+ screenshot_path = f'{OUTPUT_PATH}/{file_name}'
327
+ video.save_frame(screenshot_path, snapshot_sec)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
  def get_screenshot_from_video(video_link, start_time):
330
  # 实现从视频中提取帧的逻辑