video_bot_999

Sleeping

App Files Files Community

youngtsai commited on Feb 4

Commit

d22eec5

•

1 Parent(s): f99c291

def process_transcript_and_screenshots(video_id):

Browse files

Files changed (1) hide show

app.py +9 -28

app.py CHANGED Viewed

@@ -107,7 +107,8 @@ def upload_content_directly(service, file_name, folder_id, content):
     media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
     # 执行上传
-    service.files().create(body=file_metadata, media_body=media, fields='id').execute()
 def download_file_as_string(service, file_id):
     """
@@ -237,12 +238,11 @@ def process_transcript_and_screenshots(video_id):
     # 处理逐字稿中的每个条目，检查并上传截图
     for entry in transcript:
         if 'img_src' not in entry:
-            screenshot_path = screenshot_youtube_video(video_id, entry['start'])
             img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
             img_src = f"https://drive.google.com/uc?export=view&id={img_file_id}"
             entry['img_src'] = img_src
-            # 删除本地截图文件
-            os.remove(screenshot_path)
     # 更新逐字稿文件
     updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
@@ -255,11 +255,11 @@ def process_youtube_link(link):
     # 使用 YouTube API 获取逐字稿
     # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
     video_id = extract_youtube_id(link)
     transcript = process_transcript_and_screenshots(video_id)
     formatted_transcript = []
     screenshot_paths = []
-    download_youtube_video(video_id, output_path=OUTPUT_PATH)
     for entry in transcript:
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
@@ -321,29 +321,10 @@ def download_youtube_video(youtube_id, output_path=OUTPUT_PATH):
 def screenshot_youtube_video(youtube_id, snapshot_sec):
-    service = init_drive_service()
-    parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'  # 替换为实际的Google Drive文件夹ID
-    folder_id = create_folder_if_not_exists(service, youtube_id, parent_folder_id)
-    file_name = f'{youtube_id}_{snapshot_sec}.jpg'
-    # 检查Google Drive是否已有截图
-    exists, file_id = check_file_exists(service, folder_id, file_name)
-    if not exists:
-        # 如果没有找到截图，处理视频并上传
-        video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
-        with VideoFileClip(video_path) as video:
-            screenshot_path = f'{OUTPUT_PATH}/{file_name}'
-            video.save_frame(screenshot_path, snapshot_sec)
-        # 上传并获取文件ID
-        file_id = upload_img_directly(service, file_name, folder_id, screenshot_path)
-        print(f"截图已上传到Google Drive, 文件ID: {file_id}")
-        set_public_permission(service, file_id)
-        # 上传后删除本地临时截图文件
-        os.remove(screenshot_path)
-    image_src = f"https://drive.google.com/uc?export=view&id={file_id}"
-    return image_src
 def get_screenshot_from_video(video_link, start_time):
     # 实现从视频中提取帧的逻辑

     media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
     # 执行上传
+    file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
+    return file.get('id')
 def download_file_as_string(service, file_id):
     """
     # 处理逐字稿中的每个条目，检查并上传截图
     for entry in transcript:
         if 'img_src' not in entry:
+            screenshot_youtube_video(video_id, entry['start'])
             img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
             img_src = f"https://drive.google.com/uc?export=view&id={img_file_id}"
             entry['img_src'] = img_src
+            print(f"截图已上传到Google Drive: {img_src}")
     # 更新逐字稿文件
     updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
     # 使用 YouTube API 获取逐字稿
     # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
     video_id = extract_youtube_id(link)
+    download_youtube_video(video_id, output_path=OUTPUT_PATH)
     transcript = process_transcript_and_screenshots(video_id)
     formatted_transcript = []
     screenshot_paths = []
     for entry in transcript:
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
 def screenshot_youtube_video(youtube_id, snapshot_sec):
+    video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
+    with VideoFileClip(video_path) as video:
+        screenshot_path = f'{OUTPUT_PATH}/{file_name}'
+        video.save_frame(screenshot_path, snapshot_sec)
 def get_screenshot_from_video(video_link, start_time):
     # 实现从视频中提取帧的逻辑