Spaces:
Sleeping
Sleeping
def process_transcript_and_screenshots(video_id):
Browse files
app.py
CHANGED
@@ -107,7 +107,8 @@ def upload_content_directly(service, file_name, folder_id, content):
|
|
107 |
media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
|
108 |
|
109 |
# 执行上传
|
110 |
-
service.files().create(body=file_metadata, media_body=media, fields='id').execute()
|
|
|
111 |
|
112 |
def download_file_as_string(service, file_id):
|
113 |
"""
|
@@ -237,12 +238,11 @@ def process_transcript_and_screenshots(video_id):
|
|
237 |
# 处理逐字稿中的每个条目,检查并上传截图
|
238 |
for entry in transcript:
|
239 |
if 'img_src' not in entry:
|
240 |
-
|
241 |
img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
|
242 |
img_src = f"https://drive.google.com/uc?export=view&id={img_file_id}"
|
243 |
entry['img_src'] = img_src
|
244 |
-
|
245 |
-
os.remove(screenshot_path)
|
246 |
|
247 |
# 更新逐字稿文件
|
248 |
updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
@@ -255,11 +255,11 @@ def process_youtube_link(link):
|
|
255 |
# 使用 YouTube API 获取逐字稿
|
256 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
257 |
video_id = extract_youtube_id(link)
|
|
|
258 |
transcript = process_transcript_and_screenshots(video_id)
|
259 |
|
260 |
formatted_transcript = []
|
261 |
screenshot_paths = []
|
262 |
-
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
263 |
for entry in transcript:
|
264 |
start_time = format_seconds_to_time(entry['start'])
|
265 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
@@ -321,29 +321,10 @@ def download_youtube_video(youtube_id, output_path=OUTPUT_PATH):
|
|
321 |
|
322 |
|
323 |
def screenshot_youtube_video(youtube_id, snapshot_sec):
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
# 检查Google Drive是否已有截图
|
330 |
-
exists, file_id = check_file_exists(service, folder_id, file_name)
|
331 |
-
if not exists:
|
332 |
-
# 如果没有找到截图,处理视频并上传
|
333 |
-
video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
|
334 |
-
with VideoFileClip(video_path) as video:
|
335 |
-
screenshot_path = f'{OUTPUT_PATH}/{file_name}'
|
336 |
-
video.save_frame(screenshot_path, snapshot_sec)
|
337 |
-
# 上传并获取文件ID
|
338 |
-
file_id = upload_img_directly(service, file_name, folder_id, screenshot_path)
|
339 |
-
print(f"截图已上传到Google Drive, 文件ID: {file_id}")
|
340 |
-
set_public_permission(service, file_id)
|
341 |
-
|
342 |
-
# 上传后删除本地临时截图文件
|
343 |
-
os.remove(screenshot_path)
|
344 |
-
|
345 |
-
image_src = f"https://drive.google.com/uc?export=view&id={file_id}"
|
346 |
-
return image_src
|
347 |
|
348 |
def get_screenshot_from_video(video_link, start_time):
|
349 |
# 实现从视频中提取帧的逻辑
|
|
|
107 |
media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
|
108 |
|
109 |
# 执行上传
|
110 |
+
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
|
111 |
+
return file.get('id')
|
112 |
|
113 |
def download_file_as_string(service, file_id):
|
114 |
"""
|
|
|
238 |
# 处理逐字稿中的每个条目,检查并上传截图
|
239 |
for entry in transcript:
|
240 |
if 'img_src' not in entry:
|
241 |
+
screenshot_youtube_video(video_id, entry['start'])
|
242 |
img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
|
243 |
img_src = f"https://drive.google.com/uc?export=view&id={img_file_id}"
|
244 |
entry['img_src'] = img_src
|
245 |
+
print(f"截图已上传到Google Drive: {img_src}")
|
|
|
246 |
|
247 |
# 更新逐字稿文件
|
248 |
updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
|
|
255 |
# 使用 YouTube API 获取逐字稿
|
256 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
257 |
video_id = extract_youtube_id(link)
|
258 |
+
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
259 |
transcript = process_transcript_and_screenshots(video_id)
|
260 |
|
261 |
formatted_transcript = []
|
262 |
screenshot_paths = []
|
|
|
263 |
for entry in transcript:
|
264 |
start_time = format_seconds_to_time(entry['start'])
|
265 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
|
|
321 |
|
322 |
|
323 |
def screenshot_youtube_video(youtube_id, snapshot_sec):
|
324 |
+
video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
|
325 |
+
with VideoFileClip(video_path) as video:
|
326 |
+
screenshot_path = f'{OUTPUT_PATH}/{file_name}'
|
327 |
+
video.save_frame(screenshot_path, snapshot_sec)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
|
329 |
def get_screenshot_from_video(video_link, start_time):
|
330 |
# 实现从视频中提取帧的逻辑
|