youngtsai commited on
Commit
af9f8f3
1 Parent(s): c997d4e
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -256,8 +256,9 @@ def process_transcript_and_screenshots(video_id):
256
  service = init_drive_service()
257
  parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
258
  folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
 
 
259
  file_name = f'{video_id}_transcript.json'
260
-
261
  # 检查逐字稿是否存在
262
  exists, file_id = check_file_exists(service, folder_id, file_name)
263
  if not exists:
@@ -298,8 +299,6 @@ def process_youtube_link(link):
298
  video_id = extract_youtube_id(link)
299
  global VIDEO_ID
300
  VIDEO_ID = video_id
301
-
302
-
303
  download_youtube_video(video_id, output_path=OUTPUT_PATH)
304
 
305
  try:
@@ -345,6 +344,7 @@ def process_youtube_link(link):
345
  summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
346
  summary = summary_json["summary"]
347
  html_content = format_transcript_to_html(formatted_transcript)
 
348
  first_image = formatted_transcript[0]['screenshot_path']
349
  first_text = formatted_transcript[0]['text']
350
  mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
@@ -360,6 +360,7 @@ def process_youtube_link(link):
360
  mind_map, \
361
  mind_map_html, \
362
  html_content, \
 
363
  first_image, \
364
  first_text,
365
 
@@ -372,7 +373,12 @@ def format_transcript_to_html(formatted_transcript):
372
  html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
373
  return html_content
374
 
375
-
 
 
 
 
 
376
 
377
  def get_embedded_youtube_link(video_id, start_time):
378
  int_start_time = int(start_time)
@@ -863,6 +869,8 @@ with gr.Blocks() as demo:
863
  next_button = gr.Button("Next")
864
  prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
865
  next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])
 
 
866
  with gr.Tab("本文"):
867
  df_string_output = gr.Textbox(lines=40, label="Data Text")
868
  with gr.Tab("重點"):
@@ -907,7 +915,8 @@ with gr.Blocks() as demo:
907
  df_summarise,
908
  mind_map,
909
  mind_map_html,
910
- transcript_html,
 
911
  slide_image,
912
  slide_text
913
  ]
@@ -925,6 +934,7 @@ with gr.Blocks() as demo:
925
  mind_map,
926
  mind_map_html,
927
  transcript_html,
 
928
  slide_image,
929
  slide_text
930
  ]
 
256
  service = init_drive_service()
257
  parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
258
  folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
259
+
260
+ # 逐字稿文件名
261
  file_name = f'{video_id}_transcript.json'
 
262
  # 检查逐字稿是否存在
263
  exists, file_id = check_file_exists(service, folder_id, file_name)
264
  if not exists:
 
299
  video_id = extract_youtube_id(link)
300
  global VIDEO_ID
301
  VIDEO_ID = video_id
 
 
302
  download_youtube_video(video_id, output_path=OUTPUT_PATH)
303
 
304
  try:
 
344
  summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
345
  summary = summary_json["summary"]
346
  html_content = format_transcript_to_html(formatted_transcript)
347
+ simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
348
  first_image = formatted_transcript[0]['screenshot_path']
349
  first_text = formatted_transcript[0]['text']
350
  mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
 
360
  mind_map, \
361
  mind_map_html, \
362
  html_content, \
363
+ simple_html_content, \
364
  first_image, \
365
  first_text,
366
 
 
373
  html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
374
  return html_content
375
 
376
+ def format_simple_transcript_to_html(formatted_transcript):
377
+ html_content = ""
378
+ for entry in formatted_transcript:
379
+ html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
380
+ html_content += f"<p>{entry['text']}</p>"
381
+ return html_content
382
 
383
  def get_embedded_youtube_link(video_id, start_time):
384
  int_start_time = int(start_time)
 
869
  next_button = gr.Button("Next")
870
  prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
871
  next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])
872
+ with gr.Tab("逐字稿"):
873
+ simple_html_content = gr.HTML(label="Simple Transcript", lines=40)
874
  with gr.Tab("本文"):
875
  df_string_output = gr.Textbox(lines=40, label="Data Text")
876
  with gr.Tab("重點"):
 
915
  df_summarise,
916
  mind_map,
917
  mind_map_html,
918
+ transcript_html,
919
+ simple_html_content,
920
  slide_image,
921
  slide_text
922
  ]
 
934
  mind_map,
935
  mind_map_html,
936
  transcript_html,
937
+ simple_html_content,
938
  slide_image,
939
  slide_text
940
  ]