youngtsai commited on
Commit
a41dfa9
1 Parent(s): a11ae70

transcript_html

Browse files
Files changed (1) hide show
  1. app.py +20 -3
app.py CHANGED
@@ -69,20 +69,35 @@ def process_youtube_link(link):
69
  for entry in transcript:
70
  start_time = format_seconds_to_time(entry['start'])
71
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
 
72
  line = {
73
  "start_time": start_time,
74
  "end_time": end_time,
75
  "text": entry['text'],
76
- "screenshot": get_screenshot_from_video(link, entry['start'])
77
  }
78
  formatted_transcript.append(line)
79
 
 
 
80
  # 确保返回与 UI 组件预期匹配的输出
81
  return questions[0] if len(questions) > 0 else "", \
82
  questions[1] if len(questions) > 1 else "", \
83
  questions[2] if len(questions) > 2 else "", \
84
  df_summarise, \
85
- formatted_transcript
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  def get_screenshot_from_video(video_link, start_time):
88
  # 实现从视频中提取帧的逻辑
@@ -209,6 +224,8 @@ with gr.Blocks() as demo:
209
  send_button = gr.Button("Send")
210
 
211
  with gr.Column():
 
 
212
  with gr.Tab("資料本文"):
213
  df_string_output = gr.Textbox()
214
  with gr.Tab("資料摘要"):
@@ -236,7 +253,7 @@ with gr.Blocks() as demo:
236
  file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
237
 
238
  # 当输入 YouTube 链接时触发
239
- youtube_link.change(process_youtube_link, inputs=youtube_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
240
 
241
  # 当输入网页链接时触发
242
  web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
 
69
  for entry in transcript:
70
  start_time = format_seconds_to_time(entry['start'])
71
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
72
+ embed_url = get_embedded_youtube_link(video_id, entry['start'])
73
  line = {
74
  "start_time": start_time,
75
  "end_time": end_time,
76
  "text": entry['text'],
77
+ "embed_url": embed_url
78
  }
79
  formatted_transcript.append(line)
80
 
81
+ html_content = format_transcript_to_html(formatted_transcript)
82
+
83
  # 确保返回与 UI 组件预期匹配的输出
84
  return questions[0] if len(questions) > 0 else "", \
85
  questions[1] if len(questions) > 1 else "", \
86
  questions[2] if len(questions) > 2 else "", \
87
  df_summarise, \
88
+ html_content
89
+
90
+ def format_transcript_to_html(formatted_transcript):
91
+ html_content = ""
92
+ for entry in formatted_transcript:
93
+ html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
94
+ html_content += f"<p>{entry['text']}</p>"
95
+ html_content += f"<iframe width='560' height='315' src='{entry['embed_url']}' frameborder='0' allowfullscreen></iframe><br><br>"
96
+ return html_content
97
+
98
+ def get_embedded_youtube_link(video_id, start_time):
99
+ embed_url = f"https://www.youtube.com/embed/{video_id}?start={start_time}&autoplay=1"
100
+ return embed_url
101
 
102
  def get_screenshot_from_video(video_link, start_time):
103
  # 实现从视频中提取帧的逻辑
 
224
  send_button = gr.Button("Send")
225
 
226
  with gr.Column():
227
+ with gr.Tab("YouTube Transcript and Video"):
228
+ transcript_html = gr.HTML(label="YouTube Transcript and Video")
229
  with gr.Tab("資料本文"):
230
  df_string_output = gr.Textbox()
231
  with gr.Tab("資料摘要"):
 
253
  file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
254
 
255
  # 当输入 YouTube 链接时触发
256
+ youtube_link.change(process_youtube_link, inputs=youtube_link, outputs=[btn_1, btn_2, btn_3, df_summarise, transcript_html])
257
 
258
  # 当输入网页链接时触发
259
  web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])