youngtsai commited on
Commit
ef2b19a
1 Parent(s): 1b3ea4e

formatted_simple_transcript.append(simple_line)

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -282,6 +282,7 @@ def process_youtube_link(link):
282
  transcript = process_transcript_and_screenshots(video_id)
283
 
284
  formatted_transcript = []
 
285
  screenshot_paths = []
286
  for entry in transcript:
287
  start_time = format_seconds_to_time(entry['start'])
@@ -297,6 +298,13 @@ def process_youtube_link(link):
297
  "screenshot_path": screenshot_path
298
  }
299
  formatted_transcript.append(line)
 
 
 
 
 
 
 
300
  screenshot_paths.append(screenshot_path)
301
 
302
  html_content = format_transcript_to_html(formatted_transcript)
@@ -305,10 +313,9 @@ def process_youtube_link(link):
305
  print("=====html_content=====")
306
 
307
  # 基于逐字稿生成其他所需的输出
308
- questions = generate_questions(formatted_transcript)
309
- # 将 DataFrame 转换为纯文本,並分行
310
  df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
311
- df_summarise = generate_df_summarise(formatted_transcript)
312
 
313
  global TRANSCRIPTS
314
  TRANSCRIPTS = formatted_transcript
 
282
  transcript = process_transcript_and_screenshots(video_id)
283
 
284
  formatted_transcript = []
285
+ formatted_simple_transcript =[]
286
  screenshot_paths = []
287
  for entry in transcript:
288
  start_time = format_seconds_to_time(entry['start'])
 
298
  "screenshot_path": screenshot_path
299
  }
300
  formatted_transcript.append(line)
301
+ # formatted_simple_transcript 只要 start_time, end_time, text
302
+ simple_line = {
303
+ "start_time": start_time,
304
+ "end_time": end_time,
305
+ "text": entry['text']
306
+ }
307
+ formatted_simple_transcript.append(simple_line)
308
  screenshot_paths.append(screenshot_path)
309
 
310
  html_content = format_transcript_to_html(formatted_transcript)
 
313
  print("=====html_content=====")
314
 
315
  # 基于逐字稿生成其他所需的输出
316
+ questions = generate_questions(formatted_simple_transcript)
 
317
  df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
318
+ df_summarise = generate_df_summarise(formatted_simple_transcript)
319
 
320
  global TRANSCRIPTS
321
  TRANSCRIPTS = formatted_transcript