youngtsai commited on
Commit
a11ae70
1 Parent(s): ae369e8

def get_screenshot_from_video(video_link, start_time):

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -50,10 +50,11 @@ def docx_to_text(file):
50
  return "\n".join([para.text for para in doc.paragraphs])
51
 
52
  def format_seconds_to_time(seconds):
53
- """将秒数格式化为 分:秒 的形式"""
54
- minutes = int(seconds // 60)
 
55
  seconds = int(seconds % 60)
56
- return f"{minutes:02}:{seconds:02}"
57
 
58
  def process_youtube_link(link):
59
  # 使用 YouTube API 获取逐字稿
@@ -64,12 +65,17 @@ def process_youtube_link(link):
64
  questions = generate_questions(transcript)
65
  df_summarise = generate_df_summarise(transcript)
66
 
67
- formatted_transcript = ""
68
  for entry in transcript:
69
  start_time = format_seconds_to_time(entry['start'])
70
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
71
- line = f"---- 時間: {start_time} ~ {end_time} ----\n{entry['text']}\n\n"
72
- formatted_transcript += line
 
 
 
 
 
73
 
74
  # 确保返回与 UI 组件预期匹配的输出
75
  return questions[0] if len(questions) > 0 else "", \
@@ -78,6 +84,14 @@ def process_youtube_link(link):
78
  df_summarise, \
79
  formatted_transcript
80
 
 
 
 
 
 
 
 
 
81
  def process_web_link(link):
82
  # 抓取和解析网页内容
83
  response = requests.get(link)
 
50
  return "\n".join([para.text for para in doc.paragraphs])
51
 
52
  def format_seconds_to_time(seconds):
53
+ """将秒数格式化为 时:分:秒 的形式"""
54
+ hours = int(seconds // 3600)
55
+ minutes = int((seconds % 3600) // 60)
56
  seconds = int(seconds % 60)
57
+ return f"{hours:02}:{minutes:02}:{seconds:02}"
58
 
59
  def process_youtube_link(link):
60
  # 使用 YouTube API 获取逐字稿
 
65
  questions = generate_questions(transcript)
66
  df_summarise = generate_df_summarise(transcript)
67
 
68
+ formatted_transcript = []
69
  for entry in transcript:
70
  start_time = format_seconds_to_time(entry['start'])
71
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
72
+ line = {
73
+ "start_time": start_time,
74
+ "end_time": end_time,
75
+ "text": entry['text'],
76
+ "screenshot": get_screenshot_from_video(link, entry['start'])
77
+ }
78
+ formatted_transcript.append(line)
79
 
80
  # 确保返回与 UI 组件预期匹配的输出
81
  return questions[0] if len(questions) > 0 else "", \
 
84
  df_summarise, \
85
  formatted_transcript
86
 
87
+ def get_screenshot_from_video(video_link, start_time):
88
+ # 实现从视频中提取帧的逻辑
89
+ # 由于这需要服务器端处理,你可能需要一种方法来下载视频,
90
+ # 并使用 ffmpeg 或类似工具提取特定时间点的帧
91
+ # 这里只是一个示意性的函数实现
92
+ screenshot_url = f"[逻辑以提取视频 {video_link} 在 {start_time} 秒时的截图]"
93
+ return screenshot_url
94
+
95
  def process_web_link(link):
96
  # 抓取和解析网页内容
97
  response = requests.get(link)