youngtsai commited on
Commit
3c4e755
1 Parent(s): a6ad75a

html_content += f"<p>{entry['time_sec']} </p> <br><br>"

Browse files
Files changed (2) hide show
  1. app.py +58 -2
  2. requirements.txt +3 -1
app.py CHANGED
@@ -8,6 +8,12 @@ from openai import OpenAI
8
  import json
9
  from youtube_transcript_api import YouTubeTranscriptApi
10
 
 
 
 
 
 
 
11
 
12
  OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
13
  client = OpenAI(api_key=OPEN_AI_KEY)
@@ -74,7 +80,8 @@ def process_youtube_link(link):
74
  "start_time": start_time,
75
  "end_time": end_time,
76
  "text": entry['text'],
77
- "embed_url": embed_url
 
78
  }
79
  formatted_transcript.append(line)
80
 
@@ -95,13 +102,62 @@ def format_transcript_to_html(formatted_transcript):
95
  for entry in formatted_transcript:
96
  html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
97
  html_content += f"<p>{entry['text']}</p>"
98
- html_content += f"<iframe width='560' height='315' src='{entry['embed_url']}' frameborder='0' allowfullscreen></iframe><br><br>"
99
  return html_content
100
 
101
  def get_embedded_youtube_link(video_id, start_time):
102
  embed_url = f"https://www.youtube.com/embed/{video_id}?start={start_time}&autoplay=1"
103
  return embed_url
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def get_screenshot_from_video(video_link, start_time):
106
  # 实现从视频中提取帧的逻辑
107
  # 由于这需要服务器端处理,你可能需要一种方法来下载视频,
 
8
  import json
9
  from youtube_transcript_api import YouTubeTranscriptApi
10
 
11
+ from moviepy.editor import VideoFileClip
12
+ from pytube import YouTube
13
+ import os
14
+
15
+ OUTPUT_PATH = 'videos'
16
+
17
 
18
  OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
19
  client = OpenAI(api_key=OPEN_AI_KEY)
 
80
  "start_time": start_time,
81
  "end_time": end_time,
82
  "text": entry['text'],
83
+ "embed_url": embed_url,
84
+ "time_sec": entry['start']
85
  }
86
  formatted_transcript.append(line)
87
 
 
102
  for entry in formatted_transcript:
103
  html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
104
  html_content += f"<p>{entry['text']}</p>"
105
+ html_content += f"<p>{entry['time_sec']} </p> <br><br>"
106
  return html_content
107
 
108
  def get_embedded_youtube_link(video_id, start_time):
109
  embed_url = f"https://www.youtube.com/embed/{video_id}?start={start_time}&autoplay=1"
110
  return embed_url
111
 
112
+ def download_youtube_video(youtube_id, output_path=OUTPUT_PATH):
113
+ # Construct the full YouTube URL
114
+ youtube_url = f'https://www.youtube.com/watch?v={youtube_id}'
115
+
116
+ # Create the output directory if it doesn't exist
117
+ if not os.path.exists(output_path):
118
+ os.makedirs(output_path)
119
+
120
+ # Download the video
121
+ yt = YouTube(youtube_url)
122
+ video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
123
+ video_stream.download(output_path=output_path, filename=youtube_id+".mp4")
124
+
125
+ print(f"Video downloaded successfully: {output_path}/{youtube_id}.mp4")
126
+
127
+
128
+ def screenshot_youtube_video(youtube_id, snapshot_sec):
129
+ # 由于在 Hugging Face Spaces 中直接下载 YouTube 视频可能不可行,
130
+ # 您可能需要调整这部分代码,例如允许用户上传视频。
131
+ # ... 代码 ...
132
+
133
+ # 这里假设视频已经在适当的位置
134
+ video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
135
+
136
+ # Load the video and take a screenshot
137
+ with VideoFileClip(video_path) as video:
138
+ screenshot_path = f'{OUTPUT_PATH}/screenshots/{youtube_id}_{snapshot_sec}.jpg'
139
+ video.save_frame(screenshot_path, snapshot_sec)
140
+
141
+ return screenshot_path
142
+
143
+ def process_video(youtube_id):
144
+ download_youtube_video(youtube_id)
145
+ video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
146
+ video = VideoFileClip(video_path)
147
+ duration = int(video.duration)
148
+ output_path = f'{OUTPUT_PATH}/screenshots/{youtube_id}'
149
+ os.makedirs(output_path, exist_ok=True)
150
+
151
+ # fake duration
152
+ duration = 10
153
+
154
+ screenshot_paths = []
155
+ for i in range(1, duration):
156
+ screenshot_path = screenshot_youtube_video(youtube_id, i)
157
+ screenshot_paths.append(screenshot_path)
158
+
159
+ return screenshot_paths
160
+
161
  def get_screenshot_from_video(video_link, start_time):
162
  # 实现从视频中提取帧的逻辑
163
  # 由于这需要服务器端处理,你可能需要一种方法来下载视频,
requirements.txt CHANGED
@@ -4,4 +4,6 @@ openai>=1.0.0
4
  requests
5
  beautifulsoup4
6
  python-docx
7
- youtube-transcript-api
 
 
 
4
  requests
5
  beautifulsoup4
6
  python-docx
7
+ youtube-transcript-api
8
+ moviepy
9
+ pytube