sakasegawa commited on
Commit
dcb12bb
1 Parent(s): 49b7ecb
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +91 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Gpt 4o Video Summarizer Demo
3
  emoji: 💻
4
  colorFrom: gray
5
  colorTo: green
 
1
  ---
2
+ title: GPT-4o Video Summarizer Demo
3
  emoji: 💻
4
  colorFrom: gray
5
  colorTo: green
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from moviepy.editor import VideoFileClip
3
+ import cv2
4
+ import base64
5
+ from openai import OpenAI
6
+ import os
7
+
8
+ # ref: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
9
+ def process_video(video_path, seconds_per_frame=2):
10
+ base64Frames = []
11
+ base_video_path, _ = os.path.splitext(video_path)
12
+ video = cv2.VideoCapture(video_path)
13
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
14
+ fps = video.get(cv2.CAP_PROP_FPS)
15
+ frames_to_skip = int(fps * seconds_per_frame)
16
+ curr_frame = 0
17
+
18
+ while curr_frame < total_frames - 1:
19
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
20
+ success, frame = video.read()
21
+ if not success:
22
+ break
23
+ _, buffer = cv2.imencode(".jpg", frame)
24
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
25
+ curr_frame += frames_to_skip
26
+ video.release()
27
+
28
+ audio_path = f"{base_video_path}.mp3"
29
+ clip = VideoFileClip(video_path)
30
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
31
+ clip.audio.close()
32
+ clip.close()
33
+
34
+ return base64Frames, audio_path
35
+
36
+
37
+ def summarize_video(api_key, file_path):
38
+ client = OpenAI(api_key=api_key)
39
+
40
+ # フレームと音声を抽出 (秒間0.5フレーム)
41
+ base64Frames, audio_path = process_video(file_path, seconds_per_frame=0.5)
42
+
43
+ # Whisperで音声を文字起こし
44
+ transcription = client.audio.transcriptions.create(
45
+ model="whisper-1", file=open(audio_path, "rb")
46
+ )
47
+
48
+ # GPT-4oで要約生成
49
+ response = client.chat.completions.create(
50
+ model="gpt-4o",
51
+ messages=[
52
+ {
53
+ "role": "system",
54
+ "content": """あなたは優秀な要約者です。提供された動画とその書き起こしの要約をMarkdown形式で作成してください""",
55
+ },
56
+ {
57
+ "role": "user",
58
+ "content": [
59
+ "これらは動画から取得されたフレームです",
60
+ *map(
61
+ lambda x: {
62
+ "type": "image_url",
63
+ "image_url": {
64
+ "url": f"data:image/jpg;base64,{x}",
65
+ "detail": "low",
66
+ },
67
+ },
68
+ base64Frames,
69
+ ),
70
+ {
71
+ "type": "text",
72
+ "text": f"動画の書き起こしはこちらです: {transcription.text}",
73
+ },
74
+ ],
75
+ },
76
+ ],
77
+ temperature=0,
78
+ )
79
+
80
+ return response.choices[0].message.content
81
+
82
+ demo = gr.Interface(
83
+ fn=summarize_video,
84
+ inputs=[gr.Textbox(label="OpenAI API Key"), gr.File(label="Upload Video (mp4)")],
85
+ outputs="markdown",
86
+ title="Video Summarizer",
87
+ description="動画をアップロードしOpenAIのAPIキーを入力し動画をアップロードすると要約が生成されます。API使用料にご注意ください。詳細: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o",
88
+ )
89
+
90
+ if __name__ == "__main__":
91
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai
2
+ moviepy
3
+ python-opencv
4
+ gradio