Spaces:

sakasegawa
/

gpt-4o-video-summarizer-demo

Running

App Files Files Community

sakasegawa commited on May 13

Commit

dcb12bb

•

1 Parent(s): 49b7ecb

init

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +91 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Gpt 4o Video Summarizer Demo
 emoji: 💻
 colorFrom: gray
 colorTo: green

 ---
+title: GPT-4o Video Summarizer Demo
 emoji: 💻
 colorFrom: gray
 colorTo: green

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+from moviepy.editor import VideoFileClip
+import cv2
+import base64
+from openai import OpenAI
+import os
+# ref: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
+def process_video(video_path, seconds_per_frame=2):
+    base64Frames = []
+    base_video_path, _ = os.path.splitext(video_path)
+    video = cv2.VideoCapture(video_path)
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = video.get(cv2.CAP_PROP_FPS)
+    frames_to_skip = int(fps * seconds_per_frame)
+    curr_frame = 0
+    while curr_frame < total_frames - 1:
+        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
+        success, frame = video.read()
+        if not success:
+            break
+        _, buffer = cv2.imencode(".jpg", frame)
+        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
+        curr_frame += frames_to_skip
+    video.release()
+    audio_path = f"{base_video_path}.mp3"
+    clip = VideoFileClip(video_path)
+    clip.audio.write_audiofile(audio_path, bitrate="32k")
+    clip.audio.close()
+    clip.close()
+    return base64Frames, audio_path
+def summarize_video(api_key, file_path):
+    client = OpenAI(api_key=api_key)
+    # フレームと音声を抽出 (秒間0.5フレーム)
+    base64Frames, audio_path = process_video(file_path, seconds_per_frame=0.5)
+    # Whisperで音声を文字起こし
+    transcription = client.audio.transcriptions.create(
+        model="whisper-1", file=open(audio_path, "rb")
+    )
+    # GPT-4oで要約生成
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {
+                "role": "system",
+                "content": """あなたは優秀な要約者です。提供された動画とその書き起こしの要約をMarkdown形式で作成してください""",
+            },
+            {
+                "role": "user",
+                "content": [
+                    "これらは動画から取得されたフレームです",
+                    *map(
+                        lambda x: {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpg;base64,{x}",
+                                "detail": "low",
+                            },
+                        },
+                        base64Frames,
+                    ),
+                    {
+                        "type": "text",
+                        "text": f"動画の書き起こしはこちらです: {transcription.text}",
+                    },
+                ],
+            },
+        ],
+        temperature=0,
+    )
+    return response.choices[0].message.content
+demo = gr.Interface(
+    fn=summarize_video,
+    inputs=[gr.Textbox(label="OpenAI API Key"), gr.File(label="Upload Video (mp4)")],
+    outputs="markdown",
+    title="Video Summarizer",
+    description="動画をアップロードしOpenAIのAPIキーを入力し動画をアップロードすると要約が生成されます。API使用料にご注意ください。詳細: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o",
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+openai
+moviepy
+python-opencv
+gradio