awacke1 commited on
Commit
d907b5f
β€’
1 Parent(s): d7cecbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -14
app.py CHANGED
@@ -67,21 +67,60 @@ def process_audio(audio_input):
67
  )
68
  st.markdown(response.choices[0].message.content)
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def process_video(video_input):
71
- if video_input:
72
- base64Frames, audio_path = process_video_frames(video_input)
73
- transcription = openai.Audio.transcriptions.create(
74
- model="whisper-1",
75
- file=open(audio_path, "rb"),
76
- )
77
- frames_text = " ".join([f"[image: data:image/jpg;base64,{frame}]" for frame in base64Frames])
78
- response = openai.Completion.create(
79
- model=MODEL,
80
- prompt=f"You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown. These are the frames from the video. {frames_text} The audio transcription is: {transcription['text']}",
81
- max_tokens=500,
82
- temperature=0.5,
83
- )
84
- st.markdown(response.choices[0].text.strip())
 
 
85
 
86
  def process_video_frames(video_path, seconds_per_frame=2):
87
  base64Frames = []
 
67
  )
68
  st.markdown(response.choices[0].message.content)
69
 
70
+ def process_video(video_path, seconds_per_frame=2):
71
+ base64Frames = []
72
+ base_video_path, _ = os.path.splitext(video_path)
73
+
74
+ video = cv2.VideoCapture(video_path)
75
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
76
+ fps = video.get(cv2.CAP_PROP_FPS)
77
+ frames_to_skip = int(fps * seconds_per_frame)
78
+ curr_frame=0
79
+
80
+ # Loop through the video and extract frames at specified sampling rate
81
+ while curr_frame < total_frames - 1:
82
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
83
+ success, frame = video.read()
84
+ if not success:
85
+ break
86
+ _, buffer = cv2.imencode(".jpg", frame)
87
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
88
+ curr_frame += frames_to_skip
89
+ video.release()
90
+
91
+ # Extract audio from video
92
+ audio_path = f"{base_video_path}.mp3"
93
+ clip = VideoFileClip(video_path)
94
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
95
+ clip.audio.close()
96
+ clip.close()
97
+
98
+ print(f"Extracted {len(base64Frames)} frames")
99
+ print(f"Extracted audio to {audio_path}")
100
+ return base64Frames, audio_path
101
+
102
+ # Extract 1 frame per second. You can adjust the `seconds_per_frame` parameter to change the sampling rate
103
+ base64Frames, audio_path = process_video(VIDEO_PATH, seconds_per_frame=1)
104
+
105
+
106
+ ## Generate a summary with visual and audio
107
  def process_video(video_input):
108
+ base64Frames, audio_path = process_video(video_input, seconds_per_frame=1)
109
+ response = client.chat.completions.create(
110
+ model=MODEL,
111
+ messages=[
112
+ {"role": "system", "content":"""You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
113
+ {"role": "user", "content": [
114
+ "These are the frames from the video.",
115
+ *map(lambda x: {"type": "image_url",
116
+ "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
117
+ {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
118
+ ],
119
+ }
120
+ ],
121
+ temperature=0,
122
+ )
123
+ st.markdown(response.choices[0].message.content)
124
 
125
  def process_video_frames(video_path, seconds_per_frame=2):
126
  base64Frames = []