awacke1 commited on
Commit
e4a7d86
1 Parent(s): 93853eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -2
app.py CHANGED
@@ -67,6 +67,22 @@ def process_audio(audio_input):
67
  )
68
  st.markdown(response.choices[0].message.content)
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def save_video(video_file):
72
  # Save the uploaded video file
@@ -114,7 +130,10 @@ def process_audio_and_video(video_input):
114
 
115
  # Process the saved video
116
  base64Frames, audio_path = process_video(video_path, seconds_per_frame=1)
117
-
 
 
 
118
  # Generate a summary with visual and audio
119
  response = client.chat.completions.create(
120
  model=MODEL,
@@ -124,7 +143,7 @@ def process_audio_and_video(video_input):
124
  "These are the frames from the video.",
125
  *map(lambda x: {"type": "image_url",
126
  "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
127
- {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
128
  ]},
129
  ],
130
  temperature=0,
 
67
  )
68
  st.markdown(response.choices[0].message.content)
69
 
70
+ def process_audio_for_video(video_input):
71
+ if audio_input:
72
+ transcription = client.audio.transcriptions.create(
73
+ model="whisper-1",
74
+ file=video_input,
75
+ )
76
+ response = client.chat.completions.create(
77
+ model=MODEL,
78
+ messages=[
79
+ {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
80
+ {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription.text}"}],}
81
+ ],
82
+ temperature=0,
83
+ )
84
+ st.markdown(response.choices[0].message.content)
85
+ return response.choices[0].message.content
86
 
87
  def save_video(video_file):
88
  # Save the uploaded video file
 
130
 
131
  # Process the saved video
132
  base64Frames, audio_path = process_video(video_path, seconds_per_frame=1)
133
+
134
+ # Get the transcript for the video model call
135
+ transcript = process_audio_for_video(video_input)
136
+
137
  # Generate a summary with visual and audio
138
  response = client.chat.completions.create(
139
  model=MODEL,
 
143
  "These are the frames from the video.",
144
  *map(lambda x: {"type": "image_url",
145
  "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
146
+ {"type": "text", "text": f"The audio transcription is: {transcript}"}
147
  ]},
148
  ],
149
  temperature=0,