fffiloni commited on
Commit
ad6ae4a
1 Parent(s): 39ff4a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -12
app.py CHANGED
@@ -5,7 +5,7 @@ client = Client("https://vikhyatk-moondream1.hf.space/")
5
  import cv2
6
  from moviepy.editor import *
7
 
8
- # 1. extract and store 1 image every 5 images from video input
9
  # 2. extract audio
10
  # 3. for each image from extracted_images, get caption from caption model and concatenate into list
11
  # 4. for audio, ask audio questioning model to describe sound/scene
@@ -114,10 +114,14 @@ def process_image(image_in):
114
  '''
115
 
116
  def extract_audio(video_path):
117
- video_clip = VideoFileClip(video_path)
118
- audio_clip = video_clip.audio
119
- audio_clip.write_audiofile("output_audio.mp3")
120
- return "output_audio.mp3"
 
 
 
 
121
 
122
  def get_salmonn(audio_in):
123
  salmonn_prompt = "Please describe the audio"
@@ -167,10 +171,14 @@ def infer(video_in):
167
 
168
  # Extract audio from video
169
  extracted_audio = extract_audio(video_in)
170
- print(extracted_audio)
171
 
172
- # Get description of audio content
173
- audio_content_described = get_salmonn(extracted_audio)
 
 
 
 
 
174
 
175
  # Assemble captions
176
  formatted_captions = f"""
@@ -199,10 +207,13 @@ with gr.Blocks(css=css) as demo :
199
  gr.HTML("""
200
  <h2 style="text-align: center;">Soft video understanding</h2>
201
  """)
202
- video_in = gr.Video(label="Video input")
203
- video_cut = gr.Video(label="Video cut")
204
- submit_btn = gr.Button("Submit")
205
- video_description = gr.Textbox(label="Video description", elem_id="video-text")
 
 
 
206
  video_in.upload(
207
  fn = trim_video,
208
  inputs = [video_in],
 
5
  import cv2
6
  from moviepy.editor import *
7
 
8
+ # 1. extract and store 1 image every 24 images from video input
9
  # 2. extract audio
10
  # 3. for each image from extracted_images, get caption from caption model and concatenate into list
11
  # 4. for audio, ask audio questioning model to describe sound/scene
 
114
  '''
115
 
116
  def extract_audio(video_path):
117
+ # Check if the video has audio
118
+ if video_clip.audio is not None:
119
+ audio_clip = video_clip.audio
120
+ audio_clip.write_audiofile("output_audio.mp3")
121
+ return "output_audio.mp3"
122
+ else:
123
+ print("The video does not have any audio.")
124
+ return None
125
 
126
  def get_salmonn(audio_in):
127
  salmonn_prompt = "Please describe the audio"
 
171
 
172
  # Extract audio from video
173
  extracted_audio = extract_audio(video_in)
 
174
 
175
+ if extracted is not None :
176
+ print(extracted_audio)
177
+
178
+ # Get description of audio content
179
+ audio_content_described = get_salmonn(extracted_audio)
180
+ else :
181
+ audio_content_described = "Video has no sound."
182
 
183
  # Assemble captions
184
  formatted_captions = f"""
 
207
  gr.HTML("""
208
  <h2 style="text-align: center;">Soft video understanding</h2>
209
  """)
210
+ with gr.Row():
211
+ video_in = gr.Video(label="Video input")
212
+ with gr.Column():
213
+ video_cut = gr.Video(label="Video cut to 10 seconds")
214
+ submit_btn = gr.Button("Submit")
215
+ video_description = gr.Textbox(label="Video description", elem_id="video-text")
216
+
217
  video_in.upload(
218
  fn = trim_video,
219
  inputs = [video_in],