show ffmpg command in case of error

#6
by Aivo - opened
Files changed (2) hide show
  1. app.py +42 -86
  2. requirements.txt +1 -1
app.py CHANGED
@@ -12,24 +12,13 @@ import tempfile
12
  import shlex
13
  import shutil
14
 
15
- # Supported models configuration
16
- MODELS = {
17
- "deepseek-ai/DeepSeek-V3": {
18
- "base_url": "https://api.deepseek.com/v1",
19
- "env_key": "DEEPSEEK_API_KEY",
20
- },
21
- }
22
-
23
- # Initialize client with first available model
24
- client = OpenAI(
25
- base_url=next(iter(MODELS.values()))["base_url"],
26
- api_key=os.environ[next(iter(MODELS.values()))["env_key"]],
27
- )
28
 
29
  allowed_medias = [
30
  ".png",
31
  ".jpg",
32
- ".webp",
33
  ".jpeg",
34
  ".tiff",
35
  ".bmp",
@@ -95,7 +84,7 @@ def get_files_infos(files):
95
  return results
96
 
97
 
98
- def get_completion(prompt, files_info, top_p, temperature, model_choice):
99
  # Create table header
100
  files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
101
  files_info_string += "|------|------|------------|-----------|--------|\n"
@@ -128,7 +117,6 @@ You are given:
128
  Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
129
 
130
  Key requirements:
131
- - First, think step-by-step about what the user is asking for and reformulate it into a clear technical specification
132
  - Use the absolute minimum number of ffmpeg options needed
133
  - Avoid complex filter chains or filter_complex if possible
134
  - Prefer simple concatenation, scaling, and basic filters
@@ -144,19 +132,15 @@ Remember: Simpler is better. Only use advanced ffmpeg features if absolutely nec
144
  },
145
  {
146
  "role": "user",
147
- "content": f"""Always output the media as video/mp4 and output file with "output.mp4".
148
- The current assets and objective follow.
149
 
150
  AVAILABLE ASSETS LIST:
151
 
152
  {files_info_string}
153
 
154
  OBJECTIVE: {prompt} and output at "output.mp4"
155
-
156
- First, think step-by-step about what I'm asking for and reformulate it into a clear technical specification.
157
- Then provide the FFMPEG command that will accomplish this task.
158
-
159
- YOUR RESPONSE:
160
  """,
161
  },
162
  ]
@@ -168,16 +152,8 @@ YOUR RESPONSE:
168
  print(msg["content"])
169
  print("=====================\n")
170
 
171
- if model_choice not in MODELS:
172
- raise ValueError(f"Model {model_choice} is not supported")
173
-
174
- model_config = MODELS[model_choice]
175
- client.base_url = model_config["base_url"]
176
- client.api_key = os.environ[model_config["env_key"]]
177
- model = "deepseek-chat" if "deepseek" in model_choice.lower() else model_choice
178
-
179
  completion = client.chat.completions.create(
180
- model=model,
181
  messages=messages,
182
  temperature=temperature,
183
  top_p=top_p,
@@ -189,23 +165,13 @@ YOUR RESPONSE:
189
  # Find content between ```sh or ```bash and the next ```
190
  import re
191
 
192
- command_match = re.search(r"```(?:sh|bash)?\n(.*?)\n```", content, re.DOTALL)
193
- if command_match:
194
- command = command_match.group(1).strip()
195
- else:
196
- # Try to find a line that starts with ffmpeg
197
- ffmpeg_lines = [line.strip() for line in content.split('\n') if line.strip().startswith('ffmpeg')]
198
- if ffmpeg_lines:
199
- command = ffmpeg_lines[0]
200
- else:
201
- command = content.replace("\n", "")
202
- else:
203
- # Try to find a line that starts with ffmpeg
204
- ffmpeg_lines = [line.strip() for line in content.split('\n') if line.strip().startswith('ffmpeg')]
205
- if ffmpeg_lines:
206
- command = ffmpeg_lines[0]
207
  else:
208
  command = content.replace("\n", "")
 
 
209
 
210
  # remove output.mp4 with the actual output file path
211
  command = command.replace("output.mp4", "")
@@ -215,13 +181,7 @@ YOUR RESPONSE:
215
  raise Exception("API Error")
216
 
217
 
218
- def update(
219
- files,
220
- prompt,
221
- top_p=1,
222
- temperature=1,
223
- model_choice="deepseek-ai/DeepSeek-V3",
224
- ):
225
  if prompt == "":
226
  raise gr.Error("Please enter a prompt.")
227
 
@@ -233,16 +193,14 @@ def update(
233
  raise gr.Error(
234
  "Please make sure all videos are less than 2 minute long."
235
  )
236
- if file_info["size"] > 100000000:
237
- raise gr.Error("Please make sure all files are less than 100MB in size.")
238
 
239
  attempts = 0
240
  while attempts < 2:
241
  print("ATTEMPT", attempts)
242
  try:
243
- command_string = get_completion(
244
- prompt, files_info, top_p, temperature, model_choice
245
- )
246
  print(
247
  f"""///PROMTP {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
248
  )
@@ -281,9 +239,7 @@ def update(
281
  f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n"
282
  )
283
  subprocess.run(final_command, cwd=temp_dir)
284
- # Extract just the command for display
285
- command_for_display = f"ffmpeg {' '.join(args[1:])} -y output.mp4"
286
- generated_command = f"### Generated Command\n```bash\n{command_for_display}\n```"
287
  return output_file_path, gr.update(value=generated_command)
288
  except Exception as e:
289
  attempts += 1
@@ -296,7 +252,7 @@ with gr.Blocks() as demo:
296
  gr.Markdown(
297
  """
298
  # 🏞 AI Video Composer
299
- Compose new videos from your assets using natural language. Add video, image and audio assets and let [Qwen2.5-Coder](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) or [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base) generate a new video for you (using FFMPEG).
300
  """,
301
  elem_id="header",
302
  )
@@ -308,16 +264,11 @@ with gr.Blocks() as demo:
308
  file_types=allowed_medias,
309
  )
310
  user_prompt = gr.Textbox(
311
- placeholder="eg: Remove the 3 first seconds of the video",
312
  label="Instructions",
313
  )
314
  btn = gr.Button("Run")
315
  with gr.Accordion("Parameters", open=False):
316
- model_choice = gr.Radio(
317
- choices=list(MODELS.keys()),
318
- value=list(MODELS.keys())[0],
319
- label="Model",
320
- )
321
  top_p = gr.Slider(
322
  minimum=-0,
323
  maximum=1.0,
@@ -342,7 +293,7 @@ with gr.Blocks() as demo:
342
 
343
  btn.click(
344
  fn=update,
345
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
346
  outputs=[generated_video, generated_command],
347
  )
348
  with gr.Row():
@@ -350,35 +301,40 @@ with gr.Blocks() as demo:
350
  examples=[
351
  [
352
  ["./examples/ai_talk.wav", "./examples/bg-image.png"],
353
- "Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
354
- 0.7,
355
- 0.1,
356
- list(MODELS.keys())[0],
357
- ],
358
- [
359
- ["./examples/ai_talk.wav", "./examples/bg-image.png"],
360
- "Use the image as the background with a waveform visualization for the audio positioned in center of the video. Make sure the waveform has a max height of 250 pixels.",
361
  0.7,
362
  0.1,
363
- list(MODELS.keys())[0],
364
  ],
365
  [
366
  [
 
367
  "./examples/cat1.jpeg",
368
  "./examples/cat2.jpeg",
369
  "./examples/cat3.jpeg",
370
  "./examples/cat4.jpeg",
371
  "./examples/cat5.jpeg",
372
  "./examples/cat6.jpeg",
 
373
  "./examples/heat-wave.mp3",
374
  ],
375
- "Create a 3x2 grid of the cat images with the audio as background music. Make the video duration match the audio duration.",
376
- 0.7,
377
- 0.1,
378
- list(MODELS.keys())[0],
379
- ],
380
- ],
381
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
 
 
 
 
 
 
 
 
 
 
 
382
  outputs=[generated_video, generated_command],
383
  fn=update,
384
  run_on_click=True,
 
12
  import shlex
13
  import shutil
14
 
15
+ HF_API_KEY = os.environ["HF_TOKEN"]
16
+
17
+ client = OpenAI(base_url="https://api-inference.huggingface.co/v1/", api_key=HF_API_KEY)
 
 
 
 
 
 
 
 
 
 
18
 
19
  allowed_medias = [
20
  ".png",
21
  ".jpg",
 
22
  ".jpeg",
23
  ".tiff",
24
  ".bmp",
 
84
  return results
85
 
86
 
87
+ def get_completion(prompt, files_info, top_p, temperature):
88
  # Create table header
89
  files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
90
  files_info_string += "|------|------|------------|-----------|--------|\n"
 
117
  Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
118
 
119
  Key requirements:
 
120
  - Use the absolute minimum number of ffmpeg options needed
121
  - Avoid complex filter chains or filter_complex if possible
122
  - Prefer simple concatenation, scaling, and basic filters
 
132
  },
133
  {
134
  "role": "user",
135
+ "content": f"""Always output the media as video/mp4 and output file with "output.mp4". Provide only the shell command without any explanations.
136
+ The current assets and objective follow. Reply with the FFMPEG command:
137
 
138
  AVAILABLE ASSETS LIST:
139
 
140
  {files_info_string}
141
 
142
  OBJECTIVE: {prompt} and output at "output.mp4"
143
+ YOUR FFMPEG COMMAND:
 
 
 
 
144
  """,
145
  },
146
  ]
 
152
  print(msg["content"])
153
  print("=====================\n")
154
 
 
 
 
 
 
 
 
 
155
  completion = client.chat.completions.create(
156
+ model="Qwen/Qwen2.5-Coder-32B-Instruct",
157
  messages=messages,
158
  temperature=temperature,
159
  top_p=top_p,
 
165
  # Find content between ```sh or ```bash and the next ```
166
  import re
167
 
168
+ command = re.search(r"```(?:sh|bash)?\n(.*?)\n```", content, re.DOTALL)
169
+ if command:
170
+ command = command.group(1).strip()
 
 
 
 
 
 
 
 
 
 
 
 
171
  else:
172
  command = content.replace("\n", "")
173
+ else:
174
+ command = content.replace("\n", "")
175
 
176
  # remove output.mp4 with the actual output file path
177
  command = command.replace("output.mp4", "")
 
181
  raise Exception("API Error")
182
 
183
 
184
+ def update(files, prompt, top_p=1, temperature=1):
 
 
 
 
 
 
185
  if prompt == "":
186
  raise gr.Error("Please enter a prompt.")
187
 
 
193
  raise gr.Error(
194
  "Please make sure all videos are less than 2 minute long."
195
  )
196
+ if file_info["size"] > 10000000:
197
+ raise gr.Error("Please make sure all files are less than 10MB in size.")
198
 
199
  attempts = 0
200
  while attempts < 2:
201
  print("ATTEMPT", attempts)
202
  try:
203
+ command_string = get_completion(prompt, files_info, top_p, temperature)
 
 
204
  print(
205
  f"""///PROMTP {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
206
  )
 
239
  f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n"
240
  )
241
  subprocess.run(final_command, cwd=temp_dir)
242
+ generated_command = f"### Generated Command\n```bash\nffmpeg {' '.join(args[1:])} -y output.mp4\n```"
 
 
243
  return output_file_path, gr.update(value=generated_command)
244
  except Exception as e:
245
  attempts += 1
 
252
  gr.Markdown(
253
  """
254
  # 🏞 AI Video Composer
255
+ Compose new videos from your assets using natural language. Add video, image and audio assets and let [Qwen2.5-Coder](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) generate a new video for you (using FFMPEG).
256
  """,
257
  elem_id="header",
258
  )
 
264
  file_types=allowed_medias,
265
  )
266
  user_prompt = gr.Textbox(
267
+ placeholder="I want to convert to a gif under 15mb",
268
  label="Instructions",
269
  )
270
  btn = gr.Button("Run")
271
  with gr.Accordion("Parameters", open=False):
 
 
 
 
 
272
  top_p = gr.Slider(
273
  minimum=-0,
274
  maximum=1.0,
 
293
 
294
  btn.click(
295
  fn=update,
296
+ inputs=[user_files, user_prompt, top_p, temperature],
297
  outputs=[generated_video, generated_command],
298
  )
299
  with gr.Row():
 
301
  examples=[
302
  [
303
  ["./examples/ai_talk.wav", "./examples/bg-image.png"],
304
+ "Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
 
 
 
 
 
 
 
305
  0.7,
306
  0.1,
 
307
  ],
308
  [
309
  [
310
+ "./examples/cat8.jpeg",
311
  "./examples/cat1.jpeg",
312
  "./examples/cat2.jpeg",
313
  "./examples/cat3.jpeg",
314
  "./examples/cat4.jpeg",
315
  "./examples/cat5.jpeg",
316
  "./examples/cat6.jpeg",
317
+ "./examples/cat7.jpeg",
318
  "./examples/heat-wave.mp3",
319
  ],
320
+ "Generate an MP4 slideshow where each photo appears for 2 seconds, using the provided audio as soundtrack.",
321
+ 0.7,
322
+ 0.1,
323
+ ],
324
+ [
325
+ ["./examples/waterfall-overlay.png", "./examples/waterfall.mp4"],
326
+ "Add the overlay to the video.",
327
+ 0.7,
328
+ 0.1,
329
+ ],
330
+ [
331
+ ["./examples/example.mp4"],
332
+ "Make this video 10 times faster",
333
+ 0.7,
334
+ 0.1,
335
+ ],
336
+ ],
337
+ inputs=[user_files, user_prompt, top_p, temperature],
338
  outputs=[generated_video, generated_command],
339
  fn=update,
340
  run_on_click=True,
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  openai>=1.55.0
2
- gradio==5.30.0
3
  moviepy==1
 
1
  openai>=1.55.0
2
+ gradio==5.6.0
3
  moviepy==1