Update app.py
Browse files
app.py
CHANGED
@@ -296,7 +296,10 @@ def infer(video_in, trim_value, prompt, background_prompt):
|
|
296 |
print("video is shorter than the cut value")
|
297 |
n_frame = len(frames_list)
|
298 |
|
299 |
-
|
|
|
|
|
|
|
300 |
print("set stop frames to: " + str(n_frame))
|
301 |
|
302 |
for i in frames_list[0:int(n_frame)]:
|
@@ -305,20 +308,28 @@ def infer(video_in, trim_value, prompt, background_prompt):
|
|
305 |
# Convert the image to a NumPy array
|
306 |
image_array = np.array(to_numpy_i)
|
307 |
|
308 |
-
|
309 |
-
|
310 |
-
|
|
|
|
|
311 |
|
312 |
|
313 |
-
# exporting the
|
314 |
-
|
315 |
-
|
|
|
|
|
|
|
|
|
316 |
print("frame " + i + "/" + str(n_frame) + ": done;")
|
317 |
|
318 |
-
|
|
|
|
|
319 |
print("finished !")
|
320 |
|
321 |
-
return
|
322 |
|
323 |
if __name__ == "__main__":
|
324 |
parser = argparse.ArgumentParser("MAM demo", add_help=True)
|
@@ -337,24 +348,17 @@ if __name__ == "__main__":
|
|
337 |
with block:
|
338 |
gr.Markdown(
|
339 |
"""
|
340 |
-
# Matting Anything Demo
|
341 |
-
Welcome to the Matting Anything demo and upload your
|
|
|
342 |
## Usage
|
343 |
-
You may check the <a href='https://www.youtube.com/watch?v=XY2Q0HATGOk'>video</a> to see how to play with the demo, or check the details below.
|
344 |
<details>
|
345 |
-
You may upload
|
346 |
-
|
347 |
-
**scribble_point**: Click an point on the target instance.
|
348 |
-
|
349 |
-
**scribble_box**: Click on two points, the top-left point and the bottom-right point to represent a bounding box of the target instance.
|
350 |
-
|
351 |
**text**: Send text prompt to identify the target instance in the `Text prompt` box.
|
352 |
|
353 |
-
We also support
|
354 |
-
|
355 |
-
**real_world_sample**: Randomly select a real-world image from `assets/backgrounds` for composition.
|
356 |
-
|
357 |
**generated_by_text**: Send background text prompt to create a background image with stable diffusion model in the `Background prompt` box.
|
|
|
358 |
</details>
|
359 |
""")
|
360 |
|
@@ -364,7 +368,7 @@ if __name__ == "__main__":
|
|
364 |
trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1)
|
365 |
#task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
|
366 |
#task_type = "text"
|
367 |
-
text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
|
368 |
#background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
|
369 |
background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
|
370 |
|
@@ -390,11 +394,13 @@ if __name__ == "__main__":
|
|
390 |
#gallery = gr.Gallery(
|
391 |
# label="Generated images", show_label=True, elem_id="gallery"
|
392 |
#).style(preview=True, grid=3, object_fit="scale-down")
|
393 |
-
|
|
|
|
|
394 |
|
395 |
run_button.click(fn=infer, inputs=[
|
396 |
-
video_in, trim_in, text_prompt, background_prompt], outputs=
|
397 |
|
398 |
-
block.launch(debug=args.debug, share=args.share, show_error=True)
|
399 |
#block.queue(concurrency_count=100)
|
400 |
#block.launch(server_name='0.0.0.0', server_port=args.port, debug=args.debug, share=args.share)
|
|
|
296 |
print("video is shorter than the cut value")
|
297 |
n_frame = len(frames_list)
|
298 |
|
299 |
+
with_bg_result_frames = []
|
300 |
+
with_green_result_frames = []
|
301 |
+
with_matte_result_frames = []
|
302 |
+
|
303 |
print("set stop frames to: " + str(n_frame))
|
304 |
|
305 |
for i in frames_list[0:int(n_frame)]:
|
|
|
308 |
# Convert the image to a NumPy array
|
309 |
image_array = np.array(to_numpy_i)
|
310 |
|
311 |
+
results = run_grounded_sam(image_array, prompt, "text", background_prompt)
|
312 |
+
|
313 |
+
bg_img = Image.fromarray(results[0])
|
314 |
+
greem_img = Image.fromarray(results[1])
|
315 |
+
matte_img = Image.fromarray(results[2])
|
316 |
|
317 |
|
318 |
+
# exporting the images
|
319 |
+
bg_img.save(f"bg_result_img-{i}.jpg")
|
320 |
+
with_bg_result_frames.append(f"bg_result_img-{i}.jpg")
|
321 |
+
green_img.save(f"green_result_img-{i}.jpg")
|
322 |
+
with_green_result_frames.append(f"green_result_img-{i}.jpg")
|
323 |
+
matte_img.save(f"matte_result_img-{i}.jpg")
|
324 |
+
with_matte_result_frames.append(f"matte_result_img-{i}.jpg")
|
325 |
print("frame " + i + "/" + str(n_frame) + ": done;")
|
326 |
|
327 |
+
vid_bg = create_video(with_bg_result_frames, fps)
|
328 |
+
vid_green = create_video(with_green_result_frames, fps)
|
329 |
+
vid_matte = create_video(with_matte_result_frames, fps)
|
330 |
print("finished !")
|
331 |
|
332 |
+
return vid_bg, vid_green, vid_matte
|
333 |
|
334 |
if __name__ == "__main__":
|
335 |
parser = argparse.ArgumentParser("MAM demo", add_help=True)
|
|
|
348 |
with block:
|
349 |
gr.Markdown(
|
350 |
"""
|
351 |
+
# Matting Anything in Video Demo
|
352 |
+
Welcome to the Matting Anything in Video demo and upload your video to get started <br/>
|
353 |
+
You may open usage details below to understand how to use this demo.
|
354 |
## Usage
|
|
|
355 |
<details>
|
356 |
+
You may upload a video to start, for the moment we only support 1 prompt type to get the alpha matte of the target:
|
|
|
|
|
|
|
|
|
|
|
357 |
**text**: Send text prompt to identify the target instance in the `Text prompt` box.
|
358 |
|
359 |
+
We also only support 1 background type to support image composition with the alpha matte output:
|
|
|
|
|
|
|
360 |
**generated_by_text**: Send background text prompt to create a background image with stable diffusion model in the `Background prompt` box.
|
361 |
+
|
362 |
</details>
|
363 |
""")
|
364 |
|
|
|
368 |
trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1)
|
369 |
#task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
|
370 |
#task_type = "text"
|
371 |
+
text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle", info="Describe the subject visible in your video that you want to matte")
|
372 |
#background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
|
373 |
background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
|
374 |
|
|
|
394 |
#gallery = gr.Gallery(
|
395 |
# label="Generated images", show_label=True, elem_id="gallery"
|
396 |
#).style(preview=True, grid=3, object_fit="scale-down")
|
397 |
+
vid_bg_out = gr.Video(label="Video with background")
|
398 |
+
vid_green_out = gr.Video(label="Video green screen")
|
399 |
+
vid_matte_out = gr.Video(label="Video matte")
|
400 |
|
401 |
run_button.click(fn=infer, inputs=[
|
402 |
+
video_in, trim_in, text_prompt, background_prompt], outputs=[vid_bg_out, vid_green_out, vid_matte_out])
|
403 |
|
404 |
+
block.queue(size=12).launch(debug=args.debug, share=args.share, show_error=True)
|
405 |
#block.queue(concurrency_count=100)
|
406 |
#block.launch(server_name='0.0.0.0', server_port=args.port, debug=args.debug, share=args.share)
|