Spaces:
Runtime error
Runtime error
envs
Browse files- app.py +153 -146
- pipelines/pipeline_imagecoductor.py +1 -8
app.py
CHANGED
|
@@ -295,7 +295,7 @@ class ImageConductor:
|
|
| 295 |
if isinstance(tracking_points, list):
|
| 296 |
input_all_points = tracking_points
|
| 297 |
else:
|
| 298 |
-
input_all_points = tracking_points
|
| 299 |
|
| 300 |
|
| 301 |
resized_all_points = [tuple([tuple([float(e1[0]*self.width/original_width), float(e1[1]*self.height/original_height)]) for e1 in e]) for e in input_all_points]
|
|
@@ -304,6 +304,10 @@ class ImageConductor:
|
|
| 304 |
id = base.split('_')[-1]
|
| 305 |
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
visualized_drag, _ = visualize_drag(first_frame_path, resized_all_points, drag_mode, self.width, self.height, self.model_length)
|
| 308 |
|
| 309 |
## image condition
|
|
@@ -377,16 +381,18 @@ class ImageConductor:
|
|
| 377 |
vis_video = (rearrange(sample[0], 'c t h w -> t h w c') * 255.).clip(0, 255)
|
| 378 |
torchvision.io.write_video(outputs_path, vis_video, fps=8, video_codec='h264', options={'crf': '10'})
|
| 379 |
|
| 380 |
-
return visualized_drag, outputs_path
|
| 381 |
|
| 382 |
|
| 383 |
def reset_states(first_frame_path, tracking_points):
|
| 384 |
first_frame_path = gr.State()
|
| 385 |
tracking_points = gr.State([])
|
| 386 |
-
return None, first_frame_path, tracking_points
|
| 387 |
|
| 388 |
|
| 389 |
-
def preprocess_image(image):
|
|
|
|
|
|
|
| 390 |
image_pil = image2pil(image.name)
|
| 391 |
raw_w, raw_h = image_pil.size
|
| 392 |
resize_ratio = max(384/raw_w, 256/raw_h)
|
|
@@ -395,7 +401,7 @@ def preprocess_image(image):
|
|
| 395 |
id = str(uuid.uuid4())[:4]
|
| 396 |
first_frame_path = os.path.join(output_dir, f"first_frame_{id}.jpg")
|
| 397 |
image_pil.save(first_frame_path, quality=95)
|
| 398 |
-
return first_frame_path, first_frame_path,
|
| 399 |
|
| 400 |
|
| 401 |
def add_tracking_points(tracking_points, first_frame_path, drag_mode, evt: gr.SelectData): # SelectData is a subclass of EventData
|
|
@@ -405,13 +411,13 @@ def add_tracking_points(tracking_points, first_frame_path, drag_mode, evt: gr.Se
|
|
| 405 |
color = (0, 0, 255, 255)
|
| 406 |
|
| 407 |
print(f"You selected {evt.value} at {evt.index} from {evt.target}")
|
| 408 |
-
tracking_points
|
| 409 |
-
print(tracking_points
|
| 410 |
|
| 411 |
transparent_background = Image.open(first_frame_path).convert('RGBA')
|
| 412 |
w, h = transparent_background.size
|
| 413 |
transparent_layer = np.zeros((h, w, 4))
|
| 414 |
-
for track in tracking_points
|
| 415 |
if len(track) > 1:
|
| 416 |
for i in range(len(track)-1):
|
| 417 |
start_point = track[i]
|
|
@@ -428,13 +434,13 @@ def add_tracking_points(tracking_points, first_frame_path, drag_mode, evt: gr.Se
|
|
| 428 |
|
| 429 |
transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
|
| 430 |
trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
|
| 431 |
-
return tracking_points, trajectory_map
|
| 432 |
|
| 433 |
|
| 434 |
def add_drag(tracking_points):
|
| 435 |
-
tracking_points.
|
| 436 |
-
print(tracking_points
|
| 437 |
-
return tracking_points
|
| 438 |
|
| 439 |
|
| 440 |
def delete_last_drag(tracking_points, first_frame_path, drag_mode):
|
|
@@ -442,11 +448,11 @@ def delete_last_drag(tracking_points, first_frame_path, drag_mode):
|
|
| 442 |
color = (255, 0, 0, 255)
|
| 443 |
elif drag_mode=='camera':
|
| 444 |
color = (0, 0, 255, 255)
|
| 445 |
-
tracking_points.
|
| 446 |
transparent_background = Image.open(first_frame_path).convert('RGBA')
|
| 447 |
w, h = transparent_background.size
|
| 448 |
transparent_layer = np.zeros((h, w, 4))
|
| 449 |
-
for track in tracking_points
|
| 450 |
if len(track) > 1:
|
| 451 |
for i in range(len(track)-1):
|
| 452 |
start_point = track[i]
|
|
@@ -463,7 +469,7 @@ def delete_last_drag(tracking_points, first_frame_path, drag_mode):
|
|
| 463 |
|
| 464 |
transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
|
| 465 |
trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
|
| 466 |
-
return tracking_points, trajectory_map
|
| 467 |
|
| 468 |
|
| 469 |
def delete_last_step(tracking_points, first_frame_path, drag_mode):
|
|
@@ -471,11 +477,11 @@ def delete_last_step(tracking_points, first_frame_path, drag_mode):
|
|
| 471 |
color = (255, 0, 0, 255)
|
| 472 |
elif drag_mode=='camera':
|
| 473 |
color = (0, 0, 255, 255)
|
| 474 |
-
tracking_points
|
| 475 |
transparent_background = Image.open(first_frame_path).convert('RGBA')
|
| 476 |
w, h = transparent_background.size
|
| 477 |
transparent_layer = np.zeros((h, w, 4))
|
| 478 |
-
for track in tracking_points
|
| 479 |
if len(track) > 1:
|
| 480 |
for i in range(len(track)-1):
|
| 481 |
start_point = track[i]
|
|
@@ -492,147 +498,148 @@ def delete_last_step(tracking_points, first_frame_path, drag_mode):
|
|
| 492 |
|
| 493 |
transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
|
| 494 |
trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
|
| 495 |
-
return tracking_points, trajectory_map
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
with
|
| 506 |
-
with gr.
|
| 507 |
-
gr.
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
with gr.
|
| 513 |
-
gr.
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
width=384,
|
| 527 |
-
model_length=16
|
| 528 |
-
)
|
| 529 |
-
first_frame_path = gr.State()
|
| 530 |
-
tracking_points = gr.State([])
|
| 531 |
-
|
| 532 |
-
with gr.Row():
|
| 533 |
-
with gr.Column(scale=1):
|
| 534 |
-
image_upload_button = gr.UploadButton(label="Upload Image",file_types=["image"])
|
| 535 |
-
add_drag_button = gr.Button(value="Add Drag")
|
| 536 |
-
reset_button = gr.Button(value="Reset")
|
| 537 |
-
delete_last_drag_button = gr.Button(value="Delete last drag")
|
| 538 |
-
delete_last_step_button = gr.Button(value="Delete last step")
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
with gr.Column(scale=7):
|
| 543 |
-
with gr.Row():
|
| 544 |
-
with gr.Column(scale=6):
|
| 545 |
-
input_image = gr.Image(label="Input Image",
|
| 546 |
-
interactive=True,
|
| 547 |
-
height=265,
|
| 548 |
-
width=384,)
|
| 549 |
-
with gr.Column(scale=6):
|
| 550 |
-
output_image = gr.Image(label="Motion Path",
|
| 551 |
-
interactive=False,
|
| 552 |
height=256,
|
| 553 |
-
width=384,
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
with gr.Group():
|
| 568 |
-
seed = gr.Textbox(
|
| 569 |
-
label="Seed: ", value=561793204,
|
| 570 |
-
)
|
| 571 |
-
randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
|
| 572 |
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
)
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
|
|
|
| 588 |
)
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 599 |
|
| 600 |
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
return input_image, prompt, drag_mode, seed, personalized, examples_type
|
| 605 |
-
|
| 606 |
-
example = gr.Examples(
|
| 607 |
-
label="Input Example",
|
| 608 |
-
examples=image_examples,
|
| 609 |
-
inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
| 610 |
-
outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
| 611 |
-
fn=process_example,
|
| 612 |
-
run_on_click=True,
|
| 613 |
-
examples_per_page=10,
|
| 614 |
-
cache_examples=False,
|
| 615 |
-
)
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
with gr.Row():
|
| 619 |
-
gr.Markdown(citation)
|
| 620 |
|
| 621 |
-
|
| 622 |
-
|
| 623 |
|
| 624 |
-
|
| 625 |
|
| 626 |
-
|
| 627 |
|
| 628 |
-
|
| 629 |
|
| 630 |
-
|
| 631 |
|
| 632 |
-
|
| 633 |
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
|
| 638 |
demo.launch()
|
|
|
|
| 295 |
if isinstance(tracking_points, list):
|
| 296 |
input_all_points = tracking_points
|
| 297 |
else:
|
| 298 |
+
input_all_points = tracking_points
|
| 299 |
|
| 300 |
|
| 301 |
resized_all_points = [tuple([tuple([float(e1[0]*self.width/original_width), float(e1[1]*self.height/original_height)]) for e1 in e]) for e in input_all_points]
|
|
|
|
| 304 |
id = base.split('_')[-1]
|
| 305 |
|
| 306 |
|
| 307 |
+
# with open(f'{output_dir}/points-{id}.json', 'w') as f:
|
| 308 |
+
# json.dump(input_all_points, f)
|
| 309 |
+
|
| 310 |
+
|
| 311 |
visualized_drag, _ = visualize_drag(first_frame_path, resized_all_points, drag_mode, self.width, self.height, self.model_length)
|
| 312 |
|
| 313 |
## image condition
|
|
|
|
| 381 |
vis_video = (rearrange(sample[0], 'c t h w -> t h w c') * 255.).clip(0, 255)
|
| 382 |
torchvision.io.write_video(outputs_path, vis_video, fps=8, video_codec='h264', options={'crf': '10'})
|
| 383 |
|
| 384 |
+
return {output_image: visualized_drag, output_video: outputs_path}
|
| 385 |
|
| 386 |
|
| 387 |
def reset_states(first_frame_path, tracking_points):
|
| 388 |
first_frame_path = gr.State()
|
| 389 |
tracking_points = gr.State([])
|
| 390 |
+
return {input_image:None, first_frame_path_var: first_frame_path, tracking_points_var: tracking_points}
|
| 391 |
|
| 392 |
|
| 393 |
+
def preprocess_image(image, tracking_points):
|
| 394 |
+
if len(tracking_points) != 0:
|
| 395 |
+
tracking_points = gr.State([])
|
| 396 |
image_pil = image2pil(image.name)
|
| 397 |
raw_w, raw_h = image_pil.size
|
| 398 |
resize_ratio = max(384/raw_w, 256/raw_h)
|
|
|
|
| 401 |
id = str(uuid.uuid4())[:4]
|
| 402 |
first_frame_path = os.path.join(output_dir, f"first_frame_{id}.jpg")
|
| 403 |
image_pil.save(first_frame_path, quality=95)
|
| 404 |
+
return {input_image: first_frame_path, first_frame_path_var: first_frame_path, tracking_points_var: tracking_points}
|
| 405 |
|
| 406 |
|
| 407 |
def add_tracking_points(tracking_points, first_frame_path, drag_mode, evt: gr.SelectData): # SelectData is a subclass of EventData
|
|
|
|
| 411 |
color = (0, 0, 255, 255)
|
| 412 |
|
| 413 |
print(f"You selected {evt.value} at {evt.index} from {evt.target}")
|
| 414 |
+
tracking_points[-1].append(evt.index)
|
| 415 |
+
print(tracking_points)
|
| 416 |
|
| 417 |
transparent_background = Image.open(first_frame_path).convert('RGBA')
|
| 418 |
w, h = transparent_background.size
|
| 419 |
transparent_layer = np.zeros((h, w, 4))
|
| 420 |
+
for track in tracking_points:
|
| 421 |
if len(track) > 1:
|
| 422 |
for i in range(len(track)-1):
|
| 423 |
start_point = track[i]
|
|
|
|
| 434 |
|
| 435 |
transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
|
| 436 |
trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
|
| 437 |
+
return {tracking_points_var: tracking_points, input_image: trajectory_map}
|
| 438 |
|
| 439 |
|
| 440 |
def add_drag(tracking_points):
|
| 441 |
+
tracking_points.append([])
|
| 442 |
+
print(tracking_points)
|
| 443 |
+
return {tracking_points_var: tracking_points}
|
| 444 |
|
| 445 |
|
| 446 |
def delete_last_drag(tracking_points, first_frame_path, drag_mode):
|
|
|
|
| 448 |
color = (255, 0, 0, 255)
|
| 449 |
elif drag_mode=='camera':
|
| 450 |
color = (0, 0, 255, 255)
|
| 451 |
+
tracking_points.pop()
|
| 452 |
transparent_background = Image.open(first_frame_path).convert('RGBA')
|
| 453 |
w, h = transparent_background.size
|
| 454 |
transparent_layer = np.zeros((h, w, 4))
|
| 455 |
+
for track in tracking_points:
|
| 456 |
if len(track) > 1:
|
| 457 |
for i in range(len(track)-1):
|
| 458 |
start_point = track[i]
|
|
|
|
| 469 |
|
| 470 |
transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
|
| 471 |
trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
|
| 472 |
+
return {tracking_points_var: tracking_points, input_image: trajectory_map}
|
| 473 |
|
| 474 |
|
| 475 |
def delete_last_step(tracking_points, first_frame_path, drag_mode):
|
|
|
|
| 477 |
color = (255, 0, 0, 255)
|
| 478 |
elif drag_mode=='camera':
|
| 479 |
color = (0, 0, 255, 255)
|
| 480 |
+
tracking_points[-1].pop()
|
| 481 |
transparent_background = Image.open(first_frame_path).convert('RGBA')
|
| 482 |
w, h = transparent_background.size
|
| 483 |
transparent_layer = np.zeros((h, w, 4))
|
| 484 |
+
for track in tracking_points:
|
| 485 |
if len(track) > 1:
|
| 486 |
for i in range(len(track)-1):
|
| 487 |
start_point = track[i]
|
|
|
|
| 498 |
|
| 499 |
transparent_layer = Image.fromarray(transparent_layer.astype(np.uint8))
|
| 500 |
trajectory_map = Image.alpha_composite(transparent_background, transparent_layer)
|
| 501 |
+
return {tracking_points_var: tracking_points, input_image: trajectory_map}
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
if __name__=="__main__":
|
| 505 |
+
block = gr.Blocks(
|
| 506 |
+
theme=gr.themes.Soft(
|
| 507 |
+
radius_size=gr.themes.sizes.radius_none,
|
| 508 |
+
text_size=gr.themes.sizes.text_md
|
| 509 |
+
)
|
| 510 |
+
).queue()
|
| 511 |
+
with block as demo:
|
| 512 |
+
with gr.Row():
|
| 513 |
+
with gr.Column():
|
| 514 |
+
gr.HTML(head)
|
| 515 |
+
|
| 516 |
+
gr.Markdown(descriptions)
|
| 517 |
+
|
| 518 |
+
with gr.Accordion(label="🛠️ Instructions:", open=True, elem_id="accordion"):
|
| 519 |
+
with gr.Row(equal_height=True):
|
| 520 |
+
gr.Markdown(instructions)
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
# device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
| 524 |
+
device = torch.device("cuda")
|
| 525 |
+
unet_path = 'models/unet.ckpt'
|
| 526 |
+
image_controlnet_path = 'models/image_controlnet.ckpt'
|
| 527 |
+
flow_controlnet_path = 'models/flow_controlnet.ckpt'
|
| 528 |
+
ImageConductor_net = ImageConductor(device=device,
|
| 529 |
+
unet_path=unet_path,
|
| 530 |
+
image_controlnet_path=image_controlnet_path,
|
| 531 |
+
flow_controlnet_path=flow_controlnet_path,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
height=256,
|
| 533 |
+
width=384,
|
| 534 |
+
model_length=16
|
| 535 |
+
)
|
| 536 |
+
first_frame_path_var = gr.State(value=None)
|
| 537 |
+
tracking_points_var = gr.State([])
|
| 538 |
+
|
| 539 |
+
with gr.Row():
|
| 540 |
+
with gr.Column(scale=1):
|
| 541 |
+
image_upload_button = gr.UploadButton(label="Upload Image",file_types=["image"])
|
| 542 |
+
add_drag_button = gr.Button(value="Add Drag")
|
| 543 |
+
reset_button = gr.Button(value="Reset")
|
| 544 |
+
delete_last_drag_button = gr.Button(value="Delete last drag")
|
| 545 |
+
delete_last_step_button = gr.Button(value="Delete last step")
|
| 546 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
|
| 548 |
+
|
| 549 |
+
with gr.Column(scale=7):
|
| 550 |
+
with gr.Row():
|
| 551 |
+
with gr.Column(scale=6):
|
| 552 |
+
input_image = gr.Image(label="Input Image",
|
| 553 |
+
interactive=True,
|
| 554 |
+
height=300,
|
| 555 |
+
width=384,)
|
| 556 |
+
with gr.Column(scale=6):
|
| 557 |
+
output_image = gr.Image(label="Motion Path",
|
| 558 |
+
interactive=False,
|
| 559 |
+
height=256,
|
| 560 |
+
width=384,)
|
| 561 |
+
with gr.Row():
|
| 562 |
+
with gr.Column(scale=1):
|
| 563 |
+
prompt = gr.Textbox(value="a wonderful elf.", label="Prompt (highly-recommended)", interactive=True, visible=True)
|
| 564 |
+
negative_prompt = gr.Text(
|
| 565 |
+
label="Negative Prompt",
|
| 566 |
+
max_lines=5,
|
| 567 |
+
placeholder="Please input your negative prompt",
|
| 568 |
+
value='worst quality, low quality, letterboxed',lines=1
|
| 569 |
)
|
| 570 |
+
drag_mode = gr.Radio(['camera', 'object'], label='Drag mode: ', value='object', scale=2)
|
| 571 |
+
run_button = gr.Button(value="Run")
|
| 572 |
+
|
| 573 |
+
with gr.Accordion("More input params", open=False, elem_id="accordion1"):
|
| 574 |
+
with gr.Group():
|
| 575 |
+
seed = gr.Textbox(
|
| 576 |
+
label="Seed: ", value=561793204,
|
| 577 |
)
|
| 578 |
+
randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
|
| 579 |
+
|
| 580 |
+
with gr.Group():
|
| 581 |
+
with gr.Row():
|
| 582 |
+
guidance_scale = gr.Slider(
|
| 583 |
+
label="Guidance scale",
|
| 584 |
+
minimum=1,
|
| 585 |
+
maximum=12,
|
| 586 |
+
step=0.1,
|
| 587 |
+
value=8.5,
|
| 588 |
+
)
|
| 589 |
+
num_inference_steps = gr.Slider(
|
| 590 |
+
label="Number of inference steps",
|
| 591 |
+
minimum=1,
|
| 592 |
+
maximum=50,
|
| 593 |
+
step=1,
|
| 594 |
+
value=25,
|
| 595 |
+
)
|
| 596 |
+
|
| 597 |
+
with gr.Group():
|
| 598 |
+
personalized = gr.Dropdown(label="Personalized", choices=['HelloObject', 'TUSUN', ""], value="")
|
| 599 |
+
examples_type = gr.Textbox(label="Examples Type (Ignore) ", value="", visible=False)
|
| 600 |
+
|
| 601 |
+
with gr.Column(scale=7):
|
| 602 |
+
output_video = gr.Video(
|
| 603 |
+
label="Output Video",
|
| 604 |
+
width=384,
|
| 605 |
+
height=256)
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
with gr.Row():
|
| 609 |
+
def process_example(input_image, prompt, drag_mode, seed, personalized, examples_type):
|
| 610 |
+
|
| 611 |
+
return input_image, prompt, drag_mode, seed, personalized, examples_type
|
| 612 |
+
|
| 613 |
+
example = gr.Examples(
|
| 614 |
+
label="Input Example",
|
| 615 |
+
examples=image_examples,
|
| 616 |
+
inputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
| 617 |
+
outputs=[input_image, prompt, drag_mode, seed, personalized, examples_type],
|
| 618 |
+
fn=process_example,
|
| 619 |
+
run_on_click=True,
|
| 620 |
+
examples_per_page=10,
|
| 621 |
+
cache_examples=False,
|
| 622 |
+
)
|
| 623 |
|
| 624 |
|
| 625 |
+
with gr.Row():
|
| 626 |
+
gr.Markdown(citation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
|
| 628 |
+
|
| 629 |
+
image_upload_button.upload(preprocess_image, [image_upload_button, tracking_points_var], [input_image, first_frame_path_var, tracking_points_var])
|
| 630 |
|
| 631 |
+
add_drag_button.click(add_drag, [tracking_points_var], tracking_points_var)
|
| 632 |
|
| 633 |
+
delete_last_drag_button.click(delete_last_drag, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
|
| 634 |
|
| 635 |
+
delete_last_step_button.click(delete_last_step, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
|
| 636 |
|
| 637 |
+
reset_button.click(reset_states, [first_frame_path_var, tracking_points_var], [input_image, first_frame_path_var, tracking_points_var])
|
| 638 |
|
| 639 |
+
input_image.select(add_tracking_points, [tracking_points_var, first_frame_path_var, drag_mode], [tracking_points_var, input_image])
|
| 640 |
|
| 641 |
+
run_button.click(ImageConductor_net.run, [first_frame_path_var, tracking_points_var, prompt, drag_mode,
|
| 642 |
+
negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, personalized, examples_type],
|
| 643 |
+
[output_image, output_video])
|
| 644 |
|
| 645 |
demo.launch()
|
pipelines/pipeline_imagecoductor.py
CHANGED
|
@@ -404,7 +404,6 @@ class ImageConductorPipeline(DiffusionPipeline):
|
|
| 404 |
obj_latents = copy.deepcopy(latents)
|
| 405 |
cam_latents = copy.deepcopy(latents)
|
| 406 |
|
| 407 |
-
print("device", device)
|
| 408 |
# Prepare extra step kwargs.
|
| 409 |
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
| 410 |
|
|
@@ -463,13 +462,7 @@ class ImageConductorPipeline(DiffusionPipeline):
|
|
| 463 |
controlnet_images_mask = controlnet_images_mask.half()
|
| 464 |
controlnet_flows = controlnet_flows.half()
|
| 465 |
text_embeddings = text_embeddings.half()
|
| 466 |
-
|
| 467 |
-
print("controlnet_prompt_embeds device", controlnet_prompt_embeds.device)
|
| 468 |
-
print("controlnet_images device", controlnet_images.device)
|
| 469 |
-
print("t", t.device)
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
print("self.image_controlnet", self.image_controlnet.controlnet_mid_block.weight.device)
|
| 473 |
|
| 474 |
img_down_block_additional_residuals, img_mid_block_additional_residuals = self.image_controlnet(
|
| 475 |
controlnet_noisy_latents, t,
|
|
|
|
| 404 |
obj_latents = copy.deepcopy(latents)
|
| 405 |
cam_latents = copy.deepcopy(latents)
|
| 406 |
|
|
|
|
| 407 |
# Prepare extra step kwargs.
|
| 408 |
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
| 409 |
|
|
|
|
| 462 |
controlnet_images_mask = controlnet_images_mask.half()
|
| 463 |
controlnet_flows = controlnet_flows.half()
|
| 464 |
text_embeddings = text_embeddings.half()
|
| 465 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
img_down_block_additional_residuals, img_mid_block_additional_residuals = self.image_controlnet(
|
| 468 |
controlnet_noisy_latents, t,
|