use lcm for inpainting instead of inpainting model with lcm lora and deep cache etc, parameterize num inference steps and random seed
Browse files- app.py +20 -25
- requirements.txt +0 -1
app.py
CHANGED
@@ -14,6 +14,9 @@ from datetime import datetime
|
|
14 |
# but segformer does not work on mps lolololol
|
15 |
preferred_device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
preferred_dtype = torch.float16 if preferred_device == 'cuda' else torch.float32
|
|
|
|
|
|
|
17 |
|
18 |
seg_model_img_size = 768
|
19 |
seg_model_size = 0
|
@@ -21,30 +24,20 @@ seg_model_size = 0
|
|
21 |
seg_feature_extractor = SegformerFeatureExtractor.from_pretrained(f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}")
|
22 |
seg_model = SegformerForSemanticSegmentation.from_pretrained(
|
23 |
f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}"
|
24 |
-
).to(preferred_device)
|
25 |
|
26 |
inpainting_pipeline = StableDiffusionInpaintPipeline.from_pretrained(
|
27 |
-
"
|
28 |
-
variant="fp16",
|
29 |
torch_dtype=preferred_dtype,
|
30 |
safety_checker=None,
|
31 |
-
).to(
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
helper.enable()
|
37 |
-
|
38 |
-
# if preferred_device == "cuda":
|
39 |
-
# inpainting_pipeline.unet = torch.compile(inpainting_pipeline.unet)
|
40 |
-
# inpainting_pipeline.vae = torch.compile(inpainting_pipeline.vae)
|
41 |
-
|
42 |
-
# inpainting_pipeline.scheduler = LCMScheduler.from_config(inpainting_pipeline.scheduler.config)
|
43 |
-
# inpainting_pipeline.load_lora_weights("latent-consistency/lcm-lora-sdv1-5", torch_dtype=preferred_dtype)
|
44 |
-
# inpainting_pipeline.fuse_lora()
|
45 |
|
46 |
seg_working_size = (seg_model_img_size, seg_model_img_size)
|
47 |
-
repaint_working_size = (
|
48 |
|
49 |
default_inpainting_prompt = "award-winning photo of a leafy pedestrian mall full of people, with multiracial genderqueer joggers and bicyclists and wheelchair users talking and laughing"
|
50 |
|
@@ -63,11 +56,11 @@ def get_seg_mask(img):
|
|
63 |
outputs = seg_model(**inputs)
|
64 |
logits = outputs.logits[0]
|
65 |
mask = Image.fromarray((ban_cars_mask[ torch.argmax(logits, dim=0).cpu().numpy() ]) * 255)
|
66 |
-
blurred_widened_mask = ImageEnhance.Contrast(mask.filter(ImageFilter.GaussianBlur(
|
67 |
return blurred_widened_mask
|
68 |
|
69 |
|
70 |
-
def app(img, prompt):
|
71 |
start_time = datetime.now().timestamp()
|
72 |
old_size = Image.fromarray(img).size
|
73 |
img = np.array(Image.fromarray(img).resize(seg_working_size))
|
@@ -79,10 +72,11 @@ def app(img, prompt):
|
|
79 |
prompt=prompt,
|
80 |
image=Image.fromarray(img).resize(repaint_working_size),
|
81 |
mask_image=(mask).resize(repaint_working_size),
|
82 |
-
strength=
|
83 |
-
num_inference_steps=
|
84 |
height=repaint_working_size[0],
|
85 |
width=repaint_working_size[1],
|
|
|
86 |
).images[0]
|
87 |
#overlay_img.save("overlay_raw.jpg")
|
88 |
end_time = datetime.now().timestamp()
|
@@ -94,12 +88,13 @@ def app(img, prompt):
|
|
94 |
#overlay_img.save("overlay_with_text.jpg")
|
95 |
return overlay_img
|
96 |
|
97 |
-
|
98 |
|
99 |
-
for i in
|
100 |
-
|
|
|
101 |
|
102 |
#ideally:
|
103 |
#iface = gr.Interface(app, gr.Image(sources=["webcam"], streaming=True), "image", live=True)
|
104 |
-
iface = gr.Interface(app, [gr.Image(), gr.Textbox(value=default_inpainting_prompt)], "image")
|
105 |
iface.launch()
|
|
|
14 |
# but segformer does not work on mps lolololol
|
15 |
preferred_device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
preferred_dtype = torch.float16 if preferred_device == 'cuda' else torch.float32
|
17 |
+
inpaint_preferred_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
|
18 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
19 |
+
preferred_backend = "aot_eager" if inpaint_preferred_device == "mps" else ("tensorrt" if inpaint_preferred_device == "cuda" else "inductor")
|
20 |
|
21 |
seg_model_img_size = 768
|
22 |
seg_model_size = 0
|
|
|
24 |
seg_feature_extractor = SegformerFeatureExtractor.from_pretrained(f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}")
|
25 |
seg_model = SegformerForSemanticSegmentation.from_pretrained(
|
26 |
f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}"
|
27 |
+
).to(preferred_device).to(preferred_dtype)
|
28 |
|
29 |
inpainting_pipeline = StableDiffusionInpaintPipeline.from_pretrained(
|
30 |
+
"SimianLuo/LCM_Dreamshaper_v7",
|
|
|
31 |
torch_dtype=preferred_dtype,
|
32 |
safety_checker=None,
|
33 |
+
).to(inpaint_preferred_device)
|
34 |
|
35 |
+
inpainting_pipeline.unet = torch.compile(inpainting_pipeline.unet, backend=preferred_backend)
|
36 |
+
inpainting_pipeline.vae = torch.compile(inpainting_pipeline.vae, backend=preferred_backend)
|
37 |
+
seg_model = torch.compile(seg_model, backend=preferred_backend)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
seg_working_size = (seg_model_img_size, seg_model_img_size)
|
40 |
+
repaint_working_size = (768, 768)
|
41 |
|
42 |
default_inpainting_prompt = "award-winning photo of a leafy pedestrian mall full of people, with multiracial genderqueer joggers and bicyclists and wheelchair users talking and laughing"
|
43 |
|
|
|
56 |
outputs = seg_model(**inputs)
|
57 |
logits = outputs.logits[0]
|
58 |
mask = Image.fromarray((ban_cars_mask[ torch.argmax(logits, dim=0).cpu().numpy() ]) * 255)
|
59 |
+
blurred_widened_mask = ImageEnhance.Contrast(mask.filter(ImageFilter.GaussianBlur(2))).enhance(9000)
|
60 |
return blurred_widened_mask
|
61 |
|
62 |
|
63 |
+
def app(img, prompt, num_inference_steps, seed):
|
64 |
start_time = datetime.now().timestamp()
|
65 |
old_size = Image.fromarray(img).size
|
66 |
img = np.array(Image.fromarray(img).resize(seg_working_size))
|
|
|
72 |
prompt=prompt,
|
73 |
image=Image.fromarray(img).resize(repaint_working_size),
|
74 |
mask_image=(mask).resize(repaint_working_size),
|
75 |
+
strength=1,
|
76 |
+
num_inference_steps=num_inference_steps,
|
77 |
height=repaint_working_size[0],
|
78 |
width=repaint_working_size[1],
|
79 |
+
generator=torch.manual_seed(int(seed)),
|
80 |
).images[0]
|
81 |
#overlay_img.save("overlay_raw.jpg")
|
82 |
end_time = datetime.now().timestamp()
|
|
|
88 |
#overlay_img.save("overlay_with_text.jpg")
|
89 |
return overlay_img
|
90 |
|
91 |
+
# warmup, for compiling and then for timing
|
92 |
|
93 |
+
for i in range(2):
|
94 |
+
for j in tqdm(range(3 ** i)):
|
95 |
+
app(np.array(Image.fromarray(np.zeros((1024,1024,3), dtype=np.uint8))), default_inpainting_prompt, 4, 42).save("zeros_inpainting_oneshot.jpg")
|
96 |
|
97 |
#ideally:
|
98 |
#iface = gr.Interface(app, gr.Image(sources=["webcam"], streaming=True), "image", live=True)
|
99 |
+
iface = gr.Interface(app, [gr.Image(), gr.Textbox(value=default_inpainting_prompt), gr.Number(minimum=1, maximum=8, value=4), gr.Number(value=42)], "image")
|
100 |
iface.launch()
|
requirements.txt
CHANGED
@@ -4,4 +4,3 @@ torch==2.2.1
|
|
4 |
accelerate
|
5 |
peft
|
6 |
optimum
|
7 |
-
DeepCache
|
|
|
4 |
accelerate
|
5 |
peft
|
6 |
optimum
|
|