lsb commited on
Commit
c8e54f6
1 Parent(s): 568e892

use lcm for inpainting instead of inpainting model with lcm lora and deep cache etc, parameterize num inference steps and random seed

Browse files
Files changed (2) hide show
  1. app.py +20 -25
  2. requirements.txt +0 -1
app.py CHANGED
@@ -14,6 +14,9 @@ from datetime import datetime
14
  # but segformer does not work on mps lolololol
15
  preferred_device = "cuda" if torch.cuda.is_available() else "cpu"
16
  preferred_dtype = torch.float16 if preferred_device == 'cuda' else torch.float32
 
 
 
17
 
18
  seg_model_img_size = 768
19
  seg_model_size = 0
@@ -21,30 +24,20 @@ seg_model_size = 0
21
  seg_feature_extractor = SegformerFeatureExtractor.from_pretrained(f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}")
22
  seg_model = SegformerForSemanticSegmentation.from_pretrained(
23
  f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}"
24
- ).to(preferred_device) #.to(preferred_dtype)
25
 
26
  inpainting_pipeline = StableDiffusionInpaintPipeline.from_pretrained(
27
- "runwayml/stable-diffusion-inpainting",
28
- variant="fp16",
29
  torch_dtype=preferred_dtype,
30
  safety_checker=None,
31
- ).to(preferred_device)
32
 
33
- from DeepCache import DeepCacheSDHelper
34
- helper = DeepCacheSDHelper(pipe=inpainting_pipeline)
35
- helper.set_params(cache_interval=3, cache_branch_id=0)
36
- helper.enable()
37
-
38
- # if preferred_device == "cuda":
39
- # inpainting_pipeline.unet = torch.compile(inpainting_pipeline.unet)
40
- # inpainting_pipeline.vae = torch.compile(inpainting_pipeline.vae)
41
-
42
- # inpainting_pipeline.scheduler = LCMScheduler.from_config(inpainting_pipeline.scheduler.config)
43
- # inpainting_pipeline.load_lora_weights("latent-consistency/lcm-lora-sdv1-5", torch_dtype=preferred_dtype)
44
- # inpainting_pipeline.fuse_lora()
45
 
46
  seg_working_size = (seg_model_img_size, seg_model_img_size)
47
- repaint_working_size = (512, 512)
48
 
49
  default_inpainting_prompt = "award-winning photo of a leafy pedestrian mall full of people, with multiracial genderqueer joggers and bicyclists and wheelchair users talking and laughing"
50
 
@@ -63,11 +56,11 @@ def get_seg_mask(img):
63
  outputs = seg_model(**inputs)
64
  logits = outputs.logits[0]
65
  mask = Image.fromarray((ban_cars_mask[ torch.argmax(logits, dim=0).cpu().numpy() ]) * 255)
66
- blurred_widened_mask = ImageEnhance.Contrast(mask.filter(ImageFilter.GaussianBlur(5))).enhance(9000)
67
  return blurred_widened_mask
68
 
69
 
70
- def app(img, prompt):
71
  start_time = datetime.now().timestamp()
72
  old_size = Image.fromarray(img).size
73
  img = np.array(Image.fromarray(img).resize(seg_working_size))
@@ -79,10 +72,11 @@ def app(img, prompt):
79
  prompt=prompt,
80
  image=Image.fromarray(img).resize(repaint_working_size),
81
  mask_image=(mask).resize(repaint_working_size),
82
- strength=0.95,
83
- num_inference_steps=16,
84
  height=repaint_working_size[0],
85
  width=repaint_working_size[1],
 
86
  ).images[0]
87
  #overlay_img.save("overlay_raw.jpg")
88
  end_time = datetime.now().timestamp()
@@ -94,12 +88,13 @@ def app(img, prompt):
94
  #overlay_img.save("overlay_with_text.jpg")
95
  return overlay_img
96
 
97
- ### kick the tires before we start
98
 
99
- for i in tqdm(range(2)):
100
- app(np.array(Image.fromarray(np.zeros((1024,1024,3), dtype=np.uint8))), default_inpainting_prompt).save("zeros_inpainting_oneshot.jpg")
 
101
 
102
  #ideally:
103
  #iface = gr.Interface(app, gr.Image(sources=["webcam"], streaming=True), "image", live=True)
104
- iface = gr.Interface(app, [gr.Image(), gr.Textbox(value=default_inpainting_prompt)], "image")
105
  iface.launch()
 
14
  # but segformer does not work on mps lolololol
15
  preferred_device = "cuda" if torch.cuda.is_available() else "cpu"
16
  preferred_dtype = torch.float16 if preferred_device == 'cuda' else torch.float32
17
+ inpaint_preferred_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
18
+ torch.backends.cuda.matmul.allow_tf32 = True
19
+ preferred_backend = "aot_eager" if inpaint_preferred_device == "mps" else ("tensorrt" if inpaint_preferred_device == "cuda" else "inductor")
20
 
21
  seg_model_img_size = 768
22
  seg_model_size = 0
 
24
  seg_feature_extractor = SegformerFeatureExtractor.from_pretrained(f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}")
25
  seg_model = SegformerForSemanticSegmentation.from_pretrained(
26
  f"nvidia/segformer-b{seg_model_size}-finetuned-cityscapes-{seg_model_img_size}-{seg_model_img_size}"
27
+ ).to(preferred_device).to(preferred_dtype)
28
 
29
  inpainting_pipeline = StableDiffusionInpaintPipeline.from_pretrained(
30
+ "SimianLuo/LCM_Dreamshaper_v7",
 
31
  torch_dtype=preferred_dtype,
32
  safety_checker=None,
33
+ ).to(inpaint_preferred_device)
34
 
35
+ inpainting_pipeline.unet = torch.compile(inpainting_pipeline.unet, backend=preferred_backend)
36
+ inpainting_pipeline.vae = torch.compile(inpainting_pipeline.vae, backend=preferred_backend)
37
+ seg_model = torch.compile(seg_model, backend=preferred_backend)
 
 
 
 
 
 
 
 
 
38
 
39
  seg_working_size = (seg_model_img_size, seg_model_img_size)
40
+ repaint_working_size = (768, 768)
41
 
42
  default_inpainting_prompt = "award-winning photo of a leafy pedestrian mall full of people, with multiracial genderqueer joggers and bicyclists and wheelchair users talking and laughing"
43
 
 
56
  outputs = seg_model(**inputs)
57
  logits = outputs.logits[0]
58
  mask = Image.fromarray((ban_cars_mask[ torch.argmax(logits, dim=0).cpu().numpy() ]) * 255)
59
+ blurred_widened_mask = ImageEnhance.Contrast(mask.filter(ImageFilter.GaussianBlur(2))).enhance(9000)
60
  return blurred_widened_mask
61
 
62
 
63
+ def app(img, prompt, num_inference_steps, seed):
64
  start_time = datetime.now().timestamp()
65
  old_size = Image.fromarray(img).size
66
  img = np.array(Image.fromarray(img).resize(seg_working_size))
 
72
  prompt=prompt,
73
  image=Image.fromarray(img).resize(repaint_working_size),
74
  mask_image=(mask).resize(repaint_working_size),
75
+ strength=1,
76
+ num_inference_steps=num_inference_steps,
77
  height=repaint_working_size[0],
78
  width=repaint_working_size[1],
79
+ generator=torch.manual_seed(int(seed)),
80
  ).images[0]
81
  #overlay_img.save("overlay_raw.jpg")
82
  end_time = datetime.now().timestamp()
 
88
  #overlay_img.save("overlay_with_text.jpg")
89
  return overlay_img
90
 
91
+ # warmup, for compiling and then for timing
92
 
93
+ for i in range(2):
94
+ for j in tqdm(range(3 ** i)):
95
+ app(np.array(Image.fromarray(np.zeros((1024,1024,3), dtype=np.uint8))), default_inpainting_prompt, 4, 42).save("zeros_inpainting_oneshot.jpg")
96
 
97
  #ideally:
98
  #iface = gr.Interface(app, gr.Image(sources=["webcam"], streaming=True), "image", live=True)
99
+ iface = gr.Interface(app, [gr.Image(), gr.Textbox(value=default_inpainting_prompt), gr.Number(minimum=1, maximum=8, value=4), gr.Number(value=42)], "image")
100
  iface.launch()
requirements.txt CHANGED
@@ -4,4 +4,3 @@ torch==2.2.1
4
  accelerate
5
  peft
6
  optimum
7
- DeepCache
 
4
  accelerate
5
  peft
6
  optimum