karimbenharrak
/

stable-diffusion-inpainting-with-handler

@@ -15,10 +15,10 @@ if device.type != 'cuda':
 class EndpointHandler():
     def __init__(self, path=""):
-        self.pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16").to("cuda")
         self.generator = torch.Generator(device="cuda").manual_seed(0)
-        """
         # load StableDiffusionInpaintPipeline pipeline
         self.pipe = AutoPipelineForInpainting.from_pretrained(
             "runwayml/stable-diffusion-inpainting",
@@ -34,7 +34,6 @@ class EndpointHandler():
         self.pipe2.to("cuda")
         self.pipe3 = AutoPipelineForImage2Image.from_pipe(self.pipe2)
-        """
@@ -49,6 +48,11 @@ class EndpointHandler():
         prompt = data.pop("prompt", "")
         negative_prompt = data.pop("negative_prompt", "")
         # process image
         if encoded_image is not None and encoded_mask_image is not None:
@@ -58,21 +62,21 @@ class EndpointHandler():
             image = None
             mask_image = None
-        image = self.pipe(
-          prompt=prompt,
-          image=image,
-          mask_image=mask_image,
-          guidance_scale=8.0,
-          num_inference_steps=20,  # steps between 15 and 30 work well for us
-          strength=0.99,  # make sure to use `strength` below 1.0
-          generator=self.generator,
-        ).images[0]
-        return image
-        """
-        pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16").to("cuda")
         self.pipe.enable_xformers_memory_efficient_attention()
@@ -88,9 +92,9 @@ class EndpointHandler():
             negative_prompt=negative_prompt,
             image=image,
             mask_image=mask_image,
-            guidance_scale=8.0,
-            num_inference_steps=100,
-            strength=0.2,
             output_type="latent",  # let's keep in latent to save some VRAM
         ).images[0]
@@ -99,24 +103,14 @@ class EndpointHandler():
         image2 = self.pipe3(
             prompt=prompt,
             image=image,
-            guidance_scale=8.0,
-            num_inference_steps=100,
-            strength=0.2,
         ).images[0]
-        print(type(image2))
-        print(type(out.images[0]))
-        print(type(out.images[0].resize((1024, 1024))))
-        print(type(image))
-        result = {
-            "final_image": image2,
-            "pipe1_img": out.images[0],
-        }
         # return first generate PIL image
-        return result
-        """
     # helper to decode input image
     def decode_base64_image(self, image_string):

 class EndpointHandler():
     def __init__(self, path=""):
+        self.fast_pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16").to("cuda")
         self.generator = torch.Generator(device="cuda").manual_seed(0)
         # load StableDiffusionInpaintPipeline pipeline
         self.pipe = AutoPipelineForInpainting.from_pretrained(
             "runwayml/stable-diffusion-inpainting",
         self.pipe2.to("cuda")
         self.pipe3 = AutoPipelineForImage2Image.from_pipe(self.pipe2)
         prompt = data.pop("prompt", "")
         negative_prompt = data.pop("negative_prompt", "")
+        method = data.pop("method", "slow")
+        strength = data.pop("strength", 0.2)
+        guidance_scale = data.pop("guidance_scale", 8.0)
+        num_inference_steps = data.pop("num_inference_steps", 20)
         # process image
         if encoded_image is not None and encoded_mask_image is not None:
             image = None
             mask_image = None
+        if(method == "fast"):
+            image = self.fast_pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt
+                image=image,
+                mask_image=mask_image,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps,  # steps between 15 and 30 work well for us
+                strength=strength,  # make sure to use `strength` below 1.0
+                generator=self.generator,
+            ).images[0]
+            return image
+        #pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16").to("cuda")
         self.pipe.enable_xformers_memory_efficient_attention()
             negative_prompt=negative_prompt,
             image=image,
             mask_image=mask_image,
+            guidance_scale=guidance_scale, #8.0
+            num_inference_steps=num_inference_steps, #100
+            strength=strength, #0.2
             output_type="latent",  # let's keep in latent to save some VRAM
         ).images[0]
         image2 = self.pipe3(
             prompt=prompt,
             image=image,
+            guidance_scale=guidance_scale, #8.0
+            num_inference_steps=num_inference_steps, #100
+            strength=strength, #0.2
         ).images[0]
         # return first generate PIL image
+        return image2
     # helper to decode input image
     def decode_base64_image(self, image_string):