Spaces:

microsoft
/

visual_chatgpt

Runtime error

App Files Files Community

LanHarmony commited on Apr 19, 2023

Commit

ceebd56

1 Parent(s): f97efd7

support GroundingDINO and segment-anything

Browse files

Files changed (2) hide show

app.py +0 -1
visual_foundation_models.py +13 -31

app.py CHANGED Viewed

@@ -210,7 +210,6 @@ bot = ConversationBot({'Text2Box': 'cuda:0',
                        'Inpainting': 'cuda:0',
                        'Text2Image': 'cuda:0',
                        'ImageCaptioning': 'cuda:0',
-                       'ImageEditing': 'cuda:0',
                        'VisualQuestionAnswering': 'cuda:0',
                        'Image2Canny': 'cpu',
                        'CannyText2Image': 'cuda:0',

                        'Inpainting': 'cuda:0',
                        'Text2Image': 'cuda:0',
                        'ImageCaptioning': 'cuda:0',
                        'VisualQuestionAnswering': 'cuda:0',
                        'Image2Canny': 'cpu',
                        'CannyText2Image': 'cuda:0',

visual_foundation_models.py CHANGED Viewed

@@ -935,19 +935,18 @@ class Inpainting:
         self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
             "runwayml/stable-diffusion-inpainting", revision=self.revision, torch_dtype=self.torch_dtype).to(device)
-    def __call__(self, prompt, original_image, mask_image):
-        update_image = self.inpaint(prompt=prompt, image=original_image.resize((512, 512)),
-                                    mask_image=mask_image.resize((512, 512))).images[0]
         return update_image
 class InfinityOutPainting:
-    template_model = True  # Add this line to show this is a template model.
-    def __init__(self, ImageCaptioning, ImageEditing, VisualQuestionAnswering):
-        self.llm = OpenAI(temperature=0)
         self.ImageCaption = ImageCaptioning
-        self.ImageEditing = ImageEditing
         self.ImageVQA = VisualQuestionAnswering
         self.a_prompt = 'best quality, extremely detailed'
         self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \
@@ -963,32 +962,15 @@ class InfinityOutPainting:
     def get_BLIP_caption(self, image):
         inputs = self.ImageCaption.processor(image, return_tensors="pt").to(self.ImageCaption.device,
-                                                                            self.ImageCaption.torch_dtype)
         out = self.ImageCaption.model.generate(**inputs)
         BLIP_caption = self.ImageCaption.processor.decode(out[0], skip_special_tokens=True)
         return BLIP_caption
-    def check_prompt(self, prompt):
-        check = f"Here is a paragraph with adjectives. " \
-                f"{prompt} " \
-                f"Please change all plural forms in the adjectives to singular forms. "
-        return self.llm(check)
     def get_imagine_caption(self, image, imagine):
         BLIP_caption = self.get_BLIP_caption(image)
-        background_color = self.get_BLIP_vqa(image, 'what is the background color of this image')
-        style = self.get_BLIP_vqa(image, 'what is the style of this image')
-        imagine_prompt = f"let's pretend you are an excellent painter and now " \
-                         f"there is an incomplete painting with {BLIP_caption} in the center, " \
-                         f"please imagine the complete painting and describe it" \
-                         f"you should consider the background color is {background_color}, the style is {style}" \
-                         f"You should make the painting as vivid and realistic as possible" \
-                         f"You can not use words like painting or picture" \
-                         f"and you should use no more than 50 words to describe it"
-        caption = self.llm(imagine_prompt) if imagine else BLIP_caption
-        caption = self.check_prompt(caption)
-        print(f'BLIP observation: {BLIP_caption}, ChatGPT imagine to {caption}') if imagine else print(
-            f'Prompt: {caption}')
         return caption
     def resize_image(self, image, max_size=1000000, multiple=8):
@@ -1014,9 +996,9 @@ class InfinityOutPainting:
             temp_canvas.paste(old_img, (x, y))
             temp_mask.paste(0, (x, y, x + old_img.width, y + old_img.height))
             resized_temp_canvas, resized_temp_mask = self.resize_image(temp_canvas), self.resize_image(temp_mask)
-            image = self.ImageEditing.inpaint(prompt=prompt, image=resized_temp_canvas, mask_image=resized_temp_mask,
                                               height=resized_temp_canvas.height, width=resized_temp_canvas.width,
-                                              num_inference_steps=50).images[0].resize(
                 (temp_canvas.width, temp_canvas.height), Image.ANTIALIAS)
             image = blend_gt2pt(old_img, image)
             old_img = image
@@ -1119,7 +1101,7 @@ class ImageEditing:
         mask = self.pad_edge(mask, padding=20)  # numpy
         mask_image = Image.fromarray(mask)
-        updated_image = self.inpaint(prompt=replace_with_txt, original_image=image_pil,
                                      mask_image=mask_image)
         updated_image_path = get_new_image_name(image_path, func_name="replace-something")
         updated_image = updated_image.resize(image_pil.size)

         self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
             "runwayml/stable-diffusion-inpainting", revision=self.revision, torch_dtype=self.torch_dtype).to(device)
+    def __call__(self, prompt, image, mask_image, height=512, width=512, num_inference_steps=50):
+        update_image = self.inpaint(prompt=prompt, image=image.resize((width, height)),
+                                    mask_image=mask_image.resize((width, height)), height=height, width=width,
+                                    num_inference_steps=num_inference_steps).images[0]
         return update_image
 class InfinityOutPainting:
+    template_model = True # Add this line to show this is a template model.
+    def __init__(self, ImageCaptioning, Inpainting, VisualQuestionAnswering):
         self.ImageCaption = ImageCaptioning
+        self.inpaint = Inpainting
         self.ImageVQA = VisualQuestionAnswering
         self.a_prompt = 'best quality, extremely detailed'
         self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \
     def get_BLIP_caption(self, image):
         inputs = self.ImageCaption.processor(image, return_tensors="pt").to(self.ImageCaption.device,
+                                                                                self.ImageCaption.torch_dtype)
         out = self.ImageCaption.model.generate(**inputs)
         BLIP_caption = self.ImageCaption.processor.decode(out[0], skip_special_tokens=True)
         return BLIP_caption
     def get_imagine_caption(self, image, imagine):
         BLIP_caption = self.get_BLIP_caption(image)
+        caption = BLIP_caption
+        print(f'Prompt: {caption}')
         return caption
     def resize_image(self, image, max_size=1000000, multiple=8):
             temp_canvas.paste(old_img, (x, y))
             temp_mask.paste(0, (x, y, x + old_img.width, y + old_img.height))
             resized_temp_canvas, resized_temp_mask = self.resize_image(temp_canvas), self.resize_image(temp_mask)
+            image = self.inpaint(prompt=prompt, image=resized_temp_canvas, mask_image=resized_temp_mask,
                                               height=resized_temp_canvas.height, width=resized_temp_canvas.width,
+                                              num_inference_steps=50).resize(
                 (temp_canvas.width, temp_canvas.height), Image.ANTIALIAS)
             image = blend_gt2pt(old_img, image)
             old_img = image
         mask = self.pad_edge(mask, padding=20)  # numpy
         mask_image = Image.fromarray(mask)
+        updated_image = self.inpaint(prompt=replace_with_txt, image=image_pil,
                                      mask_image=mask_image)
         updated_image_path = get_new_image_name(image_path, func_name="replace-something")
         updated_image = updated_image.resize(image_pil.size)