LanHarmony commited on
Commit
ceebd56
1 Parent(s): f97efd7

support GroundingDINO and segment-anything

Browse files
Files changed (2) hide show
  1. app.py +0 -1
  2. visual_foundation_models.py +13 -31
app.py CHANGED
@@ -210,7 +210,6 @@ bot = ConversationBot({'Text2Box': 'cuda:0',
210
  'Inpainting': 'cuda:0',
211
  'Text2Image': 'cuda:0',
212
  'ImageCaptioning': 'cuda:0',
213
- 'ImageEditing': 'cuda:0',
214
  'VisualQuestionAnswering': 'cuda:0',
215
  'Image2Canny': 'cpu',
216
  'CannyText2Image': 'cuda:0',
210
  'Inpainting': 'cuda:0',
211
  'Text2Image': 'cuda:0',
212
  'ImageCaptioning': 'cuda:0',
 
213
  'VisualQuestionAnswering': 'cuda:0',
214
  'Image2Canny': 'cpu',
215
  'CannyText2Image': 'cuda:0',
visual_foundation_models.py CHANGED
@@ -935,19 +935,18 @@ class Inpainting:
935
  self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
936
  "runwayml/stable-diffusion-inpainting", revision=self.revision, torch_dtype=self.torch_dtype).to(device)
937
 
938
- def __call__(self, prompt, original_image, mask_image):
939
- update_image = self.inpaint(prompt=prompt, image=original_image.resize((512, 512)),
940
- mask_image=mask_image.resize((512, 512))).images[0]
 
941
  return update_image
942
 
943
 
944
  class InfinityOutPainting:
945
- template_model = True # Add this line to show this is a template model.
946
-
947
- def __init__(self, ImageCaptioning, ImageEditing, VisualQuestionAnswering):
948
- self.llm = OpenAI(temperature=0)
949
  self.ImageCaption = ImageCaptioning
950
- self.ImageEditing = ImageEditing
951
  self.ImageVQA = VisualQuestionAnswering
952
  self.a_prompt = 'best quality, extremely detailed'
953
  self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \
@@ -963,32 +962,15 @@ class InfinityOutPainting:
963
 
964
  def get_BLIP_caption(self, image):
965
  inputs = self.ImageCaption.processor(image, return_tensors="pt").to(self.ImageCaption.device,
966
- self.ImageCaption.torch_dtype)
967
  out = self.ImageCaption.model.generate(**inputs)
968
  BLIP_caption = self.ImageCaption.processor.decode(out[0], skip_special_tokens=True)
969
  return BLIP_caption
970
 
971
- def check_prompt(self, prompt):
972
- check = f"Here is a paragraph with adjectives. " \
973
- f"{prompt} " \
974
- f"Please change all plural forms in the adjectives to singular forms. "
975
- return self.llm(check)
976
-
977
  def get_imagine_caption(self, image, imagine):
978
  BLIP_caption = self.get_BLIP_caption(image)
979
- background_color = self.get_BLIP_vqa(image, 'what is the background color of this image')
980
- style = self.get_BLIP_vqa(image, 'what is the style of this image')
981
- imagine_prompt = f"let's pretend you are an excellent painter and now " \
982
- f"there is an incomplete painting with {BLIP_caption} in the center, " \
983
- f"please imagine the complete painting and describe it" \
984
- f"you should consider the background color is {background_color}, the style is {style}" \
985
- f"You should make the painting as vivid and realistic as possible" \
986
- f"You can not use words like painting or picture" \
987
- f"and you should use no more than 50 words to describe it"
988
- caption = self.llm(imagine_prompt) if imagine else BLIP_caption
989
- caption = self.check_prompt(caption)
990
- print(f'BLIP observation: {BLIP_caption}, ChatGPT imagine to {caption}') if imagine else print(
991
- f'Prompt: {caption}')
992
  return caption
993
 
994
  def resize_image(self, image, max_size=1000000, multiple=8):
@@ -1014,9 +996,9 @@ class InfinityOutPainting:
1014
  temp_canvas.paste(old_img, (x, y))
1015
  temp_mask.paste(0, (x, y, x + old_img.width, y + old_img.height))
1016
  resized_temp_canvas, resized_temp_mask = self.resize_image(temp_canvas), self.resize_image(temp_mask)
1017
- image = self.ImageEditing.inpaint(prompt=prompt, image=resized_temp_canvas, mask_image=resized_temp_mask,
1018
  height=resized_temp_canvas.height, width=resized_temp_canvas.width,
1019
- num_inference_steps=50).images[0].resize(
1020
  (temp_canvas.width, temp_canvas.height), Image.ANTIALIAS)
1021
  image = blend_gt2pt(old_img, image)
1022
  old_img = image
@@ -1119,7 +1101,7 @@ class ImageEditing:
1119
  mask = self.pad_edge(mask, padding=20) # numpy
1120
  mask_image = Image.fromarray(mask)
1121
 
1122
- updated_image = self.inpaint(prompt=replace_with_txt, original_image=image_pil,
1123
  mask_image=mask_image)
1124
  updated_image_path = get_new_image_name(image_path, func_name="replace-something")
1125
  updated_image = updated_image.resize(image_pil.size)
935
  self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
936
  "runwayml/stable-diffusion-inpainting", revision=self.revision, torch_dtype=self.torch_dtype).to(device)
937
 
938
+ def __call__(self, prompt, image, mask_image, height=512, width=512, num_inference_steps=50):
939
+ update_image = self.inpaint(prompt=prompt, image=image.resize((width, height)),
940
+ mask_image=mask_image.resize((width, height)), height=height, width=width,
941
+ num_inference_steps=num_inference_steps).images[0]
942
  return update_image
943
 
944
 
945
  class InfinityOutPainting:
946
+ template_model = True # Add this line to show this is a template model.
947
+ def __init__(self, ImageCaptioning, Inpainting, VisualQuestionAnswering):
 
 
948
  self.ImageCaption = ImageCaptioning
949
+ self.inpaint = Inpainting
950
  self.ImageVQA = VisualQuestionAnswering
951
  self.a_prompt = 'best quality, extremely detailed'
952
  self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \
962
 
963
  def get_BLIP_caption(self, image):
964
  inputs = self.ImageCaption.processor(image, return_tensors="pt").to(self.ImageCaption.device,
965
+ self.ImageCaption.torch_dtype)
966
  out = self.ImageCaption.model.generate(**inputs)
967
  BLIP_caption = self.ImageCaption.processor.decode(out[0], skip_special_tokens=True)
968
  return BLIP_caption
969
 
 
 
 
 
 
 
970
  def get_imagine_caption(self, image, imagine):
971
  BLIP_caption = self.get_BLIP_caption(image)
972
+ caption = BLIP_caption
973
+ print(f'Prompt: {caption}')
 
 
 
 
 
 
 
 
 
 
 
974
  return caption
975
 
976
  def resize_image(self, image, max_size=1000000, multiple=8):
996
  temp_canvas.paste(old_img, (x, y))
997
  temp_mask.paste(0, (x, y, x + old_img.width, y + old_img.height))
998
  resized_temp_canvas, resized_temp_mask = self.resize_image(temp_canvas), self.resize_image(temp_mask)
999
+ image = self.inpaint(prompt=prompt, image=resized_temp_canvas, mask_image=resized_temp_mask,
1000
  height=resized_temp_canvas.height, width=resized_temp_canvas.width,
1001
+ num_inference_steps=50).resize(
1002
  (temp_canvas.width, temp_canvas.height), Image.ANTIALIAS)
1003
  image = blend_gt2pt(old_img, image)
1004
  old_img = image
1101
  mask = self.pad_edge(mask, padding=20) # numpy
1102
  mask_image = Image.fromarray(mask)
1103
 
1104
+ updated_image = self.inpaint(prompt=replace_with_txt, image=image_pil,
1105
  mask_image=mask_image)
1106
  updated_image_path = get_new_image_name(image_path, func_name="replace-something")
1107
  updated_image = updated_image.resize(image_pil.size)