LanHarmony commited on
Commit
ee54eed
1 Parent(s): bc147cf
Files changed (1) hide show
  1. visual_foundation_models.py +9 -9
visual_foundation_models.py CHANGED
@@ -77,7 +77,7 @@ class ImageEditing:
77
  print("Initializing ImageEditing to %s" % device)
78
  self.device = device
79
  self.mask_former = MaskFormer(device=self.device)
80
- self.inpaint = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting").to(device)
81
 
82
  @prompts(name="Remove Something From The Photo",
83
  description="useful when you want to remove and object or something from the photo "
@@ -113,7 +113,7 @@ class InstructPix2Pix:
113
  def __init__(self, device):
114
  print("Initializing InstructPix2Pix to %s" % device)
115
  self.device = device
116
- self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained("timbrooks/instruct-pix2pix",
117
  safety_checker=None).to(device)
118
  self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
119
 
@@ -139,7 +139,7 @@ class Text2Image:
139
  def __init__(self, device):
140
  print("Initializing Text2Image to %s" % device)
141
  self.device = device
142
- self.pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
143
  self.text_refine_tokenizer = AutoTokenizer.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
144
  self.text_refine_model = AutoModelForCausalLM.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
145
  self.text_refine_gpt2_pipe = pipeline("text-generation", model=self.text_refine_model,
@@ -166,13 +166,13 @@ class ImageCaptioning:
166
  self.device = device
167
  self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
168
  self.model = BlipForConditionalGeneration.from_pretrained(
169
- "Salesforce/blip-image-captioning-base").to(self.device)
170
 
171
  @prompts(name="Get Photo Description",
172
  description="useful when you want to know what is inside the photo. receives image_path as input. "
173
  "The input to this tool should be a string, representing the image_path. ")
174
  def inference(self, image_path):
175
- inputs = self.processor(Image.open(image_path), return_tensors="pt").to(self.device)
176
  out = self.model.generate(**inputs)
177
  captions = self.processor.decode(out[0], skip_special_tokens=True)
178
  print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
@@ -206,9 +206,9 @@ class Image2Canny:
206
  class CannyText2Image:
207
  def __init__(self, device):
208
  print("Initializing CannyText2Image to %s" % device)
209
- self.controlnet = ControlNetModel.from_pretrained("fusing/stable-diffusion-v1-5-controlnet-canny")
210
  self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
211
- "runwayml/stable-diffusion-v1-5", controlnet=self.controlnet, safety_checker=None)
212
  self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
213
  self.pipe.to(device)
214
  self.seed = -1
@@ -685,7 +685,7 @@ class VisualQuestionAnswering:
685
  print("Initializing VisualQuestionAnswering to %s" % device)
686
  self.device = device
687
  self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
688
- self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(self.device)
689
 
690
  @prompts(name="Answer Question About The Image",
691
  description="useful when you need an answer for a question based on an image. "
@@ -694,7 +694,7 @@ class VisualQuestionAnswering:
694
  def inference(self, inputs):
695
  image_path, question = inputs.split(",")
696
  raw_image = Image.open(image_path).convert('RGB')
697
- inputs = self.processor(raw_image, question, return_tensors="pt").to(self.device)
698
  out = self.model.generate(**inputs)
699
  answer = self.processor.decode(out[0], skip_special_tokens=True)
700
  print(f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
 
77
  print("Initializing ImageEditing to %s" % device)
78
  self.device = device
79
  self.mask_former = MaskFormer(device=self.device)
80
+ self.inpaint = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", revision="fp16", torch_dtype=torch.float16).to(device)
81
 
82
  @prompts(name="Remove Something From The Photo",
83
  description="useful when you want to remove and object or something from the photo "
 
113
  def __init__(self, device):
114
  print("Initializing InstructPix2Pix to %s" % device)
115
  self.device = device
116
+ self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained("timbrooks/instruct-pix2pix", torch_dtype=torch.float16,
117
  safety_checker=None).to(device)
118
  self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
119
 
 
139
  def __init__(self, device):
140
  print("Initializing Text2Image to %s" % device)
141
  self.device = device
142
+ self.pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",torch_dtype=torch.float16)
143
  self.text_refine_tokenizer = AutoTokenizer.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
144
  self.text_refine_model = AutoModelForCausalLM.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
145
  self.text_refine_gpt2_pipe = pipeline("text-generation", model=self.text_refine_model,
 
166
  self.device = device
167
  self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
168
  self.model = BlipForConditionalGeneration.from_pretrained(
169
+ "Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to(self.device)
170
 
171
  @prompts(name="Get Photo Description",
172
  description="useful when you want to know what is inside the photo. receives image_path as input. "
173
  "The input to this tool should be a string, representing the image_path. ")
174
  def inference(self, image_path):
175
+ inputs = self.processor(Image.open(image_path), return_tensors="pt").to(self.device, torch.float16)
176
  out = self.model.generate(**inputs)
177
  captions = self.processor.decode(out[0], skip_special_tokens=True)
178
  print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
 
206
  class CannyText2Image:
207
  def __init__(self, device):
208
  print("Initializing CannyText2Image to %s" % device)
209
+ self.controlnet = ControlNetModel.from_pretrained("fusing/stable-diffusion-v1-5-controlnet-canny", torch_dtype=torch.float16)
210
  self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
211
+ "runwayml/stable-diffusion-v1-5", controlnet=self.controlnet, safety_checker=None, torch_dtype=torch.float16)
212
  self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
213
  self.pipe.to(device)
214
  self.seed = -1
 
685
  print("Initializing VisualQuestionAnswering to %s" % device)
686
  self.device = device
687
  self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
688
+ self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base", torch_dtype=torch.float16).to(self.device)
689
 
690
  @prompts(name="Answer Question About The Image",
691
  description="useful when you need an answer for a question based on an image. "
 
694
  def inference(self, inputs):
695
  image_path, question = inputs.split(",")
696
  raw_image = Image.open(image_path).convert('RGB')
697
+ inputs = self.processor(raw_image, question, return_tensors="pt").to(self.device, torch.float16)
698
  out = self.model.generate(**inputs)
699
  answer = self.processor.decode(out[0], skip_special_tokens=True)
700
  print(f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "