Bbmyy
add gligen
2c0adca
# Using reference image to add object in generated image
import torch
from diffusers import StableDiffusionGLIGENTextImagePipeline
from diffusers.utils import load_image
def normalize_bbox(bboxes, img_width, img_height):
normalized_bboxes = []
for box in bboxes:
x_min, y_min, x_max, y_max = box
x_min = (x_min / img_width)
y_min = (y_min / img_height)
x_max = (x_max / img_width)
y_max = (y_max / img_height)
normalized_bboxes.append([x_min, y_min, x_max, y_max])
return normalized_bboxes
def inference_image(pipe, prompt, grounding_instruction, state):
print(prompt)
print(grounding_instruction)
bbox = state['boxes']
# bbox = state
print(bbox)
bbox = normalize_bbox(bbox, 600, 600)
print(bbox)
objects = [obj for obj in grounding_instruction.split(';') if obj.strip()]
print(objects)
image = pipe(
prompt=prompt,
gligen_phrases=grounding_instruction,
gligen_images=[],
gligen_boxes=bbox,
gligen_scheduled_sampling_beta=1,
output_type="pil",
num_inference_steps=50,
).images[0]
return image
if __name__ == "__main__":
pipe = StableDiffusionGLIGENTextImagePipeline.from_pretrained("anhnct/Gligen_Text_Image", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
prompt = "a flower sitting on the beach"
boxes = [[0.0, 0.09, 0.53, 0.76]]
phrases = ["flower"]
# gligen_image = load_image(
# "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/gligen/pexels-pixabay-60597.jpg"
# )
images = pipe(
prompt=prompt,
gligen_phrases=phrases,
gligen_images=[],
gligen_boxes=boxes,
gligen_scheduled_sampling_beta=1,
output_type="pil",
num_inference_steps=50,
).images
images[0].save("./gligen-generation-text-image-box.jpg")