Spaces:
Running
Running
# Using reference image to add object in generated image | |
import torch | |
from diffusers import StableDiffusionGLIGENTextImagePipeline | |
from diffusers.utils import load_image | |
def normalize_bbox(bboxes, img_width, img_height): | |
normalized_bboxes = [] | |
for box in bboxes: | |
x_min, y_min, x_max, y_max = box | |
x_min = (x_min / img_width) | |
y_min = (y_min / img_height) | |
x_max = (x_max / img_width) | |
y_max = (y_max / img_height) | |
normalized_bboxes.append([x_min, y_min, x_max, y_max]) | |
return normalized_bboxes | |
def inference_image(pipe, prompt, grounding_instruction, state): | |
print(prompt) | |
print(grounding_instruction) | |
bbox = state['boxes'] | |
# bbox = state | |
print(bbox) | |
bbox = normalize_bbox(bbox, 600, 600) | |
print(bbox) | |
objects = [obj for obj in grounding_instruction.split(';') if obj.strip()] | |
print(objects) | |
image = pipe( | |
prompt=prompt, | |
gligen_phrases=grounding_instruction, | |
gligen_images=[], | |
gligen_boxes=bbox, | |
gligen_scheduled_sampling_beta=1, | |
output_type="pil", | |
num_inference_steps=50, | |
).images[0] | |
return image | |
if __name__ == "__main__": | |
pipe = StableDiffusionGLIGENTextImagePipeline.from_pretrained("anhnct/Gligen_Text_Image", torch_dtype=torch.float16) | |
pipe = pipe.to("cuda") | |
prompt = "a flower sitting on the beach" | |
boxes = [[0.0, 0.09, 0.53, 0.76]] | |
phrases = ["flower"] | |
# gligen_image = load_image( | |
# "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/gligen/pexels-pixabay-60597.jpg" | |
# ) | |
images = pipe( | |
prompt=prompt, | |
gligen_phrases=phrases, | |
gligen_images=[], | |
gligen_boxes=boxes, | |
gligen_scheduled_sampling_beta=1, | |
output_type="pil", | |
num_inference_steps=50, | |
).images | |
images[0].save("./gligen-generation-text-image-box.jpg") | |