nikhilg commited on
Commit
13309b8
1 Parent(s): 0c6ed4f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -19
README.md CHANGED
@@ -68,32 +68,30 @@ Running the pipeline with the default PNDM scheduler:
68
 
69
  ```python
70
  import torch
71
- import torchvision
72
  from diffusers import StableDiffusionGLIGENPipeline
 
73
 
74
- model_id = "masterful/gligen-1-4-generation-text-box"
75
- device = "cuda"
76
-
77
- pipe = StableDiffusionGLIGENPipeline.from_pretrained(model_id, variant="fp16", torch_dtype=torch.float16)
78
- pipe = pipe.to(device)
 
79
 
80
  prompt = "a waterfall and a modern high speed train running through the tunnel in a beautiful forest with fall foliage"
 
 
81
 
82
  images = pipe(
83
- prompt,
84
- num_images_per_prompt=1,
85
- gligen_phrases = ['a waterfall', 'a modern high speed train running through the tunnel'],
86
- gligen_boxes = [
87
- [0.1387, 0.2051, 0.4277, 0.7090],
88
- [0.4980, 0.4355, 0.8516, 0.7266],
89
- ],
90
- gligen_scheduled_sampling_beta=0.3,
91
- output_type="np",
92
- num_inference_steps=50
93
  ).images
94
 
95
- images = torch.stack([torch.from_numpy(image) for image in images]).permute(0, 3, 1, 2)
96
- torchvision.utils.save_image(images, "./gligen-1-4-generation-text-box.jpg", nrow=1, normalize=False)
97
  ```
98
 
99
 
@@ -181,4 +179,4 @@ Refer [`GLIGEN`](https://github.com/gligen/GLIGEN) for more details.
181
  }
182
  ```
183
 
184
- *This model card was written by: Robin Rombach and Patrick Esser and is based on the [DALL-E Mini model card](https://huggingface.co/dalle-mini/dalle-mini).*
 
68
 
69
  ```python
70
  import torch
 
71
  from diffusers import StableDiffusionGLIGENPipeline
72
+ from diffusers.utils import load_image
73
 
74
+ # Generate an image described by the prompt and
75
+ # insert objects described by text at the region defined by bounding boxes
76
+ pipe = StableDiffusionGLIGENPipeline.from_pretrained(
77
+ "masterful/gligen-1-4-generation-text-box", variant="fp16", torch_dtype=torch.float16
78
+ )
79
+ pipe = pipe.to("cuda")
80
 
81
  prompt = "a waterfall and a modern high speed train running through the tunnel in a beautiful forest with fall foliage"
82
+ boxes = [[0.1387, 0.2051, 0.4277, 0.7090], [0.4980, 0.4355, 0.8516, 0.7266]]
83
+ phrases = ["a waterfall", "a modern high speed train running through the tunnel"]
84
 
85
  images = pipe(
86
+ prompt=prompt,
87
+ gligen_phrases=phrases,
88
+ gligen_boxes=boxes,
89
+ gligen_scheduled_sampling_beta=1,
90
+ output_type="pil",
91
+ num_inference_steps=50,
 
 
 
 
92
  ).images
93
 
94
+ images[0].save("./gligen-1-4-generation-text-box.jpg")
 
95
  ```
96
 
97
 
 
179
  }
180
  ```
181
 
182
+ *This model card was written by: [Nikhil Gajendrakumar](https://github.com/nikhil-masterful) and is based on the [DALL-E Mini model card](https://huggingface.co/dalle-mini/dalle-mini).*