furkan gözükara ev pc commited on
Commit
a2f9064
1 Parent(s): b40e904
Files changed (2) hide show
  1. .gitignore +5 -0
  2. app.py +164 -170
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ venv
2
+ .git
3
+ .vs
4
+ outputs
5
+ previewer
app.py CHANGED
@@ -8,29 +8,42 @@ from typing import List
8
  from diffusers.utils import numpy_to_pil
9
  from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline
10
  from diffusers.pipelines.wuerstchen import DEFAULT_STAGE_C_TIMESTEPS
11
- import spaces
12
  from previewer.modules import Previewer
13
- import user_history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- os.environ['TOKENIZERS_PARALLELISM'] = 'false'
16
 
17
- DESCRIPTION = "# Stable Cascade"
18
- DESCRIPTION += "\n<p style=\"text-align: center\">Unofficial demo for <a href='https://huggingface.co/stabilityai/stable-cascade' target='_blank'>Stable Casacade</a>, a new high resolution text-to-image model by Stability AI, built on the Würstchen architecture - <a href='https://huggingface.co/stabilityai/stable-cascade/blob/main/LICENSE' target='_blank'>non-commercial research license</a></p>"
19
  if not torch.cuda.is_available():
20
- DESCRIPTION += "\n<p>Running on CPU 🥶</p>"
21
 
22
  MAX_SEED = np.iinfo(np.int32).max
23
- CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES") != "0"
24
- MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1536"))
25
- USE_TORCH_COMPILE = False
26
- ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD") == "1"
27
  PREVIEW_IMAGES = True
28
 
29
  dtype = torch.bfloat16
30
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
31
  if torch.cuda.is_available():
32
- prior_pipeline = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", torch_dtype=dtype).to(device)
33
- decoder_pipeline = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", torch_dtype=dtype).to(device)
 
 
34
 
35
  if ENABLE_CPU_OFFLOAD:
36
  prior_pipeline.enable_model_cpu_offload()
@@ -66,7 +79,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
66
  seed = random.randint(0, MAX_SEED)
67
  return seed
68
 
69
- @spaces.GPU
70
  def generate(
71
  prompt: str,
72
  negative_prompt: str = "",
@@ -74,172 +86,159 @@ def generate(
74
  width: int = 1024,
75
  height: int = 1024,
76
  prior_num_inference_steps: int = 30,
77
- # prior_timesteps: List[float] = None,
78
  prior_guidance_scale: float = 4.0,
79
  decoder_num_inference_steps: int = 12,
80
- # decoder_timesteps: List[float] = None,
81
  decoder_guidance_scale: float = 0.0,
82
- num_images_per_prompt: int = 2,
83
- profile: gr.OAuthProfile | None = None,
84
- ) -> PIL.Image.Image:
85
- #prior_pipeline.to(device)
86
- #decoder_pipeline.to(device)
87
- #previewer.eval().requires_grad_(False).to(device).to(dtype)
88
- generator = torch.Generator().manual_seed(seed)
89
- prior_output = prior_pipeline(
90
- prompt=prompt,
91
- height=height,
92
- width=width,
93
- num_inference_steps=prior_num_inference_steps,
94
- timesteps=DEFAULT_STAGE_C_TIMESTEPS,
95
- negative_prompt=negative_prompt,
96
- guidance_scale=prior_guidance_scale,
97
- num_images_per_prompt=num_images_per_prompt,
98
- generator=generator,
99
- callback=callback_prior,
100
- callback_steps=callback_steps
101
- )
102
-
103
- if PREVIEW_IMAGES:
104
- for _ in range(len(DEFAULT_STAGE_C_TIMESTEPS)):
105
- r = next(prior_output)
106
- if isinstance(r, list):
107
- yield r[0]
108
- prior_output = r
109
 
110
- decoder_output = decoder_pipeline(
111
- image_embeddings=prior_output.image_embeddings,
112
- prompt=prompt,
113
- num_inference_steps=decoder_num_inference_steps,
114
- # timesteps=decoder_timesteps,
115
- guidance_scale=decoder_guidance_scale,
116
- negative_prompt=negative_prompt,
117
- generator=generator,
118
- output_type="pil",
119
- ).images
120
 
121
- #Save images
122
- for image in decoder_output:
123
- user_history.save_image(
124
- profile=profile,
125
- image=image,
126
- label=prompt,
127
- metadata={
128
- "negative_prompt": negative_prompt,
129
- "seed": seed,
130
- "width": width,
131
- "height": height,
132
- "prior_guidance_scale": prior_guidance_scale,
133
- "decoder_num_inference_steps": decoder_num_inference_steps,
134
- "decoder_guidance_scale": decoder_guidance_scale,
135
- "num_images_per_prompt": num_images_per_prompt,
136
- },
137
- )
138
 
139
- yield decoder_output[0]
 
140
 
 
 
 
 
 
 
 
 
 
141
 
142
- examples = [
143
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
144
- "An astronaut riding a green horse",
145
- "A mecha robot in a favela by Tarsila do Amaral",
146
- "The sprirt of a Tamagotchi wandering in the city of Los Angeles",
147
- "A delicious feijoada ramen dish"
148
- ]
149
 
150
- with gr.Blocks() as demo:
151
- gr.Markdown(DESCRIPTION)
152
- gr.DuplicateButton(
153
- value="Duplicate Space for private use",
154
- elem_id="duplicate-button",
155
- visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
156
- )
157
- with gr.Group():
158
- with gr.Row():
159
  prompt = gr.Text(
160
  label="Prompt",
161
- show_label=False,
162
- max_lines=1,
163
  placeholder="Enter your prompt",
164
- container=False,
165
- )
166
- run_button = gr.Button("Run", scale=0)
167
- result = gr.Image(label="Result", show_label=False)
168
- with gr.Accordion("Advanced options", open=False):
169
- negative_prompt = gr.Text(
170
- label="Negative prompt",
171
- max_lines=1,
172
- placeholder="Enter a Negative Prompt",
173
- )
174
-
175
- seed = gr.Slider(
176
- label="Seed",
177
- minimum=0,
178
- maximum=MAX_SEED,
179
- step=1,
180
- value=0,
181
- )
182
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
183
- with gr.Row():
184
- width = gr.Slider(
185
- label="Width",
186
- minimum=1024,
187
- maximum=MAX_IMAGE_SIZE,
188
- step=512,
189
- value=1024,
190
- )
191
- height = gr.Slider(
192
- label="Height",
193
- minimum=1024,
194
- maximum=MAX_IMAGE_SIZE,
195
- step=512,
196
- value=1024,
197
- )
198
- num_images_per_prompt = gr.Slider(
199
- label="Number of Images",
200
- minimum=1,
201
- maximum=2,
202
- step=1,
203
- value=1,
204
  )
205
- with gr.Row():
206
- prior_guidance_scale = gr.Slider(
207
- label="Prior Guidance Scale",
208
- minimum=0,
209
- maximum=20,
210
- step=0.1,
211
- value=4.0,
212
- )
213
- prior_num_inference_steps = gr.Slider(
214
- label="Prior Inference Steps",
215
- minimum=10,
216
- maximum=30,
217
- step=1,
218
- value=20,
219
  )
220
 
221
- decoder_guidance_scale = gr.Slider(
222
- label="Decoder Guidance Scale",
223
  minimum=0,
224
- maximum=0,
225
- step=0.1,
226
- value=0.0,
227
- )
228
- decoder_num_inference_steps = gr.Slider(
229
- label="Decoder Inference Steps",
230
- minimum=4,
231
- maximum=12,
232
  step=1,
233
- value=10,
234
  )
235
-
236
- gr.Examples(
237
- examples=examples,
238
- inputs=prompt,
239
- outputs=result,
240
- fn=generate,
241
- cache_examples=CACHE_EXAMPLES,
242
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  inputs = [
245
  prompt,
@@ -253,7 +252,8 @@ with gr.Blocks() as demo:
253
  decoder_num_inference_steps,
254
  # decoder_timesteps,
255
  decoder_guidance_scale,
256
- num_images_per_prompt,
 
257
  ]
258
  gr.on(
259
  triggers=[prompt.submit, negative_prompt.submit, run_button.click],
@@ -268,12 +268,6 @@ with gr.Blocks() as demo:
268
  outputs=result,
269
  api_name="run",
270
  )
271
-
272
- with gr.Blocks(css="style.css") as demo_with_history:
273
- with gr.Tab("App"):
274
- demo.render()
275
- with gr.Tab("Past generations"):
276
- user_history.render()
277
-
278
  if __name__ == "__main__":
279
- demo_with_history.queue(max_size=20).launch()
 
8
  from diffusers.utils import numpy_to_pil
9
  from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline
10
  from diffusers.pipelines.wuerstchen import DEFAULT_STAGE_C_TIMESTEPS
 
11
  from previewer.modules import Previewer
12
+ import os
13
+ import datetime
14
+ import json
15
+ import io
16
+ import argparse # Import the argparse library
17
+
18
+ # Set up argument parser
19
+ parser = argparse.ArgumentParser(description="Gradio interface for text-to-image generation with optional features.")
20
+ parser.add_argument("--share", action="store_true", help="Enable Gradio sharing.")
21
+ parser.add_argument("--lowvram", action="store_true", help="Enable CPU offload for model operations.")
22
+ parser.add_argument("--torch_compile", action="store_true", help="Enable CPU offload for model operations.")
23
+
24
+ # Parse arguments
25
+ args = parser.parse_args()
26
+ share = args.share
27
+ ENABLE_CPU_OFFLOAD = args.lowvram # Use the offload argument to toggle ENABLE_CPU_OFFLOAD
28
+ USE_TORCH_COMPILE = args.torch_compile # Use the offload argument to toggle ENABLE_CPU_OFFLOAD
29
 
 
30
 
31
+ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
32
+ DESCRIPTION = "<p style=\"font-size:14px\">Stable Cascade Modified By SECourses - Unofficial demo for <a href='https://huggingface.co/stabilityai/stable-cascade' target='_blank'>Stable Casacade</a>, a new high resolution text-to-image model by Stability AI, built on the Würstchen architecture.<br/> Some tips: Higher batch size working great with fast speed and not much VRAM usage - Not all resolutions working e.g. 1920x1080 fails but 1920x1152 works<br/>Supports high resolutions very well such as 1536x1536</p>"
33
  if not torch.cuda.is_available():
34
+ DESCRIPTION += "<br/><p>Running on CPU 🥶</p>"
35
 
36
  MAX_SEED = np.iinfo(np.int32).max
37
+ MAX_IMAGE_SIZE = 2048
 
 
 
38
  PREVIEW_IMAGES = True
39
 
40
  dtype = torch.bfloat16
41
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
42
  if torch.cuda.is_available():
43
+ prior_pipeline = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", torch_dtype=dtype)
44
+ decoder_pipeline = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", torch_dtype=dtype)
45
+ prior_pipeline.enable_xformers_memory_efficient_attention()
46
+ decoder_pipeline.enable_xformers_memory_efficient_attention()
47
 
48
  if ENABLE_CPU_OFFLOAD:
49
  prior_pipeline.enable_model_cpu_offload()
 
79
  seed = random.randint(0, MAX_SEED)
80
  return seed
81
 
 
82
  def generate(
83
  prompt: str,
84
  negative_prompt: str = "",
 
86
  width: int = 1024,
87
  height: int = 1024,
88
  prior_num_inference_steps: int = 30,
 
89
  prior_guidance_scale: float = 4.0,
90
  decoder_num_inference_steps: int = 12,
 
91
  decoder_guidance_scale: float = 0.0,
92
+ batch_size_per_prompt: int = 2,
93
+ number_of_images_per_prompt: int = 1, # New parameter
94
+ ) -> List[PIL.Image.Image]:
95
+ images = [] # Initialize an empty list to collect generated images
96
+ original_seed = seed # Store the original seed value
97
+ for i in range(number_of_images_per_prompt):
98
+ if i > 0: # Update seed for subsequent iterations
99
+ seed = random.randint(0, MAX_SEED)
100
+ generator = torch.Generator().manual_seed(seed)
101
+ prior_output = prior_pipeline(
102
+ prompt=prompt,
103
+ height=height,
104
+ width=width,
105
+ num_inference_steps=prior_num_inference_steps,
106
+ timesteps=DEFAULT_STAGE_C_TIMESTEPS,
107
+ negative_prompt=negative_prompt,
108
+ guidance_scale=prior_guidance_scale,
109
+ num_images_per_prompt=batch_size_per_prompt,
110
+ generator=generator,
111
+ callback=callback_prior,
112
+ callback_steps=callback_steps
113
+ )
 
 
 
 
 
114
 
115
+ if PREVIEW_IMAGES:
116
+ for _ in range(len(DEFAULT_STAGE_C_TIMESTEPS)):
117
+ r = next(prior_output)
118
+ prior_output = r
 
 
 
 
 
 
119
 
120
+ decoder_output = decoder_pipeline(
121
+ image_embeddings=prior_output.image_embeddings,
122
+ prompt=prompt,
123
+ num_inference_steps= decoder_num_inference_steps,
124
+ guidance_scale=decoder_guidance_scale,
125
+ negative_prompt=negative_prompt,
126
+ generator=generator,
127
+ output_type="pil",
128
+ ).images
 
 
 
 
 
 
 
 
129
 
130
+ # Append generated images to the images list
131
+ images.extend(decoder_output)
132
 
133
+ # Optionally, save each image
134
+ output_folder = 'outputs'
135
+ if not os.path.exists(output_folder):
136
+ os.makedirs(output_folder)
137
+ for image in decoder_output:
138
+ # Generate timestamped filename
139
+ timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f')
140
+ image_filename = f"{output_folder}/{timestamp}.png"
141
+ image.save(image_filename)
142
 
143
+ # Return the list of generated images
144
+ return images
 
 
 
 
 
145
 
146
+ with gr.Blocks() as app:
147
+ with gr.Row():
148
+ gr.Markdown(DESCRIPTION)
149
+ with gr.Row():
150
+ with gr.Column():
 
 
 
 
151
  prompt = gr.Text(
152
  label="Prompt",
 
 
153
  placeholder="Enter your prompt",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  )
155
+ run_button = gr.Button("Generate")
156
+
157
+ # Advanced options now directly visible
158
+ negative_prompt = gr.Text(
159
+ label="Negative prompt",
160
+ placeholder="Enter a Negative Prompt",
 
 
 
 
 
 
 
 
161
  )
162
 
163
+ seed = gr.Slider(
164
+ label="Seed",
165
  minimum=0,
166
+ maximum=MAX_SEED,
 
 
 
 
 
 
 
167
  step=1,
168
+ value=0,
169
  )
170
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
171
+ with gr.Row():
172
+ with gr.Column():
173
+ width = gr.Slider(
174
+ label="Width",
175
+ minimum=512,
176
+ maximum=MAX_IMAGE_SIZE,
177
+ step=64,
178
+ value=1024,
179
+ )
180
+ with gr.Column():
181
+ height = gr.Slider(
182
+ label="Height",
183
+ minimum=512,
184
+ maximum=MAX_IMAGE_SIZE,
185
+ step=64,
186
+ value=1024,
187
+ )
188
+ with gr.Row():
189
+ with gr.Column():
190
+ batch_size_per_prompt = gr.Slider(
191
+ label="Batch Size",
192
+ minimum=1,
193
+ maximum=20,
194
+ step=1,
195
+ value=1,
196
+ )
197
+ with gr.Column():
198
+ number_of_images_per_prompt = gr.Slider(
199
+ label="Number Of Images To Generate",
200
+ minimum=1,
201
+ maximum=9999999,
202
+ step=1,
203
+ value=1,
204
+ )
205
+ with gr.Row():
206
+ with gr.Column():
207
+ prior_guidance_scale = gr.Slider(
208
+ label="Prior Guidance Scale (CFG)",
209
+ minimum=0,
210
+ maximum=20,
211
+ step=0.1,
212
+ value=4.0,
213
+ )
214
+ with gr.Column():
215
+ decoder_guidance_scale = gr.Slider(
216
+ label="Decoder Guidance Scale (CFG)",
217
+ minimum=0,
218
+ maximum=20,
219
+ step=0.1,
220
+ value=0.0,
221
+ )
222
+ with gr.Row():
223
+ with gr.Column():
224
+ prior_num_inference_steps = gr.Slider(
225
+ label="Prior Inference Steps",
226
+ minimum=1,
227
+ maximum=100,
228
+ step=1,
229
+ value=20,
230
+ )
231
+ with gr.Column():
232
+ decoder_num_inference_steps = gr.Slider(
233
+ label="Decoder Inference Steps",
234
+ minimum=1,
235
+ maximum=100,
236
+ step=1,
237
+ value=20,
238
+ )
239
+
240
+ with gr.Column():
241
+ result = gr.Gallery(label="Result", show_label=False, height=768)
242
 
243
  inputs = [
244
  prompt,
 
252
  decoder_num_inference_steps,
253
  # decoder_timesteps,
254
  decoder_guidance_scale,
255
+ batch_size_per_prompt,
256
+ number_of_images_per_prompt
257
  ]
258
  gr.on(
259
  triggers=[prompt.submit, negative_prompt.submit, run_button.click],
 
268
  outputs=result,
269
  api_name="run",
270
  )
271
+
 
 
 
 
 
 
272
  if __name__ == "__main__":
273
+ app.queue().launch(share=share,inbrowser=True)