import os import random import gradio as gr import numpy as np import PIL.Image import torch from typing import List from diffusers.utils import numpy_to_pil from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline from previewer.modules import Previewer import os import datetime import json import io import argparse # Import the argparse library # Set up argument parser parser = argparse.ArgumentParser(description="Gradio interface for text-to-image generation with optional features.") parser.add_argument("--share", action="store_true", help="Enable Gradio sharing.") parser.add_argument("--lowvram", action="store_true", help="Enable CPU offload for model operations.") parser.add_argument("--torch_compile", action="store_true", help="Enable CPU offload for model operations.") parser.add_argument("--fp16", action="store_true", help="fp16") # Parse arguments args = parser.parse_args() share = args.share ENABLE_CPU_OFFLOAD = args.lowvram # Use the offload argument to toggle ENABLE_CPU_OFFLOAD USE_TORCH_COMPILE = args.torch_compile # Use the offload argument to toggle ENABLE_CPU_OFFLOAD dtype = torch.bfloat16 if(args.fp16): dtype = torch.float16 print(f"used dtype {dtype}") os.environ['TOKENIZERS_PARALLELISM'] = 'false' DESCRIPTION = "

Stable Cascade Modified By SECourses - Unofficial demo for Stable Casacade, a new high resolution text-to-image model by Stability AI, built on the Würstchen architecture.
Some tips: Higher batch size working great with fast speed and not much VRAM usage - Not all resolutions working e.g. 1920x1080 fails but 1920x1152 works
Supports high resolutions very well such as 1536x1536

" if not torch.cuda.is_available(): DESCRIPTION += "

Running on CPU 🥶

" MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 4096 PREVIEW_IMAGES = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): prior_pipeline = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", torch_dtype=dtype) decoder_pipeline = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", torch_dtype=dtype) prior_pipeline.enable_xformers_memory_efficient_attention() decoder_pipeline.enable_xformers_memory_efficient_attention() if ENABLE_CPU_OFFLOAD: prior_pipeline.enable_model_cpu_offload() decoder_pipeline.enable_model_cpu_offload() else: prior_pipeline.to(device) decoder_pipeline.to(device) if USE_TORCH_COMPILE: prior_pipeline.prior = torch.compile(prior_pipeline.prior, mode="reduce-overhead", fullgraph=True) decoder_pipeline.decoder = torch.compile(decoder_pipeline.decoder, mode="max-autotune", fullgraph=True) else: prior_pipeline = None decoder_pipeline = None def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: if randomize_seed: seed = random.randint(0, MAX_SEED) return seed def generate( prompt: str, negative_prompt: str = "", seed: int = 0, width: int = 1024, height: int = 1024, prior_num_inference_steps: int = 30, prior_guidance_scale: float = 4.0, decoder_num_inference_steps: int = 12, decoder_guidance_scale: float = 0.0, batch_size_per_prompt: int = 2, number_of_images_per_prompt: int = 1, # New parameter ): images = [] # Initialize an empty list to collect generated images original_seed = seed # Store the original seed value for i in range(number_of_images_per_prompt): if i > 0: # Update seed for subsequent iterations seed = random.randint(0, MAX_SEED) generator = torch.Generator().manual_seed(seed) prior_output = prior_pipeline( prompt=prompt, height=height, width=width, generator=generator, negative_prompt=negative_prompt, guidance_scale=prior_guidance_scale, num_images_per_prompt=batch_size_per_prompt, num_inference_steps=prior_num_inference_steps ) decoder_output = decoder_pipeline( image_embeddings=prior_output.image_embeddings, prompt=prompt, negative_prompt=negative_prompt, guidance_scale=decoder_guidance_scale, output_type="pil", generator=generator, num_inference_steps=decoder_num_inference_steps ).images # Append generated images to the images list images.extend(decoder_output) # Optionally, save each image output_folder = 'outputs' if not os.path.exists(output_folder): os.makedirs(output_folder) for image in decoder_output: # Generate timestamped filename timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S_%f') image_filename = f"{output_folder}/{timestamp}.png" image.save(image_filename) if torch.cuda.is_available(): torch.cuda.empty_cache() # Return the list of generated images return images with gr.Blocks() as app: with gr.Row(): gr.Markdown(DESCRIPTION) with gr.Row(): with gr.Column(): prompt = gr.Text( label="Prompt", placeholder="Enter your prompt", ) run_button = gr.Button("Generate") # Advanced options now directly visible negative_prompt = gr.Text( label="Negative prompt", placeholder="Enter a Negative Prompt", ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(): with gr.Column(): width = gr.Slider( label="Width", minimum=512, maximum=MAX_IMAGE_SIZE, step=128, value=1024, ) with gr.Column(): height = gr.Slider( label="Height", minimum=512, maximum=MAX_IMAGE_SIZE, step=128, value=1024, ) with gr.Row(): with gr.Column(): batch_size_per_prompt = gr.Slider( label="Batch Size", minimum=1, maximum=20, step=1, value=1, ) with gr.Column(): number_of_images_per_prompt = gr.Slider( label="Number Of Images To Generate", minimum=1, maximum=9999999, step=1, value=1, ) with gr.Row(): with gr.Column(): prior_guidance_scale = gr.Slider( label="Prior Guidance Scale (CFG)", minimum=0, maximum=20, step=0.1, value=4.0, ) with gr.Column(): decoder_guidance_scale = gr.Slider( label="Decoder Guidance Scale (CFG)", minimum=0, maximum=20, step=0.1, value=0.0, ) with gr.Row(): with gr.Column(): prior_num_inference_steps = gr.Slider( label="Prior Inference Steps", minimum=1, maximum=100, step=1, value=30, ) with gr.Column(): decoder_num_inference_steps = gr.Slider( label="Decoder Inference Steps", minimum=1, maximum=100, step=1, value=20, ) with gr.Column(): result = gr.Gallery(label="Result", show_label=False, height=768) inputs = [ prompt, negative_prompt, seed, width, height, prior_num_inference_steps, # prior_timesteps, prior_guidance_scale, decoder_num_inference_steps, # decoder_timesteps, decoder_guidance_scale, batch_size_per_prompt, number_of_images_per_prompt ] gr.on( triggers=[prompt.submit, negative_prompt.submit, run_button.click], fn=randomize_seed_fn, inputs=[seed, randomize_seed], outputs=seed, queue=False, api_name=False, ).then( fn=generate, inputs=inputs, outputs=result, api_name="run", ) if __name__ == "__main__": app.queue().launch(share=share,inbrowser=True)