Spaces:

Nick088
/

stable-diffusion-arena

Running on Zero

App Files Files Community

Nick088 commited on Jun 22, 2024

Commit

0900c59

verified ·

1 Parent(s): ff28634

Create app.py

Browse files

Files changed (1) hide show

app.py +470 -0

app.py ADDED Viewed

	@@ -0,0 +1,470 @@

+import torch
+from diffusers import StableDiffusion3Pipeline, StableDiffusion2Pipeline, StableDiffusionXLBasePipeline
+import gradio as gr
+import os
+import random
+import transformers
+import numpy as np
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import spaces
+HF_TOKEN = os.getenv("HF_TOKEN")
+if torch.cuda.is_available():
+    device = "cuda"
+    print("Using GPU")
+else:
+    device = "cpu"
+    print("Using CPU")
+MAX_SEED = np.iinfo(np.int32).max
+# Initialize the pipelines for each sd model
+sd3_medium_pipe = StableDiffusion3Pipeline.from_pretrained(
+    "stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16
+)
+sd3_medium_pipe.to(device)
+sd2_1_pipe = StableDiffusion2Pipeline.from_pretrained(
+    "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
+)
+sd2_1_pipe.to(device)
+sdxl_pipe = StableDiffusionXLBasePipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+)
+sdxl_pipe.to(device)
+# superprompt-v1
+tokenizer = T5Tokenizer.from_pretrained("roborovski/superprompt-v1")
+model = T5ForConditionalGeneration.from_pretrained(
+    "roborovski/superprompt-v1", device_map="auto", torch_dtype="auto"
+)
+model.to(device)
+# toggle visibility the enhanced prompt output
+def update_visibility(enhance_prompt):
+    return gr.update(visible=enhance_prompt)
+# Define the image generation function for the Arena tab
+@spaces.GPU(duration=80)
+def generate_arena_images(
+    prompt,
+    enhance_prompt,
+    negative_prompt,
+    num_inference_steps,
+    height,
+    width,
+    guidance_scale,
+    seed,
+    num_images_per_prompt,
+    model_choice_1,
+    model_choice_2,
+    progress=gr.Progress(track_tqdm=True),
+):
+    if seed == 0:
+        seed = random.randint(1, 2**32 - 1)
+    if enhance_prompt:
+        transformers.set_seed(seed)
+        input_text = f"Expand the following prompt to add more detail: {prompt}"
+        input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
+        outputs = model.generate(
+            input_ids,
+            max_new_tokens=512,
+            repetition_penalty=1.2,
+            do_sample=True,
+            temperature=0.7,
+            top_p=1,
+            top_k=50,
+        )
+        prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    generator = torch.Generator().manual_seed(seed)
+    # Generate images for both models
+    images_1 = generate_single_image(
+        prompt,
+        negative_prompt,
+        num_inference_steps,
+        height,
+        width,
+        guidance_scale,
+        seed,
+        num_images_per_prompt,
+        model_choice_1,
+        generator,
+    )
+    images_2 = generate_single_image(
+        prompt,
+        negative_prompt,
+        num_inference_steps,
+        height,
+        width,
+        guidance_scale,
+        seed,
+        num_images_per_prompt,
+        model_choice_2,
+        generator,
+    )
+    return images_1, images_2, prompt
+# Helper function to generate images for a single model
+def generate_single_image(
+    prompt,
+    negative_prompt,
+    num_inference_steps,
+    height,
+    width,
+    guidance_scale,
+    seed,
+    num_images_per_prompt,
+    model_choice,
+    generator,
+):
+    # Select the correct pipeline based on the model choice
+    if model_choice == "sd3 medium":
+        pipe = sd3_medium_pipe
+    elif model_choice == "sd2.1":
+        pipe = sd2_1_pipe
+    elif model_choice == "sdxl":
+        pipe = sdxl_pipe
+    else:
+        raise ValueError(f"Invalid model choice: {model_choice}")
+    output = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps,
+        height=height,
+        width=width,
+        guidance_scale=guidance_scale,
+        generator=generator,
+        num_images_per_prompt=num_images_per_prompt,
+    ).images
+    return output
+# Define the image generation function for the Individual tab
+@spaces.GPU(duration=80)
+def generate_individual_image(
+    prompt,
+    enhance_prompt,
+    negative_prompt,
+    num_inference_steps,
+    height,
+    width,
+    guidance_scale,
+    seed,
+    num_images_per_prompt,
+    model_choice,
+    progress=gr.Progress(track_tqdm=True),
+):
+    if seed == 0:
+        seed = random.randint(1, 2**32 - 1)
+    if enhance_prompt:
+        transformers.set_seed(seed)
+        input_text = f"Expand the following prompt to add more detail: {prompt}"
+        input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
+        outputs = model.generate(
+            input_ids,
+            max_new_tokens=512,
+            repetition_penalty=1.2,
+            do_sample=True,
+            temperature=0.7,
+            top_p=1,
+            top_k=50,
+        )
+        prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    generator = torch.Generator().manual_seed(seed)
+    output = generate_single_image(
+        prompt,
+        negative_prompt,
+        num_inference_steps,
+        height,
+        width,
+        guidance_scale,
+        seed,
+        num_images_per_prompt,
+        model_choice,
+        generator,
+    )
+    return output, prompt
+# Create the Gradio interface
+examples = [
+    ["A white car racing fast to the moon.", True],
+    ["A woman in a red dress singing on top of a building.", True],
+    ["An astronaut on mars in a futuristic cyborg suit.", True],
+]
+css = """
+.gradio-container{max-width: 1000px !important}
+h1{text-align:center}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Row():
+        with gr.Column():
+            gr.HTML(
+                """
+            <h1 style='text-align: center'>
+            Stable Diffusion Arena
+            </h1>
+            """
+            )
+            gr.HTML(
+                """
+               Made by <a href='https://linktr.ee/Nick088' target='_blank'>Nick088</a>
+               <br> <a href="https://discord.gg/osai"> <img src="https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge" alt="Discord"> </a>
+                """
+            )
+    with gr.Tabs():
+        with gr.TabItem("Arena"):
+            with gr.Group():
+                with gr.Column():
+                    prompt = gr.Textbox(
+                        label="Prompt",
+                        info="Describe the image you want",
+                        placeholder="A cat...",
+                    )
+                    enhance_prompt = gr.Checkbox(
+                        label="Prompt Enhancement with SuperPrompt-v1", value=True
+                    )
+                    model_choice_1 = gr.Dropdown(
+                        label="Stable Diffusion Model 1",
+                        choices=["sd3 medium", "sd2.1", "sdxl"],
+                        value="sd3 medium",
+                    )
+                    model_choice_2 = gr.Dropdown(
+                        label="Stable Diffusion Model 2",
+                        choices=["sd3 medium", "sd2.1", "sdxl"],
+                        value="sd2.1",
+                    )
+                    run_button = gr.Button("Run")
+                result_1 = gr.Gallery(label="Generated Images (Model 1)", elem_id="gallery_1")
+                result_2 = gr.Gallery(label="Generated Images (Model 2)", elem_id="gallery_2")
+                better_prompt = gr.Textbox(
+                    label="Enhanced Prompt",
+                    info="The output of your enhanced prompt used for the Image Generation",
+                    visible=True,
+                )
+                enhance_prompt.change(
+                    fn=update_visibility, inputs=enhance_prompt, outputs=better_prompt
+                )
+            with gr.Accordion("Advanced options", open=False):
+                with gr.Row():
+                    negative_prompt = gr.Textbox(
+                        label="Negative Prompt",
+                        info="Describe what you don't want in the image",
+                        value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
+                        placeholder="Ugly, bad anatomy...",
+                    )
+                with gr.Row():
+                    num_inference_steps = gr.Slider(
+                        label="Number of Inference Steps",
+                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                        minimum=1,
+                        maximum=50,
+                        value=25,
+                        step=1,
+                    )
+                    guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                        minimum=0.0,
+                        maximum=10.0,
+                        value=7.5,
+                        step=0.1,
+                    )
+                with gr.Row():
+                    width = gr.Slider(
+                        label="Width",
+                        info="Width of the Image",
+                        minimum=256,
+                        maximum=1344,
+                        step=32,
+                        value=1024,
+                    )
+                    height = gr.Slider(
+                        label="Height",
+                        info="Height of the Image",
+                        minimum=256,
+                        maximum=1344,
+                        step=32,
+                        value=1024,
+                    )
+                with gr.Row():
+                    seed = gr.Slider(
+                        value=42,
+                        minimum=0,
+                        maximum=MAX_SEED,
+                        step=1,
+                        label="Seed",
+                        info="A starting point to initiate the generation process, put 0 for a random one",
+                    )
+                    num_images_per_prompt = gr.Slider(
+                        label="Images Per Prompt",
+                        info="Number of Images to generate with the settings",
+                        minimum=1,
+                        maximum=4,
+                        step=1,
+                        value=2,
+                    )
+            gr.Examples(
+                examples=examples,
+                inputs=[prompt, enhance_prompt],
+                outputs=[result_1, result_2, better_prompt],
+                fn=generate_arena_images,
+            )
+            gr.on(
+                triggers=[
+                    prompt.submit,
+                    run_button.click,
+                ],
+                fn=generate_arena_images,
+                inputs=[
+                    prompt,
+                    enhance_prompt,
+                    negative_prompt,
+                    num_inference_steps,
+                    width,
+                    height,
+                    guidance_scale,
+                    seed,
+                    num_images_per_prompt,
+                    model_choice_1,
+                    model_choice_2,
+                ],
+                outputs=[result_1, result_2, better_prompt],
+            )
+        with gr.TabItem("Individual"):
+            with gr.Group():
+                with gr.Column():
+                    prompt = gr.Textbox(
+                        label="Prompt",
+                        info="Describe the image you want",
+                        placeholder="A cat...",
+                    )
+                    enhance_prompt = gr.Checkbox(
+                        label="Prompt Enhancement with SuperPrompt-v1", value=True
+                    )
+                    model_choice = gr.Dropdown(
+                        label="Stable Diffusion Model",
+                        choices=["sd3 medium", "sd2.1", "sdxl"],
+                        value="sd3 medium",
+                    )
+                    run_button = gr.Button("Run")
+                result = gr.Gallery(label="Generated AI Images", elem_id="gallery")
+                better_prompt = gr.Textbox(
+                    label="Enhanced Prompt",
+                    info="The output of your enhanced prompt used for the Image Generation",
+                    visible=True,
+                )
+                enhance_prompt.change(
+                    fn=update_visibility, inputs=enhance_prompt, outputs=better_prompt
+                )
+            with gr.Accordion("Advanced options", open=False):
+                with gr.Row():
+                    negative_prompt = gr.Textbox(
+                        label="Negative Prompt",
+                        info="Describe what you don't want in the image",
+                        value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
+                        placeholder="Ugly, bad anatomy...",
+                    )
+                with gr.Row():
+                    num_inference_steps = gr.Slider(
+                        label="Number of Inference Steps",
+                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                        minimum=1,
+                        maximum=50,
+                        value=25,
+                        step=1,
+                    )
+                    guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                        minimum=0.0,
+                        maximum=10.0,
+                        value=7.5,
+                        step=0.1,
+                    )
+                with gr.Row():
+                    width = gr.Slider(
+                        label="Width",
+                        info="Width of the Image",
+                        minimum=256,
+                        maximum=1344,
+                        step=32,
+                        value=1024,
+                    )
+                    height = gr.Slider(
+                        label="Height",
+                        info="Height of the Image",
+                        minimum=256,
+                        maximum=1344,
+                        step=32,
+                        value=1024,
+                    )
+                with gr.Row():
+                    seed = gr.Slider(
+                        value=42,
+                        minimum=0,
+                        maximum=MAX_SEED,
+                        step=1,
+                        label="Seed",
+                        info="A starting point to initiate the generation process, put 0 for a random one",
+                    )
+                    num_images_per_prompt = gr.Slider(
+                        label="Images Per Prompt",
+                        info="Number of Images to generate with the settings",
+                        minimum=1,
+                        maximum=4,
+                        step=1,
+                        value=2,
+                    )
+            gr.Examples(
+                examples=examples,
+                inputs=[prompt, enhance_prompt],
+                outputs=[result, better_prompt],
+                fn=generate_individual_image,
+            )
+            gr.on(
+                triggers=[
+                    prompt.submit,
+                    run_button.click,
+                ],
+                fn=generate_individual_image,
+                inputs=[
+                    prompt,
+                    enhance_prompt,
+                    negative_prompt,
+                    num_inference_steps,
+                    width,
+                    height,
+                    guidance_scale,
+                    seed,
+                    num_images_per_prompt,
+                    model_choice,
+                ],
+                outputs=[result, better_prompt],
+            )
+demo.queue().launch(share=False)