Spaces:

hostin
/

txt2img

Runtime error

File size: 4,897 Bytes

# @title 📸 Image Generation (multimodel + Gradio Web Interface)

from diffusers import DiffusionPipeline, StableDiffusionPipeline
import torch
from gradio.components import Gallery
from PIL import Image
from IPython.display import display
import os
from datetime import datetime
import gradio as gr

# Function to generate and display images
def generate_and_display_images(model_selection, scenery, style, height, width, num_images=2, n_steps=50, high_noise_frac=0.5, guidance_scale=2.6, negative_prompt="", seed=None):
    if seed is None or seed == '':
        seed = torch.randint(low=0, high=2**32, size=(1,)).item()
    else:
        try:
            seed = int(seed)
        except ValueError:
            return "Invalid seed value. Seed must be an integer."
    torch.manual_seed(seed)

    # Check if CUDA is available and set the appropriate dtype
    if torch.cuda.is_available():
        device = "cuda"
        dtype = torch.float16
    else:
        device = "cpu"
        dtype = torch.float32

    prompt = f"Scenery: {scenery}; Style: {style}"

    generated_images = []
    if model_selection == "dreamlike-art/dreamlike-photoreal-2.0":
        model = StableDiffusionPipeline.from_pretrained(model_selection, torch_dtype=dtype).to(device)
        for _ in range(num_images):
            image = model(prompt=prompt, num_inference_steps=n_steps, guidance_scale=guidance_scale, negative_prompt=negative_prompt, height=height, width=width).images[0]
            generated_images.append(image)
    else:
        base = DiffusionPipeline.from_pretrained(model_selection, torch_dtype=dtype, use_auth_token=True).to(device)
        for _ in range(num_images):
            if "refiner" in model_selection:
                image_latent = base(prompt=prompt, num_inference_steps=n_steps, denoising_end=high_noise_frac, output_type="latent").images
                image = image_latent[0]  # Placeholder for actual refiner step
            else:
                image = base(prompt=prompt, num_inference_steps=n_steps, guidance_scale=guidance_scale, negative_prompt=negative_prompt, height=height, width=width).images[0]
            generated_images.append(image)

    # Save images and return file paths for Gradio display
    file_paths = []
    for i, image in enumerate(generated_images):
        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        filename = f"{seed}_{timestamp}_{i}.jpg"
        image.save(filename)
        file_paths.append(filename)

    return file_paths

# Define Gradio interface
iface = gr.Interface(
    fn=generate_and_display_images,
    inputs=[
        gr.components.Dropdown(value="dreamlike-art/dreamlike-photoreal-2.0", choices=["stabilityai/sdxl-turbo", "stabilityai/stable-diffusion-xl-base-1.0", "runwayml/stable-diffusion-v1-5", "dreamlike-art/dreamlike-photoreal-2.0", "Kardbord/stable-diffusion-v1-5-unsafe"], label="Model Selection"),
        gr.components.Textbox(label="Scenery", placeholder="Describe the scenery you want in the image"),
        gr.components.Textbox(label="Style", placeholder="Describe the style of the image (e.g., photorealistic, liminal, dark)"),
        gr.components.Slider(minimum=1, maximum=2048, step=1, value=1024, label="Height"),
        gr.components.Slider(minimum=1, maximum=2048, step=1, value=576, label="Width"),
        gr.components.Number(value=1, label="Number of Images"),
        gr.components.Slider(minimum=0, maximum=60, step=1, value=20, label="Number of Inference Steps"),
        gr.components.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.16, label="High Noise Fraction"),
        gr.components.Slider(minimum=0.0, maximum=10.0, step=0.1, value=8, label="Guidance Scale"),
        gr.components.Textbox(value="", label="Negative Prompt"),
        gr.components.Textbox(value=None, label="Seed (Optional)")
    ],
    outputs=Gallery(label="Generated Images"),
    examples=[["dreamlike-art/dreamlike-photoreal-2.0", "scenery : melting flesh", "style : (((photorealistic))), liminal, cryptic, cinematic, highly detailed, sharp focus, dark, creepy, weirdcore", 1024, 576, 10, 30, 0.16, 8, "2D || naked || Low Quality || text logos || watermarks || signatures || out of frame || jpeg artifacts || ugly || poorly drawn || extra limbs || extra hands || extra feet || backwards limbs || extra fingers || extra toes || unrealistic, incorrect, bad anatomy || cut off body pieces || strange body positions || impossible body positioning || Mismatched eyes || cross eyed || crooked face || crooked lips || unclear || undefined || mutations || deformities || off center || poor_composition || duplicate faces, blurry, blurred, unclear, deformed anatomy, deformed face, crazy eyes, bad hands, deformed body", None]],
    title="Image Generation Tool",
    description="Generate images using various diffusion models."
)

# Launch the interface
iface.launch(share=True, debug=True)