import os import sys import gradio as gr import torch import random import numpy as np from PIL import Image # Setup and model loading os.chdir('/content') !git clone -b totoro2 https://github.com/camenduru/ComfyUI /content/TotoroUI os.chdir('/content/TotoroUI') # Create requirements.txt if it doesn't exist requirements_content = """torch torchsde einops diffusers accelerate xformers==0.0.26.post1 gradio""" with open("requirements.txt", "w") as f: f.write(requirements_content) # Install dependencies from requirements.txt !pip install -r requirements.txt # Install aria2 !apt -y install -qq aria2 # Download model weights !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/adamo1139/stable-diffusion-3-medium-ungated/resolve/main/sd3_medium_incl_clips_t5xxlfp8.safetensors -d /content/TotoroUI/model -o sd3_medium_incl_clips_t5xxlfp8.safetensors # Add TotoroUI to sys.path sys.path.append('/content/TotoroUI') # Import after adding to sys.path import node_helpers from totoro.sd import load_checkpoint_guess_config import nodes # Check for GPU availability and CUDA use_cuda = torch.cuda.is_available() model_patcher, clip, vae, clipvision = load_checkpoint_guess_config( "/content/TotoroUI/model/sd3_medium_incl_clips_t5xxlfp8.safetensors", output_vae=True, output_clip=True, embedding_directory=None ) def zero_out(conditioning): c = [] for t in conditioning: d = t[1].copy() if "pooled_output" in d: d["pooled_output"] = torch.zeros_like(d["pooled_output"]) n = [torch.zeros_like(t[0]), d] c.append(n) return (c, ) def generate_image(prompt, negative_prompt, steps): with torch.inference_mode(): latent = {"samples": torch.ones([1, 16, 1024 // 8, 1024 // 8]) * 0.0609} cond, pooled = clip.encode_from_tokens(clip.tokenize(prompt), return_pooled=True) cond = [[cond, {"pooled_output": pooled}]] n_cond, n_pooled = clip.encode_from_tokens(clip.tokenize(negative_prompt), return_pooled=True) n_cond = [[n_cond, {"pooled_output": n_pooled}]] n_cond1 = node_helpers.conditioning_set_values(n_cond, {"start_percent": 0, "end_percent": 0.1}) n_cond2 = zero_out(n_cond) n_cond2 = node_helpers.conditioning_set_values(n_cond2[0], {"start_percent": 0.1, "end_percent": 1.0}) n_cond = n_cond1 + n_cond2 seed = random.randint(0, 18446744073709551615) sample = nodes.common_ksampler( model=model_patcher, seed=seed, steps=steps, cfg=4.5, sampler_name="dpmpp_2m", scheduler="sgm_uniform", positive=cond, negative=n_cond, latent=latent, denoise=1 ) sample = sample[0]["samples"].to(torch.float16) if use_cuda: vae.first_stage_model.cuda() decoded = vae.decode_tiled(sample).detach() return Image.fromarray(np.array(decoded*255, dtype=np.uint8)[0]) # Gradio interface interface = gr.Interface( fn=generate_image, inputs=[ gr.Textbox(label="Prompt"), gr.Textbox(label="Negative Prompt"), gr.Slider(label="Steps", minimum=1, maximum=200, step=1, default=28) ], outputs=gr.Image(label="Generated Image") ) interface.launch()