Spaces:

bsjd
/

sd3

Runtime error

App Files Files Community

sd3 / app.py

bsjd

Create app.py

42dfc4e verified over 1 year ago

raw

history blame contribute delete

3.38 kB

	import os
	import sys
	import gradio as gr
	import torch
	import random
	import numpy as np
	from PIL import Image

	# Setup and model loading
	os.chdir('/content')
	!git clone -b totoro2 https://github.com/camenduru/ComfyUI /content/TotoroUI
	os.chdir('/content/TotoroUI')

	# Create requirements.txt if it doesn't exist
	requirements_content = """torch
	torchsde
	einops
	diffusers
	accelerate
	xformers==0.0.26.post1
	gradio"""

	with open("requirements.txt", "w") as f:
	f.write(requirements_content)

	# Install dependencies from requirements.txt
	!pip install -r requirements.txt

	# Install aria2
	!apt -y install -qq aria2

	# Download model weights
	!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/adamo1139/stable-diffusion-3-medium-ungated/resolve/main/sd3_medium_incl_clips_t5xxlfp8.safetensors -d /content/TotoroUI/model -o sd3_medium_incl_clips_t5xxlfp8.safetensors

	# Add TotoroUI to sys.path
	sys.path.append('/content/TotoroUI')

	# Import after adding to sys.path
	import node_helpers
	from totoro.sd import load_checkpoint_guess_config
	import nodes

	# Check for GPU availability and CUDA
	use_cuda = torch.cuda.is_available()

	model_patcher, clip, vae, clipvision = load_checkpoint_guess_config(
	"/content/TotoroUI/model/sd3_medium_incl_clips_t5xxlfp8.safetensors",
	output_vae=True, output_clip=True, embedding_directory=None
	)

	def zero_out(conditioning):
	c = []
	for t in conditioning:
	d = t[1].copy()
	if "pooled_output" in d:
	d["pooled_output"] = torch.zeros_like(d["pooled_output"])
	n = [torch.zeros_like(t[0]), d]
	c.append(n)
	return (c, )

	def generate_image(prompt, negative_prompt, steps):
	with torch.inference_mode():
	latent = {"samples": torch.ones([1, 16, 1024 // 8, 1024 // 8]) * 0.0609}

	cond, pooled = clip.encode_from_tokens(clip.tokenize(prompt), return_pooled=True)
	cond = [[cond, {"pooled_output": pooled}]]

	n_cond, n_pooled = clip.encode_from_tokens(clip.tokenize(negative_prompt), return_pooled=True)
	n_cond = [[n_cond, {"pooled_output": n_pooled}]]

	n_cond1 = node_helpers.conditioning_set_values(n_cond, {"start_percent": 0, "end_percent": 0.1})
	n_cond2 = zero_out(n_cond)
	n_cond2 = node_helpers.conditioning_set_values(n_cond2[0], {"start_percent": 0.1, "end_percent": 1.0})
	n_cond = n_cond1 + n_cond2

	seed = random.randint(0, 18446744073709551615)

	sample = nodes.common_ksampler(
	model=model_patcher,
	seed=seed,
	steps=steps,
	cfg=4.5,
	sampler_name="dpmpp_2m",
	scheduler="sgm_uniform",
	positive=cond,
	negative=n_cond,
	latent=latent,
	denoise=1
	)

	sample = sample[0]["samples"].to(torch.float16)

	if use_cuda:
	vae.first_stage_model.cuda()
	decoded = vae.decode_tiled(sample).detach()

	return Image.fromarray(np.array(decoded*255, dtype=np.uint8)[0])

	# Gradio interface
	interface = gr.Interface(
	fn=generate_image,
	inputs=[
	gr.Textbox(label="Prompt"),
	gr.Textbox(label="Negative Prompt"),
	gr.Slider(label="Steps", minimum=1, maximum=200, step=1, default=28)
	],
	outputs=gr.Image(label="Generated Image")
	)

	interface.launch()