Spaces:

artificialguybr
/

Pixart-Sigma

Running on Zero

App Files Files Community

Pixart-Sigma / app.py

artificialguybr

Update app.py

ecd6341 verified 3 months ago

raw

history blame

2.63 kB

	import gradio as gr
	import spaces
	import torch
	from diffusers import Transformer2DModel
	from scripts.diffusers_patches import pixart_sigma_init_patched_inputs, PixArtSigmaPipeline

	assert getattr(Transformer2DModel, '_init_patched_inputs', False), "Need to Upgrade diffusers: pip install git+https://github.com/huggingface/diffusers"
	setattr(Transformer2DModel, '_init_patched_inputs', pixart_sigma_init_patched_inputs)
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	weight_dtype = torch.float16

	transformer = Transformer2DModel.from_pretrained(
	"PixArt-alpha/PixArt-Sigma-XL-2-1024-MS",
	subfolder='transformer',
	torch_dtype=weight_dtype,
	use_safetensors=True,
	)
	pipe = PixArtSigmaPipeline.from_pretrained(
	"PixArt-alpha/pixart_sigma_sdxlvae_T5_diffusers",
	transformer=transformer,
	torch_dtype=weight_dtype,
	use_safetensors=True,
	)
	pipe.to(device)

	@spaces.GPU(duration=90)
	def generate(prompt, negative_prompt, num_inference_steps, guidance_scale, height, width):
	image = pipe(
	prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	height=height,
	width=width
	).images[0]
	return image

	interface = gr.Interface(
	fn=generate,
	inputs=[
	gr.Text(label="Prompt"),
	gr.Text(label="Negative Prompt"),
	gr.Slider(minimum=10, maximum=100, value=30, step=1, label="Number of Inference Steps"),
	gr.Slider(minimum=1, maximum=20, value=6, step=0.1, label="Guidance Scale"),
	gr.Slider(minimum=64, maximum=1024, value=1024, step=64, label="Height"),
	gr.Slider(minimum=64, maximum=1024, value=1024, step=64, label="Width"),
	],
	outputs=gr.Image(label="Generated Image"),
	title="PixArt Sigma Image Generation",
	description="""Generate high-fidelity 4K images from text prompts using PixArt-Sigma, a state-of-the-art diffusion model.

	PixArt-Sigma achieves superior image quality and alignment with prompts compared to previous models like [PixArt-alpha](https://github.com/PixArt-alpha/PixArt-sigma). It does so efficiently, evolving from PixArt-alpha through a process termed weak-to-strong training - leveraging higher quality data and an improved attention mechanism.

	With just 0.6 billion parameters, PixArt-Sigma reaches new heights in text-to-image generation. Output stunning, intricate 4K images for posters, wallpapers, concept art, and more. Guide the model with descriptive prompts and fine-tune parameters like guidance scale and number of inference steps.
	""",
	)

	interface.launch()