catvton-flux-try-on

Running on Zero

App Files Files Community

catvton-flux-try-on / app.py

xiaozaa

using lora version for spaces zeroGPU

4fb0ca5 29 days ago

raw

history blame

7.39 kB

	import spaces

	import gradio as gr
	from tryon_inference import run_inference
	import os
	import numpy as np
	from PIL import Image
	import tempfile
	import torch
	from diffusers import FluxTransformer2DModel, FluxFillPipeline

	import shutil

	def find_cuda():
	# Check if CUDA_HOME or CUDA_PATH environment variables are set
	cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')

	if cuda_home and os.path.exists(cuda_home):
	return cuda_home

	# Search for the nvcc executable in the system's PATH
	nvcc_path = shutil.which('nvcc')

	if nvcc_path:
	# Remove the 'bin/nvcc' part to get the CUDA installation path
	cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
	return cuda_path

	return None

	cuda_path = find_cuda()

	if cuda_path:
	print(f"CUDA installation found at: {cuda_path}")
	else:
	print("CUDA installation not found")

	device = torch.device('cuda')

	print("Start loading LoRA weights")
	state_dict, network_alphas = FluxFillPipeline.lora_state_dict(
	pretrained_model_name_or_path_or_dict="xiaozaa/catvton-flux-lora-alpha", ## The tryon Lora weights
	weight_name="pytorch_lora_weights.safetensors",
	return_alphas=True
	)
	is_correct_format = all("lora" in key or "dora_scale" in key for key in state_dict.keys())
	if not is_correct_format:
	raise ValueError("Invalid LoRA checkpoint.")
	print('Loading diffusion model ...')
	pipe = FluxFillPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-Fill-dev",
	torch_dtype=torch.bfloat16
	).to(device)
	FluxFillPipeline.load_lora_into_transformer(
	state_dict=state_dict,
	network_alphas=network_alphas,
	transformer=pipe.transformer,
	)

	print('Loading Finished!')

	@spaces.GPU
	def gradio_inference(
	image_data,
	garment,
	num_steps=50,
	guidance_scale=30.0,
	seed=-1,
	width=768,
	height=1024
	):
	"""Wrapper function for Gradio interface"""
	# Use temporary directory
	with tempfile.TemporaryDirectory() as tmp_dir:
	# Save inputs to temp directory
	temp_image = os.path.join(tmp_dir, "image.png")
	temp_mask = os.path.join(tmp_dir, "mask.png")
	temp_garment = os.path.join(tmp_dir, "garment.png")

	# Extract image and mask from ImageEditor data
	image = image_data["background"]
	mask = image_data["layers"][0] # First layer contains the mask

	# Convert to numpy array and process mask
	mask_array = np.array(mask)
	is_black = np.all(mask_array < 10, axis=2)
	mask = Image.fromarray(((~is_black) * 255).astype(np.uint8))

	# Save files to temp directory
	image.save(temp_image)
	mask.save(temp_mask)
	garment.save(temp_garment)

	try:
	# Run inference
	_, tryon_result = run_inference(
	pipe=pipe,
	image_path=temp_image,
	mask_path=temp_mask,
	garment_path=temp_garment,
	num_steps=num_steps,
	guidance_scale=guidance_scale,
	seed=seed,
	size=(width, height)
	)
	return tryon_result
	except Exception as e:
	raise gr.Error(f"Error during inference: {str(e)}")

	with gr.Blocks() as demo:
	gr.Markdown("""
	# CATVTON FLUX Virtual Try-On Demo (by using LoRA weights)
	Upload a model image, draw a mask, and a garment image to generate virtual try-on results.

	[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/xiaozaa/catvton-flux-alpha)
	[![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/nftblackmagic/catvton-flux)
	""")

	# gr.Video("example/github.mp4", label="Demo Video: How to use the tool")

	with gr.Column():
	with gr.Row():
	with gr.Column():
	image_input = gr.ImageMask(
	label="Model Image (Click 'Edit' and draw mask over the clothing area)",
	type="pil",
	height=600,
	width=300
	)
	gr.Examples(
	examples=[
	["./example/person/00008_00.jpg"],
	["./example/person/00055_00.jpg"],
	["./example/person/00057_00.jpg"],
	["./example/person/00067_00.jpg"],
	["./example/person/00069_00.jpg"],
	],
	inputs=[image_input],
	label="Person Images",
	)
	with gr.Column():
	garment_input = gr.Image(label="Garment Image", type="pil", height=600, width=300)
	gr.Examples(
	examples=[
	["./example/garment/04564_00.jpg"],
	["./example/garment/00055_00.jpg"],
	["./example/garment/00396_00.jpg"],
	["./example/garment/00067_00.jpg"],
	["./example/garment/00069_00.jpg"],
	],
	inputs=[garment_input],
	label="Garment Images",
	)
	with gr.Column():
	tryon_output = gr.Image(label="Try-On Result", height=600, width=300)

	with gr.Row():
	num_steps = gr.Slider(
	minimum=1,
	maximum=100,
	value=30,
	step=1,
	label="Number of Steps"
	)
	guidance_scale = gr.Slider(
	minimum=1.0,
	maximum=50.0,
	value=30.0,
	step=0.5,
	label="Guidance Scale"
	)
	seed = gr.Slider(
	minimum=-1,
	maximum=2147483647,
	step=1,
	value=-1,
	label="Seed (-1 for random)"
	)
	width = gr.Slider(
	minimum=256,
	maximum=1024,
	step=64,
	value=768,
	label="Width"
	)
	height = gr.Slider(
	minimum=256,
	maximum=1024,
	step=64,
	value=1024,
	label="Height"
	)


	submit_btn = gr.Button("Generate Try-On", variant="primary")


	with gr.Row():
	gr.Markdown("""
	### Notes:
	- The model is trained on VITON-HD dataset. It focuses on the woman upper body try-on generation.
	- The mask should indicate the region where the garment will be placed.
	- The garment image should be on a clean background.
	- The model is not perfect. It may generate some artifacts.
	- The model is slow. Please be patient.
	- The model is just for research purpose.
	""")

	submit_btn.click(
	fn=gradio_inference,
	inputs=[
	image_input,
	garment_input,
	num_steps,
	guidance_scale,
	seed,
	width,
	height
	],
	outputs=[tryon_output],
	api_name="try-on"
	)


	demo.launch()