Spaces:

Junjie96
/

UniPortrait

Running

App Files Files Community

UniPortrait / app.py

Junjie96

Upload 38 files

dbac7c5 verified 4 months ago

raw

history blame

21 kB

	##!/usr/bin/python3
	# -- coding: utf-8 --
	# @Time : 2024-07-31
	# @Author : Junjie He
	import gradio as gr

	from src.process import (
	text_to_single_id_generation_process,
	text_to_multi_id_generation_process,
	image_to_single_id_generation_process,
	)


	def text_to_single_id_generation_block():
	gr.Markdown("## Text-to-Single-ID Generation")
	gr.HTML(text_to_single_id_description)
	gr.HTML(text_to_single_id_tips)
	with gr.Row():
	with gr.Column(scale=1, min_width=100):
	prompt = gr.Textbox(value="", label='Prompt', lines=2)
	negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
	image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
	label="Image Resolution (HxW)")
	run_button = gr.Button(value="Run")

	with gr.Accordion("Advanced Options", open=True):
	seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

	faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
	face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
	step=0.01, value=0.1)

	style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

	use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

	with gr.Column(scale=3, min_width=100):
	with gr.Row(equal_height=False):
	pil_faceid = gr.Image(type="pil", label="ID Image")
	with gr.Accordion("ID Supplements", open=True):
	with gr.Row():
	pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
	type="filepath", label="Additional ID Images")
	with gr.Row():
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
	mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
	mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
	pil_style = gr.Image(type="pil", label="Style")

	with gr.Row():
	example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
	result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
	format="png")
	with gr.Row():
	examples = [
	[
	"A young man with short black hair, wearing a black hoodie with a hood, was paired with a blue denim jacket with yellow details.",
	"assets/examples/1-newton.jpg",
	"assets/No-Image-Placeholder.png",
	"assets/examples/1-output-1.png",
	],
	[
	"A young man with short black hair, wearing a black hoodie with a hood, was paired with a blue denim jacket with yellow details.",
	"assets/examples/1-newton.jpg",
	"assets/examples/1-style-1.jpg",
	"assets/examples/1-output-2.png",
	],
	]
	gr.Examples(
	label="Examples",
	examples=examples,
	inputs=[prompt, pil_faceid, pil_style, example_output],
	)
	ips = [
	pil_faceid, pil_supp_faceids,
	pil_mix_faceid_1, mix_scale_1,
	pil_mix_faceid_2, mix_scale_2,
	faceid_scale, face_structure_scale,
	prompt, negative_prompt,
	pil_style, style_scale,
	seed, image_resolution, use_sr,
	]
	run_button.click(fn=text_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


	def text_to_multi_id_generation_block():
	gr.Markdown("## Text-to-Multi-ID Generation")
	gr.HTML(text_to_multi_id_description)
	gr.HTML(text_to_multi_id_tips)
	with gr.Row():
	with gr.Column(scale=1, min_width=100):
	prompt = gr.Textbox(value="", label='Prompt', lines=2)
	negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
	image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
	label="Image Resolution (HxW)")
	run_button = gr.Button(value="Run")

	with gr.Accordion("Advanced Options", open=True):
	seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

	faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
	face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
	step=0.01, value=0.3)
	style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

	use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

	with gr.Column(scale=3, min_width=100):
	with gr.Row(equal_height=False):
	with gr.Column(scale=1, min_width=100):
	pil_faceid_1st = gr.Image(type="pil", label="First ID")
	with gr.Accordion("First ID Supplements", open=False):
	with gr.Row():
	pil_supp_faceids_1st = gr.File(file_count="multiple", file_types=["image"],
	type="filepath", label="Additional ID Images")
	with gr.Row():
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_1_1st = gr.Image(type="pil", label="Mix ID 1")
	mix_scale_1_1st = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
	value=0.0)
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_2_1st = gr.Image(type="pil", label="Mix ID 2")
	mix_scale_2_1st = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
	value=0.0)
	with gr.Column(scale=1, min_width=100):
	pil_faceid_2nd = gr.Image(type="pil", label="Second ID")
	with gr.Accordion("Second ID Supplements", open=False):
	with gr.Row():
	pil_supp_faceids_2nd = gr.File(file_count="multiple", file_types=["image"],
	type="filepath", label="Additional ID Images")
	with gr.Row():
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_1_2nd = gr.Image(type="pil", label="Mix ID 1")
	mix_scale_1_2nd = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
	value=0.0)
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_2_2nd = gr.Image(type="pil", label="Mix ID 2")
	mix_scale_2_2nd = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
	value=0.0)
	with gr.Column(scale=1, min_width=100):
	pil_style = gr.Image(type="pil", label="Style")

	with gr.Row():
	example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
	result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
	format="png")
	with gr.Row():
	examples = [
	[
	"两个女人在欢笑和快乐中被捕捉到，他们的脸上洋溢着真挚的幸福，背景是日落时分的宁静海滩。这幅画以柔和的风格描绘，捕捉了这一刻的温暖和宁静。",
	"assets/examples/2-stylegan2-ffhq-0100.png",
	"assets/examples/2-stylegan2-ffhq-0293.png",
	"assets/No-Image-Placeholder.png",
	"assets/examples/2-output-1.png",
	],
	[
	"The two female models are drinking coffee. The background was off-white.",
	"assets/examples/2-stylegan2-ffhq-0100.png",
	"assets/examples/2-stylegan2-ffhq-0293.png",
	"assets/examples/2-style-1.jpg",
	"assets/examples/2-output-2.png",
	],
	]
	gr.Examples(
	label="Examples",
	examples=examples,
	inputs=[prompt, pil_faceid_1st, pil_faceid_2nd, pil_style, example_output],
	)
	with gr.Row():
	examples = [
	[
	"Two men in an American poster.",
	"assets/examples/Trump-1.jpg",
	["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
	"assets/examples/Biden-1.jpg",
	["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
	"assets/examples/2-output-4.png",
	],
	[
	"Two men engaged in a vigorous handshake, both wearing expressions of enthusiasm and determination, set against a backdrop of a bustling business district. The image is crafted in a sleek and modern digital art style, conveying the dynamic and competitive nature of their interaction.",
	"assets/examples/Trump-1.jpg",
	["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
	"assets/examples/Biden-1.jpg",
	["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
	"assets/examples/2-output-3.png",
	],
	]
	gr.Examples(
	label="Examples (Multiple References)",
	examples=examples,
	inputs=[prompt, pil_faceid_1st, pil_supp_faceids_1st, pil_faceid_2nd, pil_supp_faceids_2nd, example_output],
	)
	ips = [
	pil_faceid_1st, pil_supp_faceids_1st,
	pil_mix_faceid_1_1st, mix_scale_1_1st,
	pil_mix_faceid_2_1st, mix_scale_2_1st,
	pil_faceid_2nd, pil_supp_faceids_2nd,
	pil_mix_faceid_1_2nd, mix_scale_1_2nd,
	pil_mix_faceid_2_2nd, mix_scale_2_2nd,
	faceid_scale, face_structure_scale,
	prompt, negative_prompt,
	pil_style, style_scale,
	seed, image_resolution, use_sr,
	]
	run_button.click(fn=text_to_multi_id_generation_process, inputs=ips, outputs=[result_gallery])


	def image_to_single_id_generation_block():
	gr.Markdown("## Image-to-Single-ID Generation")
	gr.HTML(image_to_single_id_description)
	gr.HTML(image_to_single_id_tips)
	with gr.Row():
	with gr.Column(scale=1, min_width=100):
	image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
	label="Image Resolution (HxW)")
	run_button = gr.Button(value="Run")

	with gr.Accordion("Advanced Options", open=True):
	seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

	style_scale = gr.Slider(label="Reference Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
	faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
	face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01,
	value=0.3)

	use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

	with gr.Column(scale=3, min_width=100):
	with gr.Row(equal_height=False):
	pil_style = gr.Image(type="pil", label="Portrait Reference")
	pil_faceid = gr.Image(type="pil", label="ID Image")
	with gr.Accordion("ID Supplements", open=True):
	with gr.Row():
	pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
	type="filepath", label="Additional ID Images")
	with gr.Row():
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
	mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
	with gr.Column(scale=1, min_width=100):
	pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
	mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
	with gr.Row():
	with gr.Column(scale=3, min_width=100):
	example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
	result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4,
	preview=True, format="png")
	with gr.Row():
	examples = [
	[
	"assets/examples/3-style-1.png",
	"assets/examples/3-stylegan2-ffhq-0293.png",
	0.7,
	0.3,
	"assets/examples/3-output-1.png",
	],
	[
	"assets/examples/3-style-1.png",
	"assets/examples/3-stylegan2-ffhq-0293.png",
	0.6,
	0.0,
	"assets/examples/3-output-2.png",
	],
	[
	"assets/examples/3-style-2.jpg",
	"assets/examples/3-stylegan2-ffhq-0381.png",
	0.7,
	0.3,
	"assets/examples/3-output-3.png",
	],
	[
	"assets/examples/3-style-3.jpg",
	"assets/examples/3-stylegan2-ffhq-0381.png",
	0.6,
	0.0,
	"assets/examples/3-output-4.png",
	],
	]
	gr.Examples(
	label="Examples",
	examples=examples,
	inputs=[pil_style, pil_faceid, faceid_scale, face_structure_scale, example_output],
	)
	ips = [
	pil_faceid, pil_supp_faceids,
	pil_mix_faceid_1, mix_scale_1,
	pil_mix_faceid_2, mix_scale_2,
	faceid_scale, face_structure_scale,
	pil_style, style_scale,
	seed, image_resolution, use_sr,
	]
	run_button.click(fn=image_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


	if __name__ == "__main__":
	title = r"""
	<div style="text-align: center;">
	<h1> UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization </h1>
	<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
	<a href="https://arxiv.org/pdf/xxxx.xxxxx"><img src="https://img.shields.io/badge/arXiv-xxxx.xxxxx-red"></a>

	<a href='https://aigcdesigngroup.github.io/UniPortrait-Page/'><img src='https://img.shields.io/badge/Project_Page-UniPortrait-green' alt='Project Page'></a>

	<a href="https://github.com/junjiehe96/UniPortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
	</div>
	</br>
	</div>
	"""

	title_description = r"""
	This is the <b>official 🤗 Gradio demo</b> for <a href='https://arxiv.org/pdf/xxxx.xxxxx' target='_blank'><b>UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization</b></a>.<br>
	The demo provides three capabilities: text-to-single-ID personalization, text-to-multi-ID personalization, and image-to-single-ID personalization. All of these are based on the Stable Diffusion v1-5 model. Feel free to give them a try! 😊
	"""

	text_to_single_id_description = r"""🚀🚀🚀Quick start:<br>
	1. Enter a text prompt (Chinese or English), Upload an image with a face, and Click the <b>Run</b> button.<br>
	2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
	"""

	text_to_single_id_tips = r"""💡💡💡Tips:<br>
	1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
	2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
	3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
	"""

	text_to_multi_id_description = r"""🚀🚀🚀Quick start:<br>
	1. Enter a text prompt (Chinese or English), Upload an image with a face in "First ID" and "Second ID" blocks respectively, and Click the <b>Run</b> button.<br>
	2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
	"""

	text_to_multi_id_tips = r"""💡💡💡Tips:<br>
	1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
	2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
	3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.3~0.7) and "Face Structure Scale" (0.0~0.4).<br>
	"""

	image_to_single_id_description = r"""🚀🚀🚀Quick start: Upload an image as the portrait reference (can be any style), Upload a face image, and Click the <b>Run</b> button. 🤗<br>"""

	image_to_single_id_tips = r"""💡💡💡Tips:<br>
	1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
	2. It's a good idea to upload multiple reference photos of your face to improve ID consistency. Additional references can be uploaded in the "ID supplements".<br>
	3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the portrait reference and ID alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
	"""

	citation = r"""
	---
	📝 Citation
	<br>
	If our work is helpful for your research or applications, please cite us via:
	```bibtex
	@article{wang2024instantid,
	title={InstantID: Zero-shot Identity-Preserving Generation in Seconds},
	author={Wang, Qixun and Bai, Xu and Wang, Haofan and Qin, Zekui and Chen, Anthony},
	journal={arXiv preprint arXiv:2401.07519},
	year={2024}
	}
	```
	📧 Contact
	<br>
	If you have any questions, please feel free to open an issue or directly reach us out at <b>he_junjie@zju.edu.cn</b>.
	"""

	block = gr.Blocks(title="UniPortrait").queue()
	with block:
	gr.HTML(title)
	gr.HTML(title_description)

	with gr.TabItem("Text-to-Single-ID"):
	text_to_single_id_generation_block()

	with gr.TabItem("Text-to-Multi-ID"):
	text_to_multi_id_generation_block()

	with gr.TabItem("Image-to-Single-ID (Stylization)"):
	image_to_single_id_generation_block()

	block.launch(share=True)
	# block.launch(server_name='0.0.0.0', share=False, server_port=9999, allowed_paths=["/"])
	# block.launch(server_name='127.0.0.1', share=False, server_port=9999, allowed_paths=["/"])