UniPortrait / app.py
Junjie96's picture
Upload 38 files
dbac7c5 verified
raw
history blame
21 kB
##!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2024-07-31
# @Author : Junjie He
import gradio as gr
from src.process import (
text_to_single_id_generation_process,
text_to_multi_id_generation_process,
image_to_single_id_generation_process,
)
def text_to_single_id_generation_block():
gr.Markdown("## Text-to-Single-ID Generation")
gr.HTML(text_to_single_id_description)
gr.HTML(text_to_single_id_tips)
with gr.Row():
with gr.Column(scale=1, min_width=100):
prompt = gr.Textbox(value="", label='Prompt', lines=2)
negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
label="Image Resolution (HxW)")
run_button = gr.Button(value="Run")
with gr.Accordion("Advanced Options", open=True):
seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)
faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
step=0.01, value=0.1)
style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)
with gr.Column(scale=3, min_width=100):
with gr.Row(equal_height=False):
pil_faceid = gr.Image(type="pil", label="ID Image")
with gr.Accordion("ID Supplements", open=True):
with gr.Row():
pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
type="filepath", label="Additional ID Images")
with gr.Row():
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
pil_style = gr.Image(type="pil", label="Style")
with gr.Row():
example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
format="png")
with gr.Row():
examples = [
[
"A young man with short black hair, wearing a black hoodie with a hood, was paired with a blue denim jacket with yellow details.",
"assets/examples/1-newton.jpg",
"assets/No-Image-Placeholder.png",
"assets/examples/1-output-1.png",
],
[
"A young man with short black hair, wearing a black hoodie with a hood, was paired with a blue denim jacket with yellow details.",
"assets/examples/1-newton.jpg",
"assets/examples/1-style-1.jpg",
"assets/examples/1-output-2.png",
],
]
gr.Examples(
label="Examples",
examples=examples,
inputs=[prompt, pil_faceid, pil_style, example_output],
)
ips = [
pil_faceid, pil_supp_faceids,
pil_mix_faceid_1, mix_scale_1,
pil_mix_faceid_2, mix_scale_2,
faceid_scale, face_structure_scale,
prompt, negative_prompt,
pil_style, style_scale,
seed, image_resolution, use_sr,
]
run_button.click(fn=text_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])
def text_to_multi_id_generation_block():
gr.Markdown("## Text-to-Multi-ID Generation")
gr.HTML(text_to_multi_id_description)
gr.HTML(text_to_multi_id_tips)
with gr.Row():
with gr.Column(scale=1, min_width=100):
prompt = gr.Textbox(value="", label='Prompt', lines=2)
negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
label="Image Resolution (HxW)")
run_button = gr.Button(value="Run")
with gr.Accordion("Advanced Options", open=True):
seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)
faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
step=0.01, value=0.3)
style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)
with gr.Column(scale=3, min_width=100):
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=100):
pil_faceid_1st = gr.Image(type="pil", label="First ID")
with gr.Accordion("First ID Supplements", open=False):
with gr.Row():
pil_supp_faceids_1st = gr.File(file_count="multiple", file_types=["image"],
type="filepath", label="Additional ID Images")
with gr.Row():
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_1_1st = gr.Image(type="pil", label="Mix ID 1")
mix_scale_1_1st = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
value=0.0)
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_2_1st = gr.Image(type="pil", label="Mix ID 2")
mix_scale_2_1st = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
value=0.0)
with gr.Column(scale=1, min_width=100):
pil_faceid_2nd = gr.Image(type="pil", label="Second ID")
with gr.Accordion("Second ID Supplements", open=False):
with gr.Row():
pil_supp_faceids_2nd = gr.File(file_count="multiple", file_types=["image"],
type="filepath", label="Additional ID Images")
with gr.Row():
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_1_2nd = gr.Image(type="pil", label="Mix ID 1")
mix_scale_1_2nd = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
value=0.0)
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_2_2nd = gr.Image(type="pil", label="Mix ID 2")
mix_scale_2_2nd = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
value=0.0)
with gr.Column(scale=1, min_width=100):
pil_style = gr.Image(type="pil", label="Style")
with gr.Row():
example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
format="png")
with gr.Row():
examples = [
[
"两个女人在欢笑和快乐中被捕捉到,他们的脸上洋溢着真挚的幸福,背景是日落时分的宁静海滩。这幅画以柔和的风格描绘,捕捉了这一刻的温暖和宁静。",
"assets/examples/2-stylegan2-ffhq-0100.png",
"assets/examples/2-stylegan2-ffhq-0293.png",
"assets/No-Image-Placeholder.png",
"assets/examples/2-output-1.png",
],
[
"The two female models are drinking coffee. The background was off-white.",
"assets/examples/2-stylegan2-ffhq-0100.png",
"assets/examples/2-stylegan2-ffhq-0293.png",
"assets/examples/2-style-1.jpg",
"assets/examples/2-output-2.png",
],
]
gr.Examples(
label="Examples",
examples=examples,
inputs=[prompt, pil_faceid_1st, pil_faceid_2nd, pil_style, example_output],
)
with gr.Row():
examples = [
[
"Two men in an American poster.",
"assets/examples/Trump-1.jpg",
["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
"assets/examples/Biden-1.jpg",
["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
"assets/examples/2-output-4.png",
],
[
"Two men engaged in a vigorous handshake, both wearing expressions of enthusiasm and determination, set against a backdrop of a bustling business district. The image is crafted in a sleek and modern digital art style, conveying the dynamic and competitive nature of their interaction.",
"assets/examples/Trump-1.jpg",
["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
"assets/examples/Biden-1.jpg",
["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
"assets/examples/2-output-3.png",
],
]
gr.Examples(
label="Examples (Multiple References)",
examples=examples,
inputs=[prompt, pil_faceid_1st, pil_supp_faceids_1st, pil_faceid_2nd, pil_supp_faceids_2nd, example_output],
)
ips = [
pil_faceid_1st, pil_supp_faceids_1st,
pil_mix_faceid_1_1st, mix_scale_1_1st,
pil_mix_faceid_2_1st, mix_scale_2_1st,
pil_faceid_2nd, pil_supp_faceids_2nd,
pil_mix_faceid_1_2nd, mix_scale_1_2nd,
pil_mix_faceid_2_2nd, mix_scale_2_2nd,
faceid_scale, face_structure_scale,
prompt, negative_prompt,
pil_style, style_scale,
seed, image_resolution, use_sr,
]
run_button.click(fn=text_to_multi_id_generation_process, inputs=ips, outputs=[result_gallery])
def image_to_single_id_generation_block():
gr.Markdown("## Image-to-Single-ID Generation")
gr.HTML(image_to_single_id_description)
gr.HTML(image_to_single_id_tips)
with gr.Row():
with gr.Column(scale=1, min_width=100):
image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
label="Image Resolution (HxW)")
run_button = gr.Button(value="Run")
with gr.Accordion("Advanced Options", open=True):
seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)
style_scale = gr.Slider(label="Reference Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01,
value=0.3)
use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)
with gr.Column(scale=3, min_width=100):
with gr.Row(equal_height=False):
pil_style = gr.Image(type="pil", label="Portrait Reference")
pil_faceid = gr.Image(type="pil", label="ID Image")
with gr.Accordion("ID Supplements", open=True):
with gr.Row():
pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
type="filepath", label="Additional ID Images")
with gr.Row():
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
with gr.Column(scale=1, min_width=100):
pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
with gr.Row():
with gr.Column(scale=3, min_width=100):
example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4,
preview=True, format="png")
with gr.Row():
examples = [
[
"assets/examples/3-style-1.png",
"assets/examples/3-stylegan2-ffhq-0293.png",
0.7,
0.3,
"assets/examples/3-output-1.png",
],
[
"assets/examples/3-style-1.png",
"assets/examples/3-stylegan2-ffhq-0293.png",
0.6,
0.0,
"assets/examples/3-output-2.png",
],
[
"assets/examples/3-style-2.jpg",
"assets/examples/3-stylegan2-ffhq-0381.png",
0.7,
0.3,
"assets/examples/3-output-3.png",
],
[
"assets/examples/3-style-3.jpg",
"assets/examples/3-stylegan2-ffhq-0381.png",
0.6,
0.0,
"assets/examples/3-output-4.png",
],
]
gr.Examples(
label="Examples",
examples=examples,
inputs=[pil_style, pil_faceid, faceid_scale, face_structure_scale, example_output],
)
ips = [
pil_faceid, pil_supp_faceids,
pil_mix_faceid_1, mix_scale_1,
pil_mix_faceid_2, mix_scale_2,
faceid_scale, face_structure_scale,
pil_style, style_scale,
seed, image_resolution, use_sr,
]
run_button.click(fn=image_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])
if __name__ == "__main__":
title = r"""
<div style="text-align: center;">
<h1> UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization </h1>
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<a href="https://arxiv.org/pdf/xxxx.xxxxx"><img src="https://img.shields.io/badge/arXiv-xxxx.xxxxx-red"></a>
&nbsp;
<a href='https://aigcdesigngroup.github.io/UniPortrait-Page/'><img src='https://img.shields.io/badge/Project_Page-UniPortrait-green' alt='Project Page'></a>
&nbsp;
<a href="https://github.com/junjiehe96/UniPortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
</div>
</br>
</div>
"""
title_description = r"""
This is the <b>official 🤗 Gradio demo</b> for <a href='https://arxiv.org/pdf/xxxx.xxxxx' target='_blank'><b>UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization</b></a>.<br>
The demo provides three capabilities: text-to-single-ID personalization, text-to-multi-ID personalization, and image-to-single-ID personalization. All of these are based on the Stable Diffusion v1-5 model. Feel free to give them a try! 😊
"""
text_to_single_id_description = r"""🚀🚀🚀Quick start:<br>
1. Enter a text prompt (Chinese or English), Upload an image with a face, and Click the <b>Run</b> button.<br>
2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
"""
text_to_single_id_tips = r"""💡💡💡Tips:<br>
1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
"""
text_to_multi_id_description = r"""🚀🚀🚀Quick start:<br>
1. Enter a text prompt (Chinese or English), Upload an image with a face in "First ID" and "Second ID" blocks respectively, and Click the <b>Run</b> button.<br>
2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
"""
text_to_multi_id_tips = r"""💡💡💡Tips:<br>
1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.3~0.7) and "Face Structure Scale" (0.0~0.4).<br>
"""
image_to_single_id_description = r"""🚀🚀🚀Quick start: Upload an image as the portrait reference (can be any style), Upload a face image, and Click the <b>Run</b> button. 🤗<br>"""
image_to_single_id_tips = r"""💡💡💡Tips:<br>
1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
2. It's a good idea to upload multiple reference photos of your face to improve ID consistency. Additional references can be uploaded in the "ID supplements".<br>
3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the portrait reference and ID alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
"""
citation = r"""
---
📝 **Citation**
<br>
If our work is helpful for your research or applications, please cite us via:
```bibtex
@article{wang2024instantid,
title={InstantID: Zero-shot Identity-Preserving Generation in Seconds},
author={Wang, Qixun and Bai, Xu and Wang, Haofan and Qin, Zekui and Chen, Anthony},
journal={arXiv preprint arXiv:2401.07519},
year={2024}
}
```
📧 **Contact**
<br>
If you have any questions, please feel free to open an issue or directly reach us out at <b>he_junjie@zju.edu.cn</b>.
"""
block = gr.Blocks(title="UniPortrait").queue()
with block:
gr.HTML(title)
gr.HTML(title_description)
with gr.TabItem("Text-to-Single-ID"):
text_to_single_id_generation_block()
with gr.TabItem("Text-to-Multi-ID"):
text_to_multi_id_generation_block()
with gr.TabItem("Image-to-Single-ID (Stylization)"):
image_to_single_id_generation_block()
block.launch(share=True)
# block.launch(server_name='0.0.0.0', share=False, server_port=9999, allowed_paths=["/"])
# block.launch(server_name='127.0.0.1', share=False, server_port=9999, allowed_paths=["/"])