File size: 21,002 Bytes
dbac7c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
##!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time    : 2024-07-31
# @Author  : Junjie He
import gradio as gr

from src.process import (
    text_to_single_id_generation_process,
    text_to_multi_id_generation_process,
    image_to_single_id_generation_process,
)


def text_to_single_id_generation_block():
    gr.Markdown("## Text-to-Single-ID Generation")
    gr.HTML(text_to_single_id_description)
    gr.HTML(text_to_single_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            prompt = gr.Textbox(value="", label='Prompt', lines=2)
            negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
                                                 step=0.01, value=0.1)

                style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                pil_faceid = gr.Image(type="pil", label="ID Image")
                with gr.Accordion("ID Supplements", open=True):
                    with gr.Row():
                        pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
                                                   type="filepath", label="Additional ID Images")
                    with gr.Row():
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
                            mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
                            mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                pil_style = gr.Image(type="pil", label="Style")

            with gr.Row():
                example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
                                            format="png")
    with gr.Row():
        examples = [
            [
                "A young man with short black hair, wearing a black hoodie with a hood, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/No-Image-Placeholder.png",
                "assets/examples/1-output-1.png",
            ],
            [
                "A young man with short black hair, wearing a black hoodie with a hood, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/examples/1-style-1.jpg",
                "assets/examples/1-output-2.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[prompt, pil_faceid, pil_style, example_output],
        )
    ips = [
        pil_faceid, pil_supp_faceids,
        pil_mix_faceid_1, mix_scale_1,
        pil_mix_faceid_2, mix_scale_2,
        faceid_scale, face_structure_scale,
        prompt, negative_prompt,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=text_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


def text_to_multi_id_generation_block():
    gr.Markdown("## Text-to-Multi-ID Generation")
    gr.HTML(text_to_multi_id_description)
    gr.HTML(text_to_multi_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            prompt = gr.Textbox(value="", label='Prompt', lines=2)
            negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
                                                 step=0.01, value=0.3)
                style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                with gr.Column(scale=1, min_width=100):
                    pil_faceid_1st = gr.Image(type="pil", label="First ID")
                    with gr.Accordion("First ID Supplements", open=False):
                        with gr.Row():
                            pil_supp_faceids_1st = gr.File(file_count="multiple", file_types=["image"],
                                                           type="filepath", label="Additional ID Images")
                        with gr.Row():
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_1_1st = gr.Image(type="pil", label="Mix ID 1")
                                mix_scale_1_1st = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_2_1st = gr.Image(type="pil", label="Mix ID 2")
                                mix_scale_2_1st = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                with gr.Column(scale=1, min_width=100):
                    pil_faceid_2nd = gr.Image(type="pil", label="Second ID")
                    with gr.Accordion("Second ID Supplements", open=False):
                        with gr.Row():
                            pil_supp_faceids_2nd = gr.File(file_count="multiple", file_types=["image"],
                                                           type="filepath", label="Additional ID Images")
                        with gr.Row():
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_1_2nd = gr.Image(type="pil", label="Mix ID 1")
                                mix_scale_1_2nd = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_2_2nd = gr.Image(type="pil", label="Mix ID 2")
                                mix_scale_2_2nd = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                with gr.Column(scale=1, min_width=100):
                    pil_style = gr.Image(type="pil", label="Style")

            with gr.Row():
                example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
                                            format="png")
    with gr.Row():
        examples = [
            [
                "两个女人在欢笑和快乐中被捕捉到,他们的脸上洋溢着真挚的幸福,背景是日落时分的宁静海滩。这幅画以柔和的风格描绘,捕捉了这一刻的温暖和宁静。",
                "assets/examples/2-stylegan2-ffhq-0100.png",
                "assets/examples/2-stylegan2-ffhq-0293.png",
                "assets/No-Image-Placeholder.png",
                "assets/examples/2-output-1.png",
            ],
            [
                "The two female models are drinking coffee. The background was off-white.",
                "assets/examples/2-stylegan2-ffhq-0100.png",
                "assets/examples/2-stylegan2-ffhq-0293.png",
                "assets/examples/2-style-1.jpg",
                "assets/examples/2-output-2.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[prompt, pil_faceid_1st, pil_faceid_2nd, pil_style, example_output],
        )
    with gr.Row():
        examples = [
            [
                "Two men in an American poster.",
                "assets/examples/Trump-1.jpg",
                ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
                "assets/examples/Biden-1.jpg",
                ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
                "assets/examples/2-output-4.png",
            ],
            [
                "Two men engaged in a vigorous handshake, both wearing expressions of enthusiasm and determination, set against a backdrop of a bustling business district. The image is crafted in a sleek and modern digital art style, conveying the dynamic and competitive nature of their interaction.",
                "assets/examples/Trump-1.jpg",
                ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
                "assets/examples/Biden-1.jpg",
                ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
                "assets/examples/2-output-3.png",
            ],
        ]
        gr.Examples(
            label="Examples (Multiple References)",
            examples=examples,
            inputs=[prompt, pil_faceid_1st, pil_supp_faceids_1st, pil_faceid_2nd, pil_supp_faceids_2nd, example_output],
        )
    ips = [
        pil_faceid_1st, pil_supp_faceids_1st,
        pil_mix_faceid_1_1st, mix_scale_1_1st,
        pil_mix_faceid_2_1st, mix_scale_2_1st,
        pil_faceid_2nd, pil_supp_faceids_2nd,
        pil_mix_faceid_1_2nd, mix_scale_1_2nd,
        pil_mix_faceid_2_2nd, mix_scale_2_2nd,
        faceid_scale, face_structure_scale,
        prompt, negative_prompt,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=text_to_multi_id_generation_process, inputs=ips, outputs=[result_gallery])


def image_to_single_id_generation_block():
    gr.Markdown("## Image-to-Single-ID Generation")
    gr.HTML(image_to_single_id_description)
    gr.HTML(image_to_single_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                style_scale = gr.Slider(label="Reference Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01,
                                                 value=0.3)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                pil_style = gr.Image(type="pil", label="Portrait Reference")
                pil_faceid = gr.Image(type="pil", label="ID Image")
                with gr.Accordion("ID Supplements", open=True):
                    with gr.Row():
                        pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
                                                   type="filepath", label="Additional ID Images")
                    with gr.Row():
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
                            mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
                            mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
            with gr.Row():
                with gr.Column(scale=3, min_width=100):
                    example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                    result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4,
                                                preview=True, format="png")
    with gr.Row():
        examples = [
            [
                "assets/examples/3-style-1.png",
                "assets/examples/3-stylegan2-ffhq-0293.png",
                0.7,
                0.3,
                "assets/examples/3-output-1.png",
            ],
            [
                "assets/examples/3-style-1.png",
                "assets/examples/3-stylegan2-ffhq-0293.png",
                0.6,
                0.0,
                "assets/examples/3-output-2.png",
            ],
            [
                "assets/examples/3-style-2.jpg",
                "assets/examples/3-stylegan2-ffhq-0381.png",
                0.7,
                0.3,
                "assets/examples/3-output-3.png",
            ],
            [
                "assets/examples/3-style-3.jpg",
                "assets/examples/3-stylegan2-ffhq-0381.png",
                0.6,
                0.0,
                "assets/examples/3-output-4.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[pil_style, pil_faceid, faceid_scale, face_structure_scale, example_output],
        )
    ips = [
        pil_faceid, pil_supp_faceids,
        pil_mix_faceid_1, mix_scale_1,
        pil_mix_faceid_2, mix_scale_2,
        faceid_scale, face_structure_scale,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=image_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


if __name__ == "__main__":
    title = r"""
            <div style="text-align: center;">
                <h1> UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization </h1>
                <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
                    <a href="https://arxiv.org/pdf/xxxx.xxxxx"><img src="https://img.shields.io/badge/arXiv-xxxx.xxxxx-red"></a>
                    &nbsp;
                    <a href='https://aigcdesigngroup.github.io/UniPortrait-Page/'><img src='https://img.shields.io/badge/Project_Page-UniPortrait-green' alt='Project Page'></a>
                    &nbsp;
                    <a href="https://github.com/junjiehe96/UniPortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
                </div>
                </br>
            </div>
        """

    title_description = r"""
        This is the <b>official 🤗 Gradio demo</b> for <a href='https://arxiv.org/pdf/xxxx.xxxxx' target='_blank'><b>UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization</b></a>.<br>
        The demo provides three capabilities: text-to-single-ID personalization, text-to-multi-ID personalization, and image-to-single-ID personalization. All of these are based on the Stable Diffusion v1-5 model. Feel free to give them a try! 😊
        """

    text_to_single_id_description = r"""🚀🚀🚀Quick start:<br>
        1. Enter a text prompt (Chinese or English), Upload an image with a face, and Click the <b>Run</b> button.<br>
        2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
        """

    text_to_single_id_tips = r"""💡💡💡Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    text_to_multi_id_description = r"""🚀🚀🚀Quick start:<br>
        1. Enter a text prompt (Chinese or English), Upload an image with a face in "First ID" and "Second ID" blocks respectively, and Click the <b>Run</b> button.<br>
        2. (Optional) You can also upload an image as the style reference for the results. 🤗<br>
        """

    text_to_multi_id_tips = r"""💡💡💡Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.3~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    image_to_single_id_description = r"""🚀🚀🚀Quick start: Upload an image as the portrait reference (can be any style), Upload a face image, and Click the <b>Run</b> button. 🤗<br>"""

    image_to_single_id_tips = r"""💡💡💡Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the portrait reference and ID alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    citation = r"""
        ---
        📝 **Citation**
        <br>
        If our work is helpful for your research or applications, please cite us via:
        ```bibtex
        @article{wang2024instantid,
          title={InstantID: Zero-shot Identity-Preserving Generation in Seconds},
          author={Wang, Qixun and Bai, Xu and Wang, Haofan and Qin, Zekui and Chen, Anthony},
          journal={arXiv preprint arXiv:2401.07519},
          year={2024}
        }
        ```
        📧 **Contact**
        <br>
        If you have any questions, please feel free to open an issue or directly reach us out at <b>he_junjie@zju.edu.cn</b>.
        """

    block = gr.Blocks(title="UniPortrait").queue()
    with block:
        gr.HTML(title)
        gr.HTML(title_description)

        with gr.TabItem("Text-to-Single-ID"):
            text_to_single_id_generation_block()

        with gr.TabItem("Text-to-Multi-ID"):
            text_to_multi_id_generation_block()

        with gr.TabItem("Image-to-Single-ID (Stylization)"):
            image_to_single_id_generation_block()

    block.launch(share=True)
    # block.launch(server_name='0.0.0.0', share=False, server_port=9999, allowed_paths=["/"])
    # block.launch(server_name='127.0.0.1', share=False, server_port=9999, allowed_paths=["/"])