|
|
|
|
|
from __future__ import annotations |
|
|
|
import argparse |
|
import pathlib |
|
import torch |
|
import gradio as gr |
|
|
|
from vtoonify_model import Model |
|
|
|
|
|
def parse_args() -> argparse.Namespace: |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--device", type=str, default="cpu") |
|
parser.add_argument("--theme", type=str) |
|
parser.add_argument("--share", action="store_true") |
|
parser.add_argument("--port", type=int) |
|
parser.add_argument("--disable-queue", dest="enable_queue", action="store_false") |
|
return parser.parse_args() |
|
|
|
|
|
DESCRIPTION = """ |
|
<div align=center> |
|
<h1 style="font-weight: 900; margin-bottom: 7px;"> |
|
Portrait Style Transfer with <a href="https://github.com/williamyang1991/VToonify">VToonify</a> |
|
</h1> |
|
<p>For faster inference without waiting in queue, you may duplicate the space and use the GPU setting. |
|
<br/> |
|
<a href="https://huggingface.co/spaces/PKUWilliamYang/VToonify?duplicate=true"> |
|
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> |
|
<p/> |
|
<video id="video" width=50% controls="" preload="none" poster="https://repository-images.githubusercontent.com/534480768/53715b0f-a2df-4daa-969c-0e74c102d339"> |
|
<source id="mp4" src="https://user-images.githubusercontent.com/18130694/189483939-0fc4a358-fb34-43cc-811a-b22adb820d57.mp4 |
|
" type="video/mp4"> |
|
</videos> |
|
</div> |
|
""" |
|
FOOTER = '<div align=center><img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.laobi.icu/badge?page_id=williamyang1991/VToonify" /></div>' |
|
|
|
ARTICLE = r""" |
|
If VToonify is helpful, please help to ⭐ the <a href='https://github.com/williamyang1991/VToonify' target='_blank'>Github Repo</a>. Thanks! |
|
[![GitHub Stars](https://img.shields.io/github/stars/williamyang1991/VToonify?style=social)](https://github.com/williamyang1991/VToonify) |
|
--- |
|
📝 **Citation** |
|
If our work is useful for your research, please consider citing: |
|
```bibtex |
|
@article{yang2022Vtoonify, |
|
title={VToonify: Controllable High-Resolution Portrait Video Style Transfer}, |
|
author={Yang, Shuai and Jiang, Liming and Liu, Ziwei and Loy, Chen Change}, |
|
journal={ACM Transactions on Graphics (TOG)}, |
|
volume={41}, |
|
number={6}, |
|
articleno={203}, |
|
pages={1--15}, |
|
year={2022}, |
|
publisher={ACM New York, NY, USA}, |
|
doi={10.1145/3550454.3555437}, |
|
} |
|
``` |
|
|
|
📋 **License** |
|
This project is licensed under <a rel="license" href="https://github.com/williamyang1991/VToonify/blob/main/LICENSE.md">S-Lab License 1.0</a>. |
|
Redistribution and use for non-commercial purposes should follow this license. |
|
|
|
📧 **Contact** |
|
If you have any questions, please feel free to reach me out at <b>williamyang@pku.edu.cn</b>. |
|
""" |
|
|
|
|
|
def update_slider(choice: str) -> dict: |
|
if type(choice) == str and choice.endswith("-d"): |
|
return gr.Slider.update(maximum=1, minimum=0, value=0.5) |
|
else: |
|
return gr.Slider.update(maximum=0.5, minimum=0.5, value=0.5) |
|
|
|
|
|
def set_example_image(example: list) -> dict: |
|
return gr.Image.update(value=example[0]) |
|
|
|
|
|
def set_example_video(example: list) -> dict: |
|
return (gr.Video.update(value=example[0]),) |
|
|
|
|
|
sample_video = [ |
|
"./vtoonify/data/529_2.mp4", |
|
"./vtoonify/data/7154235.mp4", |
|
"./vtoonify/data/651.mp4", |
|
"./vtoonify/data/908.mp4", |
|
] |
|
sample_vid = gr.Video(label="Video file") |
|
example_videos = gr.components.Dataset( |
|
components=[sample_vid], |
|
samples=[[path] for path in sample_video], |
|
type="values", |
|
label="Video Examples", |
|
) |
|
|
|
|
|
model = Model(device="cuda") |
|
|
|
with gr.Blocks(css="style.css") as demo: |
|
gr.Markdown(DESCRIPTION) |
|
|
|
with gr.Box(): |
|
gr.Markdown( |
|
"""## Step 1(Select Style) |
|
- Select **Style Type**. |
|
- Type with `-d` means it supports style degree adjustment. |
|
- Type without `-d` usually has better toonification quality. |
|
|
|
""" |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("""Select Style Type""") |
|
with gr.Row(): |
|
style_type = gr.Radio( |
|
label="Style Type", |
|
choices=[ |
|
"cartoon1", |
|
"cartoon1-d", |
|
"cartoon2-d", |
|
"cartoon3-d", |
|
"cartoon4", |
|
"cartoon4-d", |
|
"cartoon5-d", |
|
"comic1-d", |
|
"comic2-d", |
|
"arcane1", |
|
"arcane1-d", |
|
"arcane2", |
|
"arcane2-d", |
|
"caricature1", |
|
"caricature2", |
|
"pixar", |
|
"pixar-d", |
|
"illustration1-d", |
|
"illustration2-d", |
|
"illustration3-d", |
|
"illustration4-d", |
|
"illustration5-d", |
|
], |
|
) |
|
exstyle = gr.Variable() |
|
with gr.Row(): |
|
loadmodel_button = gr.Button("Load Model") |
|
with gr.Row(): |
|
load_info = gr.Textbox( |
|
label="Process Information", |
|
interactive=False, |
|
value="No model loaded.", |
|
) |
|
with gr.Column(): |
|
gr.Markdown( |
|
"""Reference Styles |
|
![example](https://raw.githubusercontent.com/williamyang1991/tmpfile/master/vtoonify/style.jpg)""" |
|
) |
|
|
|
with gr.Box(): |
|
gr.Markdown( |
|
"""## Step 2 (Preprocess Input Image / Video) |
|
- Drop an image/video containing a near-frontal face to the **Input Image**/**Input Video**. |
|
- Hit the **Rescale Image**/**Rescale First Frame** button. |
|
- Rescale the input to make it best fit the model. |
|
- The final image result will be based on this **Rescaled Face**. Use padding parameters to adjust the background space. |
|
- **<font color=red>Solution to [Error: no face detected!]</font>**: VToonify uses dlib.get_frontal_face_detector but sometimes it fails to detect a face. You can try several times or use other images until a face is detected, then switch back to the original image. |
|
- For video input, further hit the **Rescale Video** button. |
|
- The final video result will be based on this **Rescaled Video**. To avoid overload, video is cut to at most **100/300** frames for CPU/GPU, respectively. |
|
|
|
""" |
|
) |
|
with gr.Row(): |
|
with gr.Box(): |
|
with gr.Column(): |
|
gr.Markdown( |
|
"""Choose the padding parameters. |
|
![example](https://raw.githubusercontent.com/williamyang1991/tmpfile/master/vtoonify/rescale.jpg)""" |
|
) |
|
with gr.Row(): |
|
top = gr.Slider(128, 256, value=200, step=8, label="top") |
|
with gr.Row(): |
|
bottom = gr.Slider(128, 256, value=200, step=8, label="bottom") |
|
with gr.Row(): |
|
left = gr.Slider(128, 256, value=200, step=8, label="left") |
|
with gr.Row(): |
|
right = gr.Slider(128, 256, value=200, step=8, label="right") |
|
with gr.Box(): |
|
with gr.Column(): |
|
gr.Markdown("""Input""") |
|
with gr.Row(): |
|
input_image = gr.Image(label="Input Image", type="filepath") |
|
with gr.Row(): |
|
preprocess_image_button = gr.Button("Rescale Image") |
|
with gr.Row(): |
|
input_video = gr.Video( |
|
label="Input Video", |
|
mirror_webcam=False, |
|
type="filepath", |
|
) |
|
with gr.Row(): |
|
preprocess_video0_button = gr.Button("Rescale First Frame") |
|
preprocess_video1_button = gr.Button("Rescale Video") |
|
|
|
with gr.Box(): |
|
with gr.Column(): |
|
gr.Markdown("""View""") |
|
with gr.Row(): |
|
input_info = gr.Textbox( |
|
label="Process Information", |
|
interactive=False, |
|
value="n.a.", |
|
) |
|
with gr.Row(): |
|
aligned_face = gr.Image( |
|
label="Rescaled Face", type="numpy", interactive=False |
|
) |
|
instyle = gr.Variable() |
|
with gr.Row(): |
|
aligned_video = gr.Video( |
|
label="Rescaled Video", type="mp4", interactive=False |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
paths = [ |
|
"./vtoonify/data/pexels-andrea-piacquadio-733872.jpg", |
|
"./vtoonify/data/i5R8hbZFDdc.jpg", |
|
"./vtoonify/data/yRpe13BHdKw.jpg", |
|
"./vtoonify/data/ILip77SbmOE.jpg", |
|
"./vtoonify/data/077436.jpg", |
|
"./vtoonify/data/081680.jpg", |
|
] |
|
example_images = gr.Dataset( |
|
components=[input_image], |
|
samples=[[path] for path in paths], |
|
label="Image Examples", |
|
) |
|
with gr.Column(): |
|
|
|
|
|
example_videos.render() |
|
|
|
|
|
def load_examples(video): |
|
|
|
|
|
return video[0] |
|
|
|
example_videos.click(load_examples, example_videos, input_video) |
|
|
|
with gr.Box(): |
|
gr.Markdown("""## Step 3 (Generate Style Transferred Image/Video)""") |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown( |
|
""" |
|
|
|
- Adjust **Style Degree**. |
|
- Hit **Toonify!** to toonify one frame. Hit **VToonify!** to toonify full video. |
|
- Estimated time on 1600x1440 video of 300 frames: 1 hour (CPU); 2 mins (GPU) |
|
""" |
|
) |
|
style_degree = gr.Slider( |
|
0, 1, value=0.5, step=0.05, label="Style Degree" |
|
) |
|
with gr.Column(): |
|
gr.Markdown( |
|
"""![example](https://raw.githubusercontent.com/williamyang1991/tmpfile/master/vtoonify/degree.jpg) |
|
""" |
|
) |
|
with gr.Row(): |
|
output_info = gr.Textbox( |
|
label="Process Information", interactive=False, value="n.a." |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
result_face = gr.Image( |
|
label="Result Image", type="numpy", interactive=False |
|
) |
|
with gr.Row(): |
|
toonify_button = gr.Button("Toonify!") |
|
with gr.Column(): |
|
with gr.Row(): |
|
result_video = gr.Video( |
|
label="Result Video", type="mp4", interactive=False |
|
) |
|
with gr.Row(): |
|
vtoonify_button = gr.Button("VToonify!") |
|
|
|
gr.Markdown(ARTICLE) |
|
gr.Markdown(FOOTER) |
|
|
|
loadmodel_button.click( |
|
fn=model.load_model, inputs=[style_type], outputs=[exstyle, load_info] |
|
) |
|
|
|
style_type.change(fn=update_slider, inputs=style_type, outputs=style_degree) |
|
|
|
preprocess_image_button.click( |
|
fn=model.detect_and_align_image, |
|
inputs=[input_image, top, bottom, left, right], |
|
outputs=[aligned_face, instyle, input_info], |
|
) |
|
preprocess_video0_button.click( |
|
fn=model.detect_and_align_video, |
|
inputs=[input_video, top, bottom, left, right], |
|
outputs=[aligned_face, instyle, input_info], |
|
) |
|
preprocess_video1_button.click( |
|
fn=model.detect_and_align_full_video, |
|
inputs=[input_video, top, bottom, left, right], |
|
outputs=[aligned_video, instyle, input_info], |
|
) |
|
|
|
toonify_button.click( |
|
fn=model.image_toonify, |
|
inputs=[aligned_face, instyle, exstyle, style_degree, style_type], |
|
outputs=[result_face, output_info], |
|
) |
|
vtoonify_button.click( |
|
fn=model.video_tooniy, |
|
inputs=[aligned_video, instyle, exstyle, style_degree, style_type], |
|
outputs=[result_video, output_info], |
|
) |
|
|
|
example_images.click( |
|
fn=set_example_image, |
|
inputs=example_images, |
|
outputs=example_images.components, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.queue(concurrency_count=1, max_size=4) |
|
demo.launch(server_port=8266) |
|
|