liveportrait-vid2vid

Running on A10G

App Files Files Community

ganeshblank commited on 3 days ago

Commit

dd6463f

•

1 Parent(s): 45b0642

requirement

Browse files

Files changed (6) hide show

.gitattributes +35 -35
.gitignore +21 -0
LICENSE +21 -0
README.md +142 -13
inference.py +49 -0
requirements.txt +22 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,21 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+**/__pycache__/
+*.py[cod]
+**/*.py[cod]
+*$py.class
+# Model weights
+**/*.pth
+**/*.onnx
+pretrained_weights/*.md
+pretrained_weights/docs
+# Ipython notebook
+*.ipynb
+# Temporary files or benchmark resources
+animations/*
+tmp/*
+.vscode/launch.json

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Kuaishou Visual Generation and Interaction Center
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,142 @@
----
-title: Liveportrait Video
-emoji: 📚
-colorFrom: indigo
-colorTo: indigo
-sdk: gradio
-sdk_version: 4.38.1
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<h1 align="center">This is the modification of LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control for allowing video as a source </h1>
+<br>
+<div align="center">
+  <!-- <a href='LICENSE'><img src='https://img.shields.io/badge/license-MIT-yellow'></a> -->
+  <a href='https://arxiv.org/pdf/2407.03168'><img src='https://img.shields.io/badge/arXiv-LivePortrait-red'></a>
+  <a href='https://liveportrait.github.io'><img src='https://img.shields.io/badge/Project-LivePortrait-green'></a>
+  <a href ='https://github.com/KwaiVGI/LivePortrait'>Official Liveportrait</a>
+</div>
+<br>
+<p align="center">
+  <img src="./assets/docs/showcase2.gif" alt="showcase">
+  <br>
+  🔥 For more results, visit LivePortrait <a href="https://liveportrait.github.io/"><strong>homepage</strong></a> 🔥
+</p>
+## 🔥 Getting Started
+### 1. Clone the code and prepare the environment
+```bash
+git clone https://github.com/KwaiVGI/LivePortrait
+cd LivePortrait
+# create env using conda
+conda create -n LivePortrait python==3.9.18
+conda activate LivePortrait
+# install dependencies with pip
+pip install -r requirements.txt
+```
+### 2. Download pretrained weights
+Download the pretrained weights from HuggingFace:
+```bash
+# you may need to run `git lfs install` first
+git clone https://huggingface.co/KwaiVGI/liveportrait pretrained_weights
+```
+Or, download all pretrained weights from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
+```text
+pretrained_weights
+├── insightface
+│   └── models
+│       └── buffalo_l
+│           ├── 2d106det.onnx
+│           └── det_10g.onnx
+└── liveportrait
+    ├── base_models
+    │   ├── appearance_feature_extractor.pth
+    │   ├── motion_extractor.pth
+    │   ├── spade_generator.pth
+    │   └── warping_module.pth
+    ├── landmark.onnx
+    └── retargeting_models
+        └── stitching_retargeting_module.pth
+```
+### 3. Inference 🚀
+#### Fast hands-on
+```bash
+python inference.py
+```
+If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.
+<p align="center">
+  <img src="./assets/docs/inference.gif" alt="image">
+</p>
+Or, you can change the input by specifying the `-s` and `-d` arguments:
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
+# disable pasting back to run faster
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
+# more options to see
+python inference.py -h
+```
+For video: you can change the input by specifying the `-sd` and `-d` arguments:
+```bash
+python inference.py -sd assets/examples/driving/d3.mp4 -d assets/examples/driving/d0.mp4 -vd True
+# disable pasting back to run faster
+python inference.py -sd assets/examples/driving/d3.mp4 -d assets/examples/driving/d0.mp4 -vd True --no_flag_pasteback
+```
+#### Driving video auto-cropping
+📕 To use your own driving video, we **recommend**:
+ - Crop it to a **1:1** aspect ratio (e.g., 512x512 or 256x256 pixels), or enable auto-cropping by `--flag_crop_driving_video`.
+ - Focus on the head area, similar to the example videos.
+ - Minimize shoulder movement.
+ - Make sure the first frame of driving video is a frontal face with **neutral expression**.
+Below is a auto-cropping case by `--flag_crop_driving_video`:
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d13.mp4 --flag_crop_driving_video
+```
+If you find the results of auto-cropping is not well, you can modify the `--scale_crop_video`, `--vy_ratio_crop_video` options to adjust the scale and offset, or do it manually.
+#### Template making
+You can also use the `.pkl` file auto-generated to speed up the inference, and **protect privacy**, such as:
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d5.pkl
+```
+**Discover more interesting results on our [Homepage](https://liveportrait.github.io)** 😊
+### 4. Gradio interface 🤗
+We also provide a Gradio interface for a better experience, just run by:
+```bash
+python app.py
+```
+You can specify the `--server_port`, `--share`, `--server_name` arguments to satisfy your needs!
+## Acknowledgements
+We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface), [LivePortrait](https://github.com/KwaiVGI/LivePortrait) repositories, for their open research and contributions.
+## Citation 💖
+```bibtex
+@article{guo2024liveportrait,
+  title   = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
+  author  = {Guo, Jianzhu and Zhang, Dingyun and Liu, Xiaoqiang and Zhong, Zhizhou and Zhang, Yuan and Wan, Pengfei and Zhang, Di},
+  journal = {arXiv preprint arXiv:2407.03168},
+  year    = {2024}
+}
+```

inference.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# coding: utf-8
+import os.path as osp
+import tyro
+from src.config.argument_config import ArgumentConfig
+from src.config.inference_config import InferenceConfig
+from src.config.crop_config import CropConfig
+from src.live_portrait_pipeline import LivePortraitPipeline
+def partial_fields(target_class, kwargs):
+    return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
+def fast_check_args(args: ArgumentConfig):
+    if not osp.exists(args.source_image):
+        raise FileNotFoundError(f"source image not found: {args.source_image}")
+    if not osp.exists(args.driving_info):
+        raise FileNotFoundError(f"driving info not found: {args.driving_info}")
+def main():
+    # set tyro theme
+    tyro.extras.set_accent_color("bright_cyan")
+    args = tyro.cli(ArgumentConfig)
+    # fast check the args
+    fast_check_args(args)
+    # specify configs for inference
+    inference_cfg = partial_fields(InferenceConfig, args.__dict__)  # use attribute of args to initial InferenceConfig
+    crop_cfg = partial_fields(CropConfig, args.__dict__)  # use attribute of args to initial CropConfig
+    live_portrait_pipeline = LivePortraitPipeline(
+        inference_cfg=inference_cfg,
+        crop_cfg=crop_cfg
+    )
+    # run
+    if args.flag_svideo:
+        live_portrait_pipeline.execute_source_video(args)
+    else:
+        live_portrait_pipeline.execute(args)
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.3.0
+torchvision==0.18.0
+torchaudio==2.3.0
+numpy==1.26.4
+pyyaml==6.0.1
+opencv-python==4.10.0.84
+scipy==1.13.1
+imageio==2.34.2
+lmdb==1.4.1
+tqdm==4.66.4
+rich==13.7.1
+ffmpeg-python==0.2.0
+onnxruntime-gpu==1.18.0
+onnx==1.16.1
+scikit-image==0.24.0
+albumentations==1.4.10
+matplotlib==3.9.0
+imageio-ffmpeg==0.5.1
+tyro==0.8.5
+gradio==4.37.1