cyber
commited on
This view is limited to 50 files because it contains too many changes.
See raw diff
- modelscope/.DS_Store +0 -0
- modelscope/hub/.DS_Store +0 -0
- modelscope/hub/models/.DS_Store +0 -0
- modelscope/hub/models/damo/.DS_Store +0 -0
- modelscope/hub/models/damo/cv_ddcolor_image-colorization/.mdl +0 -0
- modelscope/hub/models/damo/cv_ddcolor_image-colorization/.msc +0 -0
- modelscope/hub/models/damo/cv_ddcolor_image-colorization/.mv +0 -1
- modelscope/hub/models/damo/cv_ddcolor_image-colorization/README.md +0 -114
- modelscope/hub/models/damo/cv_ddcolor_image-colorization/configuration.json +0 -67
- modelscope/hub/models/damo/cv_ddcolor_image-colorization/pytorch_model.pt +0 -3
- modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/.mdl +0 -0
- modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/.msc +0 -0
- modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/README.md +0 -169
- modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/config.yaml +0 -38
- modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/config_bert.json +0 -27
- modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/configuration.json +0 -60
- modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/vocab.txt +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/.mdl +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/.msc +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/.mv +0 -1
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/README.md +0 -211
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/configuration.json +0 -20
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/tf_ckpts/model-15999.data-00000-of-00001 +0 -3
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/tf_ckpts/model-15999.index +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/.mdl +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/.msc +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/.mv +0 -1
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/README.md +0 -214
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/configuration.json +0 -20
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/tf_ckpts/model-96499.data-00000-of-00001 +0 -3
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/tf_ckpts/model-96499.index +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/.mdl +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/.msc +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/.mv +0 -1
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/README.md +0 -216
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/configuration.json +0 -20
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/tf_ckpts/model-309999.data-00000-of-00001 +0 -3
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/tf_ckpts/model-309999.index +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/.mdl +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/.msc +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/.mv +0 -1
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/README.md +0 -214
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/configuration.json +0 -20
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/tf_ckpts/model-122999.data-00000-of-00001 +0 -3
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/tf_ckpts/model-122999.index +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/.mdl +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/.msc +0 -0
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/.mv +0 -1
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/README.md +0 -215
- modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/configuration.json +0 -20
modelscope/.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
modelscope/hub/.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
modelscope/hub/models/.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
modelscope/hub/models/damo/.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
modelscope/hub/models/damo/cv_ddcolor_image-colorization/.mdl
DELETED
|
Binary file (57 Bytes)
|
|
|
modelscope/hub/models/damo/cv_ddcolor_image-colorization/.msc
DELETED
|
Binary file (561 Bytes)
|
|
|
modelscope/hub/models/damo/cv_ddcolor_image-colorization/.mv
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Revision:v1.02,CreatedAt:1678030594
|
|
|
|
|
|
modelscope/hub/models/damo/cv_ddcolor_image-colorization/README.md
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tasks:
|
| 3 |
-
- image-colorization
|
| 4 |
-
widgets:
|
| 5 |
-
- task: image-colorization
|
| 6 |
-
inputs:
|
| 7 |
-
- type: image
|
| 8 |
-
examples:
|
| 9 |
-
- name: 1
|
| 10 |
-
inputs:
|
| 11 |
-
- name: image
|
| 12 |
-
data: git://resources/demo.jpg
|
| 13 |
-
- name: 2
|
| 14 |
-
inputs:
|
| 15 |
-
- name: image
|
| 16 |
-
data: git://resources/demo2.jpg
|
| 17 |
-
- name: 3
|
| 18 |
-
inputs:
|
| 19 |
-
- name: image
|
| 20 |
-
data: git://resources/demo3.jpg
|
| 21 |
-
inferencespec:
|
| 22 |
-
cpu: 4
|
| 23 |
-
memory: 16000
|
| 24 |
-
gpu: 1
|
| 25 |
-
gpu_memory: 16000
|
| 26 |
-
model-type:
|
| 27 |
-
- ddcolor
|
| 28 |
-
domain:
|
| 29 |
-
- cv
|
| 30 |
-
frameworks:
|
| 31 |
-
- pytorch
|
| 32 |
-
backbone:
|
| 33 |
-
- unet
|
| 34 |
-
metrics:
|
| 35 |
-
- fid
|
| 36 |
-
- colorfulness
|
| 37 |
-
customized-quickstart: False
|
| 38 |
-
finetune-support: False
|
| 39 |
-
license: Apache License 2.0
|
| 40 |
-
tags:
|
| 41 |
-
- image colorization
|
| 42 |
-
- old photo restoration
|
| 43 |
-
- DDColor
|
| 44 |
-
datasets:
|
| 45 |
-
test:
|
| 46 |
-
- modelscope/image-colorization-dataset
|
| 47 |
-
---
|
| 48 |
-
|
| 49 |
-
# DDColor 图像上色模型
|
| 50 |
-
|
| 51 |
-
该模型为黑白图像上色模型,输入一张黑白图像,实现端到端的全图上色,返回上色处理后的彩色图像。
|
| 52 |
-
|
| 53 |
-
## 模型描述
|
| 54 |
-
|
| 55 |
-
DDColor 是最新的图像上色算法,能够对输入的黑白图像生成自然生动的彩色结果。
|
| 56 |
-
|
| 57 |
-
算法整体流程如下图,使用 UNet 结构的骨干网络和图像解码器分别实现图像特征提取和特征图上采样,并利用 Transformer 结构的颜色解码器完成基于视觉语义的颜色查询,最终聚合输出彩色通道预测结果。
|
| 58 |
-
|
| 59 |
-

|
| 60 |
-
|
| 61 |
-
## 模型期望使用方式和适用范围
|
| 62 |
-
|
| 63 |
-
该模型适用于多种格式的图像输入,给定黑白图像,生成上色后的彩色图像;给定彩色图像,将自动提取灰度通道作为输入,生成重上色的图像。
|
| 64 |
-
|
| 65 |
-
### 如何使用
|
| 66 |
-
|
| 67 |
-
在 ModelScope 框架上,提供输入图片,即可以通过简单的 Pipeline 调用来使用图像上色模型。
|
| 68 |
-
|
| 69 |
-
#### 代码范例
|
| 70 |
-
|
| 71 |
-
```python
|
| 72 |
-
import cv2
|
| 73 |
-
from modelscope.outputs import OutputKeys
|
| 74 |
-
from modelscope.pipelines import pipeline
|
| 75 |
-
from modelscope.utils.constant import Tasks
|
| 76 |
-
|
| 77 |
-
img_colorization = pipeline(Tasks.image_colorization,
|
| 78 |
-
model='damo/cv_ddcolor_image-colorization')
|
| 79 |
-
img_path = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/audrey_hepburn.jpg'
|
| 80 |
-
result = img_colorization(img_path)
|
| 81 |
-
cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
|
| 82 |
-
```
|
| 83 |
-
|
| 84 |
-
### 模型局限性以及可能的偏差
|
| 85 |
-
|
| 86 |
-
- 本算法模型使用自然图像数据集进行训练,对于分布外场景(例如漫画等)可能产生不恰当的上色结果;
|
| 87 |
-
- 对于低分辨率或包含明显噪声的图像,算法可能无法得到理想的生成效果。
|
| 88 |
-
|
| 89 |
-
## 训练数据介绍
|
| 90 |
-
|
| 91 |
-
模型使用公开数据集 [ImageNet](https://www.image-net.org/) 训练,其训练集包含 128 万张自然图像。
|
| 92 |
-
|
| 93 |
-
## 数据评估及结果
|
| 94 |
-
|
| 95 |
-
本算法主要在 [ImageNet](https://www.image-net.org/) 和 [COCO-Stuff](https://github.com/nightrome/cocostuff)上测试。
|
| 96 |
-
|
| 97 |
-
| Val Name | FID | Colorfulness |
|
| 98 |
-
|:-----------------:|:----:|:------------:|
|
| 99 |
-
| ImageNet (val50k) | 3.92 | 38.26 |
|
| 100 |
-
| ImageNet (val5k) | 0.96 | 38.65 |
|
| 101 |
-
| COCO-Stuff | 5.18 | 38.48 |
|
| 102 |
-
|
| 103 |
-
## 引用
|
| 104 |
-
|
| 105 |
-
如果你觉得这个模型对你有所帮助,请考虑引用下面的相关论文:
|
| 106 |
-
|
| 107 |
-
```
|
| 108 |
-
@article{kang2022ddcolor,
|
| 109 |
-
title={DDColor: Towards Photo-Realistic and Semantic-Aware Image Colorization via Dual Decoders},
|
| 110 |
-
author={Kang, Xiaoyang and Yang, Tao and Ouyang, Wenqi and Ren, Peiran and Li, Lingzhi and Xie, Xuansong},
|
| 111 |
-
journal={arXiv preprint arXiv:2212.11613},
|
| 112 |
-
year={2022}
|
| 113 |
-
}
|
| 114 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/damo/cv_ddcolor_image-colorization/configuration.json
DELETED
|
@@ -1,67 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "pytorch",
|
| 3 |
-
|
| 4 |
-
"task": "image-colorization",
|
| 5 |
-
|
| 6 |
-
"pipeline": {
|
| 7 |
-
"type": "ddcolor-image-colorization"
|
| 8 |
-
},
|
| 9 |
-
|
| 10 |
-
"model": {
|
| 11 |
-
"type": "ddcolor"
|
| 12 |
-
},
|
| 13 |
-
|
| 14 |
-
"dataset": {
|
| 15 |
-
"name": "imagenet-val5k-image",
|
| 16 |
-
"dataroot_gt": "val5k/",
|
| 17 |
-
"filename_tmpl": "{}",
|
| 18 |
-
"scale": 1,
|
| 19 |
-
"gt_size": 256
|
| 20 |
-
},
|
| 21 |
-
|
| 22 |
-
"train": {
|
| 23 |
-
"dataloader": {
|
| 24 |
-
"batch_size_per_gpu": 4,
|
| 25 |
-
"workers_per_gpu": 4,
|
| 26 |
-
"shuffle": true
|
| 27 |
-
},
|
| 28 |
-
"optimizer": {
|
| 29 |
-
"type": "AdamW",
|
| 30 |
-
"lr": 1e-6,
|
| 31 |
-
"weight_decay": 0.01,
|
| 32 |
-
"betas": [0.9, 0.99]
|
| 33 |
-
},
|
| 34 |
-
"lr_scheduler": {
|
| 35 |
-
"type": "CosineAnnealingLR",
|
| 36 |
-
"T_max": 200000,
|
| 37 |
-
"eta_min": 1e-7
|
| 38 |
-
},
|
| 39 |
-
"max_epochs": 2,
|
| 40 |
-
"hooks": [{
|
| 41 |
-
"type": "CheckpointHook",
|
| 42 |
-
"interval": 1
|
| 43 |
-
},
|
| 44 |
-
{
|
| 45 |
-
"type": "TextLoggerHook",
|
| 46 |
-
"interval": 1
|
| 47 |
-
},
|
| 48 |
-
{
|
| 49 |
-
"type": "IterTimerHook"
|
| 50 |
-
},
|
| 51 |
-
{
|
| 52 |
-
"type": "EvaluationHook",
|
| 53 |
-
"interval": 1
|
| 54 |
-
}
|
| 55 |
-
]
|
| 56 |
-
},
|
| 57 |
-
|
| 58 |
-
"evaluation": {
|
| 59 |
-
"dataloader": {
|
| 60 |
-
"batch_size_per_gpu": 8,
|
| 61 |
-
"workers_per_gpu": 1,
|
| 62 |
-
"shuffle": false
|
| 63 |
-
},
|
| 64 |
-
"metrics": "image-colorization-metric"
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/damo/cv_ddcolor_image-colorization/pytorch_model.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:17c460d7e55b32a598370621d77173be59e03c24b0823f06821db23a50c263ce
|
| 3 |
-
size 911950059
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/.mdl
DELETED
|
Binary file (64 Bytes)
|
|
|
modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/.msc
DELETED
|
Binary file (529 Bytes)
|
|
|
modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/README.md
DELETED
|
@@ -1,169 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tasks:
|
| 3 |
-
- image-captioning
|
| 4 |
-
|
| 5 |
-
widgets:
|
| 6 |
-
- task: image-captioning
|
| 7 |
-
inputs:
|
| 8 |
-
- type: image
|
| 9 |
-
name: image
|
| 10 |
-
title: 图片
|
| 11 |
-
validator:
|
| 12 |
-
max_size: 10M
|
| 13 |
-
max_resolution: 5000*5000
|
| 14 |
-
examples:
|
| 15 |
-
- name: 1
|
| 16 |
-
title: 示例1
|
| 17 |
-
inputs:
|
| 18 |
-
- name: image
|
| 19 |
-
data: http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-question-answering/visual_question_answering.png
|
| 20 |
-
inferencespec:
|
| 21 |
-
cpu: 4
|
| 22 |
-
memory: 12000
|
| 23 |
-
gpu: 1
|
| 24 |
-
gpu_memory: 16000
|
| 25 |
-
|
| 26 |
-
model-type:
|
| 27 |
-
- mplug
|
| 28 |
-
|
| 29 |
-
domain:
|
| 30 |
-
- multi-modal
|
| 31 |
-
|
| 32 |
-
frameworks:
|
| 33 |
-
- pytorch
|
| 34 |
-
|
| 35 |
-
backbone:
|
| 36 |
-
- transformer
|
| 37 |
-
|
| 38 |
-
containers:
|
| 39 |
-
|
| 40 |
-
metrics:
|
| 41 |
-
- CIDEr
|
| 42 |
-
- Bleu-4
|
| 43 |
-
|
| 44 |
-
license: Apache License 2.0
|
| 45 |
-
|
| 46 |
-
finetune-support: True
|
| 47 |
-
|
| 48 |
-
language:
|
| 49 |
-
- en
|
| 50 |
-
|
| 51 |
-
tags:
|
| 52 |
-
- transformer
|
| 53 |
-
- Alibaba
|
| 54 |
-
- volume:abs/2205.12005
|
| 55 |
-
|
| 56 |
-
datasets:
|
| 57 |
-
train:
|
| 58 |
-
- 14M image-text pairs(google cc, mscoco, vg, sbu)
|
| 59 |
-
- modelscope/coco_2014_caption
|
| 60 |
-
test:
|
| 61 |
-
- MS COCO Caption test set
|
| 62 |
-
evaluation:
|
| 63 |
-
- modelscope/coco_2014_caption
|
| 64 |
-
---
|
| 65 |
-
|
| 66 |
-
# 图像描述介绍
|
| 67 |
-
图像描述:给定一张图片,模型根据图片信息生成一句对应描述。可以应用于给一张图片配上一句文字或者打个标签的场景。本页面右侧提供了在线体验的服务,欢迎使用!注:本模型为mPLUG-图像描述的Large模型,参数量约为6亿。
|
| 68 |
-
|
| 69 |
-
## 模型描述
|
| 70 |
-
|
| 71 |
-
本任务是mPLUG,在英文图像描述MS COCO Caption数据集进行finetune的图像描述下游任务。mPLUG模型是统一理解和生成的多模态基础模型,该模型提出了基于skip-connections的高效跨模态融合框架。其中,mPLUG论文公开时在MS COCO Caption数据上达到SOTA,详见:[mPLUG: Effective and Efficient Vision-Language Learning by Cross-modal Skip-connections](https://arxiv.org/abs/2205.12005)
|
| 72 |
-
|
| 73 |
-

|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
## 期望模型使用方式以及适用范围
|
| 77 |
-
本模型主要用于给问题和对应图片生成答案。用户可以自行尝试各种输入文档。具体调用方式请参考代码示例。
|
| 78 |
-
|
| 79 |
-
### 如何使用
|
| 80 |
-
在安装完成MaaS-lib之后即可使用image-captioning的能力
|
| 81 |
-
|
| 82 |
-
#### 推理代码范例
|
| 83 |
-
```python
|
| 84 |
-
from modelscope.pipelines import pipeline
|
| 85 |
-
from modelscope.utils.constant import Tasks
|
| 86 |
-
|
| 87 |
-
model_id = 'damo/mplug_image-captioning_coco_large_en'
|
| 88 |
-
input_caption = 'https://alice-open.oss-cn-zhangjiakou.aliyuncs.com/mPLUG/image_captioning.png'
|
| 89 |
-
|
| 90 |
-
pipeline_caption = pipeline(Tasks.image_captioning, model=model_id)
|
| 91 |
-
result = pipeline_caption(input_caption)
|
| 92 |
-
print(result)
|
| 93 |
-
|
| 94 |
-
```
|
| 95 |
-
|
| 96 |
-
### 模型局限性以及可能的偏差
|
| 97 |
-
模型在数据集上训练,有可能产生一些偏差,请用户自行评测后决定如何使用。
|
| 98 |
-
|
| 99 |
-
## 训练数据介绍
|
| 100 |
-
本模型训练数据集是MS COCO Caption, 具体数据可以[下载](https://cocodataset.org)
|
| 101 |
-
|
| 102 |
-
## 模型训练流程
|
| 103 |
-
|
| 104 |
-
### 微调代码范例
|
| 105 |
-
|
| 106 |
-
```python
|
| 107 |
-
import tempfile
|
| 108 |
-
|
| 109 |
-
from modelscope.msdatasets import MsDataset
|
| 110 |
-
from modelscope.metainfo import Trainers
|
| 111 |
-
from modelscope.trainers import build_trainer
|
| 112 |
-
|
| 113 |
-
datadict = MsDataset.load('coco_captions_small_slice')
|
| 114 |
-
|
| 115 |
-
train_dataset = MsDataset(
|
| 116 |
-
datadict['train'].remap_columns({
|
| 117 |
-
'image:FILE': 'image',
|
| 118 |
-
'answer:Value': 'answer'
|
| 119 |
-
}).map(lambda _: {'question': 'what the picture describes?'}))
|
| 120 |
-
test_dataset = MsDataset(
|
| 121 |
-
datadict['test'].remap_columns({
|
| 122 |
-
'image:FILE': 'image',
|
| 123 |
-
'answer:Value': 'answer'
|
| 124 |
-
}).map(lambda _: {'question': 'what the picture describes?'}))
|
| 125 |
-
|
| 126 |
-
# 可以在代码修改 configuration 的配置
|
| 127 |
-
def cfg_modify_fn(cfg):
|
| 128 |
-
cfg.train.hooks = [{
|
| 129 |
-
'type': 'CheckpointHook',
|
| 130 |
-
'interval': 2
|
| 131 |
-
}, {
|
| 132 |
-
'type': 'TextLoggerHook',
|
| 133 |
-
'interval': 1
|
| 134 |
-
}, {
|
| 135 |
-
'type': 'IterTimerHook'
|
| 136 |
-
}]
|
| 137 |
-
return cfg
|
| 138 |
-
|
| 139 |
-
kwargs = dict(
|
| 140 |
-
model='damo/mplug_image-captioning_coco_large_en',
|
| 141 |
-
train_dataset=train_dataset,
|
| 142 |
-
eval_dataset=test_dataset,
|
| 143 |
-
max_epochs=2,
|
| 144 |
-
cfg_modify_fn=cfg_modify_fn,
|
| 145 |
-
work_dir=tempfile.TemporaryDirectory().name)
|
| 146 |
-
|
| 147 |
-
trainer = build_trainer(
|
| 148 |
-
name=Trainers.nlp_base_trainer, default_args=kwargs)
|
| 149 |
-
trainer.train()
|
| 150 |
-
```
|
| 151 |
-
|
| 152 |
-
## 数据评估及结果
|
| 153 |
-
mPLUG在VQA数据集,同等规模和预训练数据的模型中取得SOTA,VQA榜单上排名前列
|
| 154 |
-
|
| 155 |
-

|
| 156 |
-
|
| 157 |
-
### 相关论文以及引用信息
|
| 158 |
-
如果我们的模型对您有帮助,请您引入我们的文章:
|
| 159 |
-
```BibTeX
|
| 160 |
-
@inproceedings{li2022mplug,
|
| 161 |
-
title={mPLUG: Effective and Efficient Vision-Language Learning by Cross-modal Skip-connections},
|
| 162 |
-
author={Li, Chenliang and Xu, Haiyang and Tian, Junfeng and Wang, Wei and Yan, Ming and Bi, Bin and Ye, Jiabo and Chen, Hehong and Xu, Guohai and Cao, Zheng and Zhang, Ji and Huang, Songfang and Huang, Fei and Zhou, Jingren and Luo Si},
|
| 163 |
-
year={2022},
|
| 164 |
-
journal={arXiv}
|
| 165 |
-
}
|
| 166 |
-
```
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/config.yaml
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
task: 'image-captioning'
|
| 2 |
-
bert_config: 'config_bert.json'
|
| 3 |
-
|
| 4 |
-
image_res: 336
|
| 5 |
-
batch_size_train: 128
|
| 6 |
-
vision_width: 1024
|
| 7 |
-
distill: True
|
| 8 |
-
clip_name: "ViT-L-14"
|
| 9 |
-
batch_size_test: 64
|
| 10 |
-
k_test: 128
|
| 11 |
-
|
| 12 |
-
alpha: 0.4
|
| 13 |
-
warm_up: True
|
| 14 |
-
|
| 15 |
-
eos: '[SEP]'
|
| 16 |
-
|
| 17 |
-
optimizer: {opt: adamW, lr1: 3e-5, lr2: 5e-6, weight_decay: 0.02}
|
| 18 |
-
schedular: {sched: cosine, lr: 3e-5, epochs: 8, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 4, cooldown_epochs: 0}
|
| 19 |
-
|
| 20 |
-
# predictor
|
| 21 |
-
min_length: 3
|
| 22 |
-
max_length: 35
|
| 23 |
-
beam_size: 5
|
| 24 |
-
add_ocr: False
|
| 25 |
-
add_object: False
|
| 26 |
-
|
| 27 |
-
# clip
|
| 28 |
-
clip_embed_dim: 768
|
| 29 |
-
clip_image_resolution: 224
|
| 30 |
-
clip_vision_layers: 24
|
| 31 |
-
clip_vision_width: 1024
|
| 32 |
-
clip_vision_patch_size: 14
|
| 33 |
-
clip_context_length: 77
|
| 34 |
-
clip_vocab_size: 49408
|
| 35 |
-
clip_transformer_width: 768
|
| 36 |
-
clip_transformer_heads: 12
|
| 37 |
-
clip_transformer_layers: 12
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/config_bert.json
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"architectures": [
|
| 3 |
-
"BertForMaskedLM"
|
| 4 |
-
],
|
| 5 |
-
"attention_probs_dropout_prob": 0.1,
|
| 6 |
-
"hidden_act": "gelu",
|
| 7 |
-
"hidden_dropout_prob": 0.1,
|
| 8 |
-
"hidden_size": 768,
|
| 9 |
-
"initializer_range": 0.02,
|
| 10 |
-
"intermediate_size": 3072,
|
| 11 |
-
"layer_norm_eps": 1e-12,
|
| 12 |
-
"max_position_embeddings": 512,
|
| 13 |
-
"model_type": "bert",
|
| 14 |
-
"num_attention_heads": 12,
|
| 15 |
-
"num_hidden_layers": 12,
|
| 16 |
-
"pad_token_id": 0,
|
| 17 |
-
"type_vocab_size": 2,
|
| 18 |
-
"vocab_size": 30522,
|
| 19 |
-
"encoder_width": 768,
|
| 20 |
-
"add_cross_attention": false,
|
| 21 |
-
"use_cache":false,
|
| 22 |
-
"gradient_checkpointing": false,
|
| 23 |
-
"text_encoder_layers": 6,
|
| 24 |
-
"fusion_layers": 6,
|
| 25 |
-
"text_decode_layers": 12,
|
| 26 |
-
"stride_layer": 6
|
| 27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/configuration.json
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "pytorch",
|
| 3 |
-
"task": "image-captioning",
|
| 4 |
-
"preprocessor": {
|
| 5 |
-
"type": "mplug-tasks-preprocessor"
|
| 6 |
-
},
|
| 7 |
-
"model": {
|
| 8 |
-
"type": "mplug"
|
| 9 |
-
},
|
| 10 |
-
"pipeline": {
|
| 11 |
-
"type": "image-captioning"
|
| 12 |
-
},
|
| 13 |
-
"train": {
|
| 14 |
-
"work_dir": "/tmp",
|
| 15 |
-
"max_epochs": 3,
|
| 16 |
-
"dataloader": {
|
| 17 |
-
"batch_size_per_gpu": 2,
|
| 18 |
-
"workers_per_gpu": 1
|
| 19 |
-
},
|
| 20 |
-
"optimizer": {
|
| 21 |
-
"type": "SGD",
|
| 22 |
-
"lr": 0.01,
|
| 23 |
-
"options": {
|
| 24 |
-
"grad_clip": {
|
| 25 |
-
"max_norm": 2.0
|
| 26 |
-
}
|
| 27 |
-
}
|
| 28 |
-
},
|
| 29 |
-
"lr_scheduler": {
|
| 30 |
-
"type": "StepLR",
|
| 31 |
-
"step_size": 2,
|
| 32 |
-
"options": {
|
| 33 |
-
"warmup": {
|
| 34 |
-
"type": "LinearWarmup",
|
| 35 |
-
"warmup_iters": 2
|
| 36 |
-
}
|
| 37 |
-
}
|
| 38 |
-
},
|
| 39 |
-
"hooks": [{
|
| 40 |
-
"type": "CheckpointHook",
|
| 41 |
-
"interval": 1
|
| 42 |
-
}, {
|
| 43 |
-
"type": "TextLoggerHook",
|
| 44 |
-
"interval": 1
|
| 45 |
-
}, {
|
| 46 |
-
"type": "IterTimerHook"
|
| 47 |
-
}, {
|
| 48 |
-
"type": "EvaluationHook",
|
| 49 |
-
"interval": 1
|
| 50 |
-
}]
|
| 51 |
-
},
|
| 52 |
-
"evaluation": {
|
| 53 |
-
"dataloader": {
|
| 54 |
-
"batch_size_per_gpu": 2,
|
| 55 |
-
"workers_per_gpu": 1,
|
| 56 |
-
"shuffle": false
|
| 57 |
-
}
|
| 58 |
-
}
|
| 59 |
-
}
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/damo/mplug_image-captioning_coco_large_en/vocab.txt
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/.mdl
DELETED
|
Binary file (74 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/.msc
DELETED
|
Binary file (894 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/.mv
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Revision:v1.0.1,CreatedAt:1678848973
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/README.md
DELETED
|
@@ -1,211 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tasks:
|
| 3 |
-
- image-portrait-stylization
|
| 4 |
-
widgets:
|
| 5 |
-
- task: image-portrait-stylization
|
| 6 |
-
inputs:
|
| 7 |
-
- type: image
|
| 8 |
-
validator:
|
| 9 |
-
max_size: 10M
|
| 10 |
-
max_resolution: 6000*6000
|
| 11 |
-
examples:
|
| 12 |
-
- name: 1
|
| 13 |
-
inputs:
|
| 14 |
-
- name: image
|
| 15 |
-
data: https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-cartoon/cartoon.png
|
| 16 |
-
inferencespec:
|
| 17 |
-
cpu: 2
|
| 18 |
-
memory: 4000
|
| 19 |
-
gpu: 1
|
| 20 |
-
gpu_memory: 16000
|
| 21 |
-
model_type:
|
| 22 |
-
- GAN
|
| 23 |
-
domain:
|
| 24 |
-
- cv
|
| 25 |
-
frameworks:
|
| 26 |
-
- TensorFlow
|
| 27 |
-
backbone:
|
| 28 |
-
- UNet
|
| 29 |
-
metrics:
|
| 30 |
-
- realism
|
| 31 |
-
license: Apache License 2.0
|
| 32 |
-
language:
|
| 33 |
-
- ch
|
| 34 |
-
tags:
|
| 35 |
-
- portrait stylization
|
| 36 |
-
- Alibaba
|
| 37 |
-
- SIGGRAPH 2022
|
| 38 |
-
datasets:
|
| 39 |
-
test:
|
| 40 |
-
- modelscope/human_face_portrait_compound_dataset
|
| 41 |
-
---
|
| 42 |
-
|
| 43 |
-
# DCT-Net人像卡通化模型-3D
|
| 44 |
-
|
| 45 |
-
### [论文](https://arxiv.org/abs/2207.02426) | [项目主页](https://menyifang.github.io/projects/DCTNet/DCTNet.html)
|
| 46 |
-
|
| 47 |
-
输入一张人物图像,实现端到端全图卡通化转换,生成3D风格虚拟形象,返回风格化后的结果图像。
|
| 48 |
-
|
| 49 |
-
其生成效果如下所示:
|
| 50 |
-
|
| 51 |
-

|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
## 模型描述
|
| 55 |
-
|
| 56 |
-
该任务采用一种全新的域校准图像翻译模型DCT-Net(Domain-Calibrated Translation),利用小样本的风格数据,即可得到高保真、强鲁棒、易拓展的人像风格转换模型,并通过端到端推理快速得到风格转换结果。
|
| 57 |
-

|
| 58 |
-
|
| 59 |
-
## 使用方式和范围
|
| 60 |
-
|
| 61 |
-
使用方式:
|
| 62 |
-
- 支持GPU/CPU推理,在任意真实人物图像上进行直接推理;
|
| 63 |
-
|
| 64 |
-
使用范围:
|
| 65 |
-
- 包含人脸的人像照片(3通道RGB图像,支持PNG、JPG、JPEG格式),人脸分辨率大于100x100,总体图像分辨率小于3000×3000,低质人脸图像建议预先人脸增强处理。
|
| 66 |
-
|
| 67 |
-
目标场景:
|
| 68 |
-
- 艺术创作、社交娱乐、隐私保护场景,自动化生成卡通肖像。
|
| 69 |
-
|
| 70 |
-
### 如何使用
|
| 71 |
-
|
| 72 |
-
在ModelScope框架上,提供输入图片,即可以通过简单的Pipeline调用来使用人像卡通化模型。
|
| 73 |
-
|
| 74 |
-
#### 代码范例
|
| 75 |
-
|
| 76 |
-
- 模型推理(支持CPU/GPU):
|
| 77 |
-
|
| 78 |
-
```python
|
| 79 |
-
import cv2
|
| 80 |
-
from modelscope.outputs import OutputKeys
|
| 81 |
-
from modelscope.pipelines import pipeline
|
| 82 |
-
from modelscope.utils.constant import Tasks
|
| 83 |
-
|
| 84 |
-
img_cartoon = pipeline(Tasks.image_portrait_stylization,
|
| 85 |
-
model='damo/cv_unet_person-image-cartoon-3d_compound-models')
|
| 86 |
-
# 图像本地路径
|
| 87 |
-
#img_path = 'input.png'
|
| 88 |
-
# 图像url链接
|
| 89 |
-
img_path = 'https://invi-label.oss-cn-shanghai.aliyuncs.com/label/cartoon/image_cartoon.png'
|
| 90 |
-
result = img_cartoon(img_path)
|
| 91 |
-
|
| 92 |
-
cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
|
| 93 |
-
print('finished!')
|
| 94 |
-
|
| 95 |
-
```
|
| 96 |
-
|
| 97 |
-
- 模型训练:
|
| 98 |
-
|
| 99 |
-
环境要求:tf1.14/15及兼容cuda,支持GPU训练
|
| 100 |
-
|
| 101 |
-
```python
|
| 102 |
-
import os
|
| 103 |
-
import unittest
|
| 104 |
-
import cv2
|
| 105 |
-
from modelscope.exporters.cv import CartoonTranslationExporter
|
| 106 |
-
from modelscope.msdatasets import MsDataset
|
| 107 |
-
from modelscope.outputs import OutputKeys
|
| 108 |
-
from modelscope.pipelines import pipeline
|
| 109 |
-
from modelscope.pipelines.base import Pipeline
|
| 110 |
-
from modelscope.trainers.cv import CartoonTranslationTrainer
|
| 111 |
-
from modelscope.utils.constant import Tasks
|
| 112 |
-
from modelscope.utils.test_utils import test_level
|
| 113 |
-
|
| 114 |
-
model_id = 'damo/cv_unet_person-image-cartoon_compound-models'
|
| 115 |
-
data_dir = MsDataset.load(
|
| 116 |
-
'dctnet_train_clipart_mini_ms',
|
| 117 |
-
namespace='menyifang',
|
| 118 |
-
split='train').config_kwargs['split_config']['train']
|
| 119 |
-
|
| 120 |
-
data_photo = os.path.join(data_dir, 'face_photo')
|
| 121 |
-
data_cartoon = os.path.join(data_dir, 'face_cartoon')
|
| 122 |
-
work_dir = 'exp_localtoon'
|
| 123 |
-
max_steps = 10
|
| 124 |
-
trainer = CartoonTranslationTrainer(
|
| 125 |
-
model=model_id,
|
| 126 |
-
work_dir=work_dir,
|
| 127 |
-
photo=data_photo,
|
| 128 |
-
cartoon=data_cartoon,
|
| 129 |
-
max_steps=max_steps)
|
| 130 |
-
trainer.train()
|
| 131 |
-
```
|
| 132 |
-
|
| 133 |
-
上述训练代码仅仅提供简单训练的范例,对大规模自定义数据,替换data_photo为真实人脸数据路径,data_cartoon为卡通风格人脸数据路径,max_steps建议设置为300000,可视化结果将存储在work_dir下;此外configuration.json(~/.cache/modelscope/hub/damo/cv_unet_person-image-cartoon_compound-models/)可以进行自定义修改;
|
| 134 |
-
|
| 135 |
-
Note: notebook预装环境下存在numpy依赖冲突,可手动更新解决:pip install numpy==1.18.5
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
- 卡通人脸数据获取
|
| 139 |
-
|
| 140 |
-
卡通人脸数据可由设计师设计/网络收集得到,在此提供一种基于[Stable-Diffusion风格预训练模型](https://modelscope.cn/models/damo/cv_cartoon_stable_diffusion_design/summary)的卡通数据生成方式
|
| 141 |
-
|
| 142 |
-
```python
|
| 143 |
-
import cv2
|
| 144 |
-
from modelscope.pipelines import pipeline
|
| 145 |
-
from modelscope.utils.constant import Tasks
|
| 146 |
-
|
| 147 |
-
pipe = pipeline(Tasks.text_to_image_synthesis, model='damo/cv_cartoon_stable_diffusion_clipart', model_revision='v1.0.0')
|
| 148 |
-
from diffusers.schedulers import EulerAncestralDiscreteScheduler
|
| 149 |
-
pipe.pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.pipeline.scheduler.config)
|
| 150 |
-
output = pipe({'text': 'archer style, a portrait painting of Johnny Depp'})
|
| 151 |
-
cv2.imwrite('result.png', output['output_imgs'][0])
|
| 152 |
-
print('Image saved to result.png')
|
| 153 |
-
|
| 154 |
-
print('finished!')
|
| 155 |
-
```
|
| 156 |
-
可通过替换Johnny Depp为其他名人姓名,产生多样化风格数据,通过人脸对齐裁剪即可得到卡通人脸数据;可以通过修改pipeline的model参数指定不同风格的SD预训练模型。
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
### 模型局限性以及可能的偏差
|
| 160 |
-
|
| 161 |
-
- 低质/低分辨率人脸图像由于本身内容信息丢失严重,无法得到理想转换效果,可预先采用人脸增强模型预处理图像解决;
|
| 162 |
-
|
| 163 |
-
- 小样本数据涵盖场景有限,人脸暗光、阴影干扰可能会影响生成效果。
|
| 164 |
-
|
| 165 |
-
## 训练数据介绍
|
| 166 |
-
|
| 167 |
-
训练数据从公开数据集(COCO等)、互联网搜索人像图像,并进行标注作为训练数据。
|
| 168 |
-
|
| 169 |
-
- 真实人脸数据[FFHQ](https://github.com/NVlabs/ffhq-dataset)常用的人脸公开数据集,包含7w人脸图像;
|
| 170 |
-
|
| 171 |
-
- 卡通人脸数据,互联网搜集,100+张
|
| 172 |
-
|
| 173 |
-
## 模型推理流程
|
| 174 |
-
|
| 175 |
-
### 预处理
|
| 176 |
-
|
| 177 |
-
- 人脸关键点检测
|
| 178 |
-
- 人脸提取&对齐,得到256x256大小的对齐人脸
|
| 179 |
-
|
| 180 |
-
### 推理
|
| 181 |
-
|
| 182 |
-
- 为控制推理效率,人脸及背景resize到指定大小分别推理,再背景融合得到最终效果;
|
| 183 |
-
- 亦可将整图依据人脸尺度整体缩放到合适尺寸,直接单次推理
|
| 184 |
-
|
| 185 |
-
## 数据评估及结果
|
| 186 |
-
|
| 187 |
-
使用CelebA公开人脸数据集进行评测,在FID/ID/用户偏好等指标上均达SOTA结果:
|
| 188 |
-
|
| 189 |
-
| Method | FID | ID | Pref.A | Pref.B |
|
| 190 |
-
| ------------ | ------------ | ------------ | ------------ | ------------ |
|
| 191 |
-
| CycleGAN | 57.08 | 0.55 | 7.1 | 1.4 |
|
| 192 |
-
| U-GAT-IT | 68.40 | 0.58 | 5.0 | 1.5 |
|
| 193 |
-
| Toonify | 55.27 | 0.62 | 3.7 | 4.2 |
|
| 194 |
-
| pSp | 69.38 | 0.60 | 1.6 | 2.5 |
|
| 195 |
-
| Ours | **35.92** | **0.71** | **82.6** | **90.5** |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
## 引用
|
| 199 |
-
如果该模型对你有所帮助,请引用相关的论文:
|
| 200 |
-
|
| 201 |
-
```BibTeX
|
| 202 |
-
@inproceedings{men2022domain,
|
| 203 |
-
title={DCT-Net: Domain-Calibrated Translation for Portrait Stylization},
|
| 204 |
-
author={Men, Yifang and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong},
|
| 205 |
-
journal={ACM Transactions on Graphics (TOG)},
|
| 206 |
-
volume={41},
|
| 207 |
-
number={4},
|
| 208 |
-
pages={1--9},
|
| 209 |
-
year={2022}
|
| 210 |
-
}
|
| 211 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/configuration.json
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "tensorflow",
|
| 3 |
-
"task": "image-portrait-stylization",
|
| 4 |
-
"pipeline": {
|
| 5 |
-
"type": "unet-person-image-cartoon"
|
| 6 |
-
},
|
| 7 |
-
"train": {
|
| 8 |
-
"num_gpus": 1,
|
| 9 |
-
"batch_size": 32,
|
| 10 |
-
"adv_train_lr": 2e-4,
|
| 11 |
-
"max_steps": 300000,
|
| 12 |
-
"logging_interval": 1000,
|
| 13 |
-
"ckpt_period_interval": 1000,
|
| 14 |
-
"resume_epoch": 15999,
|
| 15 |
-
"patch_size": 256,
|
| 16 |
-
"work_dir": "exp_localtoon",
|
| 17 |
-
"photo": "/PATH/TO/PHOTO/DIR",
|
| 18 |
-
"cartoon": "/PATH/TO/CARTOON/DIR"
|
| 19 |
-
}
|
| 20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/tf_ckpts/model-15999.data-00000-of-00001
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ae0ab60719d9b55efa5eaca236ce37ad2ee59b49033edd7bcf31b278760074a6
|
| 3 |
-
size 31380852
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-3d_compound-models/tf_ckpts/model-15999.index
DELETED
|
Binary file (11.4 kB)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/.mdl
DELETED
|
Binary file (80 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/.msc
DELETED
|
Binary file (894 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/.mv
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Revision:v1.0.1,CreatedAt:1678848600
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/README.md
DELETED
|
@@ -1,214 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tasks:
|
| 3 |
-
- image-portrait-stylization
|
| 4 |
-
widgets:
|
| 5 |
-
- task: image-portrait-stylization
|
| 6 |
-
inputs:
|
| 7 |
-
- type: image
|
| 8 |
-
validator:
|
| 9 |
-
max_size: 10M
|
| 10 |
-
max_resolution: 6000*6000
|
| 11 |
-
examples:
|
| 12 |
-
- name: 1
|
| 13 |
-
inputs:
|
| 14 |
-
- name: image
|
| 15 |
-
data: https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-cartoon/cartoon.png
|
| 16 |
-
inferencespec:
|
| 17 |
-
cpu: 2
|
| 18 |
-
memory: 4000
|
| 19 |
-
gpu: 1
|
| 20 |
-
gpu_memory: 16000
|
| 21 |
-
model_type:
|
| 22 |
-
- GAN
|
| 23 |
-
domain:
|
| 24 |
-
- cv
|
| 25 |
-
frameworks:
|
| 26 |
-
- TensorFlow
|
| 27 |
-
backbone:
|
| 28 |
-
- UNet
|
| 29 |
-
metrics:
|
| 30 |
-
- realism
|
| 31 |
-
customized-quickstart: True
|
| 32 |
-
finetune-support: True
|
| 33 |
-
license: Apache License 2.0
|
| 34 |
-
language:
|
| 35 |
-
- ch
|
| 36 |
-
tags:
|
| 37 |
-
- portrait stylization
|
| 38 |
-
- Alibaba
|
| 39 |
-
- SIGGRAPH 2022
|
| 40 |
-
datasets:
|
| 41 |
-
test:
|
| 42 |
-
- modelscope/human_face_portrait_compound_dataset
|
| 43 |
-
---
|
| 44 |
-
|
| 45 |
-
# DCT-Net人像卡通化模型-艺术风
|
| 46 |
-
|
| 47 |
-
### [论文](https://arxiv.org/abs/2207.02426) | [项目主页](https://menyifang.github.io/projects/DCTNet/DCTNet.html)
|
| 48 |
-
|
| 49 |
-
输入一张人物图像,实现端到端全图卡通化转换,生成艺术风格虚拟形象,返回风格化后的结果图像。
|
| 50 |
-
|
| 51 |
-
其生成效果如下所示:
|
| 52 |
-
|
| 53 |
-

|
| 54 |
-
|
| 55 |
-
## 模型描述
|
| 56 |
-
|
| 57 |
-
该任务采用一种全新的域校准图像翻译模型DCT-Net(Domain-Calibrated Translation),利用小样本的风格数据,即可得到高保真、强鲁棒、易拓展的人像风格转换模型,并通过端到端推理快速得到风格转换结果。
|
| 58 |
-
|
| 59 |
-

|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
## 使用方式和范围
|
| 63 |
-
|
| 64 |
-
使用方式:
|
| 65 |
-
- 支持GPU/CPU推理,在任意真实人物图像上进行直接推理;
|
| 66 |
-
|
| 67 |
-
使用范围:
|
| 68 |
-
- 包含人脸的人像照片(3通道RGB图像,支持PNG、JPG、JPEG格式),人脸分辨率大于100x100,总体图像分辨率小于3000×3000,低质人脸图像建议预先人脸增强处理。
|
| 69 |
-
|
| 70 |
-
目标场景:
|
| 71 |
-
- 艺术创作、社交娱乐、隐私保护场景,自动化生成卡通肖像。
|
| 72 |
-
|
| 73 |
-
### 如何使用
|
| 74 |
-
|
| 75 |
-
在ModelScope框架上,提供输入图片,即可以通过简单的Pipeline调用来使用人像卡通化模型。
|
| 76 |
-
|
| 77 |
-
#### 代码范例
|
| 78 |
-
|
| 79 |
-
- 模型推理(支持CPU/GPU):
|
| 80 |
-
|
| 81 |
-
```python
|
| 82 |
-
import cv2
|
| 83 |
-
from modelscope.outputs import OutputKeys
|
| 84 |
-
from modelscope.pipelines import pipeline
|
| 85 |
-
from modelscope.utils.constant import Tasks
|
| 86 |
-
|
| 87 |
-
img_cartoon = pipeline(Tasks.image_portrait_stylization,
|
| 88 |
-
model='damo/cv_unet_person-image-cartoon-artstyle_compound-models')
|
| 89 |
-
# 图像本地路径
|
| 90 |
-
#img_path = 'input.png'
|
| 91 |
-
# 图像url链接
|
| 92 |
-
img_path = 'https://invi-label.oss-cn-shanghai.aliyuncs.com/label/cartoon/image_cartoon.png'
|
| 93 |
-
result = img_cartoon(img_path)
|
| 94 |
-
|
| 95 |
-
cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
|
| 96 |
-
print('finished!')
|
| 97 |
-
|
| 98 |
-
```
|
| 99 |
-
|
| 100 |
-
- 模型训练:
|
| 101 |
-
|
| 102 |
-
环境要求:tf1.14/15及兼容cuda,支持GPU训练
|
| 103 |
-
|
| 104 |
-
```python
|
| 105 |
-
import os
|
| 106 |
-
import unittest
|
| 107 |
-
import cv2
|
| 108 |
-
from modelscope.exporters.cv import CartoonTranslationExporter
|
| 109 |
-
from modelscope.msdatasets import MsDataset
|
| 110 |
-
from modelscope.outputs import OutputKeys
|
| 111 |
-
from modelscope.pipelines import pipeline
|
| 112 |
-
from modelscope.pipelines.base import Pipeline
|
| 113 |
-
from modelscope.trainers.cv import CartoonTranslationTrainer
|
| 114 |
-
from modelscope.utils.constant import Tasks
|
| 115 |
-
from modelscope.utils.test_utils import test_level
|
| 116 |
-
|
| 117 |
-
model_id = 'damo/cv_unet_person-image-cartoon-artstyle_compound-models'
|
| 118 |
-
data_dir = MsDataset.load(
|
| 119 |
-
'dctnet_train_clipart_mini_ms',
|
| 120 |
-
namespace='menyifang',
|
| 121 |
-
split='train').config_kwargs['split_config']['train']
|
| 122 |
-
|
| 123 |
-
data_photo = os.path.join(data_dir, 'face_photo')
|
| 124 |
-
data_cartoon = os.path.join(data_dir, 'face_cartoon')
|
| 125 |
-
work_dir = 'exp_localtoon'
|
| 126 |
-
max_steps = 10
|
| 127 |
-
trainer = CartoonTranslationTrainer(
|
| 128 |
-
model=model_id,
|
| 129 |
-
work_dir=work_dir,
|
| 130 |
-
photo=data_photo,
|
| 131 |
-
cartoon=data_cartoon,
|
| 132 |
-
max_steps=max_steps)
|
| 133 |
-
trainer.train()
|
| 134 |
-
```
|
| 135 |
-
|
| 136 |
-
上述训练代码仅仅提供简单训练的范例,对大规模自定义数据,替换data_photo为真实人脸数据路径,data_cartoon为卡通风格人脸数据路径,max_steps建议设置为300000,可视化结果将存储在work_dir下;此外configuration.json(~/.cache/modelscope/hub/damo/cv_unet_person-image-cartoon_compound-models/)可以进行自定义修改;
|
| 137 |
-
|
| 138 |
-
Note: notebook预装环境下存在numpy依赖冲突,可手动更新解决:pip install numpy==1.18.5
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
- 卡通人脸数据获取
|
| 142 |
-
|
| 143 |
-
卡通人脸数据可由设计师设计/网络收集得到,在此提供一种基于[Stable-Diffusion风格预训练模型](https://modelscope.cn/models/damo/cv_cartoon_stable_diffusion_design/summary)的卡通数据生成方式
|
| 144 |
-
|
| 145 |
-
```python
|
| 146 |
-
import cv2
|
| 147 |
-
from modelscope.pipelines import pipeline
|
| 148 |
-
from modelscope.utils.constant import Tasks
|
| 149 |
-
|
| 150 |
-
pipe = pipeline(Tasks.text_to_image_synthesis, model='damo/cv_cartoon_stable_diffusion_clipart', model_revision='v1.0.0')
|
| 151 |
-
from diffusers.schedulers import EulerAncestralDiscreteScheduler
|
| 152 |
-
pipe.pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.pipeline.scheduler.config)
|
| 153 |
-
output = pipe({'text': 'archer style, a portrait painting of Johnny Depp'})
|
| 154 |
-
cv2.imwrite('result.png', output['output_imgs'][0])
|
| 155 |
-
print('Image saved to result.png')
|
| 156 |
-
|
| 157 |
-
print('finished!')
|
| 158 |
-
```
|
| 159 |
-
可通过替换Johnny Depp为其他名人姓名,产生多样化风格数据,通过人脸对齐裁剪即可得到卡通人脸数据;可以通过修改pipeline的model参数指定不同风格的SD预训练模型。
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
### 模型局限性以及可能的偏差
|
| 163 |
-
|
| 164 |
-
- 低质/低分辨率人脸图像由于本身内容信息丢失严重,无法得到理想转换效果,可预先采用人脸增强模型预处理图像解决;
|
| 165 |
-
|
| 166 |
-
- 艺术风格着重加强色彩、对比度等,在高品质、高对比度写真上处理效果更佳。
|
| 167 |
-
|
| 168 |
-
## 训练数据介绍
|
| 169 |
-
|
| 170 |
-
训练数据从公开数据集(COCO等)、互联网搜索人像图像,并进行标注作为训练数据。
|
| 171 |
-
|
| 172 |
-
- 真实人脸数据[FFHQ](https://github.com/NVlabs/ffhq-dataset)常用的人脸公开数据集,包含7w人脸图像;
|
| 173 |
-
|
| 174 |
-
- 卡通人脸数据,互联网搜集,100+张
|
| 175 |
-
|
| 176 |
-
## 模型推理流程
|
| 177 |
-
|
| 178 |
-
### 预处理
|
| 179 |
-
|
| 180 |
-
- 人脸关键点检测
|
| 181 |
-
- 人脸提取&对齐,得到256x256大小的对齐人脸
|
| 182 |
-
|
| 183 |
-
### 推理
|
| 184 |
-
|
| 185 |
-
- 为控制推理效率,人脸及背景resize到指定大小分别推理,再背景融合得到最终效果;
|
| 186 |
-
- 亦可将整图依据人脸尺度整体缩放到合适尺寸,直接单次推理
|
| 187 |
-
|
| 188 |
-
## 数据评估及结果
|
| 189 |
-
|
| 190 |
-
使用CelebA公开人脸数据集进行评测,在FID/ID/用户偏好等指标上均达SOTA结果:
|
| 191 |
-
|
| 192 |
-
| Method | FID | ID | Pref.A | Pref.B |
|
| 193 |
-
| ------------ | ------------ | ------------ | ------------ | ------------ |
|
| 194 |
-
| CycleGAN | 57.08 | 0.55 | 7.1 | 1.4 |
|
| 195 |
-
| U-GAT-IT | 68.40 | 0.58 | 5.0 | 1.5 |
|
| 196 |
-
| Toonify | 55.27 | 0.62 | 3.7 | 4.2 |
|
| 197 |
-
| pSp | 69.38 | 0.60 | 1.6 | 2.5 |
|
| 198 |
-
| Ours | **35.92** | **0.71** | **82.6** | **90.5** |
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
## 引用
|
| 202 |
-
如果该模型对你有所帮助,请引用相关的论文:
|
| 203 |
-
|
| 204 |
-
```BibTeX
|
| 205 |
-
@inproceedings{men2022domain,
|
| 206 |
-
title={DCT-Net: Domain-Calibrated Translation for Portrait Stylization},
|
| 207 |
-
author={Men, Yifang and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong},
|
| 208 |
-
journal={ACM Transactions on Graphics (TOG)},
|
| 209 |
-
volume={41},
|
| 210 |
-
number={4},
|
| 211 |
-
pages={1--9},
|
| 212 |
-
year={2022}
|
| 213 |
-
}
|
| 214 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/configuration.json
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "tensorflow",
|
| 3 |
-
"task": "image-portrait-stylization",
|
| 4 |
-
"pipeline": {
|
| 5 |
-
"type": "unet-person-image-cartoon"
|
| 6 |
-
},
|
| 7 |
-
"train": {
|
| 8 |
-
"num_gpus": 1,
|
| 9 |
-
"batch_size": 32,
|
| 10 |
-
"adv_train_lr": 2e-4,
|
| 11 |
-
"max_steps": 300000,
|
| 12 |
-
"logging_interval": 1000,
|
| 13 |
-
"ckpt_period_interval": 1000,
|
| 14 |
-
"resume_epoch": 96499,
|
| 15 |
-
"patch_size": 256,
|
| 16 |
-
"work_dir": "exp_localtoon",
|
| 17 |
-
"photo": "/PATH/TO/PHOTO/DIR",
|
| 18 |
-
"cartoon": "/PATH/TO/CARTOON/DIR"
|
| 19 |
-
}
|
| 20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/tf_ckpts/model-96499.data-00000-of-00001
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:79091d38a0da1ff40ccf253c2bf878d6c2082adeddb720fa998a8ccda3e6bb53
|
| 3 |
-
size 5868300
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-artstyle_compound-models/tf_ckpts/model-96499.index
DELETED
|
Binary file (1.56 kB)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/.mdl
DELETED
|
Binary file (81 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/.msc
DELETED
|
Binary file (896 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/.mv
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Revision:v1.0.1,CreatedAt:1678850730
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/README.md
DELETED
|
@@ -1,216 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tasks:
|
| 3 |
-
- image-portrait-stylization
|
| 4 |
-
widgets:
|
| 5 |
-
- task: image-portrait-stylization
|
| 6 |
-
inputs:
|
| 7 |
-
- type: image
|
| 8 |
-
validator:
|
| 9 |
-
max_size: 10M
|
| 10 |
-
max_resolution: 6000*6000
|
| 11 |
-
examples:
|
| 12 |
-
- name: 1
|
| 13 |
-
inputs:
|
| 14 |
-
- name: image
|
| 15 |
-
data: https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-cartoon/cartoon.png
|
| 16 |
-
inferencespec:
|
| 17 |
-
cpu: 2
|
| 18 |
-
memory: 4000
|
| 19 |
-
gpu: 1
|
| 20 |
-
gpu_memory: 16000
|
| 21 |
-
model_type:
|
| 22 |
-
- GAN
|
| 23 |
-
domain:
|
| 24 |
-
- cv
|
| 25 |
-
frameworks:
|
| 26 |
-
- TensorFlow
|
| 27 |
-
backbone:
|
| 28 |
-
- UNet
|
| 29 |
-
metrics:
|
| 30 |
-
- realism
|
| 31 |
-
customized-quickstart: True
|
| 32 |
-
finetune-support: True
|
| 33 |
-
license: Apache License 2.0
|
| 34 |
-
language:
|
| 35 |
-
- ch
|
| 36 |
-
tags:
|
| 37 |
-
- portrait stylization
|
| 38 |
-
- Alibaba
|
| 39 |
-
- SIGGRAPH 2022
|
| 40 |
-
datasets:
|
| 41 |
-
test:
|
| 42 |
-
- modelscope/human_face_portrait_compound_dataset
|
| 43 |
-
---
|
| 44 |
-
|
| 45 |
-
# DCT-Net人像卡通化模型-手绘风
|
| 46 |
-
|
| 47 |
-
### [论文](https://arxiv.org/abs/2207.02426) | [项目主页](https://menyifang.github.io/projects/DCTNet/DCTNet.html)
|
| 48 |
-
|
| 49 |
-
输入一张人物图像,实现端到端全图卡通化转换,生成手绘风格虚拟形象,返回风格化后的结果图像。
|
| 50 |
-
|
| 51 |
-
其生成效果如下所示:
|
| 52 |
-
|
| 53 |
-

|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
## 模型描述
|
| 58 |
-
|
| 59 |
-
该任务采用一种全新的域校准图像翻译模型DCT-Net(Domain-Calibrated Translation),利用小样本的风格数据,即可得到高保真、强鲁棒、易拓展的人像风格转换模型,并通过端到端推理快速得到风格转换结果。
|
| 60 |
-
|
| 61 |
-

|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
## 使用方式和范围
|
| 65 |
-
|
| 66 |
-
使用方式:
|
| 67 |
-
- 支持GPU/CPU推理,在任意真实人物图像上进行直接推理;
|
| 68 |
-
|
| 69 |
-
使用范围:
|
| 70 |
-
- 包含人脸的人像照片(3通道RGB图像,支持PNG、JPG、JPEG格式),人脸分辨率大于100x100,总体图像分辨率小于3000×3000,低质人脸图像建议预先人脸增强处理。
|
| 71 |
-
|
| 72 |
-
目标场景:
|
| 73 |
-
- 艺术创作、社交娱乐、隐私保护场景,自动化生成卡通肖像。
|
| 74 |
-
|
| 75 |
-
### 如何使用
|
| 76 |
-
|
| 77 |
-
在ModelScope框架上,提供输入图片,即可以通过简单的Pipeline调用来使用人像卡通化模型。
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
#### 代码范例
|
| 81 |
-
|
| 82 |
-
- 模型推理(支持CPU/GPU):
|
| 83 |
-
|
| 84 |
-
```python
|
| 85 |
-
import cv2
|
| 86 |
-
from modelscope.outputs import OutputKeys
|
| 87 |
-
from modelscope.pipelines import pipeline
|
| 88 |
-
from modelscope.utils.constant import Tasks
|
| 89 |
-
|
| 90 |
-
img_cartoon = pipeline(Tasks.image_portrait_stylization,
|
| 91 |
-
model='damo/cv_unet_person-image-cartoon-handdrawn_compound-models')
|
| 92 |
-
# 图像本地路径
|
| 93 |
-
#img_path = 'input.png'
|
| 94 |
-
# 图像url链接
|
| 95 |
-
img_path = 'https://invi-label.oss-cn-shanghai.aliyuncs.com/label/cartoon/image_cartoon.png'
|
| 96 |
-
result = img_cartoon(img_path)
|
| 97 |
-
|
| 98 |
-
cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
|
| 99 |
-
print('finished!')
|
| 100 |
-
```
|
| 101 |
-
|
| 102 |
-
- 模型训练:
|
| 103 |
-
|
| 104 |
-
环境要求:tf1.14/15及兼容cuda,支持GPU训练
|
| 105 |
-
|
| 106 |
-
```python
|
| 107 |
-
import os
|
| 108 |
-
import unittest
|
| 109 |
-
import cv2
|
| 110 |
-
from modelscope.exporters.cv import CartoonTranslationExporter
|
| 111 |
-
from modelscope.msdatasets import MsDataset
|
| 112 |
-
from modelscope.outputs import OutputKeys
|
| 113 |
-
from modelscope.pipelines import pipeline
|
| 114 |
-
from modelscope.pipelines.base import Pipeline
|
| 115 |
-
from modelscope.trainers.cv import CartoonTranslationTrainer
|
| 116 |
-
from modelscope.utils.constant import Tasks
|
| 117 |
-
from modelscope.utils.test_utils import test_level
|
| 118 |
-
|
| 119 |
-
model_id = 'damo/cv_unet_person-image-cartoon-handdrawn_compound-models'
|
| 120 |
-
data_dir = MsDataset.load(
|
| 121 |
-
'dctnet_train_clipart_mini_ms',
|
| 122 |
-
namespace='menyifang',
|
| 123 |
-
split='train').config_kwargs['split_config']['train']
|
| 124 |
-
|
| 125 |
-
data_photo = os.path.join(data_dir, 'face_photo')
|
| 126 |
-
data_cartoon = os.path.join(data_dir, 'face_cartoon')
|
| 127 |
-
work_dir = 'exp_localtoon'
|
| 128 |
-
max_steps = 10
|
| 129 |
-
trainer = CartoonTranslationTrainer(
|
| 130 |
-
model=model_id,
|
| 131 |
-
work_dir=work_dir,
|
| 132 |
-
photo=data_photo,
|
| 133 |
-
cartoon=data_cartoon,
|
| 134 |
-
max_steps=max_steps)
|
| 135 |
-
trainer.train()
|
| 136 |
-
```
|
| 137 |
-
|
| 138 |
-
上述训练代码仅仅提供简单训练的范例,对大规模自定义数据,替换data_photo为真实人脸数据路径,data_cartoon为卡通风格人脸数据路径,max_steps建议设置为300000,可视化结果将存储在work_dir下;此外configuration.json(~/.cache/modelscope/hub/damo/cv_unet_person-image-cartoon_compound-models/)可以进行自定义修改;
|
| 139 |
-
|
| 140 |
-
Note: notebook预装环境下存在numpy依赖冲突,可手动更新解决:pip install numpy==1.18.5
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
- 卡通人脸数据获取
|
| 144 |
-
|
| 145 |
-
卡通人脸数据可由设计师设计/网络收集得到,在此提供一种基于[Stable-Diffusion风格预训练模型](https://modelscope.cn/models/damo/cv_cartoon_stable_diffusion_design/summary)的卡通数据生成方式
|
| 146 |
-
|
| 147 |
-
```python
|
| 148 |
-
import cv2
|
| 149 |
-
from modelscope.pipelines import pipeline
|
| 150 |
-
from modelscope.utils.constant import Tasks
|
| 151 |
-
|
| 152 |
-
pipe = pipeline(Tasks.text_to_image_synthesis, model='damo/cv_cartoon_stable_diffusion_clipart', model_revision='v1.0.0')
|
| 153 |
-
from diffusers.schedulers import EulerAncestralDiscreteScheduler
|
| 154 |
-
pipe.pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.pipeline.scheduler.config)
|
| 155 |
-
output = pipe({'text': 'archer style, a portrait painting of Johnny Depp'})
|
| 156 |
-
cv2.imwrite('result.png', output['output_imgs'][0])
|
| 157 |
-
print('Image saved to result.png')
|
| 158 |
-
|
| 159 |
-
print('finished!')
|
| 160 |
-
```
|
| 161 |
-
可通过替换Johnny Depp为其他名人姓名,产生多样化风格数据,通过人脸对齐裁剪即可得到卡通人脸数据;可以通过修改pipeline的model参数指定不同风格的SD预训练模型。
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
### 模型局限性以及可能的偏差
|
| 165 |
-
|
| 166 |
-
- 低质/低分辨率人脸图像由于本身内容信息丢失严重,无法得到理想转换效果,可预先采用人脸增强模型预处理图像解决;
|
| 167 |
-
|
| 168 |
-
- 小样本数据涵盖场景有限,人脸暗光、阴影干扰可能会影响生成效果,同样不适用于黑白照片。
|
| 169 |
-
|
| 170 |
-
## 训练数据介绍
|
| 171 |
-
|
| 172 |
-
训练数据从公开数据集(COCO等)、互联网搜索人像图像,并进行标注作为训练数据。
|
| 173 |
-
|
| 174 |
-
- 真实人脸数据[FFHQ](https://github.com/NVlabs/ffhq-dataset)常用的人脸公开数据集,包含7w人脸图像;
|
| 175 |
-
|
| 176 |
-
- 卡通人脸数据,互联网搜集,100+张
|
| 177 |
-
|
| 178 |
-
## 模型推理流程
|
| 179 |
-
|
| 180 |
-
### 预处理
|
| 181 |
-
|
| 182 |
-
- 人脸关键点检测
|
| 183 |
-
- 人脸提取&对齐,得到256x256大小的对齐人脸
|
| 184 |
-
|
| 185 |
-
### 推理
|
| 186 |
-
|
| 187 |
-
- 为控制推理效率,人脸及背景resize到指定大小分别推理,再背景融合得到最终效果;
|
| 188 |
-
- 亦可将整图依据人脸尺度整体缩放到合适尺寸,直接单次推理
|
| 189 |
-
|
| 190 |
-
## 数据评估及结果
|
| 191 |
-
|
| 192 |
-
使用CelebA公开人脸数据集进行评测,在FID/ID/用户偏好等指标上均达SOTA结果:
|
| 193 |
-
|
| 194 |
-
| Method | FID | ID | Pref.A | Pref.B |
|
| 195 |
-
| ------------ | ------------ | ------------ | ------------ | ------------ |
|
| 196 |
-
| CycleGAN | 57.08 | 0.55 | 7.1 | 1.4 |
|
| 197 |
-
| U-GAT-IT | 68.40 | 0.58 | 5.0 | 1.5 |
|
| 198 |
-
| Toonify | 55.27 | 0.62 | 3.7 | 4.2 |
|
| 199 |
-
| pSp | 69.38 | 0.60 | 1.6 | 2.5 |
|
| 200 |
-
| Ours | **35.92** | **0.71** | **82.6** | **90.5** |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
## 引用
|
| 204 |
-
如果该模型对你有所帮助,请引用相关的论文:
|
| 205 |
-
|
| 206 |
-
```BibTeX
|
| 207 |
-
@inproceedings{men2022domain,
|
| 208 |
-
title={DCT-Net: Domain-Calibrated Translation for Portrait Stylization},
|
| 209 |
-
author={Men, Yifang and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong},
|
| 210 |
-
journal={ACM Transactions on Graphics (TOG)},
|
| 211 |
-
volume={41},
|
| 212 |
-
number={4},
|
| 213 |
-
pages={1--9},
|
| 214 |
-
year={2022}
|
| 215 |
-
}
|
| 216 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/configuration.json
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "tensorflow",
|
| 3 |
-
"task": "image-portrait-stylization",
|
| 4 |
-
"pipeline": {
|
| 5 |
-
"type": "unet-person-image-cartoon"
|
| 6 |
-
},
|
| 7 |
-
"train": {
|
| 8 |
-
"num_gpus": 1,
|
| 9 |
-
"batch_size": 32,
|
| 10 |
-
"adv_train_lr": 2e-4,
|
| 11 |
-
"max_steps": 300000,
|
| 12 |
-
"logging_interval": 1000,
|
| 13 |
-
"ckpt_period_interval": 1000,
|
| 14 |
-
"resume_epoch": 309999,
|
| 15 |
-
"patch_size": 256,
|
| 16 |
-
"work_dir": "exp_localtoon",
|
| 17 |
-
"photo": "/PATH/TO/PHOTO/DIR",
|
| 18 |
-
"cartoon": "/PATH/TO/CARTOON/DIR"
|
| 19 |
-
}
|
| 20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/tf_ckpts/model-309999.data-00000-of-00001
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b4a56e9c4768097b3d12d769b4c4acd08cb91bffcc50e9e0b3b3161f62e42373
|
| 3 |
-
size 31380852
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-handdrawn_compound-models/tf_ckpts/model-309999.index
DELETED
|
Binary file (11.4 kB)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/.mdl
DELETED
|
Binary file (81 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/.msc
DELETED
|
Binary file (896 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/.mv
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Revision:v1.0.1,CreatedAt:1678850802
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/README.md
DELETED
|
@@ -1,214 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tasks:
|
| 3 |
-
- image-portrait-stylization
|
| 4 |
-
widgets:
|
| 5 |
-
- task: image-portrait-stylization
|
| 6 |
-
inputs:
|
| 7 |
-
- type: image
|
| 8 |
-
validator:
|
| 9 |
-
max_size: 10M
|
| 10 |
-
max_resolution: 6000*6000
|
| 11 |
-
examples:
|
| 12 |
-
- name: 1
|
| 13 |
-
inputs:
|
| 14 |
-
- name: image
|
| 15 |
-
data: https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-cartoon/cartoon.png
|
| 16 |
-
inferencespec:
|
| 17 |
-
cpu: 2
|
| 18 |
-
memory: 4000
|
| 19 |
-
gpu: 1
|
| 20 |
-
gpu_memory: 16000
|
| 21 |
-
model_type:
|
| 22 |
-
- GAN
|
| 23 |
-
domain:
|
| 24 |
-
- cv
|
| 25 |
-
frameworks:
|
| 26 |
-
- TensorFlow
|
| 27 |
-
backbone:
|
| 28 |
-
- UNet
|
| 29 |
-
metrics:
|
| 30 |
-
- realism
|
| 31 |
-
customized-quickstart: True
|
| 32 |
-
finetune-support: True
|
| 33 |
-
license: Apache License 2.0
|
| 34 |
-
language:
|
| 35 |
-
- ch
|
| 36 |
-
tags:
|
| 37 |
-
- portrait stylization
|
| 38 |
-
- Alibaba
|
| 39 |
-
- SIGGRAPH 2022
|
| 40 |
-
datasets:
|
| 41 |
-
test:
|
| 42 |
-
- modelscope/human_face_portrait_compound_dataset
|
| 43 |
-
---
|
| 44 |
-
|
| 45 |
-
# DCT-Net人像卡通化-扩散模型-插画风
|
| 46 |
-
|
| 47 |
-
### [论文](https://arxiv.org/abs/2207.02426) | [项目主页](https://menyifang.github.io/projects/DCTNet/DCTNet.html)
|
| 48 |
-
|
| 49 |
-
输入一张人物图像,实现端到端全图卡通化转换,生成插画风格虚拟形象,返回风格化后的结果图像。
|
| 50 |
-
|
| 51 |
-
其生成效果如下所示:
|
| 52 |
-
|
| 53 |
-

|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
## 模型描述
|
| 57 |
-
|
| 58 |
-
该任务采用一种全新的域校准图像翻译模型DCT-Net(Domain-Calibrated Translation),结合Stable-Diffusion扩散模型生成小样本的风格数据,即可训练得到高保真、强鲁棒、易拓展的人像风格转换模型,并通过端到端推理快速得到风格转换结果。
|
| 59 |
-

|
| 60 |
-
|
| 61 |
-
## 使用方式和范围
|
| 62 |
-
|
| 63 |
-
使用方式:
|
| 64 |
-
- 支持GPU/CPU推理,在任意真实人物图像上进行直接推理;
|
| 65 |
-
|
| 66 |
-
使用范围:
|
| 67 |
-
- 包含人脸的人像照片(3通道RGB图像,支持PNG、JPG、JPEG格式),人脸分辨率大于100x100,总体图像分辨率小于3000×3000,低质人脸图像建议预先人脸增强处理。
|
| 68 |
-
|
| 69 |
-
目标场景:
|
| 70 |
-
- 艺术创作、社交娱乐、隐私保护场景,自动化生成卡通肖像。
|
| 71 |
-
|
| 72 |
-
### 如何使用
|
| 73 |
-
|
| 74 |
-
在ModelScope框架上,提供输入图片,即可以通过简单的Pipeline调用来使用人像卡通化模型。
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
#### 代码范例
|
| 78 |
-
|
| 79 |
-
- 模型推理(支持CPU/GPU):
|
| 80 |
-
```python
|
| 81 |
-
import cv2
|
| 82 |
-
from modelscope.outputs import OutputKeys
|
| 83 |
-
from modelscope.pipelines import pipeline
|
| 84 |
-
from modelscope.utils.constant import Tasks
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
img_cartoon = pipeline(Tasks.image_portrait_stylization,
|
| 88 |
-
model='damo/cv_unet_person-image-cartoon-sd-design_compound-models', model_revision='v1.0.0')
|
| 89 |
-
# 图像本地路径
|
| 90 |
-
#img_path = 'input.png'
|
| 91 |
-
# 图像url链接
|
| 92 |
-
img_path = 'https://invi-label.oss-cn-shanghai.aliyuncs.com/label/cartoon/image_cartoon.png'
|
| 93 |
-
result = img_cartoon(img_path)
|
| 94 |
-
|
| 95 |
-
cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
|
| 96 |
-
print('finished!')
|
| 97 |
-
|
| 98 |
-
```
|
| 99 |
-
|
| 100 |
-
- 模型训练:
|
| 101 |
-
|
| 102 |
-
环境要求:tf1.14/15及兼容cuda,支持GPU训练
|
| 103 |
-
|
| 104 |
-
```python
|
| 105 |
-
import os
|
| 106 |
-
import unittest
|
| 107 |
-
import cv2
|
| 108 |
-
from modelscope.exporters.cv import CartoonTranslationExporter
|
| 109 |
-
from modelscope.msdatasets import MsDataset
|
| 110 |
-
from modelscope.outputs import OutputKeys
|
| 111 |
-
from modelscope.pipelines import pipeline
|
| 112 |
-
from modelscope.pipelines.base import Pipeline
|
| 113 |
-
from modelscope.trainers.cv import CartoonTranslationTrainer
|
| 114 |
-
from modelscope.utils.constant import Tasks
|
| 115 |
-
from modelscope.utils.test_utils import test_level
|
| 116 |
-
|
| 117 |
-
model_id = 'damo/cv_unet_person-image-cartoon-sd-design_compound-models'
|
| 118 |
-
data_dir = MsDataset.load(
|
| 119 |
-
'dctnet_train_clipart_mini_ms',
|
| 120 |
-
namespace='menyifang',
|
| 121 |
-
split='train').config_kwargs['split_config']['train']
|
| 122 |
-
|
| 123 |
-
data_photo = os.path.join(data_dir, 'face_photo')
|
| 124 |
-
data_cartoon = os.path.join(data_dir, 'face_cartoon')
|
| 125 |
-
work_dir = 'exp_localtoon'
|
| 126 |
-
max_steps = 10
|
| 127 |
-
trainer = CartoonTranslationTrainer(
|
| 128 |
-
model=model_id,
|
| 129 |
-
work_dir=work_dir,
|
| 130 |
-
photo=data_photo,
|
| 131 |
-
cartoon=data_cartoon,
|
| 132 |
-
max_steps=max_steps)
|
| 133 |
-
trainer.train()
|
| 134 |
-
```
|
| 135 |
-
|
| 136 |
-
上述训练代码仅仅提供简单训练的范例,对大规模自定义数据,替换data_photo为真实人脸数据路径,data_cartoon为卡通风格人脸数据路径,max_steps建议设置为300000,可视化结果将存储在work_dir下;此外configuration.json(~/.cache/modelscope/hub/damo/cv_unet_person-image-cartoon_compound-models/)可以进行自定义修改;
|
| 137 |
-
|
| 138 |
-
Note: notebook预装环境下存在numpy依赖冲突,可手动更新解决:pip install numpy==1.18.5
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
- 卡通人脸数据获取
|
| 142 |
-
|
| 143 |
-
卡通人脸数据可由设计师设计/网络收集得到,在此提供一种基于[Stable-Diffusion风格预训练模型](https://modelscope.cn/models/damo/cv_cartoon_stable_diffusion_design/summary)的卡通数据生成方式
|
| 144 |
-
|
| 145 |
-
```python
|
| 146 |
-
import cv2
|
| 147 |
-
from modelscope.pipelines import pipeline
|
| 148 |
-
from modelscope.utils.constant import Tasks
|
| 149 |
-
|
| 150 |
-
pipe = pipeline(Tasks.text_to_image_synthesis, model='damo/cv_cartoon_stable_diffusion_clipart', model_revision='v1.0.0')
|
| 151 |
-
from diffusers.schedulers import EulerAncestralDiscreteScheduler
|
| 152 |
-
pipe.pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.pipeline.scheduler.config)
|
| 153 |
-
output = pipe({'text': 'archer style, a portrait painting of Johnny Depp'})
|
| 154 |
-
cv2.imwrite('result.png', output['output_imgs'][0])
|
| 155 |
-
print('Image saved to result.png')
|
| 156 |
-
|
| 157 |
-
print('finished!')
|
| 158 |
-
```
|
| 159 |
-
可通过替换Johnny Depp为其他名人姓名,产生多样化风格数据,通过人脸对齐裁剪即可得到卡通人脸数据;可以通过修改pipeline的model参数指定不同风格的SD预训练模型。
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
### 模型局限性以及可能的偏差
|
| 163 |
-
|
| 164 |
-
- 低质/低分辨率人脸图像由于本身内容信息丢失严重,无法得到理想转换效果,可预先采用人脸增强模型预处理图像解决;
|
| 165 |
-
|
| 166 |
-
- 小样本数据涵盖场景有限,人脸暗光、阴影干扰可能会影响生成效果。
|
| 167 |
-
|
| 168 |
-
## 训练数据介绍
|
| 169 |
-
|
| 170 |
-
训练数据从公开数据集(COCO等)、互联网搜索人像图像,并进行标注作为训练数据。
|
| 171 |
-
|
| 172 |
-
- 真实人脸数据[FFHQ](https://github.com/NVlabs/ffhq-dataset)常用的人脸公开数据集,包含7w人脸图像;
|
| 173 |
-
|
| 174 |
-
- 卡通人脸数据,互联网搜集,100+张
|
| 175 |
-
|
| 176 |
-
## 模型推理流程
|
| 177 |
-
|
| 178 |
-
### 预处理
|
| 179 |
-
|
| 180 |
-
- 人脸关键点检测
|
| 181 |
-
- 人脸提取&对齐,得到256x256大小的对齐人脸
|
| 182 |
-
|
| 183 |
-
### 推理
|
| 184 |
-
|
| 185 |
-
- 为控制推理效率,人脸及背景resize到指定大小分别推理,再背景融合得到最终效果;
|
| 186 |
-
- 亦可将整图依据人脸尺度整体缩放到合适尺寸,直接单次推理
|
| 187 |
-
|
| 188 |
-
## 数据评估及结果
|
| 189 |
-
|
| 190 |
-
使用CelebA公开人脸数据集进行评测,在FID/ID/用户偏好等指标上均达SOTA结果:
|
| 191 |
-
|
| 192 |
-
| Method | FID | ID | Pref.A | Pref.B |
|
| 193 |
-
| ------------ | ------------ | ------------ | ------------ | ------------ |
|
| 194 |
-
| CycleGAN | 57.08 | 0.55 | 7.1 | 1.4 |
|
| 195 |
-
| U-GAT-IT | 68.40 | 0.58 | 5.0 | 1.5 |
|
| 196 |
-
| Toonify | 55.27 | 0.62 | 3.7 | 4.2 |
|
| 197 |
-
| pSp | 69.38 | 0.60 | 1.6 | 2.5 |
|
| 198 |
-
| Ours | **35.92** | **0.71** | **82.6** | **90.5** |
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
## 引用
|
| 202 |
-
如果该模型对你有所帮助,请引用相关的论文:
|
| 203 |
-
|
| 204 |
-
```BibTeX
|
| 205 |
-
@inproceedings{men2022domain,
|
| 206 |
-
title={DCT-Net: Domain-Calibrated Translation for Portrait Stylization},
|
| 207 |
-
author={Men, Yifang and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong},
|
| 208 |
-
journal={ACM Transactions on Graphics (TOG)},
|
| 209 |
-
volume={41},
|
| 210 |
-
number={4},
|
| 211 |
-
pages={1--9},
|
| 212 |
-
year={2022}
|
| 213 |
-
}
|
| 214 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/configuration.json
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "tensorflow",
|
| 3 |
-
"task": "image-portrait-stylization",
|
| 4 |
-
"pipeline": {
|
| 5 |
-
"type": "unet-person-image-cartoon"
|
| 6 |
-
},
|
| 7 |
-
"train": {
|
| 8 |
-
"num_gpus": 1,
|
| 9 |
-
"batch_size": 32,
|
| 10 |
-
"adv_train_lr": 2e-4,
|
| 11 |
-
"max_steps": 300000,
|
| 12 |
-
"logging_interval": 1000,
|
| 13 |
-
"ckpt_period_interval": 1000,
|
| 14 |
-
"resume_epoch": 122999,
|
| 15 |
-
"patch_size": 256,
|
| 16 |
-
"work_dir": "exp_localtoon",
|
| 17 |
-
"photo": "/PATH/TO/PHOTO/DIR",
|
| 18 |
-
"cartoon": "/PATH/TO/CARTOON/DIR"
|
| 19 |
-
}
|
| 20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/tf_ckpts/model-122999.data-00000-of-00001
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:f7ea1e082436759e49fce3a6577e3233bb8748fa68e1e935441e0c3c6a913750
|
| 3 |
-
size 24492884
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sd-design_compound-models/tf_ckpts/model-122999.index
DELETED
|
Binary file (7.82 kB)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/.mdl
DELETED
|
Binary file (78 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/.msc
DELETED
|
Binary file (894 Bytes)
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/.mv
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Revision:v1.0.1,CreatedAt:1678850776
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/README.md
DELETED
|
@@ -1,215 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tasks:
|
| 3 |
-
- image-portrait-stylization
|
| 4 |
-
widgets:
|
| 5 |
-
- task: image-portrait-stylization
|
| 6 |
-
inputs:
|
| 7 |
-
- type: image
|
| 8 |
-
validator:
|
| 9 |
-
max_size: 10M
|
| 10 |
-
max_resolution: 6000*6000
|
| 11 |
-
examples:
|
| 12 |
-
- name: 1
|
| 13 |
-
inputs:
|
| 14 |
-
- name: image
|
| 15 |
-
data: https://modelscope.oss-cn-beijing.aliyuncs.com/demo/image-cartoon/cartoon.png
|
| 16 |
-
inferencespec:
|
| 17 |
-
cpu: 2
|
| 18 |
-
memory: 4000
|
| 19 |
-
gpu: 1
|
| 20 |
-
gpu_memory: 16000
|
| 21 |
-
model_type:
|
| 22 |
-
- GAN
|
| 23 |
-
domain:
|
| 24 |
-
- cv
|
| 25 |
-
frameworks:
|
| 26 |
-
- TensorFlow
|
| 27 |
-
backbone:
|
| 28 |
-
- UNet
|
| 29 |
-
metrics:
|
| 30 |
-
- realism
|
| 31 |
-
customized-quickstart: True
|
| 32 |
-
finetune-support: True
|
| 33 |
-
license: Apache License 2.0
|
| 34 |
-
language:
|
| 35 |
-
- ch
|
| 36 |
-
tags:
|
| 37 |
-
- portrait stylization
|
| 38 |
-
- Alibaba
|
| 39 |
-
- SIGGRAPH 2022
|
| 40 |
-
datasets:
|
| 41 |
-
test:
|
| 42 |
-
- modelscope/human_face_portrait_compound_dataset
|
| 43 |
-
---
|
| 44 |
-
|
| 45 |
-
# DCT-Net人像卡通化模型-素描风
|
| 46 |
-
|
| 47 |
-
### [论文](https://arxiv.org/abs/2207.02426) | [项目主页](https://menyifang.github.io/projects/DCTNet/DCTNet.html)
|
| 48 |
-
|
| 49 |
-
输入一张人物图像,实现端到端全图卡通化转换,生成素描风格虚拟形象,返回风格化后的结果图像。
|
| 50 |
-
|
| 51 |
-
其生成效果如下所示:
|
| 52 |
-
|
| 53 |
-

|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
## 模型描述
|
| 57 |
-
|
| 58 |
-
该任务采用一种全新的域校准图像翻译模型DCT-Net(Domain-Calibrated Translation),利用小样本的风格数据,即可得到高保真、强鲁棒、易拓展的人像风格转换模型,并通过端到端推理快速得到风格转换结果。
|
| 59 |
-
|
| 60 |
-

|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
## 使用方式和范围
|
| 64 |
-
|
| 65 |
-
使用方式:
|
| 66 |
-
- 支持GPU/CPU推理,在任意真实人物图像上进行直接推理;
|
| 67 |
-
|
| 68 |
-
使用范围:
|
| 69 |
-
- 包含人脸的人像照片(3通道RGB图像,支持PNG、JPG、JPEG格式),人脸分辨率大于100x100,总体图像分辨率小于3000×3000,低质人脸图像建议预先人脸增强处理。
|
| 70 |
-
|
| 71 |
-
目标场景:
|
| 72 |
-
- 艺术创作、社交娱乐、隐私保护场景,自动化生成卡通肖像。
|
| 73 |
-
|
| 74 |
-
### 如何使用
|
| 75 |
-
|
| 76 |
-
在ModelScope框架上,提供输入图片,即可以通过简单的Pipeline调用来使用人像卡通化模型。
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
#### 代码范例
|
| 80 |
-
|
| 81 |
-
- 模型推理(支持CPU/GPU):
|
| 82 |
-
|
| 83 |
-
```python
|
| 84 |
-
import cv2
|
| 85 |
-
from modelscope.outputs import OutputKeys
|
| 86 |
-
from modelscope.pipelines import pipeline
|
| 87 |
-
from modelscope.utils.constant import Tasks
|
| 88 |
-
|
| 89 |
-
img_cartoon = pipeline(Tasks.image_portrait_stylization,
|
| 90 |
-
model='damo/cv_unet_person-image-cartoon-sketch_compound-models')
|
| 91 |
-
# 图像本地路径
|
| 92 |
-
#img_path = 'input.png'
|
| 93 |
-
# 图像url链接
|
| 94 |
-
img_path = 'https://invi-label.oss-cn-shanghai.aliyuncs.com/label/cartoon/image_cartoon.png'
|
| 95 |
-
result = img_cartoon(img_path)
|
| 96 |
-
cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
|
| 97 |
-
print('finished!')
|
| 98 |
-
|
| 99 |
-
```
|
| 100 |
-
|
| 101 |
-
- 模型训练:
|
| 102 |
-
|
| 103 |
-
环境要求:tf1.14/15及兼容cuda,支持GPU训练
|
| 104 |
-
|
| 105 |
-
```python
|
| 106 |
-
import os
|
| 107 |
-
import unittest
|
| 108 |
-
import cv2
|
| 109 |
-
from modelscope.exporters.cv import CartoonTranslationExporter
|
| 110 |
-
from modelscope.msdatasets import MsDataset
|
| 111 |
-
from modelscope.outputs import OutputKeys
|
| 112 |
-
from modelscope.pipelines import pipeline
|
| 113 |
-
from modelscope.pipelines.base import Pipeline
|
| 114 |
-
from modelscope.trainers.cv import CartoonTranslationTrainer
|
| 115 |
-
from modelscope.utils.constant import Tasks
|
| 116 |
-
from modelscope.utils.test_utils import test_level
|
| 117 |
-
|
| 118 |
-
model_id = 'damo/cv_unet_person-image-cartoon-sketch_compound-models'
|
| 119 |
-
data_dir = MsDataset.load(
|
| 120 |
-
'dctnet_train_clipart_mini_ms',
|
| 121 |
-
namespace='menyifang',
|
| 122 |
-
split='train').config_kwargs['split_config']['train']
|
| 123 |
-
|
| 124 |
-
data_photo = os.path.join(data_dir, 'face_photo')
|
| 125 |
-
data_cartoon = os.path.join(data_dir, 'face_cartoon')
|
| 126 |
-
work_dir = 'exp_localtoon'
|
| 127 |
-
max_steps = 10
|
| 128 |
-
trainer = CartoonTranslationTrainer(
|
| 129 |
-
model=model_id,
|
| 130 |
-
work_dir=work_dir,
|
| 131 |
-
photo=data_photo,
|
| 132 |
-
cartoon=data_cartoon,
|
| 133 |
-
max_steps=max_steps)
|
| 134 |
-
trainer.train()
|
| 135 |
-
```
|
| 136 |
-
|
| 137 |
-
上述训练代码仅仅提供简单训练的范例,对大规模自定义数据,替换data_photo为真实人脸数据路径,data_cartoon为卡通风格人脸数据路径,max_steps建议设置为300000,可视化结果将存储在work_dir下;此外configuration.json(~/.cache/modelscope/hub/damo/cv_unet_person-image-cartoon_compound-models/)可以进行自定义修改;
|
| 138 |
-
|
| 139 |
-
Note: notebook预装环境下存在numpy依赖冲突,可手动更新解决:pip install numpy==1.18.5
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
- 卡通人脸数据获取
|
| 143 |
-
|
| 144 |
-
卡通人脸数据可由设计师设计/网络收集得到,在此提供一种基于[Stable-Diffusion风格预训练模型](https://modelscope.cn/models/damo/cv_cartoon_stable_diffusion_design/summary)的卡通数据生成方式
|
| 145 |
-
|
| 146 |
-
```python
|
| 147 |
-
import cv2
|
| 148 |
-
from modelscope.pipelines import pipeline
|
| 149 |
-
from modelscope.utils.constant import Tasks
|
| 150 |
-
|
| 151 |
-
pipe = pipeline(Tasks.text_to_image_synthesis, model='damo/cv_cartoon_stable_diffusion_clipart', model_revision='v1.0.0')
|
| 152 |
-
from diffusers.schedulers import EulerAncestralDiscreteScheduler
|
| 153 |
-
pipe.pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.pipeline.scheduler.config)
|
| 154 |
-
output = pipe({'text': 'archer style, a portrait painting of Johnny Depp'})
|
| 155 |
-
cv2.imwrite('result.png', output['output_imgs'][0])
|
| 156 |
-
print('Image saved to result.png')
|
| 157 |
-
|
| 158 |
-
print('finished!')
|
| 159 |
-
```
|
| 160 |
-
可通过替换Johnny Depp为其他名人姓名,产生多样化风格数据,通过人脸对齐裁剪即可得到卡通人脸数据;可以通过修改pipeline的model参数指定不同风格的SD预训练模型。
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
### 模型局限性以及可能的偏差
|
| 164 |
-
|
| 165 |
-
- 低质/低分辨率人脸图像由于本身内容信息丢失严重,无法得到理想转换效果,可预先采用人脸增强模型预处理图像解决;
|
| 166 |
-
|
| 167 |
-
- 小样本数据涵盖场景有限,人脸暗光、阴影干扰可能会影响生成效果。
|
| 168 |
-
|
| 169 |
-
## 训练数据介绍
|
| 170 |
-
|
| 171 |
-
训练数据从公开数据集(COCO等)、互联网搜索人像图像,并进行标注作为训练数据。
|
| 172 |
-
|
| 173 |
-
- 真实人脸数据[FFHQ](https://github.com/NVlabs/ffhq-dataset)常用的人脸公开数据集,包含7w人脸图像;
|
| 174 |
-
|
| 175 |
-
- 卡通人脸数据,互联网搜集,100+张
|
| 176 |
-
|
| 177 |
-
## 模型推理流程
|
| 178 |
-
|
| 179 |
-
### 预处理
|
| 180 |
-
|
| 181 |
-
- 人脸关键点检测
|
| 182 |
-
- 人脸提取&对齐,得到256x256大小的对齐人脸
|
| 183 |
-
|
| 184 |
-
### 推理
|
| 185 |
-
|
| 186 |
-
- 为控制推理效率,人脸及背景resize到指定大小分别推理,再背景融合得到最终效果;
|
| 187 |
-
- 亦可将整图依据人脸尺度整体缩放到合适尺寸,直接单次推理
|
| 188 |
-
|
| 189 |
-
## 数据评估及结果
|
| 190 |
-
|
| 191 |
-
使用CelebA公开人脸数据集进行评测,在FID/ID/用户偏好等指标上均达SOTA结果:
|
| 192 |
-
|
| 193 |
-
| Method | FID | ID | Pref.A | Pref.B |
|
| 194 |
-
| ------------ | ------------ | ------------ | ------------ | ------------ |
|
| 195 |
-
| CycleGAN | 57.08 | 0.55 | 7.1 | 1.4 |
|
| 196 |
-
| U-GAT-IT | 68.40 | 0.58 | 5.0 | 1.5 |
|
| 197 |
-
| Toonify | 55.27 | 0.62 | 3.7 | 4.2 |
|
| 198 |
-
| pSp | 69.38 | 0.60 | 1.6 | 2.5 |
|
| 199 |
-
| Ours | **35.92** | **0.71** | **82.6** | **90.5** |
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
## 引用
|
| 203 |
-
如果该模型对你有所帮助,请引用相关的论文:
|
| 204 |
-
|
| 205 |
-
```BibTeX
|
| 206 |
-
@inproceedings{men2022domain,
|
| 207 |
-
title={DCT-Net: Domain-Calibrated Translation for Portrait Stylization},
|
| 208 |
-
author={Men, Yifang and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong},
|
| 209 |
-
journal={ACM Transactions on Graphics (TOG)},
|
| 210 |
-
volume={41},
|
| 211 |
-
number={4},
|
| 212 |
-
pages={1--9},
|
| 213 |
-
year={2022}
|
| 214 |
-
}
|
| 215 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modelscope/hub/models/iic/cv_unet_person-image-cartoon-sketch_compound-models/configuration.json
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"framework": "tensorflow",
|
| 3 |
-
"task": "image-portrait-stylization",
|
| 4 |
-
"pipeline": {
|
| 5 |
-
"type": "unet-person-image-cartoon"
|
| 6 |
-
},
|
| 7 |
-
"train": {
|
| 8 |
-
"num_gpus": 1,
|
| 9 |
-
"batch_size": 32,
|
| 10 |
-
"adv_train_lr": 2e-4,
|
| 11 |
-
"max_steps": 300000,
|
| 12 |
-
"logging_interval": 1000,
|
| 13 |
-
"ckpt_period_interval": 1000,
|
| 14 |
-
"resume_epoch": 28999,
|
| 15 |
-
"patch_size": 256,
|
| 16 |
-
"work_dir": "exp_localtoon",
|
| 17 |
-
"photo": "/PATH/TO/PHOTO/DIR",
|
| 18 |
-
"cartoon": "/PATH/TO/CARTOON/DIR"
|
| 19 |
-
}
|
| 20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|