Spaces:
Running
Running
update to latest version
#5
by
akhaliq
HF staff
- opened
- .pre-commit-config.yaml +12 -2
- README.md +1 -2
- app.py +126 -113
- model.py +29 -5
- pose_images/000.png +2 -2
- pose_images/001.png +2 -2
- pose_images/002.png +2 -2
- pose_images/003.png +2 -2
- pose_images/004.png +2 -2
- pose_images/005.png +2 -2
- requirements.txt +6 -7
.pre-commit-config.yaml
CHANGED
@@ -21,11 +21,11 @@ repos:
|
|
21 |
- id: docformatter
|
22 |
args: ['--in-place']
|
23 |
- repo: https://github.com/pycqa/isort
|
24 |
-
rev: 5.
|
25 |
hooks:
|
26 |
- id: isort
|
27 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
28 |
-
rev: v0.
|
29 |
hooks:
|
30 |
- id: mypy
|
31 |
args: ['--ignore-missing-imports']
|
@@ -34,3 +34,13 @@ repos:
|
|
34 |
hooks:
|
35 |
- id: yapf
|
36 |
args: ['--parallel', '--in-place']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
- id: docformatter
|
22 |
args: ['--in-place']
|
23 |
- repo: https://github.com/pycqa/isort
|
24 |
+
rev: 5.10.1
|
25 |
hooks:
|
26 |
- id: isort
|
27 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
28 |
+
rev: v0.812
|
29 |
hooks:
|
30 |
- id: mypy
|
31 |
args: ['--ignore-missing-imports']
|
34 |
hooks:
|
35 |
- id: yapf
|
36 |
args: ['--parallel', '--in-place']
|
37 |
+
- repo: https://github.com/kynan/nbstripout
|
38 |
+
rev: 0.5.0
|
39 |
+
hooks:
|
40 |
+
- id: nbstripout
|
41 |
+
args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
|
42 |
+
- repo: https://github.com/nbQA-dev/nbQA
|
43 |
+
rev: 1.3.1
|
44 |
+
hooks:
|
45 |
+
- id: nbqa-isort
|
46 |
+
- id: nbqa-yapf
|
README.md
CHANGED
@@ -4,10 +4,9 @@ emoji: 🏃
|
|
4 |
colorFrom: purple
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
suggested_hardware: t4-small
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
4 |
colorFrom: purple
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.0.13
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
CHANGED
@@ -2,139 +2,152 @@
|
|
2 |
|
3 |
from __future__ import annotations
|
4 |
|
|
|
5 |
import os
|
6 |
import pathlib
|
7 |
-
import random
|
8 |
-
import shlex
|
9 |
import subprocess
|
10 |
|
11 |
import gradio as gr
|
12 |
-
import numpy as np
|
13 |
|
14 |
if os.getenv('SYSTEM') == 'spaces':
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
mim.install('mmcv-full==1.5.2', is_yes=True)
|
19 |
-
|
20 |
-
with open('patch') as f:
|
21 |
-
subprocess.run(shlex.split('patch -p1'), cwd='Text2Human', stdin=f)
|
22 |
|
23 |
from model import Model
|
24 |
|
25 |
-
DESCRIPTION = '''#
|
26 |
|
|
|
27 |
You can modify sample steps and seeds. By varying seeds, you can sample different human images under the same pose, shape description, and texture description. The larger the sample steps, the better quality of the generated images. (The default value of sample steps is 256 in the original repo.)
|
28 |
|
29 |
Label image generation step can be skipped. However, in that case, the input label image must be 512x256 in size and must contain only the specified colors.
|
30 |
'''
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
Note: The outer clothing type and accessories can be omitted.''')
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
'''<upper clothing texture>, <lower clothing texture>, <outer clothing texture>
|
83 |
Note: Currently, only 5 types of textures are supported, i.e., pure color, stripe/spline, plaid/lattice, floral, denim.'''
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
)
|
119 |
-
generate_label_button.click(
|
120 |
-
fn=model.generate_label_image,
|
121 |
-
inputs=[
|
122 |
-
pose_data,
|
123 |
-
shape_text,
|
124 |
-
],
|
125 |
-
outputs=label_image,
|
126 |
-
)
|
127 |
-
generate_human_button.click(fn=randomize_seed_fn,
|
128 |
-
inputs=[seed, randomize_seed],
|
129 |
-
outputs=seed,
|
130 |
-
queue=False).then(
|
131 |
-
fn=model.generate_human,
|
132 |
inputs=[
|
133 |
label_image,
|
134 |
texture_text,
|
135 |
sample_steps,
|
136 |
seed,
|
137 |
],
|
138 |
-
outputs=result
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
from __future__ import annotations
|
4 |
|
5 |
+
import argparse
|
6 |
import os
|
7 |
import pathlib
|
|
|
|
|
8 |
import subprocess
|
9 |
|
10 |
import gradio as gr
|
|
|
11 |
|
12 |
if os.getenv('SYSTEM') == 'spaces':
|
13 |
+
subprocess.call('pip uninstall -y mmcv-full'.split())
|
14 |
+
subprocess.call('pip install mmcv-full==1.5.2'.split())
|
15 |
+
subprocess.call('git apply ../patch'.split(), cwd='Text2Human')
|
|
|
|
|
|
|
|
|
16 |
|
17 |
from model import Model
|
18 |
|
19 |
+
DESCRIPTION = '''# Text2Human
|
20 |
|
21 |
+
This is an unofficial demo for <a href="https://github.com/yumingj/Text2Human">https://github.com/yumingj/Text2Human</a>.
|
22 |
You can modify sample steps and seeds. By varying seeds, you can sample different human images under the same pose, shape description, and texture description. The larger the sample steps, the better quality of the generated images. (The default value of sample steps is 256 in the original repo.)
|
23 |
|
24 |
Label image generation step can be skipped. However, in that case, the input label image must be 512x256 in size and must contain only the specified colors.
|
25 |
'''
|
26 |
+
FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=hysts.text2human" />'
|
27 |
+
|
28 |
+
|
29 |
+
def parse_args() -> argparse.Namespace:
|
30 |
+
parser = argparse.ArgumentParser()
|
31 |
+
parser.add_argument('--device', type=str, default='cpu')
|
32 |
+
parser.add_argument('--theme', type=str)
|
33 |
+
parser.add_argument('--share', action='store_true')
|
34 |
+
parser.add_argument('--port', type=int)
|
35 |
+
parser.add_argument('--disable-queue',
|
36 |
+
dest='enable_queue',
|
37 |
+
action='store_false')
|
38 |
+
return parser.parse_args()
|
39 |
+
|
40 |
+
|
41 |
+
def set_example_image(example: list) -> dict:
|
42 |
+
return gr.Image.update(value=example[0])
|
43 |
+
|
44 |
+
|
45 |
+
def set_example_text(example: list) -> dict:
|
46 |
+
return gr.Textbox.update(value=example[0])
|
47 |
+
|
48 |
+
|
49 |
+
def main():
|
50 |
+
args = parse_args()
|
51 |
+
model = Model(args.device)
|
52 |
+
|
53 |
+
with gr.Blocks(theme=args.theme, css='style.css') as demo:
|
54 |
+
gr.Markdown(DESCRIPTION)
|
55 |
+
|
56 |
+
with gr.Row():
|
57 |
+
with gr.Column():
|
58 |
+
with gr.Row():
|
59 |
+
input_image = gr.Image(label='Input Pose Image',
|
60 |
+
type='pil',
|
61 |
+
elem_id='input-image')
|
62 |
+
pose_data = gr.Variable()
|
63 |
+
with gr.Row():
|
64 |
+
paths = sorted(pathlib.Path('pose_images').glob('*.png'))
|
65 |
+
example_images = gr.Dataset(components=[input_image],
|
66 |
+
samples=[[path.as_posix()]
|
67 |
+
for path in paths])
|
68 |
+
with gr.Row():
|
69 |
+
shape_text = gr.Textbox(
|
70 |
+
label='Shape Description',
|
71 |
+
placeholder=
|
72 |
+
'''<gender>, <sleeve length>, <length of lower clothing>, <outer clothing type>, <other accessories1>, ...
|
73 |
Note: The outer clothing type and accessories can be omitted.''')
|
74 |
+
with gr.Row():
|
75 |
+
shape_example_texts = gr.Dataset(
|
76 |
+
components=[shape_text],
|
77 |
+
samples=[['man, sleeveless T-shirt, long pants'],
|
78 |
+
['woman, short-sleeve T-shirt, short jeans']])
|
79 |
+
with gr.Row():
|
80 |
+
generate_label_button = gr.Button('Generate Label Image')
|
81 |
+
|
82 |
+
with gr.Column():
|
83 |
+
with gr.Row():
|
84 |
+
label_image = gr.Image(label='Label Image',
|
85 |
+
type='numpy',
|
86 |
+
elem_id='label-image')
|
87 |
+
with gr.Row():
|
88 |
+
texture_text = gr.Textbox(
|
89 |
+
label='Texture Description',
|
90 |
+
placeholder=
|
91 |
+
'''<upper clothing texture>, <lower clothing texture>, <outer clothing texture>
|
|
|
92 |
Note: Currently, only 5 types of textures are supported, i.e., pure color, stripe/spline, plaid/lattice, floral, denim.'''
|
93 |
+
)
|
94 |
+
with gr.Row():
|
95 |
+
texture_example_texts = gr.Dataset(
|
96 |
+
components=[texture_text],
|
97 |
+
samples=[['pure color, denim'], ['floral, stripe']])
|
98 |
+
with gr.Row():
|
99 |
+
sample_steps = gr.Slider(10,
|
100 |
+
300,
|
101 |
+
value=10,
|
102 |
+
step=10,
|
103 |
+
label='Sample Steps')
|
104 |
+
with gr.Row():
|
105 |
+
seed = gr.Slider(0, 1000000, value=0, step=1, label='Seed')
|
106 |
+
with gr.Row():
|
107 |
+
generate_human_button = gr.Button('Generate Human')
|
108 |
+
|
109 |
+
with gr.Column():
|
110 |
+
with gr.Row():
|
111 |
+
result = gr.Image(label='Result',
|
112 |
+
type='numpy',
|
113 |
+
elem_id='result-image')
|
114 |
+
|
115 |
+
gr.Markdown(FOOTER)
|
116 |
+
|
117 |
+
input_image.change(fn=model.process_pose_image,
|
118 |
+
inputs=input_image,
|
119 |
+
outputs=pose_data)
|
120 |
+
generate_label_button.click(fn=model.generate_label_image,
|
121 |
+
inputs=[
|
122 |
+
pose_data,
|
123 |
+
shape_text,
|
124 |
+
],
|
125 |
+
outputs=label_image)
|
126 |
+
generate_human_button.click(fn=model.generate_human,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
inputs=[
|
128 |
label_image,
|
129 |
texture_text,
|
130 |
sample_steps,
|
131 |
seed,
|
132 |
],
|
133 |
+
outputs=result)
|
134 |
+
example_images.click(fn=set_example_image,
|
135 |
+
inputs=example_images,
|
136 |
+
outputs=example_images.components)
|
137 |
+
shape_example_texts.click(fn=set_example_text,
|
138 |
+
inputs=shape_example_texts,
|
139 |
+
outputs=shape_example_texts.components)
|
140 |
+
texture_example_texts.click(fn=set_example_text,
|
141 |
+
inputs=texture_example_texts,
|
142 |
+
outputs=texture_example_texts.components)
|
143 |
+
|
144 |
+
demo.launch(
|
145 |
+
enable_queue=args.enable_queue,
|
146 |
+
server_port=args.port,
|
147 |
+
share=args.share,
|
148 |
+
debug=True,
|
149 |
+
)
|
150 |
+
|
151 |
+
|
152 |
+
if __name__ == '__main__':
|
153 |
+
main()
|
model.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
from __future__ import annotations
|
2 |
|
|
|
|
|
3 |
import pathlib
|
4 |
import sys
|
5 |
import zipfile
|
@@ -17,6 +19,17 @@ from utils.language_utils import (generate_shape_attributes,
|
|
17 |
from utils.options import dict_to_nonedict, parse
|
18 |
from utils.util import set_random_seed
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
COLOR_LIST = [
|
21 |
(0, 0, 0),
|
22 |
(255, 250, 250),
|
@@ -46,10 +59,9 @@ COLOR_LIST = [
|
|
46 |
|
47 |
|
48 |
class Model:
|
49 |
-
def __init__(self):
|
50 |
-
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
51 |
self.config = self._load_config()
|
52 |
-
self.config['device'] = device
|
53 |
self._download_models()
|
54 |
self.model = SampleFromPoseModel(self.config)
|
55 |
self.model.batch_size = 1
|
@@ -64,14 +76,17 @@ class Model:
|
|
64 |
model_dir = pathlib.Path('pretrained_models')
|
65 |
if model_dir.exists():
|
66 |
return
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
model_dir.mkdir()
|
70 |
with zipfile.ZipFile(path) as f:
|
71 |
f.extractall(model_dir)
|
72 |
|
73 |
@staticmethod
|
74 |
def preprocess_pose_image(image: PIL.Image.Image) -> torch.Tensor:
|
|
|
75 |
image = np.array(
|
76 |
image.resize(
|
77 |
size=(256, 512),
|
@@ -83,8 +98,14 @@ class Model:
|
|
83 |
|
84 |
@staticmethod
|
85 |
def process_mask(mask: np.ndarray) -> np.ndarray:
|
|
|
86 |
if mask.shape != (512, 256, 3):
|
87 |
return None
|
|
|
|
|
|
|
|
|
|
|
88 |
seg_map = np.full(mask.shape[:-1], -1)
|
89 |
for index, color in enumerate(COLOR_LIST):
|
90 |
seg_map[np.sum(mask == color, axis=2) == 3] = index
|
@@ -111,6 +132,7 @@ class Model:
|
|
111 |
shape_text: str) -> np.ndarray:
|
112 |
if pose_data is None:
|
113 |
return
|
|
|
114 |
self.model.feed_pose_data(pose_data)
|
115 |
shape_attributes = generate_shape_attributes(shape_text)
|
116 |
shape_attributes = torch.LongTensor(shape_attributes).unsqueeze(0)
|
@@ -124,6 +146,8 @@ class Model:
|
|
124 |
sample_steps: int, seed: int) -> np.ndarray:
|
125 |
if label_image is None:
|
126 |
return
|
|
|
|
|
127 |
mask = label_image.copy()
|
128 |
seg_map = self.process_mask(mask)
|
129 |
if seg_map is None:
|
1 |
from __future__ import annotations
|
2 |
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
import pathlib
|
6 |
import sys
|
7 |
import zipfile
|
19 |
from utils.options import dict_to_nonedict, parse
|
20 |
from utils.util import set_random_seed
|
21 |
|
22 |
+
logger = logging.getLogger(__name__)
|
23 |
+
logger.setLevel(logging.DEBUG)
|
24 |
+
logger.propagate = False
|
25 |
+
formatter = logging.Formatter(
|
26 |
+
'[%(asctime)s] %(name)s %(levelname)s: %(message)s',
|
27 |
+
datefmt='%Y-%m-%d %H:%M:%S')
|
28 |
+
handler = logging.StreamHandler(stream=sys.stdout)
|
29 |
+
handler.setLevel(logging.DEBUG)
|
30 |
+
handler.setFormatter(formatter)
|
31 |
+
logger.addHandler(handler)
|
32 |
+
|
33 |
COLOR_LIST = [
|
34 |
(0, 0, 0),
|
35 |
(255, 250, 250),
|
59 |
|
60 |
|
61 |
class Model:
|
62 |
+
def __init__(self, device: str):
|
|
|
63 |
self.config = self._load_config()
|
64 |
+
self.config['device'] = device
|
65 |
self._download_models()
|
66 |
self.model = SampleFromPoseModel(self.config)
|
67 |
self.model.batch_size = 1
|
76 |
model_dir = pathlib.Path('pretrained_models')
|
77 |
if model_dir.exists():
|
78 |
return
|
79 |
+
token = os.getenv('HF_TOKEN')
|
80 |
+
path = huggingface_hub.hf_hub_download('hysts/Text2Human',
|
81 |
+
'orig/pretrained_models.zip',
|
82 |
+
use_auth_token=token)
|
83 |
model_dir.mkdir()
|
84 |
with zipfile.ZipFile(path) as f:
|
85 |
f.extractall(model_dir)
|
86 |
|
87 |
@staticmethod
|
88 |
def preprocess_pose_image(image: PIL.Image.Image) -> torch.Tensor:
|
89 |
+
logger.debug(f'{image.size=}')
|
90 |
image = np.array(
|
91 |
image.resize(
|
92 |
size=(256, 512),
|
98 |
|
99 |
@staticmethod
|
100 |
def process_mask(mask: np.ndarray) -> np.ndarray:
|
101 |
+
logger.debug(f'{mask.shape=}')
|
102 |
if mask.shape != (512, 256, 3):
|
103 |
return None
|
104 |
+
colors = np.unique(mask.reshape(-1, 3), axis=0)
|
105 |
+
colors = set(map(tuple, colors.tolist()))
|
106 |
+
logger.debug(f'{colors=}')
|
107 |
+
logger.debug(f'{colors - set(COLOR_LIST)=}')
|
108 |
+
|
109 |
seg_map = np.full(mask.shape[:-1], -1)
|
110 |
for index, color in enumerate(COLOR_LIST):
|
111 |
seg_map[np.sum(mask == color, axis=2) == 3] = index
|
132 |
shape_text: str) -> np.ndarray:
|
133 |
if pose_data is None:
|
134 |
return
|
135 |
+
logger.debug(f'{len(shape_text)=}')
|
136 |
self.model.feed_pose_data(pose_data)
|
137 |
shape_attributes = generate_shape_attributes(shape_text)
|
138 |
shape_attributes = torch.LongTensor(shape_attributes).unsqueeze(0)
|
146 |
sample_steps: int, seed: int) -> np.ndarray:
|
147 |
if label_image is None:
|
148 |
return
|
149 |
+
logger.debug(f'{len(texture_text)=}')
|
150 |
+
logger.debug(f'{sample_steps=}')
|
151 |
mask = label_image.copy()
|
152 |
seg_map = self.process_mask(mask)
|
153 |
if seg_map is None:
|
pose_images/000.png
CHANGED
Git LFS Details
|
Git LFS Details
|
pose_images/001.png
CHANGED
Git LFS Details
|
Git LFS Details
|
pose_images/002.png
CHANGED
Git LFS Details
|
Git LFS Details
|
pose_images/003.png
CHANGED
Git LFS Details
|
Git LFS Details
|
pose_images/004.png
CHANGED
Git LFS Details
|
Git LFS Details
|
pose_images/005.png
CHANGED
Git LFS Details
|
Git LFS Details
|
requirements.txt
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
-
einops==0.
|
2 |
lpips==0.1.4
|
3 |
mmcv-full==1.5.2
|
4 |
mmsegmentation==0.24.1
|
5 |
-
numpy==1.
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
tokenizers==0.13.3
|
10 |
torch==1.11.0
|
11 |
torchvision==0.12.0
|
12 |
-
transformers==4.
|
1 |
+
einops==0.4.1
|
2 |
lpips==0.1.4
|
3 |
mmcv-full==1.5.2
|
4 |
mmsegmentation==0.24.1
|
5 |
+
numpy==1.22.3
|
6 |
+
Pillow==9.1.1
|
7 |
+
sentence-transformers==2.2.0
|
8 |
+
tokenizers==0.12.1
|
|
|
9 |
torch==1.11.0
|
10 |
torchvision==0.12.0
|
11 |
+
transformers==4.19.2
|