Spaces:
Running
Running
Upload 000.png
#1
by
yumingj
- opened
- .pre-commit-config.yaml +12 -2
- README.md +1 -2
- app.py +128 -110
- model.py +15 -26
- pose_images/000.png +0 -0
- pose_images/001.png +0 -0
- pose_images/002.png +0 -0
- pose_images/003.png +0 -0
- pose_images/004.png +0 -0
- pose_images/005.png +0 -0
- requirements.txt +6 -7
- style.css +0 -16
.pre-commit-config.yaml
CHANGED
@@ -21,11 +21,11 @@ repos:
|
|
21 |
- id: docformatter
|
22 |
args: ['--in-place']
|
23 |
- repo: https://github.com/pycqa/isort
|
24 |
-
rev: 5.
|
25 |
hooks:
|
26 |
- id: isort
|
27 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
28 |
-
rev: v0.
|
29 |
hooks:
|
30 |
- id: mypy
|
31 |
args: ['--ignore-missing-imports']
|
@@ -34,3 +34,13 @@ repos:
|
|
34 |
hooks:
|
35 |
- id: yapf
|
36 |
args: ['--parallel', '--in-place']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
- id: docformatter
|
22 |
args: ['--in-place']
|
23 |
- repo: https://github.com/pycqa/isort
|
24 |
+
rev: 5.10.1
|
25 |
hooks:
|
26 |
- id: isort
|
27 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
28 |
+
rev: v0.812
|
29 |
hooks:
|
30 |
- id: mypy
|
31 |
args: ['--ignore-missing-imports']
|
34 |
hooks:
|
35 |
- id: yapf
|
36 |
args: ['--parallel', '--in-place']
|
37 |
+
- repo: https://github.com/kynan/nbstripout
|
38 |
+
rev: 0.5.0
|
39 |
+
hooks:
|
40 |
+
- id: nbstripout
|
41 |
+
args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
|
42 |
+
- repo: https://github.com/nbQA-dev/nbQA
|
43 |
+
rev: 1.3.1
|
44 |
+
hooks:
|
45 |
+
- id: nbqa-isort
|
46 |
+
- id: nbqa-yapf
|
README.md
CHANGED
@@ -4,10 +4,9 @@ emoji: 🏃
|
|
4 |
colorFrom: purple
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
suggested_hardware: t4-small
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
4 |
colorFrom: purple
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.0.11
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
CHANGED
@@ -2,139 +2,157 @@
|
|
2 |
|
3 |
from __future__ import annotations
|
4 |
|
|
|
5 |
import os
|
6 |
import pathlib
|
7 |
-
import random
|
8 |
-
import shlex
|
9 |
import subprocess
|
10 |
|
11 |
import gradio as gr
|
12 |
-
import numpy as np
|
13 |
|
14 |
if os.getenv('SYSTEM') == 'spaces':
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
mim.install('mmcv-full==1.5.2', is_yes=True)
|
19 |
-
|
20 |
-
with open('patch') as f:
|
21 |
-
subprocess.run(shlex.split('patch -p1'), cwd='Text2Human', stdin=f)
|
22 |
|
23 |
from model import Model
|
24 |
|
25 |
-
DESCRIPTION = '''# [Text2Human](https://github.com/yumingj/Text2Human)
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
Label image generation step can be skipped. However, in that case, the input label image must be 512x256 in size and must contain only the specified colors.
|
30 |
-
'''
|
31 |
|
32 |
-
|
|
|
33 |
|
34 |
|
35 |
-
def
|
36 |
-
|
37 |
-
seed = random.randint(0, MAX_SEED)
|
38 |
-
return seed
|
39 |
|
40 |
|
41 |
-
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
Note: The outer clothing type and accessories can be omitted.''')
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
'''<upper clothing texture>, <lower clothing texture>, <outer clothing texture>
|
83 |
Note: Currently, only 5 types of textures are supported, i.e., pure color, stripe/spline, plaid/lattice, floral, denim.'''
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
type='numpy',
|
112 |
-
elem_id='result-image')
|
113 |
-
|
114 |
-
input_image.change(
|
115 |
-
fn=model.process_pose_image,
|
116 |
-
inputs=input_image,
|
117 |
-
outputs=pose_data,
|
118 |
-
)
|
119 |
-
generate_label_button.click(
|
120 |
-
fn=model.generate_label_image,
|
121 |
-
inputs=[
|
122 |
-
pose_data,
|
123 |
-
shape_text,
|
124 |
-
],
|
125 |
-
outputs=label_image,
|
126 |
-
)
|
127 |
-
generate_human_button.click(fn=randomize_seed_fn,
|
128 |
-
inputs=[seed, randomize_seed],
|
129 |
-
outputs=seed,
|
130 |
-
queue=False).then(
|
131 |
-
fn=model.generate_human,
|
132 |
inputs=[
|
133 |
-
label_image,
|
134 |
texture_text,
|
135 |
sample_steps,
|
136 |
seed,
|
137 |
],
|
138 |
-
outputs=result
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
from __future__ import annotations
|
4 |
|
5 |
+
import argparse
|
6 |
import os
|
7 |
import pathlib
|
|
|
|
|
8 |
import subprocess
|
9 |
|
10 |
import gradio as gr
|
|
|
11 |
|
12 |
if os.getenv('SYSTEM') == 'spaces':
|
13 |
+
subprocess.call('pip uninstall -y mmcv-full'.split())
|
14 |
+
subprocess.call('pip install mmcv-full==1.5.2'.split())
|
15 |
+
subprocess.call('git apply ../patch'.split(), cwd='Text2Human')
|
|
|
|
|
|
|
|
|
16 |
|
17 |
from model import Model
|
18 |
|
|
|
19 |
|
20 |
+
def parse_args() -> argparse.Namespace:
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
parser.add_argument('--device', type=str, default='cpu')
|
23 |
+
parser.add_argument('--theme', type=str)
|
24 |
+
parser.add_argument('--share', action='store_true')
|
25 |
+
parser.add_argument('--port', type=int)
|
26 |
+
parser.add_argument('--disable-queue',
|
27 |
+
dest='enable_queue',
|
28 |
+
action='store_false')
|
29 |
+
return parser.parse_args()
|
30 |
|
|
|
|
|
31 |
|
32 |
+
def set_example_image(example: list) -> dict:
|
33 |
+
return gr.Image.update(value=example[0])
|
34 |
|
35 |
|
36 |
+
def set_example_text(example: list) -> dict:
|
37 |
+
return gr.Textbox.update(value=example[0])
|
|
|
|
|
38 |
|
39 |
|
40 |
+
def main():
|
41 |
+
args = parse_args()
|
42 |
+
model = Model(args.device)
|
43 |
|
44 |
+
css = '''
|
45 |
+
h1#title {
|
46 |
+
text-align: center;
|
47 |
+
}
|
48 |
+
#input-image {
|
49 |
+
max-height: 300px;
|
50 |
+
}
|
51 |
+
#label-image {
|
52 |
+
height: 300px;
|
53 |
+
}
|
54 |
+
#result-image {
|
55 |
+
height: 300px;
|
56 |
+
}
|
57 |
+
'''
|
58 |
|
59 |
+
with gr.Blocks(theme=args.theme, css=css) as demo:
|
60 |
+
gr.Markdown('''<h1 id="title">Text2Human</h1>
|
61 |
+
|
62 |
+
This is an unofficial demo for <a href="https://github.com/yumingj/Text2Human">https://github.com/yumingj/Text2Human</a>.
|
63 |
+
You can modify sample steps and seeds. By varying seeds, you can sample different human images under the same pose, shape description, and texture description. The larger the sample steps, the better quality of the generated images. (The default value of sample steps is 256 in the original repo.)</a>
|
64 |
+
''')
|
65 |
+
with gr.Row():
|
66 |
+
with gr.Column():
|
67 |
+
with gr.Row():
|
68 |
+
input_image = gr.Image(label='Input Pose Image',
|
69 |
+
type='pil',
|
70 |
+
elem_id='input-image')
|
71 |
+
with gr.Row():
|
72 |
+
paths = sorted(pathlib.Path('pose_images').glob('*.png'))
|
73 |
+
example_images = gr.Dataset(components=[input_image],
|
74 |
+
samples=[[path.as_posix()]
|
75 |
+
for path in paths])
|
76 |
+
|
77 |
+
with gr.Column():
|
78 |
+
with gr.Row():
|
79 |
+
label_image = gr.Image(label='Label Image',
|
80 |
+
type='numpy',
|
81 |
+
elem_id='label-image')
|
82 |
+
with gr.Row():
|
83 |
+
shape_text = gr.Textbox(
|
84 |
+
label='Shape Description',
|
85 |
+
placeholder=
|
86 |
+
'''<gender>, <sleeve length>, <length of lower clothing>, <outer clothing type>, <other accessories1>, ...
|
87 |
Note: The outer clothing type and accessories can be omitted.''')
|
88 |
+
with gr.Row():
|
89 |
+
shape_example_texts = gr.Dataset(
|
90 |
+
components=[shape_text],
|
91 |
+
samples=[['man, sleeveless T-shirt, long pants'],
|
92 |
+
['woman, short-sleeve T-shirt, short jeans']])
|
93 |
+
with gr.Row():
|
94 |
+
generate_label_button = gr.Button('Generate Label Image')
|
95 |
+
|
96 |
+
with gr.Column():
|
97 |
+
with gr.Row():
|
98 |
+
result = gr.Image(label='Result',
|
99 |
+
type='numpy',
|
100 |
+
elem_id='result-image')
|
101 |
+
with gr.Row():
|
102 |
+
texture_text = gr.Textbox(
|
103 |
+
label='Texture Description',
|
104 |
+
placeholder=
|
105 |
+
'''<upper clothing texture>, <lower clothing texture>, <outer clothing texture>
|
|
|
106 |
Note: Currently, only 5 types of textures are supported, i.e., pure color, stripe/spline, plaid/lattice, floral, denim.'''
|
107 |
+
)
|
108 |
+
with gr.Row():
|
109 |
+
texture_example_texts = gr.Dataset(
|
110 |
+
components=[texture_text],
|
111 |
+
samples=[['pure color, denim'], ['floral, stripe']])
|
112 |
+
with gr.Row():
|
113 |
+
sample_steps = gr.Slider(10,
|
114 |
+
300,
|
115 |
+
value=10,
|
116 |
+
step=10,
|
117 |
+
label='Sample Steps')
|
118 |
+
with gr.Row():
|
119 |
+
seed = gr.Slider(0, 1000000, value=0, step=1, label='Seed')
|
120 |
+
with gr.Row():
|
121 |
+
generate_human_button = gr.Button('Generate Human')
|
122 |
+
|
123 |
+
gr.Markdown(
|
124 |
+
'<center><img src="https://visitor-badge.glitch.me/badge?page_id=hysts.text2human" alt="visitor badge"/></center>'
|
125 |
+
)
|
126 |
+
|
127 |
+
input_image.change(fn=model.process_pose_image,
|
128 |
+
inputs=[input_image],
|
129 |
+
outputs=None)
|
130 |
+
generate_label_button.click(fn=model.generate_label_image,
|
131 |
+
inputs=[shape_text],
|
132 |
+
outputs=[label_image])
|
133 |
+
generate_human_button.click(fn=model.generate_human,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
inputs=[
|
|
|
135 |
texture_text,
|
136 |
sample_steps,
|
137 |
seed,
|
138 |
],
|
139 |
+
outputs=[result])
|
140 |
+
example_images.click(fn=set_example_image,
|
141 |
+
inputs=example_images,
|
142 |
+
outputs=example_images.components)
|
143 |
+
shape_example_texts.click(fn=set_example_text,
|
144 |
+
inputs=shape_example_texts,
|
145 |
+
outputs=shape_example_texts.components)
|
146 |
+
texture_example_texts.click(fn=set_example_text,
|
147 |
+
inputs=texture_example_texts,
|
148 |
+
outputs=texture_example_texts.components)
|
149 |
+
|
150 |
+
demo.launch(
|
151 |
+
enable_queue=args.enable_queue,
|
152 |
+
server_port=args.port,
|
153 |
+
share=args.share,
|
154 |
+
)
|
155 |
+
|
156 |
+
|
157 |
+
if __name__ == '__main__':
|
158 |
+
main()
|
model.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from __future__ import annotations
|
2 |
|
|
|
3 |
import pathlib
|
4 |
import sys
|
5 |
import zipfile
|
@@ -46,13 +47,11 @@ COLOR_LIST = [
|
|
46 |
|
47 |
|
48 |
class Model:
|
49 |
-
def __init__(self):
|
50 |
-
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
51 |
self.config = self._load_config()
|
52 |
-
self.config['device'] = device
|
53 |
self._download_models()
|
54 |
self.model = SampleFromPoseModel(self.config)
|
55 |
-
self.model.batch_size = 1
|
56 |
|
57 |
def _load_config(self) -> dict:
|
58 |
path = 'Text2Human/configs/sample_from_pose.yml'
|
@@ -64,8 +63,10 @@ class Model:
|
|
64 |
model_dir = pathlib.Path('pretrained_models')
|
65 |
if model_dir.exists():
|
66 |
return
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
model_dir.mkdir()
|
70 |
with zipfile.ZipFile(path) as f:
|
71 |
f.extractall(model_dir)
|
@@ -82,14 +83,11 @@ class Model:
|
|
82 |
return data
|
83 |
|
84 |
@staticmethod
|
85 |
-
def process_mask(mask:
|
86 |
-
if mask.shape != (512, 256, 3):
|
87 |
-
return None
|
88 |
seg_map = np.full(mask.shape[:-1], -1)
|
89 |
for index, color in enumerate(COLOR_LIST):
|
90 |
seg_map[np.sum(mask == color, axis=2) == 3] = index
|
91 |
-
|
92 |
-
return None
|
93 |
return seg_map
|
94 |
|
95 |
@staticmethod
|
@@ -100,38 +98,29 @@ class Model:
|
|
100 |
result = np.asarray(result[0, :, :, :], dtype=np.uint8)
|
101 |
return result
|
102 |
|
103 |
-
def process_pose_image(self, pose_image: PIL.Image.Image) ->
|
104 |
if pose_image is None:
|
105 |
return
|
106 |
data = self.preprocess_pose_image(pose_image)
|
107 |
self.model.feed_pose_data(data)
|
108 |
-
return data
|
109 |
|
110 |
-
def generate_label_image(self,
|
111 |
-
shape_text: str) -> np.ndarray:
|
112 |
-
if pose_data is None:
|
113 |
-
return
|
114 |
-
self.model.feed_pose_data(pose_data)
|
115 |
shape_attributes = generate_shape_attributes(shape_text)
|
116 |
shape_attributes = torch.LongTensor(shape_attributes).unsqueeze(0)
|
117 |
self.model.feed_shape_attributes(shape_attributes)
|
118 |
self.model.generate_parsing_map()
|
119 |
self.model.generate_quantized_segm()
|
120 |
colored_segm = self.model.palette_result(self.model.segm[0].cpu())
|
121 |
-
return colored_segm
|
122 |
|
123 |
-
|
124 |
-
sample_steps: int, seed: int) -> np.ndarray:
|
125 |
-
if label_image is None:
|
126 |
-
return
|
127 |
-
mask = label_image.copy()
|
128 |
seg_map = self.process_mask(mask)
|
129 |
-
if seg_map is None:
|
130 |
-
return
|
131 |
self.model.segm = torch.from_numpy(seg_map).unsqueeze(0).unsqueeze(
|
132 |
0).to(self.model.device)
|
133 |
self.model.generate_quantized_segm()
|
|
|
134 |
|
|
|
|
|
135 |
set_random_seed(seed)
|
136 |
|
137 |
texture_attributes = generate_texture_attributes(texture_text)
|
1 |
from __future__ import annotations
|
2 |
|
3 |
+
import os
|
4 |
import pathlib
|
5 |
import sys
|
6 |
import zipfile
|
47 |
|
48 |
|
49 |
class Model:
|
50 |
+
def __init__(self, device: str):
|
|
|
51 |
self.config = self._load_config()
|
52 |
+
self.config['device'] = device
|
53 |
self._download_models()
|
54 |
self.model = SampleFromPoseModel(self.config)
|
|
|
55 |
|
56 |
def _load_config(self) -> dict:
|
57 |
path = 'Text2Human/configs/sample_from_pose.yml'
|
63 |
model_dir = pathlib.Path('pretrained_models')
|
64 |
if model_dir.exists():
|
65 |
return
|
66 |
+
token = os.getenv('HF_TOKEN')
|
67 |
+
path = huggingface_hub.hf_hub_download('hysts/Text2Human',
|
68 |
+
'orig/pretrained_models.zip',
|
69 |
+
use_auth_token=token)
|
70 |
model_dir.mkdir()
|
71 |
with zipfile.ZipFile(path) as f:
|
72 |
f.extractall(model_dir)
|
83 |
return data
|
84 |
|
85 |
@staticmethod
|
86 |
+
def process_mask(mask: torch.Tensor) -> torch.Tensor:
|
|
|
|
|
87 |
seg_map = np.full(mask.shape[:-1], -1)
|
88 |
for index, color in enumerate(COLOR_LIST):
|
89 |
seg_map[np.sum(mask == color, axis=2) == 3] = index
|
90 |
+
assert (seg_map != -1).all()
|
|
|
91 |
return seg_map
|
92 |
|
93 |
@staticmethod
|
98 |
result = np.asarray(result[0, :, :, :], dtype=np.uint8)
|
99 |
return result
|
100 |
|
101 |
+
def process_pose_image(self, pose_image: PIL.Image.Image) -> None:
|
102 |
if pose_image is None:
|
103 |
return
|
104 |
data = self.preprocess_pose_image(pose_image)
|
105 |
self.model.feed_pose_data(data)
|
|
|
106 |
|
107 |
+
def generate_label_image(self, shape_text: str) -> np.ndarray:
|
|
|
|
|
|
|
|
|
108 |
shape_attributes = generate_shape_attributes(shape_text)
|
109 |
shape_attributes = torch.LongTensor(shape_attributes).unsqueeze(0)
|
110 |
self.model.feed_shape_attributes(shape_attributes)
|
111 |
self.model.generate_parsing_map()
|
112 |
self.model.generate_quantized_segm()
|
113 |
colored_segm = self.model.palette_result(self.model.segm[0].cpu())
|
|
|
114 |
|
115 |
+
mask = colored_segm.copy()
|
|
|
|
|
|
|
|
|
116 |
seg_map = self.process_mask(mask)
|
|
|
|
|
117 |
self.model.segm = torch.from_numpy(seg_map).unsqueeze(0).unsqueeze(
|
118 |
0).to(self.model.device)
|
119 |
self.model.generate_quantized_segm()
|
120 |
+
return colored_segm
|
121 |
|
122 |
+
def generate_human(self, texture_text: str, sample_steps: int,
|
123 |
+
seed: int) -> np.ndarray:
|
124 |
set_random_seed(seed)
|
125 |
|
126 |
texture_attributes = generate_texture_attributes(texture_text)
|
pose_images/000.png
CHANGED
Git LFS Details
|
pose_images/001.png
CHANGED
Git LFS Details
|
pose_images/002.png
CHANGED
Git LFS Details
|
pose_images/003.png
CHANGED
Git LFS Details
|
pose_images/004.png
CHANGED
Git LFS Details
|
pose_images/005.png
CHANGED
Git LFS Details
|
requirements.txt
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
-
einops==0.
|
2 |
lpips==0.1.4
|
3 |
mmcv-full==1.5.2
|
4 |
mmsegmentation==0.24.1
|
5 |
-
numpy==1.
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
tokenizers==0.13.3
|
10 |
torch==1.11.0
|
11 |
torchvision==0.12.0
|
12 |
-
transformers==4.
|
1 |
+
einops==0.4.1
|
2 |
lpips==0.1.4
|
3 |
mmcv-full==1.5.2
|
4 |
mmsegmentation==0.24.1
|
5 |
+
numpy==1.22.3
|
6 |
+
Pillow==9.1.1
|
7 |
+
sentence-transformers==2.2.0
|
8 |
+
tokenizers==0.12.1
|
|
|
9 |
torch==1.11.0
|
10 |
torchvision==0.12.0
|
11 |
+
transformers==4.19.2
|
style.css
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
h1 {
|
2 |
-
text-align: center;
|
3 |
-
}
|
4 |
-
#input-image {
|
5 |
-
max-height: 300px;
|
6 |
-
}
|
7 |
-
#label-image {
|
8 |
-
height: 300px;
|
9 |
-
}
|
10 |
-
#result-image {
|
11 |
-
height: 300px;
|
12 |
-
}
|
13 |
-
img#visitor-badge {
|
14 |
-
display: block;
|
15 |
-
margin: auto;
|
16 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|