.pre-commit-config.yaml CHANGED
@@ -21,11 +21,11 @@ repos:
21
  - id: docformatter
22
  args: ['--in-place']
23
  - repo: https://github.com/pycqa/isort
24
- rev: 5.12.0
25
  hooks:
26
  - id: isort
27
  - repo: https://github.com/pre-commit/mirrors-mypy
28
- rev: v0.991
29
  hooks:
30
  - id: mypy
31
  args: ['--ignore-missing-imports']
@@ -34,3 +34,13 @@ repos:
34
  hooks:
35
  - id: yapf
36
  args: ['--parallel', '--in-place']
 
 
 
 
 
 
 
 
 
 
21
  - id: docformatter
22
  args: ['--in-place']
23
  - repo: https://github.com/pycqa/isort
24
+ rev: 5.10.1
25
  hooks:
26
  - id: isort
27
  - repo: https://github.com/pre-commit/mirrors-mypy
28
+ rev: v0.812
29
  hooks:
30
  - id: mypy
31
  args: ['--ignore-missing-imports']
34
  hooks:
35
  - id: yapf
36
  args: ['--parallel', '--in-place']
37
+ - repo: https://github.com/kynan/nbstripout
38
+ rev: 0.5.0
39
+ hooks:
40
+ - id: nbstripout
41
+ args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
42
+ - repo: https://github.com/nbQA-dev/nbQA
43
+ rev: 1.3.1
44
+ hooks:
45
+ - id: nbqa-isort
46
+ - id: nbqa-yapf
README.md CHANGED
@@ -4,10 +4,9 @@ emoji: 🏃
4
  colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.36.1
8
  app_file: app.py
9
  pinned: false
10
- suggested_hardware: t4-small
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
4
  colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 3.0.11
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py CHANGED
@@ -2,139 +2,157 @@
2
 
3
  from __future__ import annotations
4
 
 
5
  import os
6
  import pathlib
7
- import random
8
- import shlex
9
  import subprocess
10
 
11
  import gradio as gr
12
- import numpy as np
13
 
14
  if os.getenv('SYSTEM') == 'spaces':
15
- import mim
16
-
17
- mim.uninstall('mmcv-full', confirm_yes=True)
18
- mim.install('mmcv-full==1.5.2', is_yes=True)
19
-
20
- with open('patch') as f:
21
- subprocess.run(shlex.split('patch -p1'), cwd='Text2Human', stdin=f)
22
 
23
  from model import Model
24
 
25
- DESCRIPTION = '''# [Text2Human](https://github.com/yumingj/Text2Human)
26
 
27
- You can modify sample steps and seeds. By varying seeds, you can sample different human images under the same pose, shape description, and texture description. The larger the sample steps, the better quality of the generated images. (The default value of sample steps is 256 in the original repo.)
 
 
 
 
 
 
 
 
 
28
 
29
- Label image generation step can be skipped. However, in that case, the input label image must be 512x256 in size and must contain only the specified colors.
30
- '''
31
 
32
- MAX_SEED = np.iinfo(np.int32).max
 
33
 
34
 
35
- def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
- return seed
39
 
40
 
41
- model = Model()
 
 
42
 
43
- with gr.Blocks(css='style.css') as demo:
44
- gr.Markdown(DESCRIPTION)
45
-
46
- with gr.Row():
47
- with gr.Column():
48
- with gr.Row():
49
- input_image = gr.Image(label='Input Pose Image',
50
- type='pil',
51
- elem_id='input-image')
52
- pose_data = gr.State()
53
- with gr.Row():
54
- paths = sorted(pathlib.Path('pose_images').glob('*.png'))
55
- gr.Examples(examples=[[path.as_posix()] for path in paths],
56
- inputs=input_image)
57
 
58
- with gr.Row():
59
- shape_text = gr.Textbox(
60
- label='Shape Description',
61
- placeholder=
62
- '''<gender>, <sleeve length>, <length of lower clothing>, <outer clothing type>, <other accessories1>, ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  Note: The outer clothing type and accessories can be omitted.''')
64
- with gr.Row():
65
- gr.Examples(
66
- examples=[['man, sleeveless T-shirt, long pants'],
67
- ['woman, short-sleeve T-shirt, short jeans']],
68
- inputs=shape_text)
69
- with gr.Row():
70
- generate_label_button = gr.Button('Generate Label Image')
71
-
72
- with gr.Column():
73
- with gr.Row():
74
- label_image = gr.Image(label='Label Image',
75
- type='numpy',
76
- elem_id='label-image')
77
-
78
- with gr.Row():
79
- texture_text = gr.Textbox(
80
- label='Texture Description',
81
- placeholder=
82
- '''<upper clothing texture>, <lower clothing texture>, <outer clothing texture>
83
  Note: Currently, only 5 types of textures are supported, i.e., pure color, stripe/spline, plaid/lattice, floral, denim.'''
84
- )
85
- with gr.Row():
86
- gr.Examples(examples=[
87
- ['pure color, denim'],
88
- ['floral, stripe'],
89
- ],
90
- inputs=texture_text)
91
- with gr.Row():
92
- sample_steps = gr.Slider(label='Sample Steps',
93
- minimum=10,
94
- maximum=300,
95
- step=1,
96
- value=256)
97
- with gr.Row():
98
- seed = gr.Slider(label='Seed',
99
- minimum=0,
100
- maximum=MAX_SEED,
101
- step=1,
102
- value=0)
103
- randomize_seed = gr.Checkbox(label='Randomize seed',
104
- value=True)
105
- with gr.Row():
106
- generate_human_button = gr.Button('Generate Human')
107
-
108
- with gr.Column():
109
- with gr.Row():
110
- result = gr.Image(label='Result',
111
- type='numpy',
112
- elem_id='result-image')
113
-
114
- input_image.change(
115
- fn=model.process_pose_image,
116
- inputs=input_image,
117
- outputs=pose_data,
118
- )
119
- generate_label_button.click(
120
- fn=model.generate_label_image,
121
- inputs=[
122
- pose_data,
123
- shape_text,
124
- ],
125
- outputs=label_image,
126
- )
127
- generate_human_button.click(fn=randomize_seed_fn,
128
- inputs=[seed, randomize_seed],
129
- outputs=seed,
130
- queue=False).then(
131
- fn=model.generate_human,
132
  inputs=[
133
- label_image,
134
  texture_text,
135
  sample_steps,
136
  seed,
137
  ],
138
- outputs=result,
139
- )
140
- demo.queue(max_size=10).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  from __future__ import annotations
4
 
5
+ import argparse
6
  import os
7
  import pathlib
 
 
8
  import subprocess
9
 
10
  import gradio as gr
 
11
 
12
  if os.getenv('SYSTEM') == 'spaces':
13
+ subprocess.call('pip uninstall -y mmcv-full'.split())
14
+ subprocess.call('pip install mmcv-full==1.5.2'.split())
15
+ subprocess.call('git apply ../patch'.split(), cwd='Text2Human')
 
 
 
 
16
 
17
  from model import Model
18
 
 
19
 
20
+ def parse_args() -> argparse.Namespace:
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument('--device', type=str, default='cpu')
23
+ parser.add_argument('--theme', type=str)
24
+ parser.add_argument('--share', action='store_true')
25
+ parser.add_argument('--port', type=int)
26
+ parser.add_argument('--disable-queue',
27
+ dest='enable_queue',
28
+ action='store_false')
29
+ return parser.parse_args()
30
 
 
 
31
 
32
+ def set_example_image(example: list) -> dict:
33
+ return gr.Image.update(value=example[0])
34
 
35
 
36
+ def set_example_text(example: list) -> dict:
37
+ return gr.Textbox.update(value=example[0])
 
 
38
 
39
 
40
+ def main():
41
+ args = parse_args()
42
+ model = Model(args.device)
43
 
44
+ css = '''
45
+ h1#title {
46
+ text-align: center;
47
+ }
48
+ #input-image {
49
+ max-height: 300px;
50
+ }
51
+ #label-image {
52
+ height: 300px;
53
+ }
54
+ #result-image {
55
+ height: 300px;
56
+ }
57
+ '''
58
 
59
+ with gr.Blocks(theme=args.theme, css=css) as demo:
60
+ gr.Markdown('''<h1 id="title">Text2Human</h1>
61
+
62
+ This is an unofficial demo for <a href="https://github.com/yumingj/Text2Human">https://github.com/yumingj/Text2Human</a>.
63
+ You can modify sample steps and seeds. By varying seeds, you can sample different human images under the same pose, shape description, and texture description. The larger the sample steps, the better quality of the generated images. (The default value of sample steps is 256 in the original repo.)</a>
64
+ ''')
65
+ with gr.Row():
66
+ with gr.Column():
67
+ with gr.Row():
68
+ input_image = gr.Image(label='Input Pose Image',
69
+ type='pil',
70
+ elem_id='input-image')
71
+ with gr.Row():
72
+ paths = sorted(pathlib.Path('pose_images').glob('*.png'))
73
+ example_images = gr.Dataset(components=[input_image],
74
+ samples=[[path.as_posix()]
75
+ for path in paths])
76
+
77
+ with gr.Column():
78
+ with gr.Row():
79
+ label_image = gr.Image(label='Label Image',
80
+ type='numpy',
81
+ elem_id='label-image')
82
+ with gr.Row():
83
+ shape_text = gr.Textbox(
84
+ label='Shape Description',
85
+ placeholder=
86
+ '''<gender>, <sleeve length>, <length of lower clothing>, <outer clothing type>, <other accessories1>, ...
87
  Note: The outer clothing type and accessories can be omitted.''')
88
+ with gr.Row():
89
+ shape_example_texts = gr.Dataset(
90
+ components=[shape_text],
91
+ samples=[['man, sleeveless T-shirt, long pants'],
92
+ ['woman, short-sleeve T-shirt, short jeans']])
93
+ with gr.Row():
94
+ generate_label_button = gr.Button('Generate Label Image')
95
+
96
+ with gr.Column():
97
+ with gr.Row():
98
+ result = gr.Image(label='Result',
99
+ type='numpy',
100
+ elem_id='result-image')
101
+ with gr.Row():
102
+ texture_text = gr.Textbox(
103
+ label='Texture Description',
104
+ placeholder=
105
+ '''<upper clothing texture>, <lower clothing texture>, <outer clothing texture>
 
106
  Note: Currently, only 5 types of textures are supported, i.e., pure color, stripe/spline, plaid/lattice, floral, denim.'''
107
+ )
108
+ with gr.Row():
109
+ texture_example_texts = gr.Dataset(
110
+ components=[texture_text],
111
+ samples=[['pure color, denim'], ['floral, stripe']])
112
+ with gr.Row():
113
+ sample_steps = gr.Slider(10,
114
+ 300,
115
+ value=10,
116
+ step=10,
117
+ label='Sample Steps')
118
+ with gr.Row():
119
+ seed = gr.Slider(0, 1000000, value=0, step=1, label='Seed')
120
+ with gr.Row():
121
+ generate_human_button = gr.Button('Generate Human')
122
+
123
+ gr.Markdown(
124
+ '<center><img src="https://visitor-badge.glitch.me/badge?page_id=hysts.text2human" alt="visitor badge"/></center>'
125
+ )
126
+
127
+ input_image.change(fn=model.process_pose_image,
128
+ inputs=[input_image],
129
+ outputs=None)
130
+ generate_label_button.click(fn=model.generate_label_image,
131
+ inputs=[shape_text],
132
+ outputs=[label_image])
133
+ generate_human_button.click(fn=model.generate_human,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  inputs=[
 
135
  texture_text,
136
  sample_steps,
137
  seed,
138
  ],
139
+ outputs=[result])
140
+ example_images.click(fn=set_example_image,
141
+ inputs=example_images,
142
+ outputs=example_images.components)
143
+ shape_example_texts.click(fn=set_example_text,
144
+ inputs=shape_example_texts,
145
+ outputs=shape_example_texts.components)
146
+ texture_example_texts.click(fn=set_example_text,
147
+ inputs=texture_example_texts,
148
+ outputs=texture_example_texts.components)
149
+
150
+ demo.launch(
151
+ enable_queue=args.enable_queue,
152
+ server_port=args.port,
153
+ share=args.share,
154
+ )
155
+
156
+
157
+ if __name__ == '__main__':
158
+ main()
model.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  import pathlib
4
  import sys
5
  import zipfile
@@ -46,13 +47,11 @@ COLOR_LIST = [
46
 
47
 
48
  class Model:
49
- def __init__(self):
50
- device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
51
  self.config = self._load_config()
52
- self.config['device'] = device.type
53
  self._download_models()
54
  self.model = SampleFromPoseModel(self.config)
55
- self.model.batch_size = 1
56
 
57
  def _load_config(self) -> dict:
58
  path = 'Text2Human/configs/sample_from_pose.yml'
@@ -64,8 +63,10 @@ class Model:
64
  model_dir = pathlib.Path('pretrained_models')
65
  if model_dir.exists():
66
  return
67
- path = huggingface_hub.hf_hub_download('yumingj/Text2Human_SSHQ',
68
- 'pretrained_models.zip')
 
 
69
  model_dir.mkdir()
70
  with zipfile.ZipFile(path) as f:
71
  f.extractall(model_dir)
@@ -82,14 +83,11 @@ class Model:
82
  return data
83
 
84
  @staticmethod
85
- def process_mask(mask: np.ndarray) -> np.ndarray:
86
- if mask.shape != (512, 256, 3):
87
- return None
88
  seg_map = np.full(mask.shape[:-1], -1)
89
  for index, color in enumerate(COLOR_LIST):
90
  seg_map[np.sum(mask == color, axis=2) == 3] = index
91
- if not (seg_map != -1).all():
92
- return None
93
  return seg_map
94
 
95
  @staticmethod
@@ -100,38 +98,29 @@ class Model:
100
  result = np.asarray(result[0, :, :, :], dtype=np.uint8)
101
  return result
102
 
103
- def process_pose_image(self, pose_image: PIL.Image.Image) -> torch.Tensor:
104
  if pose_image is None:
105
  return
106
  data = self.preprocess_pose_image(pose_image)
107
  self.model.feed_pose_data(data)
108
- return data
109
 
110
- def generate_label_image(self, pose_data: torch.Tensor,
111
- shape_text: str) -> np.ndarray:
112
- if pose_data is None:
113
- return
114
- self.model.feed_pose_data(pose_data)
115
  shape_attributes = generate_shape_attributes(shape_text)
116
  shape_attributes = torch.LongTensor(shape_attributes).unsqueeze(0)
117
  self.model.feed_shape_attributes(shape_attributes)
118
  self.model.generate_parsing_map()
119
  self.model.generate_quantized_segm()
120
  colored_segm = self.model.palette_result(self.model.segm[0].cpu())
121
- return colored_segm
122
 
123
- def generate_human(self, label_image: np.ndarray, texture_text: str,
124
- sample_steps: int, seed: int) -> np.ndarray:
125
- if label_image is None:
126
- return
127
- mask = label_image.copy()
128
  seg_map = self.process_mask(mask)
129
- if seg_map is None:
130
- return
131
  self.model.segm = torch.from_numpy(seg_map).unsqueeze(0).unsqueeze(
132
  0).to(self.model.device)
133
  self.model.generate_quantized_segm()
 
134
 
 
 
135
  set_random_seed(seed)
136
 
137
  texture_attributes = generate_texture_attributes(texture_text)
1
  from __future__ import annotations
2
 
3
+ import os
4
  import pathlib
5
  import sys
6
  import zipfile
47
 
48
 
49
  class Model:
50
+ def __init__(self, device: str):
 
51
  self.config = self._load_config()
52
+ self.config['device'] = device
53
  self._download_models()
54
  self.model = SampleFromPoseModel(self.config)
 
55
 
56
  def _load_config(self) -> dict:
57
  path = 'Text2Human/configs/sample_from_pose.yml'
63
  model_dir = pathlib.Path('pretrained_models')
64
  if model_dir.exists():
65
  return
66
+ token = os.getenv('HF_TOKEN')
67
+ path = huggingface_hub.hf_hub_download('hysts/Text2Human',
68
+ 'orig/pretrained_models.zip',
69
+ use_auth_token=token)
70
  model_dir.mkdir()
71
  with zipfile.ZipFile(path) as f:
72
  f.extractall(model_dir)
83
  return data
84
 
85
  @staticmethod
86
+ def process_mask(mask: torch.Tensor) -> torch.Tensor:
 
 
87
  seg_map = np.full(mask.shape[:-1], -1)
88
  for index, color in enumerate(COLOR_LIST):
89
  seg_map[np.sum(mask == color, axis=2) == 3] = index
90
+ assert (seg_map != -1).all()
 
91
  return seg_map
92
 
93
  @staticmethod
98
  result = np.asarray(result[0, :, :, :], dtype=np.uint8)
99
  return result
100
 
101
+ def process_pose_image(self, pose_image: PIL.Image.Image) -> None:
102
  if pose_image is None:
103
  return
104
  data = self.preprocess_pose_image(pose_image)
105
  self.model.feed_pose_data(data)
 
106
 
107
+ def generate_label_image(self, shape_text: str) -> np.ndarray:
 
 
 
 
108
  shape_attributes = generate_shape_attributes(shape_text)
109
  shape_attributes = torch.LongTensor(shape_attributes).unsqueeze(0)
110
  self.model.feed_shape_attributes(shape_attributes)
111
  self.model.generate_parsing_map()
112
  self.model.generate_quantized_segm()
113
  colored_segm = self.model.palette_result(self.model.segm[0].cpu())
 
114
 
115
+ mask = colored_segm.copy()
 
 
 
 
116
  seg_map = self.process_mask(mask)
 
 
117
  self.model.segm = torch.from_numpy(seg_map).unsqueeze(0).unsqueeze(
118
  0).to(self.model.device)
119
  self.model.generate_quantized_segm()
120
+ return colored_segm
121
 
122
+ def generate_human(self, texture_text: str, sample_steps: int,
123
+ seed: int) -> np.ndarray:
124
  set_random_seed(seed)
125
 
126
  texture_attributes = generate_texture_attributes(texture_text)
pose_images/000.png CHANGED

Git LFS Details

  • SHA256: 3de0dcff0651ff0667b844f58a42b4c6537c86ae0cdac32068c060f5a471832e
  • Pointer size: 130 Bytes
  • Size of remote file: 47.8 kB
pose_images/001.png CHANGED

Git LFS Details

  • SHA256: 83f059e8281483a1c8848c9e190813f2e4eb56b0bfa866cf004e87f019ef4d2c
  • Pointer size: 130 Bytes
  • Size of remote file: 48.7 kB
pose_images/002.png CHANGED

Git LFS Details

  • SHA256: 0fe37d3d227a61259faa032f7430c95fb1162758545d02e1f4c6bd4cd2bc99fc
  • Pointer size: 130 Bytes
  • Size of remote file: 43.4 kB
pose_images/003.png CHANGED

Git LFS Details

  • SHA256: 83f059e8281483a1c8848c9e190813f2e4eb56b0bfa866cf004e87f019ef4d2c
  • Pointer size: 130 Bytes
  • Size of remote file: 48.7 kB
pose_images/004.png CHANGED

Git LFS Details

  • SHA256: 43a71489b88a0bfb8c3a035f62599534cdd6df7b2adb188be4da351819709de1
  • Pointer size: 130 Bytes
  • Size of remote file: 45.8 kB
pose_images/005.png CHANGED

Git LFS Details

  • SHA256: 9bd8833ace00dd3c97eb858e5b87d6803e0611fd718234699329bad7e4f906f1
  • Pointer size: 130 Bytes
  • Size of remote file: 45.7 kB
requirements.txt CHANGED
@@ -1,12 +1,11 @@
1
- einops==0.6.1
2
  lpips==0.1.4
3
  mmcv-full==1.5.2
4
  mmsegmentation==0.24.1
5
- numpy==1.23.5
6
- openmim==0.1.5
7
- Pillow==9.5.0
8
- sentence-transformers==2.2.2
9
- tokenizers==0.13.3
10
  torch==1.11.0
11
  torchvision==0.12.0
12
- transformers==4.30.2
1
+ einops==0.4.1
2
  lpips==0.1.4
3
  mmcv-full==1.5.2
4
  mmsegmentation==0.24.1
5
+ numpy==1.22.3
6
+ Pillow==9.1.1
7
+ sentence-transformers==2.2.0
8
+ tokenizers==0.12.1
 
9
  torch==1.11.0
10
  torchvision==0.12.0
11
+ transformers==4.19.2
style.css DELETED
@@ -1,16 +0,0 @@
1
- h1 {
2
- text-align: center;
3
- }
4
- #input-image {
5
- max-height: 300px;
6
- }
7
- #label-image {
8
- height: 300px;
9
- }
10
- #result-image {
11
- height: 300px;
12
- }
13
- img#visitor-badge {
14
- display: block;
15
- margin: auto;
16
- }