skytnt commited on
Commit
7aeddce
1 Parent(s): de2874f

README and Code

Browse files
.gitignore ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ downloads/
14
+ eggs/
15
+ .eggs/
16
+ lib/
17
+ lib64/
18
+ parts/
19
+ sdist/
20
+ var/
21
+ wheels/
22
+ *.egg-info/
23
+ .installed.cfg
24
+ *.egg
25
+ MANIFEST
26
+
27
+ # PyInstaller
28
+ # Usually these files are written by a python script from a template
29
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
30
+ *.manifest
31
+ *.spec
32
+
33
+ # Installer logs
34
+ pip-log.txt
35
+ pip-delete-this-directory.txt
36
+
37
+ # Unit test / coverage reports
38
+ htmlcov/
39
+ .tox/
40
+ .nox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ .hypothesis/
48
+ .pytest_cache/
49
+
50
+ # Translations
51
+ *.mo
52
+ *.pot
53
+
54
+ # Django stuff:
55
+ *.log
56
+ local_settings.py
57
+ db.sqlite3
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ docs/_build/
68
+
69
+ # PyBuilder
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # IPython
76
+ profile_default/
77
+ ipython_config.py
78
+
79
+ # pyenv
80
+ .python-version
81
+
82
+ # celery beat schedule file
83
+ celerybeat-schedule
84
+
85
+ # SageMath parsed files
86
+ *.sage.py
87
+
88
+ # Environments
89
+ .env
90
+ .venv
91
+ env/
92
+ venv/
93
+ ENV/
94
+ env.bak/
95
+ venv.bak/
96
+
97
+ # Spyder project settings
98
+ .spyderproject
99
+ .spyproject
100
+
101
+ # Rope project settings
102
+ .ropeproject
103
+
104
+ # mkdocs documentation
105
+ /site
106
+
107
+ # mypy
108
+ .mypy_cache/
109
+ .dmypy.json
110
+ dmypy.json
111
+
112
+ # Pyre type checker
113
+ .pyre/
114
+
115
+ .idea/
README.md CHANGED
@@ -1,3 +1,23 @@
1
  ---
 
 
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - unconditional-image-generation
4
  license: apache-2.0
5
  ---
6
+
7
+ # Model info
8
+
9
+ `fbanime.pkl`: StyleGan2 model trained with official [StyleGan3](https://github.com/NVlabs/stylegan3).
10
+ But I modified the code (networks_stylegan2.py and dataset.py) to support non-square resolutions.
11
+
12
+ `g_mapping.onnx`: onnx format mapping network of fbanime.pkl
13
+
14
+ `g_synthesis.onnx`: onnx format synthesis network of fbanime.pkl
15
+
16
+ `encoder.onnx`: 4e model trained with [encoder4editing-stylegan3](https://github.com/yj7082126/encoder4editing-stylegan3).
17
+ I add support for official StyleGan2 model and change backbone to ResNet-34 in [restyle-encoder](https://github.com/yuval-alaluf/restyle-encoder).
18
+
19
+ `waifu_dect.onnx` YOLOv5 model trained with official [YOLOv5](https://github.com/ultralytics/yolov5)
20
+
21
+ # Usage
22
+
23
+ see [demo](https://huggingface.co/spaces/skytnt/full-body-anime-gan/blob/main/app.py)
code/convert-stylegan2-model.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
code/dataset.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ """Streaming images and labels from datasets created with dataset_tool.py."""
10
+
11
+ import os
12
+ import numpy as np
13
+ import zipfile
14
+ import PIL.Image
15
+ import json
16
+ import torch
17
+ import dnnlib
18
+
19
+ try:
20
+ import pyspng
21
+ except ImportError:
22
+ pyspng = None
23
+
24
+ #----------------------------------------------------------------------------
25
+
26
+ class Dataset(torch.utils.data.Dataset):
27
+ def __init__(self,
28
+ name, # Name of the dataset.
29
+ raw_shape, # Shape of the raw image data (NCHW).
30
+ max_size = None, # Artificially limit the size of the dataset. None = no limit. Applied before xflip.
31
+ use_labels = False, # Enable conditioning labels? False = label dimension is zero.
32
+ xflip = False, # Artificially double the size of the dataset via x-flips. Applied after max_size.
33
+ random_seed = 0, # Random seed to use when applying max_size.
34
+ ):
35
+ self._name = name
36
+ self._raw_shape = list(raw_shape)
37
+ self._use_labels = use_labels
38
+ self._raw_labels = None
39
+ self._label_shape = None
40
+
41
+ # Apply max_size.
42
+ self._raw_idx = np.arange(self._raw_shape[0], dtype=np.int64)
43
+ if (max_size is not None) and (self._raw_idx.size > max_size):
44
+ np.random.RandomState(random_seed).shuffle(self._raw_idx)
45
+ self._raw_idx = np.sort(self._raw_idx[:max_size])
46
+
47
+ # Apply xflip.
48
+ self._xflip = np.zeros(self._raw_idx.size, dtype=np.uint8)
49
+ if xflip:
50
+ self._raw_idx = np.tile(self._raw_idx, 2)
51
+ self._xflip = np.concatenate([self._xflip, np.ones_like(self._xflip)])
52
+
53
+ def _get_raw_labels(self):
54
+ if self._raw_labels is None:
55
+ self._raw_labels = self._load_raw_labels() if self._use_labels else None
56
+ if self._raw_labels is None:
57
+ self._raw_labels = np.zeros([self._raw_shape[0], 0], dtype=np.float32)
58
+ assert isinstance(self._raw_labels, np.ndarray)
59
+ assert self._raw_labels.shape[0] == self._raw_shape[0]
60
+ assert self._raw_labels.dtype in [np.float32, np.int64]
61
+ if self._raw_labels.dtype == np.int64:
62
+ assert self._raw_labels.ndim == 1
63
+ assert np.all(self._raw_labels >= 0)
64
+ return self._raw_labels
65
+
66
+ def close(self): # to be overridden by subclass
67
+ pass
68
+
69
+ def _load_raw_image(self, raw_idx): # to be overridden by subclass
70
+ raise NotImplementedError
71
+
72
+ def _load_raw_labels(self): # to be overridden by subclass
73
+ raise NotImplementedError
74
+
75
+ def __getstate__(self):
76
+ return dict(self.__dict__, _raw_labels=None)
77
+
78
+ def __del__(self):
79
+ try:
80
+ self.close()
81
+ except:
82
+ pass
83
+
84
+ def __len__(self):
85
+ return self._raw_idx.size
86
+
87
+ def __getitem__(self, idx):
88
+ image = self._load_raw_image(self._raw_idx[idx])
89
+ assert isinstance(image, np.ndarray)
90
+ assert list(image.shape) == self.image_shape
91
+ assert image.dtype == np.uint8
92
+ if self._xflip[idx]:
93
+ assert image.ndim == 3 # CHW
94
+ image = image[:, :, ::-1]
95
+ return image.copy(), self.get_label(idx)
96
+
97
+ def get_label(self, idx):
98
+ label = self._get_raw_labels()[self._raw_idx[idx]]
99
+ if label.dtype == np.int64:
100
+ onehot = np.zeros(self.label_shape, dtype=np.float32)
101
+ onehot[label] = 1
102
+ label = onehot
103
+ return label.copy()
104
+
105
+ def get_details(self, idx):
106
+ d = dnnlib.EasyDict()
107
+ d.raw_idx = int(self._raw_idx[idx])
108
+ d.xflip = (int(self._xflip[idx]) != 0)
109
+ d.raw_label = self._get_raw_labels()[d.raw_idx].copy()
110
+ return d
111
+
112
+ @property
113
+ def name(self):
114
+ return self._name
115
+
116
+ @property
117
+ def image_shape(self):
118
+ return list(self._raw_shape[1:])
119
+
120
+ @property
121
+ def num_channels(self):
122
+ assert len(self.image_shape) == 3 # CHW
123
+ return self.image_shape[0]
124
+
125
+ @property
126
+ def resolution(self):
127
+ assert len(self.image_shape) == 3 # CHW
128
+ # assert self.image_shape[1] == self.image_shape[2]
129
+ return self.image_shape[1], self.image_shape[2]
130
+
131
+ @property
132
+ def label_shape(self):
133
+ if self._label_shape is None:
134
+ raw_labels = self._get_raw_labels()
135
+ if raw_labels.dtype == np.int64:
136
+ self._label_shape = [int(np.max(raw_labels)) + 1]
137
+ else:
138
+ self._label_shape = raw_labels.shape[1:]
139
+ return list(self._label_shape)
140
+
141
+ @property
142
+ def label_dim(self):
143
+ assert len(self.label_shape) == 1
144
+ return self.label_shape[0]
145
+
146
+ @property
147
+ def has_labels(self):
148
+ return any(x != 0 for x in self.label_shape)
149
+
150
+ @property
151
+ def has_onehot_labels(self):
152
+ return self._get_raw_labels().dtype == np.int64
153
+
154
+ #----------------------------------------------------------------------------
155
+
156
+ class ImageFolderDataset(Dataset):
157
+ def __init__(self,
158
+ path, # Path to directory or zip.
159
+ resolution = None, # Ensure specific resolution, None = highest available.
160
+ **super_kwargs, # Additional arguments for the Dataset base class.
161
+ ):
162
+ self._path = path
163
+ self._zipfile = None
164
+
165
+ if os.path.isdir(self._path):
166
+ self._type = 'dir'
167
+ self._all_fnames = {os.path.relpath(os.path.join(root, fname), start=self._path) for root, _dirs, files in os.walk(self._path) for fname in files}
168
+ elif self._file_ext(self._path) == '.zip':
169
+ self._type = 'zip'
170
+ self._all_fnames = set(self._get_zipfile().namelist())
171
+ else:
172
+ raise IOError('Path must point to a directory or zip')
173
+
174
+ PIL.Image.init()
175
+ self._image_fnames = sorted(fname for fname in self._all_fnames if self._file_ext(fname) in PIL.Image.EXTENSION)
176
+ if len(self._image_fnames) == 0:
177
+ raise IOError('No image files found in the specified path')
178
+
179
+ name = os.path.splitext(os.path.basename(self._path))[0]
180
+ raw_shape = [len(self._image_fnames)] + list(self._load_raw_image(0).shape)
181
+ if resolution is not None and (raw_shape[2] != resolution[0] or raw_shape[3] != resolution[1]):
182
+ raise IOError('Image files do not match the specified resolution')
183
+ super().__init__(name=name, raw_shape=raw_shape, **super_kwargs)
184
+
185
+ @staticmethod
186
+ def _file_ext(fname):
187
+ return os.path.splitext(fname)[1].lower()
188
+
189
+ def _get_zipfile(self):
190
+ assert self._type == 'zip'
191
+ if self._zipfile is None:
192
+ self._zipfile = zipfile.ZipFile(self._path)
193
+ return self._zipfile
194
+
195
+ def _open_file(self, fname):
196
+ if self._type == 'dir':
197
+ return open(os.path.join(self._path, fname), 'rb')
198
+ if self._type == 'zip':
199
+ return self._get_zipfile().open(fname, 'r')
200
+ return None
201
+
202
+ def close(self):
203
+ try:
204
+ if self._zipfile is not None:
205
+ self._zipfile.close()
206
+ finally:
207
+ self._zipfile = None
208
+
209
+ def __getstate__(self):
210
+ return dict(super().__getstate__(), _zipfile=None)
211
+
212
+ def _load_raw_image(self, raw_idx):
213
+ fname = self._image_fnames[raw_idx]
214
+ with self._open_file(fname) as f:
215
+ if pyspng is not None and self._file_ext(fname) == '.png':
216
+ image = pyspng.load(f.read())
217
+ else:
218
+ image = np.array(PIL.Image.open(f))
219
+ if image.ndim == 2:
220
+ image = image[:, :, np.newaxis] # HW => HWC
221
+ image = image.transpose(2, 0, 1) # HWC => CHW
222
+ return image
223
+
224
+ def _load_raw_labels(self):
225
+ fname = 'dataset.json'
226
+ if fname not in self._all_fnames:
227
+ return None
228
+ with self._open_file(fname) as f:
229
+ labels = json.load(f)['labels']
230
+ if labels is None:
231
+ return None
232
+ labels = dict(labels)
233
+ labels = [labels[fname.replace('\\', '/')] for fname in self._image_fnames]
234
+ labels = np.array(labels)
235
+ labels = labels.astype({1: np.int64, 2: np.float32}[labels.ndim])
236
+ return labels
237
+
238
+ #----------------------------------------------------------------------------
code/networks_stylegan2.py ADDED
@@ -0,0 +1,842 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ """Network architectures from the paper
10
+ "Analyzing and Improving the Image Quality of StyleGAN".
11
+ Matches the original implementation of configs E-F by Karras et al. at
12
+ https://github.com/NVlabs/stylegan2/blob/master/training/networks_stylegan2.py"""
13
+
14
+ import numpy as np
15
+ import torch
16
+ from torch_utils import misc
17
+ from torch_utils import persistence
18
+ from torch_utils.ops import conv2d_resample
19
+ from torch_utils.ops import upfirdn2d
20
+ from torch_utils.ops import bias_act
21
+ from torch_utils.ops import fma
22
+
23
+
24
+ # ----------------------------------------------------------------------------
25
+
26
+ @misc.profiled_function
27
+ def normalize_2nd_moment(x, dim=1, eps=1e-8):
28
+ return x * (x.square().mean(dim=dim, keepdim=True) + eps).rsqrt()
29
+
30
+
31
+ # ----------------------------------------------------------------------------
32
+
33
+ @misc.profiled_function
34
+ def modulated_conv2d(
35
+ x, # Input tensor of shape [batch_size, in_channels, in_height, in_width].
36
+ weight, # Weight tensor of shape [out_channels, in_channels, kernel_height, kernel_width].
37
+ styles, # Modulation coefficients of shape [batch_size, in_channels].
38
+ noise=None, # Optional noise tensor to add to the output activations.
39
+ up=1, # Integer upsampling factor.
40
+ down=1, # Integer downsampling factor.
41
+ padding=0, # Padding with respect to the upsampled image.
42
+ resample_filter=None,
43
+ # Low-pass filter to apply when resampling activations. Must be prepared beforehand by calling upfirdn2d.setup_filter().
44
+ demodulate=True, # Apply weight demodulation?
45
+ flip_weight=True, # False = convolution, True = correlation (matches torch.nn.functional.conv2d).
46
+ fused_modconv=True, # Perform modulation, convolution, and demodulation as a single fused operation?
47
+ ):
48
+ batch_size = x.shape[0]
49
+ out_channels, in_channels, kh, kw = weight.shape
50
+ misc.assert_shape(weight, [out_channels, in_channels, kh, kw]) # [OIkk]
51
+ misc.assert_shape(x, [batch_size, in_channels, None, None]) # [NIHW]
52
+ misc.assert_shape(styles, [batch_size, in_channels]) # [NI]
53
+
54
+ # Pre-normalize inputs to avoid FP16 overflow.
55
+ if x.dtype == torch.float16 and demodulate:
56
+ weight = weight * (1 / np.sqrt(in_channels * kh * kw) / weight.norm(float('inf'), dim=[1, 2, 3],
57
+ keepdim=True)) # max_Ikk
58
+ styles = styles / styles.norm(float('inf'), dim=1, keepdim=True) # max_I
59
+
60
+ # Calculate per-sample weights and demodulation coefficients.
61
+ w = None
62
+ dcoefs = None
63
+ if demodulate or fused_modconv:
64
+ w = weight.unsqueeze(0) # [NOIkk]
65
+ w = w * styles.reshape(batch_size, 1, -1, 1, 1) # [NOIkk]
66
+ if demodulate:
67
+ dcoefs = (w.square().sum(dim=[2, 3, 4]) + 1e-8).rsqrt() # [NO]
68
+ if demodulate and fused_modconv:
69
+ w = w * dcoefs.reshape(batch_size, -1, 1, 1, 1) # [NOIkk]
70
+
71
+ # Execute by scaling the activations before and after the convolution.
72
+ if not fused_modconv:
73
+ x = x * styles.to(x.dtype).reshape(batch_size, -1, 1, 1)
74
+ x = conv2d_resample.conv2d_resample(x=x, w=weight.to(x.dtype), f=resample_filter, up=up, down=down,
75
+ padding=padding, flip_weight=flip_weight)
76
+ if demodulate and noise is not None:
77
+ x = fma.fma(x, dcoefs.to(x.dtype).reshape(batch_size, -1, 1, 1), noise.to(x.dtype))
78
+ elif demodulate:
79
+ x = x * dcoefs.to(x.dtype).reshape(batch_size, -1, 1, 1)
80
+ elif noise is not None:
81
+ x = x.add_(noise.to(x.dtype))
82
+ return x
83
+
84
+ # Execute as one fused op using grouped convolution.
85
+ with misc.suppress_tracer_warnings(): # this value will be treated as a constant
86
+ batch_size = int(batch_size)
87
+ misc.assert_shape(x, [batch_size, in_channels, None, None])
88
+ x = x.reshape(1, -1, *x.shape[2:])
89
+ w = w.reshape(-1, in_channels, kh, kw)
90
+ x = conv2d_resample.conv2d_resample(x=x, w=w.to(x.dtype), f=resample_filter, up=up, down=down, padding=padding,
91
+ groups=batch_size, flip_weight=flip_weight)
92
+ x = x.reshape(batch_size, -1, *x.shape[2:])
93
+ if noise is not None:
94
+ x = x.add_(noise)
95
+ return x
96
+
97
+
98
+ # ----------------------------------------------------------------------------
99
+
100
+ @persistence.persistent_class
101
+ class FullyConnectedLayer(torch.nn.Module):
102
+ def __init__(self,
103
+ in_features, # Number of input features.
104
+ out_features, # Number of output features.
105
+ bias=True, # Apply additive bias before the activation function?
106
+ activation='linear', # Activation function: 'relu', 'lrelu', etc.
107
+ lr_multiplier=1, # Learning rate multiplier.
108
+ bias_init=0, # Initial value for the additive bias.
109
+ ):
110
+ super().__init__()
111
+ self.in_features = in_features
112
+ self.out_features = out_features
113
+ self.activation = activation
114
+ self.weight = torch.nn.Parameter(torch.randn([out_features, in_features]) / lr_multiplier)
115
+ self.bias = torch.nn.Parameter(torch.full([out_features], np.float32(bias_init))) if bias else None
116
+ self.weight_gain = lr_multiplier / np.sqrt(in_features)
117
+ self.bias_gain = lr_multiplier
118
+
119
+ def forward(self, x):
120
+ w = self.weight.to(x.dtype) * self.weight_gain
121
+ b = self.bias
122
+ if b is not None:
123
+ b = b.to(x.dtype)
124
+ if self.bias_gain != 1:
125
+ b = b * self.bias_gain
126
+
127
+ if self.activation == 'linear' and b is not None:
128
+ x = torch.addmm(b.unsqueeze(0), x, w.t())
129
+ else:
130
+ x = x.matmul(w.t())
131
+ x = bias_act.bias_act(x, b, act=self.activation)
132
+ return x
133
+
134
+ def extra_repr(self):
135
+ return f'in_features={self.in_features:d}, out_features={self.out_features:d}, activation={self.activation:s}'
136
+
137
+
138
+ # ----------------------------------------------------------------------------
139
+
140
+ @persistence.persistent_class
141
+ class Conv2dLayer(torch.nn.Module):
142
+ def __init__(self,
143
+ in_channels, # Number of input channels.
144
+ out_channels, # Number of output channels.
145
+ kernel_size, # Width and height of the convolution kernel.
146
+ bias=True, # Apply additive bias before the activation function?
147
+ activation='linear', # Activation function: 'relu', 'lrelu', etc.
148
+ up=1, # Integer upsampling factor.
149
+ down=1, # Integer downsampling factor.
150
+ resample_filter=[1, 3, 3, 1], # Low-pass filter to apply when resampling activations.
151
+ conv_clamp=None, # Clamp the output to +-X, None = disable clamping.
152
+ channels_last=False, # Expect the input to have memory_format=channels_last?
153
+ trainable=True, # Update the weights of this layer during training?
154
+ ):
155
+ super().__init__()
156
+ self.in_channels = in_channels
157
+ self.out_channels = out_channels
158
+ self.activation = activation
159
+ self.up = up
160
+ self.down = down
161
+ self.conv_clamp = conv_clamp
162
+ self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
163
+ self.padding = kernel_size // 2
164
+ self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2))
165
+ self.act_gain = bias_act.activation_funcs[activation].def_gain
166
+
167
+ memory_format = torch.channels_last if channels_last else torch.contiguous_format
168
+ weight = torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format)
169
+ bias = torch.zeros([out_channels]) if bias else None
170
+ if trainable:
171
+ self.weight = torch.nn.Parameter(weight)
172
+ self.bias = torch.nn.Parameter(bias) if bias is not None else None
173
+ else:
174
+ self.register_buffer('weight', weight)
175
+ if bias is not None:
176
+ self.register_buffer('bias', bias)
177
+ else:
178
+ self.bias = None
179
+
180
+ def forward(self, x, gain=1):
181
+ w = self.weight * self.weight_gain
182
+ b = self.bias.to(x.dtype) if self.bias is not None else None
183
+ flip_weight = (self.up == 1) # slightly faster
184
+ x = conv2d_resample.conv2d_resample(x=x, w=w.to(x.dtype), f=self.resample_filter, up=self.up, down=self.down,
185
+ padding=self.padding, flip_weight=flip_weight)
186
+
187
+ act_gain = self.act_gain * gain
188
+ act_clamp = self.conv_clamp * gain if self.conv_clamp is not None else None
189
+ x = bias_act.bias_act(x, b, act=self.activation, gain=act_gain, clamp=act_clamp)
190
+ return x
191
+
192
+ def extra_repr(self):
193
+ return ' '.join([
194
+ f'in_channels={self.in_channels:d}, out_channels={self.out_channels:d}, activation={self.activation:s},',
195
+ f'up={self.up}, down={self.down}'])
196
+
197
+
198
+ # ----------------------------------------------------------------------------
199
+
200
+ @persistence.persistent_class
201
+ class MappingNetwork(torch.nn.Module):
202
+ def __init__(self,
203
+ z_dim, # Input latent (Z) dimensionality, 0 = no latent.
204
+ c_dim, # Conditioning label (C) dimensionality, 0 = no label.
205
+ w_dim, # Intermediate latent (W) dimensionality.
206
+ num_ws, # Number of intermediate latents to output, None = do not broadcast.
207
+ num_layers=8, # Number of mapping layers.
208
+ embed_features=None, # Label embedding dimensionality, None = same as w_dim.
209
+ layer_features=None, # Number of intermediate features in the mapping layers, None = same as w_dim.
210
+ activation='lrelu', # Activation function: 'relu', 'lrelu', etc.
211
+ lr_multiplier=0.01, # Learning rate multiplier for the mapping layers.
212
+ w_avg_beta=0.998, # Decay for tracking the moving average of W during training, None = do not track.
213
+ ):
214
+ super().__init__()
215
+ self.z_dim = z_dim
216
+ self.c_dim = c_dim
217
+ self.w_dim = w_dim
218
+ self.num_ws = num_ws
219
+ self.num_layers = num_layers
220
+ self.w_avg_beta = w_avg_beta
221
+
222
+ if embed_features is None:
223
+ embed_features = w_dim
224
+ if c_dim == 0:
225
+ embed_features = 0
226
+ if layer_features is None:
227
+ layer_features = w_dim
228
+ features_list = [z_dim + embed_features] + [layer_features] * (num_layers - 1) + [w_dim]
229
+
230
+ if c_dim > 0:
231
+ self.embed = FullyConnectedLayer(c_dim, embed_features)
232
+ for idx in range(num_layers):
233
+ in_features = features_list[idx]
234
+ out_features = features_list[idx + 1]
235
+ layer = FullyConnectedLayer(in_features, out_features, activation=activation, lr_multiplier=lr_multiplier)
236
+ setattr(self, f'fc{idx}', layer)
237
+
238
+ if num_ws is not None and w_avg_beta is not None:
239
+ self.register_buffer('w_avg', torch.zeros([w_dim]))
240
+
241
+ def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, update_emas=False):
242
+ # Embed, normalize, and concat inputs.
243
+ x = None
244
+ with torch.autograd.profiler.record_function('input'):
245
+ if self.z_dim > 0:
246
+ misc.assert_shape(z, [None, self.z_dim])
247
+ x = normalize_2nd_moment(z.to(torch.float32))
248
+ if self.c_dim > 0:
249
+ misc.assert_shape(c, [None, self.c_dim])
250
+ y = normalize_2nd_moment(self.embed(c.to(torch.float32)))
251
+ x = torch.cat([x, y], dim=1) if x is not None else y
252
+
253
+ # Main layers.
254
+ for idx in range(self.num_layers):
255
+ layer = getattr(self, f'fc{idx}')
256
+ x = layer(x)
257
+
258
+ # Update moving average of W.
259
+ if update_emas and self.w_avg_beta is not None:
260
+ with torch.autograd.profiler.record_function('update_w_avg'):
261
+ self.w_avg.copy_(x.detach().mean(dim=0).lerp(self.w_avg, self.w_avg_beta))
262
+
263
+ # Broadcast.
264
+ if self.num_ws is not None:
265
+ with torch.autograd.profiler.record_function('broadcast'):
266
+ x = x.unsqueeze(1).repeat([1, self.num_ws, 1])
267
+
268
+ # Apply truncation.
269
+ if truncation_psi != 1:
270
+ with torch.autograd.profiler.record_function('truncate'):
271
+ assert self.w_avg_beta is not None
272
+ if self.num_ws is None or truncation_cutoff is None:
273
+ x = self.w_avg.lerp(x, truncation_psi)
274
+ else:
275
+ x[:, :truncation_cutoff] = self.w_avg.lerp(x[:, :truncation_cutoff], truncation_psi)
276
+ return x
277
+
278
+ def extra_repr(self):
279
+ return f'z_dim={self.z_dim:d}, c_dim={self.c_dim:d}, w_dim={self.w_dim:d}, num_ws={self.num_ws:d}'
280
+
281
+
282
+ # ----------------------------------------------------------------------------
283
+
284
+ @persistence.persistent_class
285
+ class SynthesisLayer(torch.nn.Module):
286
+ def __init__(self,
287
+ in_channels, # Number of input channels.
288
+ out_channels, # Number of output channels.
289
+ w_dim, # Intermediate latent (W) dimensionality.
290
+ resolution, # Resolution of this layer.
291
+ kernel_size=3, # Convolution kernel size.
292
+ up=1, # Integer upsampling factor.
293
+ use_noise=True, # Enable noise input?
294
+ activation='lrelu', # Activation function: 'relu', 'lrelu', etc.
295
+ resample_filter=[1, 3, 3, 1], # Low-pass filter to apply when resampling activations.
296
+ conv_clamp=None, # Clamp the output of convolution layers to +-X, None = disable clamping.
297
+ channels_last=False, # Use channels_last format for the weights?
298
+ ):
299
+ super().__init__()
300
+ self.in_channels = in_channels
301
+ self.out_channels = out_channels
302
+ self.w_dim = w_dim
303
+ self.resolution = resolution
304
+ self.up = up
305
+ self.use_noise = use_noise
306
+ self.activation = activation
307
+ self.conv_clamp = conv_clamp
308
+ self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
309
+ self.padding = kernel_size // 2
310
+ self.act_gain = bias_act.activation_funcs[activation].def_gain
311
+
312
+ self.affine = FullyConnectedLayer(w_dim, in_channels, bias_init=1)
313
+ memory_format = torch.channels_last if channels_last else torch.contiguous_format
314
+ self.weight = torch.nn.Parameter(
315
+ torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format))
316
+ if use_noise:
317
+ self.register_buffer('noise_const', torch.randn([resolution[0], resolution[1]]))
318
+ self.noise_strength = torch.nn.Parameter(torch.zeros([]))
319
+ self.bias = torch.nn.Parameter(torch.zeros([out_channels]))
320
+
321
+ def forward(self, x, w, noise_mode='random', fused_modconv=True, gain=1):
322
+ assert noise_mode in ['random', 'const', 'none']
323
+ in_resolution = (self.resolution[0] // self.up, self.resolution[1] // self.up)
324
+ misc.assert_shape(x, [None, self.in_channels, in_resolution[0], in_resolution[1]])
325
+ styles = self.affine(w)
326
+
327
+ noise = None
328
+ if self.use_noise and noise_mode == 'random':
329
+ noise = torch.randn([x.shape[0], 1, self.resolution[0], self.resolution[1]],
330
+ device=x.device) * self.noise_strength
331
+ if self.use_noise and noise_mode == 'const':
332
+ noise = self.noise_const * self.noise_strength
333
+
334
+ flip_weight = (self.up == 1) # slightly faster
335
+ x = modulated_conv2d(x=x, weight=self.weight, styles=styles, noise=noise, up=self.up,
336
+ padding=self.padding, resample_filter=self.resample_filter, flip_weight=flip_weight,
337
+ fused_modconv=fused_modconv)
338
+
339
+ act_gain = self.act_gain * gain
340
+ act_clamp = self.conv_clamp * gain if self.conv_clamp is not None else None
341
+ x = bias_act.bias_act(x, self.bias.to(x.dtype), act=self.activation, gain=act_gain, clamp=act_clamp)
342
+ return x
343
+
344
+ def extra_repr(self):
345
+ return ' '.join([
346
+ f'in_channels={self.in_channels:d}, out_channels={self.out_channels:d}, w_dim={self.w_dim:d},',
347
+ f'resolution={self.resolution[0]:d}x{self.resolution[1]:d}, up={self.up}, activation={self.activation:s}'])
348
+
349
+
350
+ # ----------------------------------------------------------------------------
351
+
352
+ @persistence.persistent_class
353
+ class ToRGBLayer(torch.nn.Module):
354
+ def __init__(self, in_channels, out_channels, w_dim, kernel_size=1, conv_clamp=None, channels_last=False):
355
+ super().__init__()
356
+ self.in_channels = in_channels
357
+ self.out_channels = out_channels
358
+ self.w_dim = w_dim
359
+ self.conv_clamp = conv_clamp
360
+ self.affine = FullyConnectedLayer(w_dim, in_channels, bias_init=1)
361
+ memory_format = torch.channels_last if channels_last else torch.contiguous_format
362
+ self.weight = torch.nn.Parameter(
363
+ torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format))
364
+ self.bias = torch.nn.Parameter(torch.zeros([out_channels]))
365
+ self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2))
366
+
367
+ def forward(self, x, w, fused_modconv=True):
368
+ styles = self.affine(w) * self.weight_gain
369
+ x = modulated_conv2d(x=x, weight=self.weight, styles=styles, demodulate=False, fused_modconv=fused_modconv)
370
+ x = bias_act.bias_act(x, self.bias.to(x.dtype), clamp=self.conv_clamp)
371
+ return x
372
+
373
+ def extra_repr(self):
374
+ return f'in_channels={self.in_channels:d}, out_channels={self.out_channels:d}, w_dim={self.w_dim:d}'
375
+
376
+
377
+ # ----------------------------------------------------------------------------
378
+
379
+ @persistence.persistent_class
380
+ class SynthesisBlock(torch.nn.Module):
381
+ def __init__(self,
382
+ in_channels, # Number of input channels, 0 = first block.
383
+ out_channels, # Number of output channels.
384
+ w_dim, # Intermediate latent (W) dimensionality.
385
+ resolution, # Resolution of this block.
386
+ img_channels, # Number of output color channels.
387
+ is_last, # Is this the last block?
388
+ architecture='skip', # Architecture: 'orig', 'skip', 'resnet'.
389
+ resample_filter=[1, 3, 3, 1], # Low-pass filter to apply when resampling activations.
390
+ conv_clamp=256, # Clamp the output of convolution layers to +-X, None = disable clamping.
391
+ use_fp16=False, # Use FP16 for this block?
392
+ fp16_channels_last=False, # Use channels-last memory format with FP16?
393
+ fused_modconv_default=True,
394
+ # Default value of fused_modconv. 'inference_only' = True for inference, False for training.
395
+ **layer_kwargs, # Arguments for SynthesisLayer.
396
+ ):
397
+ assert architecture in ['orig', 'skip', 'resnet']
398
+ super().__init__()
399
+ self.in_channels = in_channels
400
+ self.w_dim = w_dim
401
+ self.resolution = resolution
402
+ self.img_channels = img_channels
403
+ self.is_last = is_last
404
+ self.architecture = architecture
405
+ self.use_fp16 = use_fp16
406
+ self.channels_last = (use_fp16 and fp16_channels_last)
407
+ self.fused_modconv_default = fused_modconv_default
408
+ self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
409
+ self.num_conv = 0
410
+ self.num_torgb = 0
411
+
412
+ if in_channels == 0:
413
+ self.const = torch.nn.Parameter(torch.randn([out_channels, resolution[0], resolution[1]]))
414
+
415
+ if in_channels != 0:
416
+ self.conv0 = SynthesisLayer(in_channels, out_channels, w_dim=w_dim, resolution=resolution, up=2,
417
+ resample_filter=resample_filter, conv_clamp=conv_clamp,
418
+ channels_last=self.channels_last, **layer_kwargs)
419
+ self.num_conv += 1
420
+
421
+ self.conv1 = SynthesisLayer(out_channels, out_channels, w_dim=w_dim, resolution=resolution,
422
+ conv_clamp=conv_clamp, channels_last=self.channels_last, **layer_kwargs)
423
+ self.num_conv += 1
424
+
425
+ if is_last or architecture == 'skip':
426
+ self.torgb = ToRGBLayer(out_channels, img_channels, w_dim=w_dim,
427
+ conv_clamp=conv_clamp, channels_last=self.channels_last)
428
+ self.num_torgb += 1
429
+
430
+ if in_channels != 0 and architecture == 'resnet':
431
+ self.skip = Conv2dLayer(in_channels, out_channels, kernel_size=1, bias=False, up=2,
432
+ resample_filter=resample_filter, channels_last=self.channels_last)
433
+
434
+ def forward(self, x, img, ws, force_fp32=False, fused_modconv=None, update_emas=False, **layer_kwargs):
435
+ _ = update_emas # unused
436
+ misc.assert_shape(ws, [None, self.num_conv + self.num_torgb, self.w_dim])
437
+ w_iter = iter(ws.unbind(dim=1))
438
+ if ws.device.type != 'cuda':
439
+ force_fp32 = True
440
+ dtype = torch.float16 if self.use_fp16 and not force_fp32 else torch.float32
441
+ memory_format = torch.channels_last if self.channels_last and not force_fp32 else torch.contiguous_format
442
+ if fused_modconv is None:
443
+ fused_modconv = self.fused_modconv_default
444
+ if fused_modconv == 'inference_only':
445
+ fused_modconv = (not self.training)
446
+
447
+ # Input.
448
+ if self.in_channels == 0:
449
+ x = self.const.to(dtype=dtype, memory_format=memory_format)
450
+ x = x.unsqueeze(0).repeat([ws.shape[0], 1, 1, 1])
451
+ else:
452
+ misc.assert_shape(x, [None, self.in_channels, self.resolution[0] // 2, self.resolution[1] // 2])
453
+ x = x.to(dtype=dtype, memory_format=memory_format)
454
+
455
+ # Main layers.
456
+ if self.in_channels == 0:
457
+ x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
458
+ elif self.architecture == 'resnet':
459
+ y = self.skip(x, gain=np.sqrt(0.5))
460
+ x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
461
+ x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, gain=np.sqrt(0.5), **layer_kwargs)
462
+ x = y.add_(x)
463
+ else:
464
+ x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
465
+ x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs)
466
+
467
+ # ToRGB.
468
+ if img is not None:
469
+ misc.assert_shape(img, [None, self.img_channels, self.resolution[0] // 2, self.resolution[1] // 2])
470
+ img = upfirdn2d.upsample2d(img, self.resample_filter)
471
+ if self.is_last or self.architecture == 'skip':
472
+ y = self.torgb(x, next(w_iter), fused_modconv=fused_modconv)
473
+ y = y.to(dtype=torch.float32, memory_format=torch.contiguous_format)
474
+ img = img.add_(y) if img is not None else y
475
+
476
+ assert x.dtype == dtype
477
+ assert img is None or img.dtype == torch.float32
478
+ return x, img
479
+
480
+ def extra_repr(self):
481
+ return f'resolution={self.resolution[0]:d}x{self.resolution[1]:d}, architecture={self.architecture:s}'
482
+
483
+
484
+ # ----------------------------------------------------------------------------
485
+
486
+ @persistence.persistent_class
487
+ class SynthesisNetwork(torch.nn.Module):
488
+ def __init__(self,
489
+ w_dim, # Intermediate latent (W) dimensionality.
490
+ img_resolution, # Output image resolution.
491
+ img_channels, # Number of color channels.
492
+ channel_base=32768, # Overall multiplier for the number of channels.
493
+ channel_max=512, # Maximum number of channels in any layer.
494
+ num_fp16_res=4, # Use FP16 for the N highest resolutions.
495
+ **block_kwargs, # Arguments for SynthesisBlock.
496
+ ):
497
+ assert img_resolution[0] >= 4 and img_resolution[0] & (img_resolution[0] - 1) == 0
498
+ assert img_resolution[1] >= 4 and img_resolution[1] & (img_resolution[1] - 1) == 0
499
+ super().__init__()
500
+ self.w_dim = w_dim
501
+ self.img_resolution = img_resolution
502
+ self.img_resolution_log2 = int(np.log2(min(img_resolution)))
503
+ self.min_h = img_resolution[0] // min(img_resolution)
504
+ self.min_w = img_resolution[1] // min(img_resolution)
505
+ self.img_channels = img_channels
506
+ self.num_fp16_res = num_fp16_res
507
+ self.block_resolutions = [2 ** i for i in range(2, self.img_resolution_log2 + 1)]
508
+ channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions}
509
+ fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
510
+
511
+ self.num_ws = 0
512
+ for res in self.block_resolutions:
513
+ in_channels = channels_dict[res // 2] if res > 4 else 0
514
+ out_channels = channels_dict[res]
515
+ use_fp16 = (res >= fp16_resolution)
516
+ is_last = (res == min(self.img_resolution))
517
+ block = SynthesisBlock(in_channels, out_channels, w_dim=w_dim,
518
+ resolution=(res * self.min_h, res * self.min_w),
519
+ img_channels=img_channels, is_last=is_last, use_fp16=use_fp16, **block_kwargs)
520
+ self.num_ws += block.num_conv
521
+ if is_last:
522
+ self.num_ws += block.num_torgb
523
+ setattr(self, f'b{res}', block)
524
+
525
+ def forward(self, ws, **block_kwargs):
526
+ block_ws = []
527
+ with torch.autograd.profiler.record_function('split_ws'):
528
+ misc.assert_shape(ws, [None, self.num_ws, self.w_dim])
529
+ ws = ws.to(torch.float32)
530
+ w_idx = 0
531
+ for res in self.block_resolutions:
532
+ block = getattr(self, f'b{res}')
533
+ block_ws.append(ws.narrow(1, w_idx, block.num_conv + block.num_torgb))
534
+ w_idx += block.num_conv
535
+
536
+ x = img = None
537
+ for res, cur_ws in zip(self.block_resolutions, block_ws):
538
+ block = getattr(self, f'b{res}')
539
+ x, img = block(x, img, cur_ws, **block_kwargs)
540
+ return img
541
+
542
+ def extra_repr(self):
543
+ return ' '.join([
544
+ f'w_dim={self.w_dim:d}, num_ws={self.num_ws:d},',
545
+ f'img_resolution={self.img_resolution[0]:d}x{self.img_resolution[1]:d},'
546
+ f'img_channels={self.img_channels:d},',
547
+ f'num_fp16_res={self.num_fp16_res:d}'])
548
+
549
+
550
+ # ----------------------------------------------------------------------------
551
+
552
+ @persistence.persistent_class
553
+ class Generator(torch.nn.Module):
554
+ def __init__(self,
555
+ z_dim, # Input latent (Z) dimensionality.
556
+ c_dim, # Conditioning label (C) dimensionality.
557
+ w_dim, # Intermediate latent (W) dimensionality.
558
+ img_resolution, # Output resolution.
559
+ img_channels, # Number of output color channels.
560
+ mapping_kwargs={}, # Arguments for MappingNetwork.
561
+ **synthesis_kwargs, # Arguments for SynthesisNetwork.
562
+ ):
563
+ super().__init__()
564
+ self.z_dim = z_dim
565
+ self.c_dim = c_dim
566
+ self.w_dim = w_dim
567
+ self.img_resolution = img_resolution
568
+ self.img_channels = img_channels
569
+ self.synthesis = SynthesisNetwork(w_dim=w_dim, img_resolution=img_resolution, img_channels=img_channels,
570
+ **synthesis_kwargs)
571
+ self.num_ws = self.synthesis.num_ws
572
+ self.mapping = MappingNetwork(z_dim=z_dim, c_dim=c_dim, w_dim=w_dim, num_ws=self.num_ws, **mapping_kwargs)
573
+
574
+ def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, update_emas=False, **synthesis_kwargs):
575
+ ws = self.mapping(z, c, truncation_psi=truncation_psi, truncation_cutoff=truncation_cutoff,
576
+ update_emas=update_emas)
577
+ img = self.synthesis(ws, update_emas=update_emas, **synthesis_kwargs)
578
+ return img
579
+
580
+
581
+ # ----------------------------------------------------------------------------
582
+
583
+ @persistence.persistent_class
584
+ class DiscriminatorBlock(torch.nn.Module):
585
+ def __init__(self,
586
+ in_channels, # Number of input channels, 0 = first block.
587
+ tmp_channels, # Number of intermediate channels.
588
+ out_channels, # Number of output channels.
589
+ resolution, # Resolution of this block.
590
+ img_channels, # Number of input color channels.
591
+ first_layer_idx, # Index of the first layer.
592
+ architecture='resnet', # Architecture: 'orig', 'skip', 'resnet'.
593
+ activation='lrelu', # Activation function: 'relu', 'lrelu', etc.
594
+ resample_filter=[1, 3, 3, 1], # Low-pass filter to apply when resampling activations.
595
+ conv_clamp=None, # Clamp the output of convolution layers to +-X, None = disable clamping.
596
+ use_fp16=False, # Use FP16 for this block?
597
+ fp16_channels_last=False, # Use channels-last memory format with FP16?
598
+ freeze_layers=0, # Freeze-D: Number of layers to freeze.
599
+ ):
600
+ assert in_channels in [0, tmp_channels]
601
+ assert architecture in ['orig', 'skip', 'resnet']
602
+ super().__init__()
603
+ self.in_channels = in_channels
604
+ self.resolution = resolution
605
+ self.img_channels = img_channels
606
+ self.first_layer_idx = first_layer_idx
607
+ self.architecture = architecture
608
+ self.use_fp16 = use_fp16
609
+ self.channels_last = (use_fp16 and fp16_channels_last)
610
+ self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter))
611
+
612
+ self.num_layers = 0
613
+
614
+ def trainable_gen():
615
+ while True:
616
+ layer_idx = self.first_layer_idx + self.num_layers
617
+ trainable = (layer_idx >= freeze_layers)
618
+ self.num_layers += 1
619
+ yield trainable
620
+
621
+ trainable_iter = trainable_gen()
622
+
623
+ if in_channels == 0 or architecture == 'skip':
624
+ self.fromrgb = Conv2dLayer(img_channels, tmp_channels, kernel_size=1, activation=activation,
625
+ trainable=next(trainable_iter), conv_clamp=conv_clamp,
626
+ channels_last=self.channels_last)
627
+
628
+ self.conv0 = Conv2dLayer(tmp_channels, tmp_channels, kernel_size=3, activation=activation,
629
+ trainable=next(trainable_iter), conv_clamp=conv_clamp,
630
+ channels_last=self.channels_last)
631
+
632
+ self.conv1 = Conv2dLayer(tmp_channels, out_channels, kernel_size=3, activation=activation, down=2,
633
+ trainable=next(trainable_iter), resample_filter=resample_filter, conv_clamp=conv_clamp,
634
+ channels_last=self.channels_last)
635
+
636
+ if architecture == 'resnet':
637
+ self.skip = Conv2dLayer(tmp_channels, out_channels, kernel_size=1, bias=False, down=2,
638
+ trainable=next(trainable_iter), resample_filter=resample_filter,
639
+ channels_last=self.channels_last)
640
+
641
+ def forward(self, x, img, force_fp32=False):
642
+ if (x if x is not None else img).device.type != 'cuda':
643
+ force_fp32 = True
644
+ dtype = torch.float16 if self.use_fp16 and not force_fp32 else torch.float32
645
+ memory_format = torch.channels_last if self.channels_last and not force_fp32 else torch.contiguous_format
646
+
647
+ # Input.
648
+ if x is not None:
649
+ misc.assert_shape(x, [None, self.in_channels, self.resolution[0], self.resolution[1]])
650
+ x = x.to(dtype=dtype, memory_format=memory_format)
651
+
652
+ # FromRGB.
653
+ if self.in_channels == 0 or self.architecture == 'skip':
654
+ misc.assert_shape(img, [None, self.img_channels, self.resolution[0], self.resolution[1]])
655
+ img = img.to(dtype=dtype, memory_format=memory_format)
656
+ y = self.fromrgb(img)
657
+ x = x + y if x is not None else y
658
+ img = upfirdn2d.downsample2d(img, self.resample_filter) if self.architecture == 'skip' else None
659
+
660
+ # Main layers.
661
+ if self.architecture == 'resnet':
662
+ y = self.skip(x, gain=np.sqrt(0.5))
663
+ x = self.conv0(x)
664
+ x = self.conv1(x, gain=np.sqrt(0.5))
665
+ x = y.add_(x)
666
+ else:
667
+ x = self.conv0(x)
668
+ x = self.conv1(x)
669
+
670
+ assert x.dtype == dtype
671
+ return x, img
672
+
673
+ def extra_repr(self):
674
+ return f'resolution={self.resolution[0]:d}x{self.resolution[1]:d}, architecture={self.architecture:s}'
675
+
676
+
677
+ # ----------------------------------------------------------------------------
678
+
679
+ @persistence.persistent_class
680
+ class MinibatchStdLayer(torch.nn.Module):
681
+ def __init__(self, group_size, num_channels=1):
682
+ super().__init__()
683
+ self.group_size = group_size
684
+ self.num_channels = num_channels
685
+
686
+ def forward(self, x):
687
+ N, C, H, W = x.shape
688
+ with misc.suppress_tracer_warnings(): # as_tensor results are registered as constants
689
+ G = torch.min(torch.as_tensor(self.group_size), torch.as_tensor(N)) if self.group_size is not None else N
690
+ F = self.num_channels
691
+ c = C // F
692
+
693
+ y = x.reshape(G, -1, F, c, H,
694
+ W) # [GnFcHW] Split minibatch N into n groups of size G, and channels C into F groups of size c.
695
+ y = y - y.mean(dim=0) # [GnFcHW] Subtract mean over group.
696
+ y = y.square().mean(dim=0) # [nFcHW] Calc variance over group.
697
+ y = (y + 1e-8).sqrt() # [nFcHW] Calc stddev over group.
698
+ y = y.mean(dim=[2, 3, 4]) # [nF] Take average over channels and pixels.
699
+ y = y.reshape(-1, F, 1, 1) # [nF11] Add missing dimensions.
700
+ y = y.repeat(G, 1, H, W) # [NFHW] Replicate over group and pixels.
701
+ x = torch.cat([x, y], dim=1) # [NCHW] Append to input as new channels.
702
+ return x
703
+
704
+ def extra_repr(self):
705
+ return f'group_size={self.group_size}, num_channels={self.num_channels:d}'
706
+
707
+
708
+ # ----------------------------------------------------------------------------
709
+
710
+ @persistence.persistent_class
711
+ class DiscriminatorEpilogue(torch.nn.Module):
712
+ def __init__(self,
713
+ in_channels, # Number of input channels.
714
+ cmap_dim, # Dimensionality of mapped conditioning label, 0 = no label.
715
+ resolution, # Resolution of this block.
716
+ img_channels, # Number of input color channels.
717
+ architecture='resnet', # Architecture: 'orig', 'skip', 'resnet'.
718
+ mbstd_group_size=4, # Group size for the minibatch standard deviation layer, None = entire minibatch.
719
+ mbstd_num_channels=1, # Number of features for the minibatch standard deviation layer, 0 = disable.
720
+ activation='lrelu', # Activation function: 'relu', 'lrelu', etc.
721
+ conv_clamp=None, # Clamp the output of convolution layers to +-X, None = disable clamping.
722
+ ):
723
+ assert architecture in ['orig', 'skip', 'resnet']
724
+ super().__init__()
725
+ self.in_channels = in_channels
726
+ self.cmap_dim = cmap_dim
727
+ self.resolution = resolution
728
+ self.img_channels = img_channels
729
+ self.architecture = architecture
730
+
731
+ if architecture == 'skip':
732
+ self.fromrgb = Conv2dLayer(img_channels, in_channels, kernel_size=1, activation=activation)
733
+ self.mbstd = MinibatchStdLayer(group_size=mbstd_group_size,
734
+ num_channels=mbstd_num_channels) if mbstd_num_channels > 0 else None
735
+ self.conv = Conv2dLayer(in_channels + mbstd_num_channels, in_channels, kernel_size=3, activation=activation,
736
+ conv_clamp=conv_clamp)
737
+ self.fc = FullyConnectedLayer(in_channels * resolution[0] * resolution[1], in_channels, activation=activation)
738
+ self.out = FullyConnectedLayer(in_channels, 1 if cmap_dim == 0 else cmap_dim)
739
+
740
+ def forward(self, x, img, cmap, force_fp32=False):
741
+ misc.assert_shape(x, [None, self.in_channels, self.resolution[0], self.resolution[1]]) # [NCHW]
742
+ _ = force_fp32 # unused
743
+ dtype = torch.float32
744
+ memory_format = torch.contiguous_format
745
+
746
+ # FromRGB.
747
+ x = x.to(dtype=dtype, memory_format=memory_format)
748
+ if self.architecture == 'skip':
749
+ misc.assert_shape(img, [None, self.img_channels, self.resolution[0], self.resolution[1]])
750
+ img = img.to(dtype=dtype, memory_format=memory_format)
751
+ x = x + self.fromrgb(img)
752
+
753
+ # Main layers.
754
+ if self.mbstd is not None:
755
+ x = self.mbstd(x)
756
+ x = self.conv(x)
757
+ x = self.fc(x.flatten(1))
758
+ x = self.out(x)
759
+
760
+ # Conditioning.
761
+ if self.cmap_dim > 0:
762
+ misc.assert_shape(cmap, [None, self.cmap_dim])
763
+ x = (x * cmap).sum(dim=1, keepdim=True) * (1 / np.sqrt(self.cmap_dim))
764
+
765
+ assert x.dtype == dtype
766
+ return x
767
+
768
+ def extra_repr(self):
769
+ return f'resolution={self.resolution[0]:d}x{self.resolution[1]:d}, architecture={self.architecture:s}'
770
+
771
+
772
+ # ----------------------------------------------------------------------------
773
+
774
+ @persistence.persistent_class
775
+ class Discriminator(torch.nn.Module):
776
+ def __init__(self,
777
+ c_dim, # Conditioning label (C) dimensionality.
778
+ img_resolution, # Input resolution.
779
+ img_channels, # Number of input color channels.
780
+ architecture='resnet', # Architecture: 'orig', 'skip', 'resnet'.
781
+ channel_base=32768, # Overall multiplier for the number of channels.
782
+ channel_max=512, # Maximum number of channels in any layer.
783
+ num_fp16_res=4, # Use FP16 for the N highest resolutions.
784
+ conv_clamp=256, # Clamp the output of convolution layers to +-X, None = disable clamping.
785
+ cmap_dim=None, # Dimensionality of mapped conditioning label, None = default.
786
+ block_kwargs={}, # Arguments for DiscriminatorBlock.
787
+ mapping_kwargs={}, # Arguments for MappingNetwork.
788
+ epilogue_kwargs={}, # Arguments for DiscriminatorEpilogue.
789
+ ):
790
+ super().__init__()
791
+ self.c_dim = c_dim
792
+ self.img_resolution = img_resolution
793
+ self.img_resolution_log2 = int(np.log2(min(img_resolution)))
794
+ self.min_h = img_resolution[0] // min(img_resolution)
795
+ self.min_w = img_resolution[1] // min(img_resolution)
796
+ self.img_channels = img_channels
797
+ self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)]
798
+ channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]}
799
+ fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8)
800
+
801
+ if cmap_dim is None:
802
+ cmap_dim = channels_dict[4]
803
+ if c_dim == 0:
804
+ cmap_dim = 0
805
+
806
+ common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp)
807
+ cur_layer_idx = 0
808
+ for res in self.block_resolutions:
809
+ in_channels = channels_dict[res] if res < min(img_resolution) else 0
810
+ tmp_channels = channels_dict[res]
811
+ out_channels = channels_dict[res // 2]
812
+ use_fp16 = (res >= fp16_resolution)
813
+ block = DiscriminatorBlock(in_channels, tmp_channels, out_channels,
814
+ resolution=(res * self.min_h, res * self.min_w),
815
+ first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs,
816
+ **common_kwargs)
817
+ setattr(self, f'b{res}', block)
818
+ cur_layer_idx += block.num_layers
819
+ if c_dim > 0:
820
+ self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None,
821
+ **mapping_kwargs)
822
+ self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim,
823
+ resolution=(4 * self.min_h, 4 * self.min_w), **epilogue_kwargs,
824
+ **common_kwargs)
825
+
826
+ def forward(self, img, c, update_emas=False, **block_kwargs):
827
+ _ = update_emas # unused
828
+ x = None
829
+ for res in self.block_resolutions:
830
+ block = getattr(self, f'b{res}')
831
+ x, img = block(x, img, **block_kwargs)
832
+
833
+ cmap = None
834
+ if self.c_dim > 0:
835
+ cmap = self.mapping(None, c)
836
+ x = self.b4(x, img, cmap)
837
+ return x
838
+
839
+ def extra_repr(self):
840
+ return f'c_dim={self.c_dim:d}, img_resolution={self.img_resolution[0]:d}x{self.img_resolution[1]:d}, img_channels={self.img_channels:d}'
841
+
842
+ # ----------------------------------------------------------------------------