Spaces:
Runtime error
Runtime error
add face_upsample.
Browse files- README.md +5 -4
- basicsr/utils/realesrgan_utils.py +13 -10
- facelib/utils/face_restoration_helper.py +20 -11
- inference_codeformer.py +10 -1
- inputs/whole_imgs/stable_diffusion_00.jpg +0 -0
README.md
CHANGED
|
@@ -16,10 +16,11 @@ S-Lab, Nanyang Technological University
|
|
| 16 |
<img src="assets/network.jpg" width="800px"/>
|
| 17 |
|
| 18 |
|
| 19 |
-
:star: If CodeFormer is helpful to your projects, please help star this repo. Thanks! :hugs:
|
| 20 |
|
| 21 |
### Updates
|
| 22 |
|
|
|
|
| 23 |
- **2022.08.23**: Some modifications on face detection and fusion for better AI-created face enhancement.
|
| 24 |
- **2022.08.07**: Integrate Real-ESRGAN to support background image enhancement.
|
| 25 |
- **2022.07.29**: Integrate new face detectors of `['RetinaFace'(default), 'YOLOv5']`.
|
|
@@ -59,7 +60,7 @@ cd CodeFormer
|
|
| 59 |
|
| 60 |
# create new anaconda env
|
| 61 |
conda create -n codeformer python=3.8 -y
|
| 62 |
-
|
| 63 |
|
| 64 |
# install python dependencies
|
| 65 |
pip3 install -r requirements.txt
|
|
@@ -90,8 +91,8 @@ You can put the testing images in the `inputs/TestWhole` folder. If you would li
|
|
| 90 |
python inference_codeformer.py --w 0.5 --has_aligned --test_path [input folder]
|
| 91 |
|
| 92 |
# For the whole images
|
| 93 |
-
#
|
| 94 |
-
#
|
| 95 |
python inference_codeformer.py --w 0.7 --test_path [input folder]
|
| 96 |
```
|
| 97 |
|
|
|
|
| 16 |
<img src="assets/network.jpg" width="800px"/>
|
| 17 |
|
| 18 |
|
| 19 |
+
:star: If CodeFormer is helpful to your images or projects, please help star this repo. Thanks! :hugs:
|
| 20 |
|
| 21 |
### Updates
|
| 22 |
|
| 23 |
+
- **2022.09.04**: Add face upsampling '--face_upsample' for high-resolution AI-created face enhancement.
|
| 24 |
- **2022.08.23**: Some modifications on face detection and fusion for better AI-created face enhancement.
|
| 25 |
- **2022.08.07**: Integrate Real-ESRGAN to support background image enhancement.
|
| 26 |
- **2022.07.29**: Integrate new face detectors of `['RetinaFace'(default), 'YOLOv5']`.
|
|
|
|
| 60 |
|
| 61 |
# create new anaconda env
|
| 62 |
conda create -n codeformer python=3.8 -y
|
| 63 |
+
conda activate codeformer
|
| 64 |
|
| 65 |
# install python dependencies
|
| 66 |
pip3 install -r requirements.txt
|
|
|
|
| 91 |
python inference_codeformer.py --w 0.5 --has_aligned --test_path [input folder]
|
| 92 |
|
| 93 |
# For the whole images
|
| 94 |
+
# Add '--bg_upsampler realesrgan' to enhance the background regions with Real-ESRGAN
|
| 95 |
+
# Add '--face_upsample' to further upsample restorated face with Real-ESRGAN
|
| 96 |
python inference_codeformer.py --w 0.7 --test_path [input folder]
|
| 97 |
```
|
| 98 |
|
basicsr/utils/realesrgan_utils.py
CHANGED
|
@@ -196,16 +196,19 @@ class RealESRGANer():
|
|
| 196 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 197 |
|
| 198 |
# ------------------- process image (without the alpha channel) ------------------- #
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
self.
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
# ------------------- process the alpha channel if necessary ------------------- #
|
| 211 |
if img_mode == 'RGBA':
|
|
|
|
| 196 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 197 |
|
| 198 |
# ------------------- process image (without the alpha channel) ------------------- #
|
| 199 |
+
with torch.no_grad():
|
| 200 |
+
self.pre_process(img)
|
| 201 |
+
if self.tile_size > 0:
|
| 202 |
+
self.tile_process()
|
| 203 |
+
else:
|
| 204 |
+
self.process()
|
| 205 |
+
output_img_t = self.post_process()
|
| 206 |
+
output_img = output_img_t.data.squeeze().float().cpu().clamp_(0, 1).numpy()
|
| 207 |
+
output_img = np.transpose(output_img[[2, 1, 0], :, :], (1, 2, 0))
|
| 208 |
+
if img_mode == 'L':
|
| 209 |
+
output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2GRAY)
|
| 210 |
+
del output_img_t
|
| 211 |
+
torch.cuda.empty_cache()
|
| 212 |
|
| 213 |
# ------------------- process the alpha channel if necessary ------------------- #
|
| 214 |
if img_mode == 'RGBA':
|
facelib/utils/face_restoration_helper.py
CHANGED
|
@@ -294,10 +294,12 @@ class FaceRestoreHelper(object):
|
|
| 294 |
save_path = f'{path}_{idx:02d}.pth'
|
| 295 |
torch.save(inverse_affine, save_path)
|
| 296 |
|
|
|
|
| 297 |
def add_restored_face(self, face):
|
| 298 |
self.restored_faces.append(face)
|
| 299 |
|
| 300 |
-
|
|
|
|
| 301 |
h, w, _ = self.input_img.shape
|
| 302 |
h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
|
| 303 |
|
|
@@ -313,16 +315,23 @@ class FaceRestoreHelper(object):
|
|
| 313 |
|
| 314 |
inv_mask_borders = []
|
| 315 |
for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
|
|
|
|
|
|
| 319 |
else:
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
|
| 323 |
|
| 324 |
# if draw_box or not self.use_parse: # use square parse maps
|
| 325 |
-
# mask = np.ones(
|
| 326 |
# inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
|
| 327 |
# # remove the black borders
|
| 328 |
# inv_mask_erosion = cv2.erode(
|
|
@@ -331,7 +340,7 @@ class FaceRestoreHelper(object):
|
|
| 331 |
# total_face_area = np.sum(inv_mask_erosion) # // 3
|
| 332 |
# # add border
|
| 333 |
# if draw_box:
|
| 334 |
-
# h, w =
|
| 335 |
# mask_border = np.ones((h, w, 3), dtype=np.float32)
|
| 336 |
# border = int(1400/np.sqrt(total_face_area))
|
| 337 |
# mask_border[border:h-border, border:w-border,:] = 0
|
|
@@ -349,7 +358,7 @@ class FaceRestoreHelper(object):
|
|
| 349 |
# inv_soft_mask = inv_soft_mask[:, :, None]
|
| 350 |
|
| 351 |
# always use square mask
|
| 352 |
-
mask = np.ones(
|
| 353 |
inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
|
| 354 |
# remove the black borders
|
| 355 |
inv_mask_erosion = cv2.erode(
|
|
@@ -358,7 +367,7 @@ class FaceRestoreHelper(object):
|
|
| 358 |
total_face_area = np.sum(inv_mask_erosion) # // 3
|
| 359 |
# add border
|
| 360 |
if draw_box:
|
| 361 |
-
h, w =
|
| 362 |
mask_border = np.ones((h, w, 3), dtype=np.float32)
|
| 363 |
border = int(1400/np.sqrt(total_face_area))
|
| 364 |
mask_border[border:h-border, border:w-border,:] = 0
|
|
@@ -400,7 +409,7 @@ class FaceRestoreHelper(object):
|
|
| 400 |
parse_mask[:, -thres:] = 0
|
| 401 |
parse_mask = parse_mask / 255.
|
| 402 |
|
| 403 |
-
parse_mask = cv2.resize(parse_mask,
|
| 404 |
parse_mask = cv2.warpAffine(parse_mask, inverse_affine, (w_up, h_up), flags=3)
|
| 405 |
inv_soft_parse_mask = parse_mask[:, :, None]
|
| 406 |
# pasted_face = inv_restored
|
|
|
|
| 294 |
save_path = f'{path}_{idx:02d}.pth'
|
| 295 |
torch.save(inverse_affine, save_path)
|
| 296 |
|
| 297 |
+
|
| 298 |
def add_restored_face(self, face):
|
| 299 |
self.restored_faces.append(face)
|
| 300 |
|
| 301 |
+
|
| 302 |
+
def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):
|
| 303 |
h, w, _ = self.input_img.shape
|
| 304 |
h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
|
| 305 |
|
|
|
|
| 315 |
|
| 316 |
inv_mask_borders = []
|
| 317 |
for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
|
| 318 |
+
if face_upsampler is not None:
|
| 319 |
+
restored_face = face_upsampler.enhance(restored_face, outscale=self.upscale_factor)[0]
|
| 320 |
+
inverse_affine /= self.upscale_factor
|
| 321 |
+
inverse_affine[:, 2] *= self.upscale_factor
|
| 322 |
+
face_size = (self.face_size[0]*self.upscale_factor, self.face_size[1]*self.upscale_factor)
|
| 323 |
else:
|
| 324 |
+
# Add an offset to inverse affine matrix, for more precise back alignment
|
| 325 |
+
if self.upscale_factor > 1:
|
| 326 |
+
extra_offset = 0.5 * self.upscale_factor
|
| 327 |
+
else:
|
| 328 |
+
extra_offset = 0
|
| 329 |
+
inverse_affine[:, 2] += extra_offset
|
| 330 |
+
face_size = self.face_size
|
| 331 |
inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
|
| 332 |
|
| 333 |
# if draw_box or not self.use_parse: # use square parse maps
|
| 334 |
+
# mask = np.ones(face_size, dtype=np.float32)
|
| 335 |
# inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
|
| 336 |
# # remove the black borders
|
| 337 |
# inv_mask_erosion = cv2.erode(
|
|
|
|
| 340 |
# total_face_area = np.sum(inv_mask_erosion) # // 3
|
| 341 |
# # add border
|
| 342 |
# if draw_box:
|
| 343 |
+
# h, w = face_size
|
| 344 |
# mask_border = np.ones((h, w, 3), dtype=np.float32)
|
| 345 |
# border = int(1400/np.sqrt(total_face_area))
|
| 346 |
# mask_border[border:h-border, border:w-border,:] = 0
|
|
|
|
| 358 |
# inv_soft_mask = inv_soft_mask[:, :, None]
|
| 359 |
|
| 360 |
# always use square mask
|
| 361 |
+
mask = np.ones(face_size, dtype=np.float32)
|
| 362 |
inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
|
| 363 |
# remove the black borders
|
| 364 |
inv_mask_erosion = cv2.erode(
|
|
|
|
| 367 |
total_face_area = np.sum(inv_mask_erosion) # // 3
|
| 368 |
# add border
|
| 369 |
if draw_box:
|
| 370 |
+
h, w = face_size
|
| 371 |
mask_border = np.ones((h, w, 3), dtype=np.float32)
|
| 372 |
border = int(1400/np.sqrt(total_face_area))
|
| 373 |
mask_border[border:h-border, border:w-border,:] = 0
|
|
|
|
| 409 |
parse_mask[:, -thres:] = 0
|
| 410 |
parse_mask = parse_mask / 255.
|
| 411 |
|
| 412 |
+
parse_mask = cv2.resize(parse_mask, face_size)
|
| 413 |
parse_mask = cv2.warpAffine(parse_mask, inverse_affine, (w_up, h_up), flags=3)
|
| 414 |
inv_soft_parse_mask = parse_mask[:, :, None]
|
| 415 |
# pasted_face = inv_restored
|
inference_codeformer.py
CHANGED
|
@@ -30,6 +30,7 @@ if __name__ == '__main__':
|
|
| 30 |
parser.add_argument('--detection_model', type=str, default='retinaface_resnet50')
|
| 31 |
parser.add_argument('--draw_box', action='store_true')
|
| 32 |
parser.add_argument('--bg_upsampler', type=str, default='None', help='background upsampler. Optional: realesrgan')
|
|
|
|
| 33 |
parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
|
| 34 |
|
| 35 |
args = parser.parse_args()
|
|
@@ -80,6 +81,11 @@ if __name__ == '__main__':
|
|
| 80 |
# small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
|
| 81 |
if not args.has_aligned:
|
| 82 |
print(f'Using [{args.detection_model}] for face detection network.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
face_helper = FaceRestoreHelper(
|
| 84 |
args.upscale,
|
| 85 |
face_size=512,
|
|
@@ -143,7 +149,10 @@ if __name__ == '__main__':
|
|
| 143 |
bg_img = None
|
| 144 |
face_helper.get_inverse_affine(None)
|
| 145 |
# paste each restored face to the input image
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
# save faces
|
| 149 |
for idx, (cropped_face, restored_face) in enumerate(zip(face_helper.cropped_faces, face_helper.restored_faces)):
|
|
|
|
| 30 |
parser.add_argument('--detection_model', type=str, default='retinaface_resnet50')
|
| 31 |
parser.add_argument('--draw_box', action='store_true')
|
| 32 |
parser.add_argument('--bg_upsampler', type=str, default='None', help='background upsampler. Optional: realesrgan')
|
| 33 |
+
parser.add_argument('--face_upsample', action='store_true', help='face upsampler after enhancement.')
|
| 34 |
parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
|
| 35 |
|
| 36 |
args = parser.parse_args()
|
|
|
|
| 81 |
# small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
|
| 82 |
if not args.has_aligned:
|
| 83 |
print(f'Using [{args.detection_model}] for face detection network.')
|
| 84 |
+
if args.bg_upsampler is not None:
|
| 85 |
+
print(f'Background upsampling: True, Face upsampling: {args.face_upsample}')
|
| 86 |
+
else:
|
| 87 |
+
print('Background upsampling: False, Face upsampling: False')
|
| 88 |
+
|
| 89 |
face_helper = FaceRestoreHelper(
|
| 90 |
args.upscale,
|
| 91 |
face_size=512,
|
|
|
|
| 149 |
bg_img = None
|
| 150 |
face_helper.get_inverse_affine(None)
|
| 151 |
# paste each restored face to the input image
|
| 152 |
+
if args.face_upsample and bg_upsampler is not None:
|
| 153 |
+
restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box, face_upsampler=bg_upsampler)
|
| 154 |
+
else:
|
| 155 |
+
restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box)
|
| 156 |
|
| 157 |
# save faces
|
| 158 |
for idx, (cropped_face, restored_face) in enumerate(zip(face_helper.cropped_faces, face_helper.restored_faces)):
|
inputs/whole_imgs/stable_diffusion_00.jpg
DELETED
|
Binary file (127 kB)
|
|
|