Spaces:

iamvishalksingh
/

codeformer-api

Runtime error

App Files Files Community

sczhou commited on Sep 4, 2022

Commit

581abcb

1 Parent(s): 4732c6d

add face_upsample.

Browse files

Files changed (5) hide show

README.md +5 -4
basicsr/utils/realesrgan_utils.py +13 -10
facelib/utils/face_restoration_helper.py +20 -11
inference_codeformer.py +10 -1
inputs/whole_imgs/stable_diffusion_00.jpg +0 -0

README.md CHANGED Viewed

@@ -16,10 +16,11 @@ S-Lab, Nanyang Technological University
 <img src="assets/network.jpg" width="800px"/>
-:star: If CodeFormer is helpful to your projects, please help star this repo. Thanks! :hugs:
 ### Updates
 - **2022.08.23**: Some modifications on face detection and fusion for better AI-created face enhancement.
 - **2022.08.07**: Integrate Real-ESRGAN to support background image enhancement.
 - **2022.07.29**: Integrate new face detectors of `['RetinaFace'(default), 'YOLOv5']`.
@@ -59,7 +60,7 @@ cd CodeFormer
 # create new anaconda env
 conda create -n codeformer python=3.8 -y
-source activate codeformer
 # install python dependencies
 pip3 install -r requirements.txt
@@ -90,8 +91,8 @@ You can put the testing images in the `inputs/TestWhole` folder. If you would li
 python inference_codeformer.py --w 0.5 --has_aligned --test_path [input folder]
 # For the whole images
-# If you want to enhance the background regions with Real-ESRGAN,
-# you can add '--bg_upsampler realesrgan' in the following command
 python inference_codeformer.py --w 0.7 --test_path [input folder]
 ```

 <img src="assets/network.jpg" width="800px"/>
+:star: If CodeFormer is helpful to your images or projects, please help star this repo. Thanks! :hugs:
 ### Updates
+- **2022.09.04**: Add face upsampling '--face_upsample' for high-resolution AI-created face enhancement.
 - **2022.08.23**: Some modifications on face detection and fusion for better AI-created face enhancement.
 - **2022.08.07**: Integrate Real-ESRGAN to support background image enhancement.
 - **2022.07.29**: Integrate new face detectors of `['RetinaFace'(default), 'YOLOv5']`.
 # create new anaconda env
 conda create -n codeformer python=3.8 -y
+conda activate codeformer
 # install python dependencies
 pip3 install -r requirements.txt
 python inference_codeformer.py --w 0.5 --has_aligned --test_path [input folder]
 # For the whole images
+# Add '--bg_upsampler realesrgan' to enhance the background regions with Real-ESRGAN
+# Add '--face_upsample' to further upsample restorated face with Real-ESRGAN
 python inference_codeformer.py --w 0.7 --test_path [input folder]
 ```

basicsr/utils/realesrgan_utils.py CHANGED Viewed

@@ -196,16 +196,19 @@ class RealESRGANer():
             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
         # ------------------- process image (without the alpha channel) ------------------- #
-        self.pre_process(img)
-        if self.tile_size > 0:
-            self.tile_process()
-        else:
-            self.process()
-        output_img = self.post_process()
-        output_img = output_img.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-        output_img = np.transpose(output_img[[2, 1, 0], :, :], (1, 2, 0))
-        if img_mode == 'L':
-            output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2GRAY)
         # ------------------- process the alpha channel if necessary ------------------- #
         if img_mode == 'RGBA':

             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
         # ------------------- process image (without the alpha channel) ------------------- #
+        with torch.no_grad():
+            self.pre_process(img)
+            if self.tile_size > 0:
+                self.tile_process()
+            else:
+                self.process()
+            output_img_t = self.post_process()
+            output_img = output_img_t.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+            output_img = np.transpose(output_img[[2, 1, 0], :, :], (1, 2, 0))
+            if img_mode == 'L':
+                output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2GRAY)
+        del output_img_t
+        torch.cuda.empty_cache()
         # ------------------- process the alpha channel if necessary ------------------- #
         if img_mode == 'RGBA':

facelib/utils/face_restoration_helper.py CHANGED Viewed

@@ -294,10 +294,12 @@ class FaceRestoreHelper(object):
                 save_path = f'{path}_{idx:02d}.pth'
                 torch.save(inverse_affine, save_path)
     def add_restored_face(self, face):
         self.restored_faces.append(face)
-    def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False):
         h, w, _ = self.input_img.shape
         h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
@@ -313,16 +315,23 @@ class FaceRestoreHelper(object):
         inv_mask_borders = []
         for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
-            # Add an offset to inverse affine matrix, for more precise back alignment
-            if self.upscale_factor > 1:
-                extra_offset = 0.5 * self.upscale_factor
             else:
-                extra_offset = 0
-            inverse_affine[:, 2] += extra_offset
             inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
             # if draw_box or not self.use_parse:  # use square parse maps
-            #     mask = np.ones(self.face_size, dtype=np.float32)
             #     inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
             #     # remove the black borders
             #     inv_mask_erosion = cv2.erode(
@@ -331,7 +340,7 @@ class FaceRestoreHelper(object):
             #     total_face_area = np.sum(inv_mask_erosion)  # // 3
             #     # add border
             #     if draw_box:
-            #         h, w = self.face_size
             #         mask_border = np.ones((h, w, 3), dtype=np.float32)
             #         border = int(1400/np.sqrt(total_face_area))
             #         mask_border[border:h-border, border:w-border,:] = 0
@@ -349,7 +358,7 @@ class FaceRestoreHelper(object):
             #         inv_soft_mask = inv_soft_mask[:, :, None]
             # always use square mask
-            mask = np.ones(self.face_size, dtype=np.float32)
             inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
             # remove the black borders
             inv_mask_erosion = cv2.erode(
@@ -358,7 +367,7 @@ class FaceRestoreHelper(object):
             total_face_area = np.sum(inv_mask_erosion)  # // 3
             # add border
             if draw_box:
-                h, w = self.face_size
                 mask_border = np.ones((h, w, 3), dtype=np.float32)
                 border = int(1400/np.sqrt(total_face_area))
                 mask_border[border:h-border, border:w-border,:] = 0
@@ -400,7 +409,7 @@ class FaceRestoreHelper(object):
                 parse_mask[:, -thres:] = 0
                 parse_mask = parse_mask / 255.
-                parse_mask = cv2.resize(parse_mask, restored_face.shape[:2])
                 parse_mask = cv2.warpAffine(parse_mask, inverse_affine, (w_up, h_up), flags=3)
                 inv_soft_parse_mask = parse_mask[:, :, None]
                 # pasted_face = inv_restored

                 save_path = f'{path}_{idx:02d}.pth'
                 torch.save(inverse_affine, save_path)
     def add_restored_face(self, face):
         self.restored_faces.append(face)
+    def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):
         h, w, _ = self.input_img.shape
         h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
         inv_mask_borders = []
         for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
+            if face_upsampler is not None:
+                restored_face = face_upsampler.enhance(restored_face, outscale=self.upscale_factor)[0]
+                inverse_affine /= self.upscale_factor
+                inverse_affine[:, 2] *= self.upscale_factor
+                face_size = (self.face_size[0]*self.upscale_factor, self.face_size[1]*self.upscale_factor)
             else:
+                # Add an offset to inverse affine matrix, for more precise back alignment
+                if self.upscale_factor > 1:
+                    extra_offset = 0.5 * self.upscale_factor
+                else:
+                    extra_offset = 0
+                inverse_affine[:, 2] += extra_offset
+                face_size = self.face_size
             inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
             # if draw_box or not self.use_parse:  # use square parse maps
+            #     mask = np.ones(face_size, dtype=np.float32)
             #     inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
             #     # remove the black borders
             #     inv_mask_erosion = cv2.erode(
             #     total_face_area = np.sum(inv_mask_erosion)  # // 3
             #     # add border
             #     if draw_box:
+            #         h, w = face_size
             #         mask_border = np.ones((h, w, 3), dtype=np.float32)
             #         border = int(1400/np.sqrt(total_face_area))
             #         mask_border[border:h-border, border:w-border,:] = 0
             #         inv_soft_mask = inv_soft_mask[:, :, None]
             # always use square mask
+            mask = np.ones(face_size, dtype=np.float32)
             inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
             # remove the black borders
             inv_mask_erosion = cv2.erode(
             total_face_area = np.sum(inv_mask_erosion)  # // 3
             # add border
             if draw_box:
+                h, w = face_size
                 mask_border = np.ones((h, w, 3), dtype=np.float32)
                 border = int(1400/np.sqrt(total_face_area))
                 mask_border[border:h-border, border:w-border,:] = 0
                 parse_mask[:, -thres:] = 0
                 parse_mask = parse_mask / 255.
+                parse_mask = cv2.resize(parse_mask, face_size)
                 parse_mask = cv2.warpAffine(parse_mask, inverse_affine, (w_up, h_up), flags=3)
                 inv_soft_parse_mask = parse_mask[:, :, None]
                 # pasted_face = inv_restored

inference_codeformer.py CHANGED Viewed

@@ -30,6 +30,7 @@ if __name__ == '__main__':
     parser.add_argument('--detection_model', type=str, default='retinaface_resnet50')
     parser.add_argument('--draw_box', action='store_true')
     parser.add_argument('--bg_upsampler', type=str, default='None', help='background upsampler. Optional: realesrgan')
     parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
     args = parser.parse_args()
@@ -80,6 +81,11 @@ if __name__ == '__main__':
     # small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
     if not args.has_aligned:
         print(f'Using [{args.detection_model}] for face detection network.')
     face_helper = FaceRestoreHelper(
         args.upscale,
         face_size=512,
@@ -143,7 +149,10 @@ if __name__ == '__main__':
                 bg_img = None
             face_helper.get_inverse_affine(None)
             # paste each restored face to the input image
-            restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box)
         # save faces
         for idx, (cropped_face, restored_face) in enumerate(zip(face_helper.cropped_faces, face_helper.restored_faces)):

     parser.add_argument('--detection_model', type=str, default='retinaface_resnet50')
     parser.add_argument('--draw_box', action='store_true')
     parser.add_argument('--bg_upsampler', type=str, default='None', help='background upsampler. Optional: realesrgan')
+    parser.add_argument('--face_upsample', action='store_true', help='face upsampler after enhancement.')
     parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
     args = parser.parse_args()
     # small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
     if not args.has_aligned:
         print(f'Using [{args.detection_model}] for face detection network.')
+    if args.bg_upsampler is not None:
+        print(f'Background upsampling: True, Face upsampling: {args.face_upsample}')
+    else:
+        print('Background upsampling: False, Face upsampling: False')
     face_helper = FaceRestoreHelper(
         args.upscale,
         face_size=512,
                 bg_img = None
             face_helper.get_inverse_affine(None)
             # paste each restored face to the input image
+            if args.face_upsample and bg_upsampler is not None:
+                restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box, face_upsampler=bg_upsampler)
+            else:
+                restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box)
         # save faces
         for idx, (cropped_face, restored_face) in enumerate(zip(face_helper.cropped_faces, face_helper.restored_faces)):

inputs/whole_imgs/stable_diffusion_00.jpg DELETED Viewed

Binary file (127 kB)