Spaces:

skytnt
/

full-body-anime-gan

Running

App Files Files Community

skytnt commited on Aug 20, 2022

Commit

5f2c171

•

1 Parent(s): 17e3440

add remove background before encode img

Browse files

Files changed (1) hide show

app.py +27 -6

app.py CHANGED Viewed

@@ -82,14 +82,16 @@ class Model:
         self.detector_stride = None
         self.detector_imgsz = None
         self.detector_class_names = None
         self.w_avg = None
-        self.load_models("skytnt/fbanime-gan")
-    def load_models(self, repo):
-        g_mapping_path = huggingface_hub.hf_hub_download(repo, "g_mapping.onnx")
-        g_synthesis_path = huggingface_hub.hf_hub_download(repo, "g_synthesis.onnx")
-        encoder_path = huggingface_hub.hf_hub_download(repo, "encoder.onnx")
-        detector_path = huggingface_hub.hf_hub_download(repo, "waifu_dect.onnx")
         providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
         g_mapping = onnx.load(g_mapping_path)
@@ -105,6 +107,7 @@ class Model:
         self.detector_stride = int(detector_meta['stride'])
         self.detector_imgsz = 1088
         self.detector_class_names = eval(detector_meta['names'])
     def get_img(self, w, noise=0):
         img = self.g_synthesis.run(None, {'w': w, "noise": np.asarray([noise], dtype=np.float32)})[0]
@@ -113,6 +116,23 @@ class Model:
     def get_w(self, z, psi1, psi2):
         return self.g_mapping.run(None, {'z': z, 'psi': np.asarray([psi1, psi2], dtype=np.float32)})[0]
     def encode_img(self, img):
         img = transform.resize(((img / 255 - 0.5) / 0.5), (256, 256)).transpose(2, 0, 1)[np.newaxis, :].astype(
             np.float32)
@@ -236,6 +256,7 @@ def gen_fn(method, seed, psi1, psi2, noise):
 def encode_img_fn(img, noise):
     if img is None:
         return "please upload a image", None, None, None, None
     imgs = model.detect(img, 0.2, 0.03)
     if len(imgs) == 0:
         return "failed to detect waifu", None, None, None, None

         self.detector_stride = None
         self.detector_imgsz = None
         self.detector_class_names = None
+        self.anime_seg = None
         self.w_avg = None
+        self.load_models()
+    def load_models(self):
+        g_mapping_path = huggingface_hub.hf_hub_download("skytnt/fbanime-gan", "g_mapping.onnx")
+        g_synthesis_path = huggingface_hub.hf_hub_download("skytnt/fbanime-gan", "g_synthesis.onnx")
+        encoder_path = huggingface_hub.hf_hub_download("skytnt/fbanime-gan", "encoder.onnx")
+        detector_path = huggingface_hub.hf_hub_download("skytnt/fbanime-gan", "waifu_dect.onnx")
+        anime_seg_path = huggingface_hub.hf_hub_download("skytnt/anime-seg", "isnetis.onnx")
         providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
         g_mapping = onnx.load(g_mapping_path)
         self.detector_stride = int(detector_meta['stride'])
         self.detector_imgsz = 1088
         self.detector_class_names = eval(detector_meta['names'])
+        self.anime_seg = rt.InferenceSession(anime_seg_path, providers=providers)
     def get_img(self, w, noise=0):
         img = self.g_synthesis.run(None, {'w': w, "noise": np.asarray([noise], dtype=np.float32)})[0]
     def get_w(self, z, psi1, psi2):
         return self.g_mapping.run(None, {'z': z, 'psi': np.asarray([psi1, psi2], dtype=np.float32)})[0]
+    def remove_bg(self, img, s=1024):
+        img0 = img
+        img = (img / 255).astype(np.float32)
+        h, w = h0, w0 = img.shape[:-1]
+        h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s)
+        ph, pw = s - h, s - w
+        img_input = np.zeros([s, s, 3], dtype=np.float32)
+        img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = transform.resize(img, (h, w))
+        img_input = np.transpose(img_input, (2, 0, 1))
+        img_input = img_input[np.newaxis, :]
+        mask = self.anime_seg.run(None, {'img': img_input})[0][0]
+        mask = np.transpose(mask, (1, 2, 0))
+        mask = mask[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w]
+        mask = transform.resize(mask, (h0, w0))
+        img0 = (img0*mask + 255*(1-mask)).astype(np.uint8)
+        return img0
     def encode_img(self, img):
         img = transform.resize(((img / 255 - 0.5) / 0.5), (256, 256)).transpose(2, 0, 1)[np.newaxis, :].astype(
             np.float32)
 def encode_img_fn(img, noise):
     if img is None:
         return "please upload a image", None, None, None, None
+    img = model.remove_bg(img)
     imgs = model.detect(img, 0.2, 0.03)
     if len(imgs) == 0:
         return "failed to detect waifu", None, None, None, None