jjeamin commited on
Commit
dffcfb5
1 Parent(s): 9b8bbee

Update face alignment

Browse files
Files changed (3) hide show
  1. app.py +9 -67
  2. requirements.txt +1 -0
  3. utils.py +114 -0
app.py CHANGED
@@ -4,17 +4,22 @@ os.system("pip -qq install facenet_pytorch")
4
  from facenet_pytorch import MTCNN
5
  from torchvision import transforms
6
  import torch, PIL
7
- from tqdm.notebook import tqdm
8
  import gradio as gr
9
  import torch
 
10
 
11
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
 
13
  image_size = 512
 
14
 
15
  means = [0.5, 0.5, 0.5]
16
  stds = [0.5, 0.5, 0.5]
17
 
 
 
 
 
18
  model_path = hf_hub_download(repo_id="jjeamin/ArcaneStyleTransfer", filename="pytorch_model.bin")
19
 
20
  if 'cuda' in device:
@@ -25,76 +30,14 @@ else:
25
  style_transfer = torch.jit.load(model_path).eval().cpu()
26
  t_stds = torch.tensor(stds).cpu()[:,None,None]
27
  t_means = torch.tensor(means).cpu()[:,None,None]
28
-
29
- mtcnn = MTCNN(image_size=image_size, margin=80)
30
-
31
- def detect(img):
32
-
33
- # Detect faces
34
- batch_boxes, batch_probs, batch_points = mtcnn.detect(img, landmarks=True)
35
- # Select faces
36
- if not mtcnn.keep_all:
37
- batch_boxes, batch_probs, batch_points = mtcnn.select_boxes(
38
- batch_boxes, batch_probs, batch_points, img, method=mtcnn.selection_method
39
- )
40
-
41
- return batch_boxes, batch_points
42
-
43
- def makeEven(_x):
44
- return _x if (_x % 2 == 0) else _x+1
45
-
46
- def scale(boxes, _img, max_res=1_500_000, target_face=256, fixed_ratio=0, max_upscale=2, VERBOSE=False):
47
-
48
- x, y = _img.size
49
-
50
- ratio = 2 #initial ratio
51
-
52
- #scale to desired face size
53
- if (boxes is not None):
54
- if len(boxes)>0:
55
- ratio = target_face/max(boxes[0][2:]-boxes[0][:2]);
56
- ratio = min(ratio, max_upscale)
57
- if VERBOSE: print('up by', ratio)
58
 
59
- if fixed_ratio>0:
60
- if VERBOSE: print('fixed ratio')
61
- ratio = fixed_ratio
62
-
63
- x*=ratio
64
- y*=ratio
65
-
66
- #downscale to fit into max res
67
- res = x*y
68
- if res > max_res:
69
- ratio = pow(res/max_res,1/2);
70
- if VERBOSE: print(ratio)
71
- x=int(x/ratio)
72
- y=int(y/ratio)
73
-
74
- #make dimensions even, because usually NNs fail on uneven dimensions due skip connection size mismatch
75
- x = makeEven(int(x))
76
- y = makeEven(int(y))
77
-
78
- size = (x, y)
79
-
80
- return _img.resize(size)
81
-
82
- def scale_by_face_size(_img, max_res=1_500_000, target_face=256, fix_ratio=0, max_upscale=2, VERBOSE=False):
83
- boxes = None
84
- boxes, _ = detect(_img)
85
- if VERBOSE: print('boxes',boxes)
86
- img_resized = scale(boxes, _img, max_res, target_face, fix_ratio, max_upscale, VERBOSE)
87
- return img_resized
88
-
89
-
90
- img_transforms = transforms.Compose([
91
- transforms.ToTensor(),
92
- transforms.Normalize(means, stds)])
93
-
94
  def tensor2im(var):
95
  return var.mul(t_stds).add(t_means).mul(255.).clamp(0,255).permute(1,2,0)
96
 
97
  def proc_pil_img(input_image):
 
 
 
98
  if 'cuda' in device:
99
  transformed_image = img_transforms(input_image)[None,...].cuda().half()
100
  else:
@@ -108,7 +51,6 @@ def proc_pil_img(input_image):
108
  return output_image
109
 
110
  def process(im):
111
- im = scale_by_face_size(im, target_face=image_size, max_res=1_500_000, max_upscale=1)
112
  res = proc_pil_img(im)
113
  return res
114
 
 
4
  from facenet_pytorch import MTCNN
5
  from torchvision import transforms
6
  import torch, PIL
 
7
  import gradio as gr
8
  import torch
9
+ from utils import align_face
10
 
11
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
 
13
  image_size = 512
14
+ transform_size = 1024
15
 
16
  means = [0.5, 0.5, 0.5]
17
  stds = [0.5, 0.5, 0.5]
18
 
19
+ img_transforms = transforms.Compose([
20
+ transforms.ToTensor(),
21
+ transforms.Normalize(means, stds)])
22
+
23
  model_path = hf_hub_download(repo_id="jjeamin/ArcaneStyleTransfer", filename="pytorch_model.bin")
24
 
25
  if 'cuda' in device:
 
30
  style_transfer = torch.jit.load(model_path).eval().cpu()
31
  t_stds = torch.tensor(stds).cpu()[:,None,None]
32
  t_means = torch.tensor(means).cpu()[:,None,None]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def tensor2im(var):
35
  return var.mul(t_stds).add(t_means).mul(255.).clamp(0,255).permute(1,2,0)
36
 
37
  def proc_pil_img(input_image):
38
+ # input_image = PIL
39
+ input_image = align_face(input_image, output_size=image_size, transform_size=transform_size)
40
+
41
  if 'cuda' in device:
42
  transformed_image = img_transforms(input_image)[None,...].cuda().half()
43
  else:
 
51
  return output_image
52
 
53
  def process(im):
 
54
  res = proc_pil_img(im)
55
  return res
56
 
requirements.txt CHANGED
@@ -7,3 +7,4 @@ scipy
7
  cmake
8
  onnxruntime-gpu
9
  opencv-python-headless
 
 
7
  cmake
8
  onnxruntime-gpu
9
  opencv-python-headless
10
+ dlib
utils.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dlib
2
+ import numpy as np
3
+ import scipy
4
+ from PIL import Image
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ shape_predictor_path = hf_hub_download(repo_id="jjeamin/ArcaneStyleTransfer", filename="shape_predictor_68_face_landmarks.dat")
8
+
9
+ def get_landmark(img, predictor):
10
+ """get landmark with dlib
11
+ :return: np.array shape=(68, 2)
12
+ """
13
+ detector = dlib.get_frontal_face_detector()
14
+
15
+ dets = detector(img, 1)
16
+ assert len(dets) > 0, "Face not detected, try another face image"
17
+
18
+ for k, d in enumerate(dets):
19
+ shape = predictor(img, d)
20
+
21
+ t = list(shape.parts())
22
+ a = []
23
+ for tt in t:
24
+ a.append([tt.x, tt.y])
25
+ lm = np.array(a)
26
+ return lm
27
+
28
+
29
+ def align_face(img, output_size=512, transform_size=1024, enable_padding=True):
30
+
31
+ """
32
+ :param filepath: str
33
+ :return: PIL Image
34
+ """
35
+ np_img = np.array(img)
36
+ predictor = dlib.shape_predictor(shape_predictor_path)
37
+ lm = get_landmark(np_img, predictor)
38
+
39
+ lm_chin = lm[0: 17] # left-right
40
+ lm_eyebrow_left = lm[17: 22] # left-right
41
+ lm_eyebrow_right = lm[22: 27] # left-right
42
+ lm_nose = lm[27: 31] # top-down
43
+ lm_nostrils = lm[31: 36] # top-down
44
+ lm_eye_left = lm[36: 42] # left-clockwise
45
+ lm_eye_right = lm[42: 48] # left-clockwise
46
+ lm_mouth_outer = lm[48: 60] # left-clockwise
47
+ lm_mouth_inner = lm[60: 68] # left-clockwise
48
+
49
+ # Calculate auxiliary vectors.
50
+ eye_left = np.mean(lm_eye_left, axis=0)
51
+ eye_right = np.mean(lm_eye_right, axis=0)
52
+ eye_avg = (eye_left + eye_right) * 0.5
53
+ eye_to_eye = eye_right - eye_left
54
+ mouth_left = lm_mouth_outer[0]
55
+ mouth_right = lm_mouth_outer[6]
56
+ mouth_avg = (mouth_left + mouth_right) * 0.5
57
+ eye_to_mouth = mouth_avg - eye_avg
58
+
59
+ # Choose oriented crop rectangle.
60
+ x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
61
+ x /= np.hypot(*x)
62
+ x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
63
+ y = np.flipud(x) * [-1, 1]
64
+ c = eye_avg + eye_to_mouth * 0.1
65
+ quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
66
+ qsize = np.hypot(*x) * 2
67
+
68
+ # read image
69
+ transform_size = output_size
70
+ enable_padding = True
71
+
72
+ # Shrink.
73
+ shrink = int(np.floor(qsize / output_size * 0.5))
74
+ if shrink > 1:
75
+ rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
76
+ img = img.resize(rsize, Image.ANTIALIAS)
77
+ quad /= shrink
78
+ qsize /= shrink
79
+
80
+ # Crop.
81
+ border = max(int(np.rint(qsize * 0.1)), 3)
82
+ crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
83
+ int(np.ceil(max(quad[:, 1]))))
84
+ crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]),
85
+ min(crop[3] + border, img.size[1]))
86
+ if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
87
+ img = img.crop(crop)
88
+ quad -= crop[0:2]
89
+
90
+ # Pad.
91
+ pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
92
+ int(np.ceil(max(quad[:, 1]))))
93
+ pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
94
+ max(pad[3] - img.size[1] + border, 0))
95
+ if enable_padding and max(pad) > border - 4:
96
+ pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
97
+ img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
98
+ h, w, _ = img.shape
99
+ y, x, _ = np.ogrid[:h, :w, :1]
100
+ mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
101
+ 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
102
+ blur = qsize * 0.02
103
+ img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
104
+ img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
105
+ img = Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
106
+ quad += pad[:2]
107
+
108
+ # Transform.
109
+ img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
110
+ if output_size < transform_size:
111
+ img = img.resize((output_size, output_size), Image.ANTIALIAS)
112
+
113
+ # Return aligned image.
114
+ return img