diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..326bf5fd44d1cda2eb4cdd013a38d6fd1435f411 --- /dev/null +++ b/app.py @@ -0,0 +1,126 @@ +import random +import subprocess +import os +os.environ['MPLCONFIGDIR'] = os.getcwd() + "/configs/" +import gradio +import gradio as gr +import shutil + +current_dir = os.path.dirname(os.path.abspath(__file__)) + + +def convert(segment_length, video, audio, progress=gradio.Progress()): + if segment_length is None: + segment_length=0 + print(video, audio) + + if segment_length != 0: + video_segments = cut_video_segments(video, segment_length) + audio_segments = cut_audio_segments(audio, segment_length) + else: + video_path = os.path.join('temp/video', os.path.basename(video)) + shutil.move(video, video_path) + video_segments = [video_path] + audio_path = os.path.join('temp/audio', os.path.basename(audio)) + shutil.move(audio, audio_path) + audio_segments = [audio_path] + + processed_segments = [] + for i, (video_seg, audio_seg) in progress.tqdm(enumerate(zip(video_segments, audio_segments))): + processed_output = process_segment(video_seg, audio_seg, i) + processed_segments.append(processed_output) + + output_file = f"results/output_{random.randint(0,1000)}.mp4" + concatenate_videos(processed_segments, output_file) + + # Remove temporary files + cleanup_temp_files(video_segments + audio_segments) + + # Return the concatenated video file + return output_file + + +def cleanup_temp_files(file_list): + for file_path in file_list: + if os.path.isfile(file_path): + os.remove(file_path) + + +def cut_video_segments(video_file, segment_length): + temp_directory = 'temp/audio' + shutil.rmtree(temp_directory, ignore_errors=True) + shutil.os.makedirs(temp_directory, exist_ok=True) + segment_template = f"{temp_directory}/{random.randint(0,1000)}_%03d.mp4" + command = ["ffmpeg", "-i", video_file, "-c", "copy", "-f", + "segment", "-segment_time", str(segment_length), segment_template] + subprocess.run(command, check=True) + + video_segments = [segment_template % + i for i in range(len(os.listdir(temp_directory)))] + return video_segments + + +def cut_audio_segments(audio_file, segment_length): + temp_directory = 'temp/video' + shutil.rmtree(temp_directory, ignore_errors=True) + shutil.os.makedirs(temp_directory, exist_ok=True) + segment_template = f"{temp_directory}/{random.randint(0,1000)}_%03d.mp3" + command = ["ffmpeg", "-i", audio_file, "-f", "segment", + "-segment_time", str(segment_length), segment_template] + subprocess.run(command, check=True) + + audio_segments = [segment_template % + i for i in range(len(os.listdir(temp_directory)))] + return audio_segments + + +def process_segment(video_seg, audio_seg, i): + output_file = f"results/{random.randint(10,100000)}_{i}.mp4" + command = ["python", "inference.py", "--face", video_seg, + "--audio", audio_seg, "--outfile", output_file] + subprocess.run(command, check=True) + + return output_file + + +def concatenate_videos(video_segments, output_file): + with open("segments.txt", "w") as file: + for segment in video_segments: + file.write(f"file '{segment}'\n") + command = ["ffmpeg", "-f", "concat", "-i", + "segments.txt", "-c", "copy", output_file] + subprocess.run(command, check=True) + + +with gradio.Blocks( + title="Audio-based Lip Synchronization", + theme=gr.themes.Base( + primary_hue=gr.themes.colors.green, + font=["Source Sans Pro", "Arial", "sans-serif"], + font_mono=['JetBrains mono', "Consolas", 'Courier New'] + ), +) as demo: + with gradio.Row(): + gradio.Markdown("# Audio-based Lip Synchronization") + with gradio.Row(): + with gradio.Column(): + with gradio.Row(): + seg = gradio.Number( + label="segment length (Second), 0 for no segmentation") + with gradio.Row(): + with gradio.Column(): + v = gradio.Video(label='SOurce Face') + + with gradio.Column(): + a = gradio.Audio( + type='filepath', label='Target Audio') + + with gradio.Row(): + btn = gradio.Button(value="Synthesize",variant="primary") + + with gradio.Column(): + o = gradio.Video(label="Output Video") + + btn.click(fn=convert, inputs=[seg, v, a], outputs=[o]) + +demo.queue().launch() diff --git a/checkpoints/30_net_gen.pth b/checkpoints/30_net_gen.pth new file mode 100644 index 0000000000000000000000000000000000000000..a08303a88d8cfe1288d97c4af9256075a724ca3e --- /dev/null +++ b/checkpoints/30_net_gen.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db83e1727128e2c5de27bc80d2929586535e04a709af45016a63e7cf7c46b0c +size 33877439 diff --git a/checkpoints/BFM.zip b/checkpoints/BFM.zip new file mode 100644 index 0000000000000000000000000000000000000000..fabbf35826c63205affc72124f4b10f851beac45 --- /dev/null +++ b/checkpoints/BFM.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369eb3177ca5491fe04c2a9aba2d33a39642681f57796fbc611dab36f9a10656 +size 404749663 diff --git a/checkpoints/BFM/.gitkeep b/checkpoints/BFM/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/checkpoints/BFM/01_MorphableModel.mat b/checkpoints/BFM/01_MorphableModel.mat new file mode 100644 index 0000000000000000000000000000000000000000..f251485b55d35adac0ad4f1622a47d7a39a1502c --- /dev/null +++ b/checkpoints/BFM/01_MorphableModel.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2 +size 240875364 diff --git a/checkpoints/BFM/BFM_exp_idx.mat b/checkpoints/BFM/BFM_exp_idx.mat new file mode 100644 index 0000000000000000000000000000000000000000..5b214a5f8afbc038e6959f7f72141e448e89fb3b --- /dev/null +++ b/checkpoints/BFM/BFM_exp_idx.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62752a2cab3eea148569fb07e367e03535b4ee04aa71ea1a9aed36486d26c612 +size 91931 diff --git a/checkpoints/BFM/BFM_front_idx.mat b/checkpoints/BFM/BFM_front_idx.mat new file mode 100644 index 0000000000000000000000000000000000000000..29d82e79f8b2558a5bf1956ab9e1261d49c2c8dd --- /dev/null +++ b/checkpoints/BFM/BFM_front_idx.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d285dd018563113496127df9c364800183172adb4d3e802f726085dab66b087 +size 44880 diff --git a/checkpoints/BFM/BFM_model_front.mat b/checkpoints/BFM/BFM_model_front.mat new file mode 100644 index 0000000000000000000000000000000000000000..2926e5f317244023be2421b17dbb0e97d97ce9e6 --- /dev/null +++ b/checkpoints/BFM/BFM_model_front.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3ff544aba3246c5f2c117f2be76fa44a7b76145326aae0bbfbfb564d4f82af +size 127170280 diff --git a/checkpoints/BFM/Exp_Pca.bin b/checkpoints/BFM/Exp_Pca.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c1785e6abc52b13e54a573f9f3ebc099915b1e0 --- /dev/null +++ b/checkpoints/BFM/Exp_Pca.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726 +size 51086404 diff --git a/checkpoints/BFM/facemodel_info.mat b/checkpoints/BFM/facemodel_info.mat new file mode 100644 index 0000000000000000000000000000000000000000..c2e0a3521fc040e59e07fc09384fc140234f006f --- /dev/null +++ b/checkpoints/BFM/facemodel_info.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529398f76619ae7e22f43c25dd60a2473bcc2bcc8c894fd9c613c68624ce1c04 +size 738861 diff --git a/checkpoints/BFM/select_vertex_id.mat b/checkpoints/BFM/select_vertex_id.mat new file mode 100644 index 0000000000000000000000000000000000000000..feadeff96a0b8e0619461f64a9bdc9e761b14c80 --- /dev/null +++ b/checkpoints/BFM/select_vertex_id.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6877a7d634330f25bf1e81bc062b6507ee53ea183838e471fa21b613048fa36b +size 62299 diff --git a/checkpoints/BFM/similarity_Lm3D_all.mat b/checkpoints/BFM/similarity_Lm3D_all.mat new file mode 100644 index 0000000000000000000000000000000000000000..9f5b0bd4ecffb926128a29cb1bbf9d9081c3d4e7 --- /dev/null +++ b/checkpoints/BFM/similarity_Lm3D_all.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b83ce6e35c50ddc3e97603650cef4970320c157e75c241c844f29c1dcba65a +size 994 diff --git a/checkpoints/BFM/std_exp.txt b/checkpoints/BFM/std_exp.txt new file mode 100644 index 0000000000000000000000000000000000000000..767b8de4ea1ca78b6f22b98ff2dee4fa345500bb --- /dev/null +++ b/checkpoints/BFM/std_exp.txt @@ -0,0 +1 @@ +453980 257264 263068 211890 135873 184721 47055.6 72732 62787.4 106226 56708.5 51439.8 34887.1 44378.7 51813.4 31030.7 23354.9 23128.1 19400 21827.6 22767.7 22057.4 19894.3 16172.8 17142.7 10035.3 14727.5 12972.5 10763.8 8953.93 8682.62 8941.81 6342.3 5205.3 7065.65 6083.35 6678.88 4666.63 5082.89 5134.76 4908.16 3964.93 3739.95 3180.09 2470.45 1866.62 1624.71 2423.74 1668.53 1471.65 1194.52 782.102 815.044 835.782 834.937 744.496 575.146 633.76 705.685 753.409 620.306 673.326 766.189 619.866 559.93 357.264 396.472 556.849 455.048 460.592 400.735 326.702 279.428 291.535 326.584 305.664 287.816 283.642 276.19 \ No newline at end of file diff --git a/checkpoints/DNet.pt b/checkpoints/DNet.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5258b8314f176fb9d5646d9c2a955e08180610a --- /dev/null +++ b/checkpoints/DNet.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41220d2973c0ba2eab6e8f17ed00711aef5a0d76d19808f885dc0e3251df2e80 +size 180424655 diff --git a/checkpoints/ENet.pth b/checkpoints/ENet.pth new file mode 100644 index 0000000000000000000000000000000000000000..783f421cd2ebc35ca938493c12744018b83f4033 --- /dev/null +++ b/checkpoints/ENet.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967ee3ed857619cedd92b6407dc8a124cbfe763cc11cad58316fe21271a8928f +size 573261168 diff --git a/checkpoints/GFPGANv1.3.pth b/checkpoints/GFPGANv1.3.pth new file mode 100644 index 0000000000000000000000000000000000000000..1da748a3ef84ff85dd2c77c836f222aae22b007e --- /dev/null +++ b/checkpoints/GFPGANv1.3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c953a88f2727c85c3d9ae72e2bd4846bbaf59fe6972ad94130e23e7017524a70 +size 348632874 diff --git a/checkpoints/GPEN-BFR-512.pth b/checkpoints/GPEN-BFR-512.pth new file mode 100644 index 0000000000000000000000000000000000000000..2287dbb4a09d881a933fcda63ef61f42da9eb5ba --- /dev/null +++ b/checkpoints/GPEN-BFR-512.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1002c41add95b0decad69604d80455576f7187dd99ca16bd611bcfd44c10b51 +size 284085738 diff --git a/checkpoints/LNet.pth b/checkpoints/LNet.pth new file mode 100644 index 0000000000000000000000000000000000000000..63d1c81336b6c997e59ce2cf18a40140e92910d1 --- /dev/null +++ b/checkpoints/LNet.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae06fef0454c421b828cc53e8d4b9c92d990867a858ea7bb9661ab6cf6ab774 +size 1534697728 diff --git a/checkpoints/ParseNet-latest.pth b/checkpoints/ParseNet-latest.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ac2efc50360a79c9905dbac57d9d99cbfbe863c --- /dev/null +++ b/checkpoints/ParseNet-latest.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2 +size 85331193 diff --git a/checkpoints/RetinaFace-R50.pth b/checkpoints/RetinaFace-R50.pth new file mode 100644 index 0000000000000000000000000000000000000000..16546738ce0a00a9fd47585e0fc52744d31cc117 --- /dev/null +++ b/checkpoints/RetinaFace-R50.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d +size 109497761 diff --git a/checkpoints/expression.mat b/checkpoints/expression.mat new file mode 100644 index 0000000000000000000000000000000000000000..bf4d3c687be74adda57b4096cf05e279b9bf72ec --- /dev/null +++ b/checkpoints/expression.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93e9d69eb46e866ed5cbb569ed2bdb3813254720fb0cb745d5b56181faf9aec5 +size 1456 diff --git a/checkpoints/face3d_pretrain_epoch_20.pth b/checkpoints/face3d_pretrain_epoch_20.pth new file mode 100644 index 0000000000000000000000000000000000000000..97ebd6753f7ca4bcd39d3b82e7109b66a2dbc1fb --- /dev/null +++ b/checkpoints/face3d_pretrain_epoch_20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b +size 288860037 diff --git a/checkpoints/shape_predictor_68_face_landmarks.dat b/checkpoints/shape_predictor_68_face_landmarks.dat new file mode 100644 index 0000000000000000000000000000000000000000..1e5da4f9a556bec8582e6c55b89b3e6bfdd60021 --- /dev/null +++ b/checkpoints/shape_predictor_68_face_landmarks.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f +size 99693937 diff --git a/inference.py b/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..df4ede8165fb8c52029209e165585df14dc3fa45 --- /dev/null +++ b/inference.py @@ -0,0 +1,345 @@ +import numpy as np +import cv2, os, sys, subprocess, platform, torch +from tqdm import tqdm +from PIL import Image +from scipy.io import loadmat + +sys.path.insert(0, 'third_part') +sys.path.insert(0, 'third_part/GPEN') +sys.path.insert(0, 'third_part/GFPGAN') + +# 3dmm extraction +from third_part.face3d.util.preprocess import align_img +from third_part.face3d.util.load_mats import load_lm3d +from third_part.face3d.extract_kp_videos import KeypointExtractor +# face enhancement +from third_part.GPEN.gpen_face_enhancer import FaceEnhancement +from third_part.GFPGAN.gfpgan import GFPGANer +# expression control +from third_part.ganimation_replicate.model.ganimation import GANimationModel + +from utils import audio +from utils.ffhq_preprocess import Croper +from utils.alignment_stit import crop_faces, calc_alignment_coefficients, paste_image +from utils.inference_utils import Laplacian_Pyramid_Blending_with_mask, face_detect, load_model, options, split_coeff, \ + trans_image, transform_semantic, find_crop_norm_ratio, load_face3d_net, exp_aus_dict +import warnings +warnings.filterwarnings("ignore") + +args = options() + +def main(): + device = 'cuda' if torch.cuda.is_available() else 'cpu' + print('[Info] Using {} for inference.'.format(device)) + os.makedirs(os.path.join('temp', args.tmp_dir), exist_ok=True) + + enhancer = FaceEnhancement(base_dir='checkpoints', size=512, model='GPEN-BFR-512', use_sr=False, \ + sr_model='rrdb_realesrnet_psnr', channel_multiplier=2, narrow=1, device=device) + restorer = GFPGANer(model_path='checkpoints/GFPGANv1.3.pth', upscale=1, arch='clean', \ + channel_multiplier=2, bg_upsampler=None) + + base_name = args.face.split('/')[-1] + if os.path.isfile(args.face) and args.face.split('.')[1] in ['jpg', 'png', 'jpeg']: + args.static = True + if not os.path.isfile(args.face): + raise ValueError('--face argument must be a valid path to video/image file') + elif args.face.split('.')[1] in ['jpg', 'png', 'jpeg']: + full_frames = [cv2.imread(args.face)] + fps = args.fps + else: + video_stream = cv2.VideoCapture(args.face) + fps = video_stream.get(cv2.CAP_PROP_FPS) + + full_frames = [] + while True: + still_reading, frame = video_stream.read() + if not still_reading: + video_stream.release() + break + y1, y2, x1, x2 = args.crop + if x2 == -1: x2 = frame.shape[1] + if y2 == -1: y2 = frame.shape[0] + frame = frame[y1:y2, x1:x2] + full_frames.append(frame) + + print ("[Step 0] Number of frames available for inference: "+str(len(full_frames))) + # face detection & cropping, cropping the first frame as the style of FFHQ + croper = Croper('checkpoints/shape_predictor_68_face_landmarks.dat') + full_frames_RGB = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in full_frames] + full_frames_RGB, crop, quad = croper.crop(full_frames_RGB, xsize=512) + + clx, cly, crx, cry = crop + lx, ly, rx, ry = quad + lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry) + oy1, oy2, ox1, ox2 = cly+ly, min(cly+ry, full_frames[0].shape[0]), clx+lx, min(clx+rx, full_frames[0].shape[1]) + # original_size = (ox2 - ox1, oy2 - oy1) + frames_pil = [Image.fromarray(cv2.resize(frame,(256,256))) for frame in full_frames_RGB] + + # get the landmark according to the detected face. + if not os.path.isfile('temp/'+base_name+'_landmarks.txt') or args.re_preprocess: + print('[Step 1] Landmarks Extraction in Video.') + kp_extractor = KeypointExtractor() + lm = kp_extractor.extract_keypoint(frames_pil, './temp/'+base_name+'_landmarks.txt') + else: + print('[Step 1] Using saved landmarks.') + lm = np.loadtxt('temp/'+base_name+'_landmarks.txt').astype(np.float32) + lm = lm.reshape([len(full_frames), -1, 2]) + + if not os.path.isfile('temp/'+base_name+'_coeffs.npy') or args.exp_img is not None or args.re_preprocess: + net_recon = load_face3d_net(args.face3d_net_path, device) + lm3d_std = load_lm3d('checkpoints/BFM') + + video_coeffs = [] + for idx in tqdm(range(len(frames_pil)), desc="[Step 2] 3DMM Extraction In Video:"): + frame = frames_pil[idx] + W, H = frame.size + lm_idx = lm[idx].reshape([-1, 2]) + if np.mean(lm_idx) == -1: + lm_idx = (lm3d_std[:, :2]+1) / 2. + lm_idx = np.concatenate([lm_idx[:, :1] * W, lm_idx[:, 1:2] * H], 1) + else: + lm_idx[:, -1] = H - 1 - lm_idx[:, -1] + + trans_params, im_idx, lm_idx, _ = align_img(frame, lm_idx, lm3d_std) + trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]).astype(np.float32) + im_idx_tensor = torch.tensor(np.array(im_idx)/255., dtype=torch.float32).permute(2, 0, 1).to(device).unsqueeze(0) + with torch.no_grad(): + coeffs = split_coeff(net_recon(im_idx_tensor)) + + pred_coeff = {key:coeffs[key].cpu().numpy() for key in coeffs} + pred_coeff = np.concatenate([pred_coeff['id'], pred_coeff['exp'], pred_coeff['tex'], pred_coeff['angle'],\ + pred_coeff['gamma'], pred_coeff['trans'], trans_params[None]], 1) + video_coeffs.append(pred_coeff) + semantic_npy = np.array(video_coeffs)[:,0] + np.save('temp/'+base_name+'_coeffs.npy', semantic_npy) + else: + print('[Step 2] Using saved coeffs.') + semantic_npy = np.load('temp/'+base_name+'_coeffs.npy').astype(np.float32) + + # generate the 3dmm coeff from a single image + if args.exp_img is not None and ('.png' in args.exp_img or '.jpg' in args.exp_img): + print('extract the exp from',args.exp_img) + exp_pil = Image.open(args.exp_img).convert('RGB') + lm3d_std = load_lm3d('third_part/face3d/BFM') + + W, H = exp_pil.size + kp_extractor = KeypointExtractor() + lm_exp = kp_extractor.extract_keypoint([exp_pil], 'temp/'+base_name+'_temp.txt')[0] + if np.mean(lm_exp) == -1: + lm_exp = (lm3d_std[:, :2] + 1) / 2. + lm_exp = np.concatenate( + [lm_exp[:, :1] * W, lm_exp[:, 1:2] * H], 1) + else: + lm_exp[:, -1] = H - 1 - lm_exp[:, -1] + + trans_params, im_exp, lm_exp, _ = align_img(exp_pil, lm_exp, lm3d_std) + trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]).astype(np.float32) + im_exp_tensor = torch.tensor(np.array(im_exp)/255., dtype=torch.float32).permute(2, 0, 1).to(device).unsqueeze(0) + with torch.no_grad(): + expression = split_coeff(net_recon(im_exp_tensor))['exp'][0] + del net_recon + elif args.exp_img == 'smile': + expression = torch.tensor(loadmat('checkpoints/expression.mat')['expression_mouth'])[0] + else: + print('using expression center') + expression = torch.tensor(loadmat('checkpoints/expression.mat')['expression_center'])[0] + + # load DNet, model(LNet and ENet) + D_Net, model = load_model(args, device) + + if not os.path.isfile('temp/'+base_name+'_stablized.npy') or args.re_preprocess: + imgs = [] + for idx in tqdm(range(len(frames_pil)), desc="[Step 3] Stablize the expression In Video:"): + if args.one_shot: + source_img = trans_image(frames_pil[0]).unsqueeze(0).to(device) + semantic_source_numpy = semantic_npy[0:1] + else: + source_img = trans_image(frames_pil[idx]).unsqueeze(0).to(device) + semantic_source_numpy = semantic_npy[idx:idx+1] + ratio = find_crop_norm_ratio(semantic_source_numpy, semantic_npy) + coeff = transform_semantic(semantic_npy, idx, ratio).unsqueeze(0).to(device) + + # hacking the new expression + coeff[:, :64, :] = expression[None, :64, None].to(device) + with torch.no_grad(): + output = D_Net(source_img, coeff) + img_stablized = np.uint8((output['fake_image'].squeeze(0).permute(1,2,0).cpu().clamp_(-1, 1).numpy() + 1 )/2. * 255) + imgs.append(cv2.cvtColor(img_stablized,cv2.COLOR_RGB2BGR)) + np.save('temp/'+base_name+'_stablized.npy',imgs) + del D_Net + else: + print('[Step 3] Using saved stablized video.') + imgs = np.load('temp/'+base_name+'_stablized.npy') + torch.cuda.empty_cache() + + if not args.audio.endswith('.wav'): + command = 'ffmpeg -loglevel error -y -i {} -strict -2 {}'.format(args.audio, 'temp/{}/temp.wav'.format(args.tmp_dir)) + subprocess.call(command, shell=True) + args.audio = 'temp/{}/temp.wav'.format(args.tmp_dir) + wav = audio.load_wav(args.audio, 16000) + mel = audio.melspectrogram(wav) + if np.isnan(mel.reshape(-1)).sum() > 0: + raise ValueError('Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again') + + mel_step_size, mel_idx_multiplier, i, mel_chunks = 16, 80./fps, 0, [] + while True: + start_idx = int(i * mel_idx_multiplier) + if start_idx + mel_step_size > len(mel[0]): + mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:]) + break + mel_chunks.append(mel[:, start_idx : start_idx + mel_step_size]) + i += 1 + + print("[Step 4] Load audio; Length of mel chunks: {}".format(len(mel_chunks))) + imgs = imgs[:len(mel_chunks)] + full_frames = full_frames[:len(mel_chunks)] + lm = lm[:len(mel_chunks)] + + imgs_enhanced = [] + for idx in tqdm(range(len(imgs)), desc='[Step 5] Reference Enhancement'): + img = imgs[idx] + pred, _, _ = enhancer.process(img, img, face_enhance=True, possion_blending=False) + imgs_enhanced.append(pred) + gen = datagen(imgs_enhanced.copy(), mel_chunks, full_frames, None, (oy1,oy2,ox1,ox2)) + + frame_h, frame_w = full_frames[0].shape[:-1] + out = cv2.VideoWriter('temp/{}/result.mp4'.format(args.tmp_dir), cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_w, frame_h)) + + if args.up_face != 'original': + instance = GANimationModel() + instance.initialize() + instance.setup() + + kp_extractor = KeypointExtractor() + for i, (img_batch, mel_batch, frames, coords, img_original, f_frames) in enumerate(tqdm(gen, desc='[Step 6] Lip Synthesis:', total=int(np.ceil(float(len(mel_chunks)) / args.LNet_batch_size)))): + img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device) + mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device) + img_original = torch.FloatTensor(np.transpose(img_original, (0, 3, 1, 2))).to(device)/255. # BGR -> RGB + + with torch.no_grad(): + incomplete, reference = torch.split(img_batch, 3, dim=1) + pred, low_res = model(mel_batch, img_batch, reference) + pred = torch.clamp(pred, 0, 1) + + if args.up_face in ['sad', 'angry', 'surprise']: + tar_aus = exp_aus_dict[args.up_face] + else: + pass + + if args.up_face == 'original': + cur_gen_faces = img_original + else: + test_batch = {'src_img': torch.nn.functional.interpolate((img_original * 2 - 1), size=(128, 128), mode='bilinear'), + 'tar_aus': tar_aus.repeat(len(incomplete), 1)} + instance.feed_batch(test_batch) + instance.forward() + cur_gen_faces = torch.nn.functional.interpolate(instance.fake_img / 2. + 0.5, size=(384, 384), mode='bilinear') + + if args.without_rl1 is not False: + incomplete, reference = torch.split(img_batch, 3, dim=1) + mask = torch.where(incomplete==0, torch.ones_like(incomplete), torch.zeros_like(incomplete)) + pred = pred * mask + cur_gen_faces * (1 - mask) + + pred = pred.cpu().numpy().transpose(0, 2, 3, 1) * 255. + + torch.cuda.empty_cache() + for p, f, xf, c in zip(pred, frames, f_frames, coords): + y1, y2, x1, x2 = c + p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1)) + + ff = xf.copy() + ff[y1:y2, x1:x2] = p + + # month region enhancement by GFPGAN + cropped_faces, restored_faces, restored_img = restorer.enhance( + ff, has_aligned=False, only_center_face=True, paste_back=True) + # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + mm = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0] + mouse_mask = np.zeros_like(restored_img) + tmp_mask = enhancer.faceparser.process(restored_img[y1:y2, x1:x2], mm)[0] + mouse_mask[y1:y2, x1:x2]= cv2.resize(tmp_mask, (x2 - x1, y2 - y1))[:, :, np.newaxis] / 255. + + height, width = ff.shape[:2] + restored_img, ff, full_mask = [cv2.resize(x, (512, 512)) for x in (restored_img, ff, np.float32(mouse_mask))] + img = Laplacian_Pyramid_Blending_with_mask(restored_img, ff, full_mask[:, :, 0], 10) + pp = np.uint8(cv2.resize(np.clip(img, 0 ,255), (width, height))) + + pp, orig_faces, enhanced_faces = enhancer.process(pp, xf, bbox=c, face_enhance=False, possion_blending=True) + out.write(pp) + out.release() + + if not os.path.isdir(os.path.dirname(args.outfile)): + os.makedirs(os.path.dirname(args.outfile), exist_ok=True) + command = 'ffmpeg -loglevel error -y -i {} -i {} -strict -2 -q:v 1 {}'.format(args.audio, 'temp/{}/result.mp4'.format(args.tmp_dir), args.outfile) + subprocess.call(command, shell=platform.system() != 'Windows') + print('outfile:', args.outfile) + + +# frames:256x256, full_frames: original size +def datagen(frames, mels, full_frames, frames_pil, cox): + img_batch, mel_batch, frame_batch, coords_batch, ref_batch, full_frame_batch = [], [], [], [], [], [] + base_name = args.face.split('/')[-1] + refs = [] + image_size = 256 + + # original frames + kp_extractor = KeypointExtractor() + fr_pil = [Image.fromarray(frame) for frame in frames] + lms = kp_extractor.extract_keypoint(fr_pil, 'temp/'+base_name+'x12_landmarks.txt') + frames_pil = [ (lm, frame) for frame,lm in zip(fr_pil, lms)] # frames is the croped version of modified face + crops, orig_images, quads = crop_faces(image_size, frames_pil, scale=1.0, use_fa=True) + inverse_transforms = [calc_alignment_coefficients(quad + 0.5, [[0, 0], [0, image_size], [image_size, image_size], [image_size, 0]]) for quad in quads] + del kp_extractor.detector + + oy1,oy2,ox1,ox2 = cox + face_det_results = face_detect(full_frames, args, jaw_correction=True) + + for inverse_transform, crop, full_frame, face_det in zip(inverse_transforms, crops, full_frames, face_det_results): + imc_pil = paste_image(inverse_transform, crop, Image.fromarray( + cv2.resize(full_frame[int(oy1):int(oy2), int(ox1):int(ox2)], (256, 256)))) + + ff = full_frame.copy() + ff[int(oy1):int(oy2), int(ox1):int(ox2)] = cv2.resize(np.array(imc_pil.convert('RGB')), (ox2 - ox1, oy2 - oy1)) + oface, coords = face_det + y1, y2, x1, x2 = coords + refs.append(ff[y1: y2, x1:x2]) + + for i, m in enumerate(mels): + idx = 0 if args.static else i % len(frames) + frame_to_save = frames[idx].copy() + face = refs[idx] + oface, coords = face_det_results[idx].copy() + + face = cv2.resize(face, (args.img_size, args.img_size)) + oface = cv2.resize(oface, (args.img_size, args.img_size)) + + img_batch.append(oface) + ref_batch.append(face) + mel_batch.append(m) + coords_batch.append(coords) + frame_batch.append(frame_to_save) + full_frame_batch.append(full_frames[idx].copy()) + + if len(img_batch) >= args.LNet_batch_size: + img_batch, mel_batch, ref_batch = np.asarray(img_batch), np.asarray(mel_batch), np.asarray(ref_batch) + img_masked = img_batch.copy() + img_original = img_batch.copy() + img_masked[:, args.img_size//2:] = 0 + img_batch = np.concatenate((img_masked, ref_batch), axis=3) / 255. + mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]) + + yield img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch + img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch, ref_batch = [], [], [], [], [], [], [] + + if len(img_batch) > 0: + img_batch, mel_batch, ref_batch = np.asarray(img_batch), np.asarray(mel_batch), np.asarray(ref_batch) + img_masked = img_batch.copy() + img_original = img_batch.copy() + img_masked[:, args.img_size//2:] = 0 + img_batch = np.concatenate((img_masked, ref_batch), axis=3) / 255. + mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]) + yield img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch + + +if __name__ == '__main__': + main() diff --git a/models/DNet.py b/models/DNet.py new file mode 100644 index 0000000000000000000000000000000000000000..085b2dcd59deb699af198b180d77ff80a81746d6 --- /dev/null +++ b/models/DNet.py @@ -0,0 +1,118 @@ +# TODO +import functools +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils import flow_util +from models.base_blocks import LayerNorm2d, ADAINHourglass, FineEncoder, FineDecoder + +# DNet +class DNet(nn.Module): + def __init__(self): + super(DNet, self).__init__() + self.mapping_net = MappingNet() + self.warpping_net = WarpingNet() + self.editing_net = EditingNet() + + def forward(self, input_image, driving_source, stage=None): + if stage == 'warp': + descriptor = self.mapping_net(driving_source) + output = self.warpping_net(input_image, descriptor) + else: + descriptor = self.mapping_net(driving_source) + output = self.warpping_net(input_image, descriptor) + output['fake_image'] = self.editing_net(input_image, output['warp_image'], descriptor) + return output + +class MappingNet(nn.Module): + def __init__(self, coeff_nc=73, descriptor_nc=256, layer=3): + super( MappingNet, self).__init__() + + self.layer = layer + nonlinearity = nn.LeakyReLU(0.1) + + self.first = nn.Sequential( + torch.nn.Conv1d(coeff_nc, descriptor_nc, kernel_size=7, padding=0, bias=True)) + + for i in range(layer): + net = nn.Sequential(nonlinearity, + torch.nn.Conv1d(descriptor_nc, descriptor_nc, kernel_size=3, padding=0, dilation=3)) + setattr(self, 'encoder' + str(i), net) + + self.pooling = nn.AdaptiveAvgPool1d(1) + self.output_nc = descriptor_nc + + def forward(self, input_3dmm): + out = self.first(input_3dmm) + for i in range(self.layer): + model = getattr(self, 'encoder' + str(i)) + out = model(out) + out[:,:,3:-3] + out = self.pooling(out) + return out + +class WarpingNet(nn.Module): + def __init__( + self, + image_nc=3, + descriptor_nc=256, + base_nc=32, + max_nc=256, + encoder_layer=5, + decoder_layer=3, + use_spect=False + ): + super( WarpingNet, self).__init__() + + nonlinearity = nn.LeakyReLU(0.1) + norm_layer = functools.partial(LayerNorm2d, affine=True) + kwargs = {'nonlinearity':nonlinearity, 'use_spect':use_spect} + + self.descriptor_nc = descriptor_nc + self.hourglass = ADAINHourglass(image_nc, self.descriptor_nc, base_nc, + max_nc, encoder_layer, decoder_layer, **kwargs) + + self.flow_out = nn.Sequential(norm_layer(self.hourglass.output_nc), + nonlinearity, + nn.Conv2d(self.hourglass.output_nc, 2, kernel_size=7, stride=1, padding=3)) + + self.pool = nn.AdaptiveAvgPool2d(1) + + def forward(self, input_image, descriptor): + final_output={} + output = self.hourglass(input_image, descriptor) + final_output['flow_field'] = self.flow_out(output) + + deformation = flow_util.convert_flow_to_deformation(final_output['flow_field']) + final_output['warp_image'] = flow_util.warp_image(input_image, deformation) + return final_output + + +class EditingNet(nn.Module): + def __init__( + self, + image_nc=3, + descriptor_nc=256, + layer=3, + base_nc=64, + max_nc=256, + num_res_blocks=2, + use_spect=False): + super(EditingNet, self).__init__() + + nonlinearity = nn.LeakyReLU(0.1) + norm_layer = functools.partial(LayerNorm2d, affine=True) + kwargs = {'norm_layer':norm_layer, 'nonlinearity':nonlinearity, 'use_spect':use_spect} + self.descriptor_nc = descriptor_nc + + # encoder part + self.encoder = FineEncoder(image_nc*2, base_nc, max_nc, layer, **kwargs) + self.decoder = FineDecoder(image_nc, self.descriptor_nc, base_nc, max_nc, layer, num_res_blocks, **kwargs) + + def forward(self, input_image, warp_image, descriptor): + x = torch.cat([input_image, warp_image], 1) + x = self.encoder(x) + gen_image = self.decoder(x, descriptor) + return gen_image diff --git a/models/ENet.py b/models/ENet.py new file mode 100644 index 0000000000000000000000000000000000000000..4df10d662122f6acb20ecfabe3b0d069144b8d18 --- /dev/null +++ b/models/ENet.py @@ -0,0 +1,139 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from models.base_blocks import ResBlock, StyleConv, ToRGB + + +class ENet(nn.Module): + def __init__( + self, + num_style_feat=512, + lnet=None, + concat=False + ): + super(ENet, self).__init__() + + self.low_res = lnet + for param in self.low_res.parameters(): + param.requires_grad = False + + channel_multiplier, narrow = 2, 1 + channels = { + '4': int(512 * narrow), + '8': int(512 * narrow), + '16': int(512 * narrow), + '32': int(512 * narrow), + '64': int(256 * channel_multiplier * narrow), + '128': int(128 * channel_multiplier * narrow), + '256': int(64 * channel_multiplier * narrow), + '512': int(32 * channel_multiplier * narrow), + '1024': int(16 * channel_multiplier * narrow) + } + + self.log_size = 8 + first_out_size = 128 + self.conv_body_first = nn.Conv2d(3, channels[f'{first_out_size}'], 1) # 256 -> 128 + + # downsample + in_channels = channels[f'{first_out_size}'] + self.conv_body_down = nn.ModuleList() + for i in range(8, 2, -1): + out_channels = channels[f'{2**(i - 1)}'] + self.conv_body_down.append(ResBlock(in_channels, out_channels, mode='down')) + in_channels = out_channels + + self.num_style_feat = num_style_feat + linear_out_channel = num_style_feat + self.final_linear = nn.Linear(channels['4'] * 4 * 4, linear_out_channel) + self.final_conv = nn.Conv2d(in_channels, channels['4'], 3, 1, 1) + + self.style_convs = nn.ModuleList() + self.to_rgbs = nn.ModuleList() + self.noises = nn.Module() + + self.concat = concat + if concat: + in_channels = 3 + 32 # channels['64'] + else: + in_channels = 3 + + for i in range(7, 9): # 128, 256 + out_channels = channels[f'{2**i}'] # + self.style_convs.append( + StyleConv( + in_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode='upsample')) + self.style_convs.append( + StyleConv( + out_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None)) + self.to_rgbs.append(ToRGB(out_channels, num_style_feat, upsample=True)) + in_channels = out_channels + + def forward(self, audio_sequences, face_sequences, gt_sequences): + B = audio_sequences.size(0) + input_dim_size = len(face_sequences.size()) + inp, ref = torch.split(face_sequences,3,dim=1) + + if input_dim_size > 4: + audio_sequences = torch.cat([audio_sequences[:, i] for i in range(audio_sequences.size(1))], dim=0) + inp = torch.cat([inp[:, :, i] for i in range(inp.size(2))], dim=0) + ref = torch.cat([ref[:, :, i] for i in range(ref.size(2))], dim=0) + gt_sequences = torch.cat([gt_sequences[:, :, i] for i in range(gt_sequences.size(2))], dim=0) + + # get the global style + feat = F.leaky_relu_(self.conv_body_first(F.interpolate(ref, size=(256,256), mode='bilinear')), negative_slope=0.2) + for i in range(self.log_size - 2): + feat = self.conv_body_down[i](feat) + feat = F.leaky_relu_(self.final_conv(feat), negative_slope=0.2) + + # style code + style_code = self.final_linear(feat.reshape(feat.size(0), -1)) + style_code = style_code.reshape(style_code.size(0), -1, self.num_style_feat) + + LNet_input = torch.cat([inp, gt_sequences], dim=1) + LNet_input = F.interpolate(LNet_input, size=(96,96), mode='bilinear') + + if self.concat: + low_res_img, low_res_feat = self.low_res(audio_sequences, LNet_input) + low_res_img.detach() + low_res_feat.detach() + out = torch.cat([low_res_img, low_res_feat], dim=1) + + else: + low_res_img = self.low_res(audio_sequences, LNet_input) + low_res_img.detach() + # 96 x 96 + out = low_res_img + + p2d = (2,2,2,2) + out = F.pad(out, p2d, "reflect", 0) + skip = out + + for conv1, conv2, to_rgb in zip(self.style_convs[::2], self.style_convs[1::2], self.to_rgbs): + out = conv1(out, style_code) # 96, 192, 384 + out = conv2(out, style_code) + skip = to_rgb(out, style_code, skip) + _outputs = skip + + # remove padding + _outputs = _outputs[:,:,8:-8,8:-8] + + if input_dim_size > 4: + _outputs = torch.split(_outputs, B, dim=0) + outputs = torch.stack(_outputs, dim=2) + low_res_img = F.interpolate(low_res_img, outputs.size()[3:]) + low_res_img = torch.split(low_res_img, B, dim=0) + low_res_img = torch.stack(low_res_img, dim=2) + else: + outputs = _outputs + return outputs, low_res_img \ No newline at end of file diff --git a/models/LNet.py b/models/LNet.py new file mode 100644 index 0000000000000000000000000000000000000000..f3b36d764d9f5a10c6834868dec6f11fc5bb1d3c --- /dev/null +++ b/models/LNet.py @@ -0,0 +1,139 @@ +import functools +import torch +import torch.nn as nn + +from models.transformer import RETURNX, Transformer +from models.base_blocks import Conv2d, LayerNorm2d, FirstBlock2d, DownBlock2d, UpBlock2d, \ + FFCADAINResBlocks, Jump, FinalBlock2d + + +class Visual_Encoder(nn.Module): + def __init__(self, image_nc, ngf, img_f, layers, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(Visual_Encoder, self).__init__() + self.layers = layers + self.first_inp = FirstBlock2d(image_nc, ngf, norm_layer, nonlinearity, use_spect) + self.first_ref = FirstBlock2d(image_nc, ngf, norm_layer, nonlinearity, use_spect) + for i in range(layers): + in_channels = min(ngf*(2**i), img_f) + out_channels = min(ngf*(2**(i+1)), img_f) + model_ref = DownBlock2d(in_channels, out_channels, norm_layer, nonlinearity, use_spect) + model_inp = DownBlock2d(in_channels, out_channels, norm_layer, nonlinearity, use_spect) + if i < 2: + ca_layer = RETURNX() + else: + ca_layer = Transformer(2**(i+1) * ngf,2,4,ngf,ngf*4) + setattr(self, 'ca' + str(i), ca_layer) + setattr(self, 'ref_down' + str(i), model_ref) + setattr(self, 'inp_down' + str(i), model_inp) + self.output_nc = out_channels * 2 + + def forward(self, maskGT, ref): + x_maskGT, x_ref = self.first_inp(maskGT), self.first_ref(ref) + out=[x_maskGT] + for i in range(self.layers): + model_ref = getattr(self, 'ref_down'+str(i)) + model_inp = getattr(self, 'inp_down'+str(i)) + ca_layer = getattr(self, 'ca'+str(i)) + x_maskGT, x_ref = model_inp(x_maskGT), model_ref(x_ref) + x_maskGT = ca_layer(x_maskGT, x_ref) + if i < self.layers - 1: + out.append(x_maskGT) + else: + out.append(torch.cat([x_maskGT, x_ref], dim=1)) # concat ref features ! + return out + + +class Decoder(nn.Module): + def __init__(self, image_nc, feature_nc, ngf, img_f, layers, num_block, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(Decoder, self).__init__() + self.layers = layers + for i in range(layers)[::-1]: + if i == layers-1: + in_channels = ngf*(2**(i+1)) * 2 + else: + in_channels = min(ngf*(2**(i+1)), img_f) + out_channels = min(ngf*(2**i), img_f) + up = UpBlock2d(in_channels, out_channels, norm_layer, nonlinearity, use_spect) + res = FFCADAINResBlocks(num_block, in_channels, feature_nc, norm_layer, nonlinearity, use_spect) + jump = Jump(out_channels, norm_layer, nonlinearity, use_spect) + + setattr(self, 'up' + str(i), up) + setattr(self, 'res' + str(i), res) + setattr(self, 'jump' + str(i), jump) + + self.final = FinalBlock2d(out_channels, image_nc, use_spect, 'sigmoid') + self.output_nc = out_channels + + def forward(self, x, z): + out = x.pop() + for i in range(self.layers)[::-1]: + res_model = getattr(self, 'res' + str(i)) + up_model = getattr(self, 'up' + str(i)) + jump_model = getattr(self, 'jump' + str(i)) + out = res_model(out, z) + out = up_model(out) + out = jump_model(x.pop()) + out + out_image = self.final(out) + return out_image + + +class LNet(nn.Module): + def __init__( + self, + image_nc=3, + descriptor_nc=512, + layer=3, + base_nc=64, + max_nc=512, + num_res_blocks=9, + use_spect=True, + encoder=Visual_Encoder, + decoder=Decoder + ): + super(LNet, self).__init__() + + nonlinearity = nn.LeakyReLU(0.1) + norm_layer = functools.partial(LayerNorm2d, affine=True) + kwargs = {'norm_layer':norm_layer, 'nonlinearity':nonlinearity, 'use_spect':use_spect} + self.descriptor_nc = descriptor_nc + + self.encoder = encoder(image_nc, base_nc, max_nc, layer, **kwargs) + self.decoder = decoder(image_nc, self.descriptor_nc, base_nc, max_nc, layer, num_res_blocks, **kwargs) + self.audio_encoder = nn.Sequential( + Conv2d(1, 32, kernel_size=3, stride=1, padding=1), + Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True), + + Conv2d(32, 64, kernel_size=3, stride=(3, 1), padding=1), + Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True), + + Conv2d(64, 128, kernel_size=3, stride=3, padding=1), + Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True), + + Conv2d(128, 256, kernel_size=3, stride=(3, 2), padding=1), + Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True), + + Conv2d(256, 512, kernel_size=3, stride=1, padding=0), + Conv2d(512, descriptor_nc, kernel_size=1, stride=1, padding=0), + ) + + def forward(self, audio_sequences, face_sequences): + B = audio_sequences.size(0) + input_dim_size = len(face_sequences.size()) + if input_dim_size > 4: + audio_sequences = torch.cat([audio_sequences[:, i] for i in range(audio_sequences.size(1))], dim=0) + face_sequences = torch.cat([face_sequences[:, :, i] for i in range(face_sequences.size(2))], dim=0) + cropped, ref = torch.split(face_sequences, 3, dim=1) + + vis_feat = self.encoder(cropped, ref) + audio_feat = self.audio_encoder(audio_sequences) + _outputs = self.decoder(vis_feat, audio_feat) + + if input_dim_size > 4: + _outputs = torch.split(_outputs, B, dim=0) + outputs = torch.stack(_outputs, dim=2) + else: + outputs = _outputs + return outputs \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b859f14e6975631adacf1ad8d8316c70e80994c6 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,36 @@ +import torch +from models.DNet import DNet +from models.LNet import LNet +from models.ENet import ENet + + +def _load(checkpoint_path): + checkpoint = torch.load(checkpoint_path) + return checkpoint + +def load_checkpoint(path, model): + print("Load checkpoint from: {}".format(path)) + checkpoint = _load(path) + s = checkpoint["state_dict"] if 'arcface' not in path else checkpoint + new_s = {} + for k, v in s.items(): + if 'low_res' in k: + continue + else: + new_s[k.replace('module.', '')] = v + model.load_state_dict(new_s, strict=False) + return model + +def load_network(args): + L_net = LNet() + L_net = load_checkpoint(args.LNet_path, L_net) + E_net = ENet(lnet=L_net) + model = load_checkpoint(args.ENet_path, E_net) + return model.eval() + +def load_DNet(args): + D_Net = DNet() + print("Load checkpoint from: {}".format(args.DNet_path)) + checkpoint = torch.load(args.DNet_path, map_location=lambda storage, loc: storage) + D_Net.load_state_dict(checkpoint['net_G_ema'], strict=False) + return D_Net.eval() \ No newline at end of file diff --git a/models/base_blocks.py b/models/base_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..2d453540b5b9701b23574aea175890de74f06b51 --- /dev/null +++ b/models/base_blocks.py @@ -0,0 +1,554 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.batchnorm import BatchNorm2d +from torch.nn.utils.spectral_norm import spectral_norm as SpectralNorm + +from models.ffc import FFC +from basicsr.archs.arch_util import default_init_weights + + +class Conv2d(nn.Module): + def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, *args, **kwargs): + super().__init__(*args, **kwargs) + self.conv_block = nn.Sequential( + nn.Conv2d(cin, cout, kernel_size, stride, padding), + nn.BatchNorm2d(cout) + ) + self.act = nn.ReLU() + self.residual = residual + + def forward(self, x): + out = self.conv_block(x) + if self.residual: + out += x + return self.act(out) + + +class ResBlock(nn.Module): + def __init__(self, in_channels, out_channels, mode='down'): + super(ResBlock, self).__init__() + self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1) + self.conv2 = nn.Conv2d(in_channels, out_channels, 3, 1, 1) + self.skip = nn.Conv2d(in_channels, out_channels, 1, bias=False) + if mode == 'down': + self.scale_factor = 0.5 + elif mode == 'up': + self.scale_factor = 2 + + def forward(self, x): + out = F.leaky_relu_(self.conv1(x), negative_slope=0.2) + # upsample/downsample + out = F.interpolate(out, scale_factor=self.scale_factor, mode='bilinear', align_corners=False) + out = F.leaky_relu_(self.conv2(out), negative_slope=0.2) + # skip + x = F.interpolate(x, scale_factor=self.scale_factor, mode='bilinear', align_corners=False) + skip = self.skip(x) + out = out + skip + return out + + +class LayerNorm2d(nn.Module): + def __init__(self, n_out, affine=True): + super(LayerNorm2d, self).__init__() + self.n_out = n_out + self.affine = affine + + if self.affine: + self.weight = nn.Parameter(torch.ones(n_out, 1, 1)) + self.bias = nn.Parameter(torch.zeros(n_out, 1, 1)) + + def forward(self, x): + normalized_shape = x.size()[1:] + if self.affine: + return F.layer_norm(x, normalized_shape, \ + self.weight.expand(normalized_shape), + self.bias.expand(normalized_shape)) + else: + return F.layer_norm(x, normalized_shape) + + +def spectral_norm(module, use_spect=True): + if use_spect: + return SpectralNorm(module) + else: + return module + + +class FirstBlock2d(nn.Module): + def __init__(self, input_nc, output_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(FirstBlock2d, self).__init__() + kwargs = {'kernel_size': 7, 'stride': 1, 'padding': 3} + conv = spectral_norm(nn.Conv2d(input_nc, output_nc, **kwargs), use_spect) + + if type(norm_layer) == type(None): + self.model = nn.Sequential(conv, nonlinearity) + else: + self.model = nn.Sequential(conv, norm_layer(output_nc), nonlinearity) + + def forward(self, x): + out = self.model(x) + return out + + +class DownBlock2d(nn.Module): + def __init__(self, input_nc, output_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(DownBlock2d, self).__init__() + kwargs = {'kernel_size': 3, 'stride': 1, 'padding': 1} + conv = spectral_norm(nn.Conv2d(input_nc, output_nc, **kwargs), use_spect) + pool = nn.AvgPool2d(kernel_size=(2, 2)) + + if type(norm_layer) == type(None): + self.model = nn.Sequential(conv, nonlinearity, pool) + else: + self.model = nn.Sequential(conv, norm_layer(output_nc), nonlinearity, pool) + + def forward(self, x): + out = self.model(x) + return out + + +class UpBlock2d(nn.Module): + def __init__(self, input_nc, output_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(UpBlock2d, self).__init__() + kwargs = {'kernel_size': 3, 'stride': 1, 'padding': 1} + conv = spectral_norm(nn.Conv2d(input_nc, output_nc, **kwargs), use_spect) + if type(norm_layer) == type(None): + self.model = nn.Sequential(conv, nonlinearity) + else: + self.model = nn.Sequential(conv, norm_layer(output_nc), nonlinearity) + + def forward(self, x): + out = self.model(F.interpolate(x, scale_factor=2)) + return out + + +class ADAIN(nn.Module): + def __init__(self, norm_nc, feature_nc): + super().__init__() + + self.param_free_norm = nn.InstanceNorm2d(norm_nc, affine=False) + + nhidden = 128 + use_bias=True + + self.mlp_shared = nn.Sequential( + nn.Linear(feature_nc, nhidden, bias=use_bias), + nn.ReLU() + ) + self.mlp_gamma = nn.Linear(nhidden, norm_nc, bias=use_bias) + self.mlp_beta = nn.Linear(nhidden, norm_nc, bias=use_bias) + + def forward(self, x, feature): + + # Part 1. generate parameter-free normalized activations + normalized = self.param_free_norm(x) + # Part 2. produce scaling and bias conditioned on feature + feature = feature.view(feature.size(0), -1) + actv = self.mlp_shared(feature) + gamma = self.mlp_gamma(actv) + beta = self.mlp_beta(actv) + + # apply scale and bias + gamma = gamma.view(*gamma.size()[:2], 1,1) + beta = beta.view(*beta.size()[:2], 1,1) + out = normalized * (1 + gamma) + beta + return out + + +class FineADAINResBlock2d(nn.Module): + """ + Define an Residual block for different types + """ + def __init__(self, input_nc, feature_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(FineADAINResBlock2d, self).__init__() + kwargs = {'kernel_size': 3, 'stride': 1, 'padding': 1} + self.conv1 = spectral_norm(nn.Conv2d(input_nc, input_nc, **kwargs), use_spect) + self.conv2 = spectral_norm(nn.Conv2d(input_nc, input_nc, **kwargs), use_spect) + self.norm1 = ADAIN(input_nc, feature_nc) + self.norm2 = ADAIN(input_nc, feature_nc) + self.actvn = nonlinearity + + def forward(self, x, z): + dx = self.actvn(self.norm1(self.conv1(x), z)) + dx = self.norm2(self.conv2(x), z) + out = dx + x + return out + + +class FineADAINResBlocks(nn.Module): + def __init__(self, num_block, input_nc, feature_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(FineADAINResBlocks, self).__init__() + self.num_block = num_block + for i in range(num_block): + model = FineADAINResBlock2d(input_nc, feature_nc, norm_layer, nonlinearity, use_spect) + setattr(self, 'res'+str(i), model) + + def forward(self, x, z): + for i in range(self.num_block): + model = getattr(self, 'res'+str(i)) + x = model(x, z) + return x + + +class ADAINEncoderBlock(nn.Module): + def __init__(self, input_nc, output_nc, feature_nc, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(ADAINEncoderBlock, self).__init__() + kwargs_down = {'kernel_size': 4, 'stride': 2, 'padding': 1} + kwargs_fine = {'kernel_size': 3, 'stride': 1, 'padding': 1} + + self.conv_0 = spectral_norm(nn.Conv2d(input_nc, output_nc, **kwargs_down), use_spect) + self.conv_1 = spectral_norm(nn.Conv2d(output_nc, output_nc, **kwargs_fine), use_spect) + + + self.norm_0 = ADAIN(input_nc, feature_nc) + self.norm_1 = ADAIN(output_nc, feature_nc) + self.actvn = nonlinearity + + def forward(self, x, z): + x = self.conv_0(self.actvn(self.norm_0(x, z))) + x = self.conv_1(self.actvn(self.norm_1(x, z))) + return x + + +class ADAINDecoderBlock(nn.Module): + def __init__(self, input_nc, output_nc, hidden_nc, feature_nc, use_transpose=True, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(ADAINDecoderBlock, self).__init__() + # Attributes + self.actvn = nonlinearity + hidden_nc = min(input_nc, output_nc) if hidden_nc is None else hidden_nc + + kwargs_fine = {'kernel_size':3, 'stride':1, 'padding':1} + if use_transpose: + kwargs_up = {'kernel_size':3, 'stride':2, 'padding':1, 'output_padding':1} + else: + kwargs_up = {'kernel_size':3, 'stride':1, 'padding':1} + + # create conv layers + self.conv_0 = spectral_norm(nn.Conv2d(input_nc, hidden_nc, **kwargs_fine), use_spect) + if use_transpose: + self.conv_1 = spectral_norm(nn.ConvTranspose2d(hidden_nc, output_nc, **kwargs_up), use_spect) + self.conv_s = spectral_norm(nn.ConvTranspose2d(input_nc, output_nc, **kwargs_up), use_spect) + else: + self.conv_1 = nn.Sequential(spectral_norm(nn.Conv2d(hidden_nc, output_nc, **kwargs_up), use_spect), + nn.Upsample(scale_factor=2)) + self.conv_s = nn.Sequential(spectral_norm(nn.Conv2d(input_nc, output_nc, **kwargs_up), use_spect), + nn.Upsample(scale_factor=2)) + # define normalization layers + self.norm_0 = ADAIN(input_nc, feature_nc) + self.norm_1 = ADAIN(hidden_nc, feature_nc) + self.norm_s = ADAIN(input_nc, feature_nc) + + def forward(self, x, z): + x_s = self.shortcut(x, z) + dx = self.conv_0(self.actvn(self.norm_0(x, z))) + dx = self.conv_1(self.actvn(self.norm_1(dx, z))) + out = x_s + dx + return out + + def shortcut(self, x, z): + x_s = self.conv_s(self.actvn(self.norm_s(x, z))) + return x_s + + +class FineEncoder(nn.Module): + """docstring for Encoder""" + def __init__(self, image_nc, ngf, img_f, layers, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(FineEncoder, self).__init__() + self.layers = layers + self.first = FirstBlock2d(image_nc, ngf, norm_layer, nonlinearity, use_spect) + for i in range(layers): + in_channels = min(ngf*(2**i), img_f) + out_channels = min(ngf*(2**(i+1)), img_f) + model = DownBlock2d(in_channels, out_channels, norm_layer, nonlinearity, use_spect) + setattr(self, 'down' + str(i), model) + self.output_nc = out_channels + + def forward(self, x): + x = self.first(x) + out=[x] + for i in range(self.layers): + model = getattr(self, 'down'+str(i)) + x = model(x) + out.append(x) + return out + + +class FineDecoder(nn.Module): + """docstring for FineDecoder""" + def __init__(self, image_nc, feature_nc, ngf, img_f, layers, num_block, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(FineDecoder, self).__init__() + self.layers = layers + for i in range(layers)[::-1]: + in_channels = min(ngf*(2**(i+1)), img_f) + out_channels = min(ngf*(2**i), img_f) + up = UpBlock2d(in_channels, out_channels, norm_layer, nonlinearity, use_spect) + res = FineADAINResBlocks(num_block, in_channels, feature_nc, norm_layer, nonlinearity, use_spect) + jump = Jump(out_channels, norm_layer, nonlinearity, use_spect) + setattr(self, 'up' + str(i), up) + setattr(self, 'res' + str(i), res) + setattr(self, 'jump' + str(i), jump) + self.final = FinalBlock2d(out_channels, image_nc, use_spect, 'tanh') + self.output_nc = out_channels + + def forward(self, x, z): + out = x.pop() + for i in range(self.layers)[::-1]: + res_model = getattr(self, 'res' + str(i)) + up_model = getattr(self, 'up' + str(i)) + jump_model = getattr(self, 'jump' + str(i)) + out = res_model(out, z) + out = up_model(out) + out = jump_model(x.pop()) + out + out_image = self.final(out) + return out_image + + +class ADAINEncoder(nn.Module): + def __init__(self, image_nc, pose_nc, ngf, img_f, layers, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(ADAINEncoder, self).__init__() + self.layers = layers + self.input_layer = nn.Conv2d(image_nc, ngf, kernel_size=7, stride=1, padding=3) + for i in range(layers): + in_channels = min(ngf * (2**i), img_f) + out_channels = min(ngf *(2**(i+1)), img_f) + model = ADAINEncoderBlock(in_channels, out_channels, pose_nc, nonlinearity, use_spect) + setattr(self, 'encoder' + str(i), model) + self.output_nc = out_channels + + def forward(self, x, z): + out = self.input_layer(x) + out_list = [out] + for i in range(self.layers): + model = getattr(self, 'encoder' + str(i)) + out = model(out, z) + out_list.append(out) + return out_list + + +class ADAINDecoder(nn.Module): + """docstring for ADAINDecoder""" + def __init__(self, pose_nc, ngf, img_f, encoder_layers, decoder_layers, skip_connect=True, + nonlinearity=nn.LeakyReLU(), use_spect=False): + + super(ADAINDecoder, self).__init__() + self.encoder_layers = encoder_layers + self.decoder_layers = decoder_layers + self.skip_connect = skip_connect + use_transpose = True + for i in range(encoder_layers-decoder_layers, encoder_layers)[::-1]: + in_channels = min(ngf * (2**(i+1)), img_f) + in_channels = in_channels*2 if i != (encoder_layers-1) and self.skip_connect else in_channels + out_channels = min(ngf * (2**i), img_f) + model = ADAINDecoderBlock(in_channels, out_channels, out_channels, pose_nc, use_transpose, nonlinearity, use_spect) + setattr(self, 'decoder' + str(i), model) + self.output_nc = out_channels*2 if self.skip_connect else out_channels + + def forward(self, x, z): + out = x.pop() if self.skip_connect else x + for i in range(self.encoder_layers-self.decoder_layers, self.encoder_layers)[::-1]: + model = getattr(self, 'decoder' + str(i)) + out = model(out, z) + out = torch.cat([out, x.pop()], 1) if self.skip_connect else out + return out + + +class ADAINHourglass(nn.Module): + def __init__(self, image_nc, pose_nc, ngf, img_f, encoder_layers, decoder_layers, nonlinearity, use_spect): + super(ADAINHourglass, self).__init__() + self.encoder = ADAINEncoder(image_nc, pose_nc, ngf, img_f, encoder_layers, nonlinearity, use_spect) + self.decoder = ADAINDecoder(pose_nc, ngf, img_f, encoder_layers, decoder_layers, True, nonlinearity, use_spect) + self.output_nc = self.decoder.output_nc + + def forward(self, x, z): + return self.decoder(self.encoder(x, z), z) + + +class FineADAINLama(nn.Module): + def __init__(self, input_nc, feature_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(FineADAINLama, self).__init__() + kwargs = {'kernel_size': 3, 'stride': 1, 'padding': 1} + self.actvn = nonlinearity + ratio_gin = 0.75 + ratio_gout = 0.75 + self.ffc = FFC(input_nc, input_nc, 3, + ratio_gin, ratio_gout, 1, 1, 1, + 1, False, False, padding_type='reflect') + global_channels = int(input_nc * ratio_gout) + self.bn_l = ADAIN(input_nc - global_channels, feature_nc) + self.bn_g = ADAIN(global_channels, feature_nc) + + def forward(self, x, z): + x_l, x_g = self.ffc(x) + x_l = self.actvn(self.bn_l(x_l,z)) + x_g = self.actvn(self.bn_g(x_g,z)) + return x_l, x_g + + +class FFCResnetBlock(nn.Module): + def __init__(self, dim, feature_dim, padding_type='reflect', norm_layer=BatchNorm2d, activation_layer=nn.ReLU, dilation=1, + spatial_transform_kwargs=None, inline=False, **conv_kwargs): + super().__init__() + self.conv1 = FineADAINLama(dim, feature_dim, **conv_kwargs) + self.conv2 = FineADAINLama(dim, feature_dim, **conv_kwargs) + self.inline = True + + def forward(self, x, z): + if self.inline: + x_l, x_g = x[:, :-self.conv1.ffc.global_in_num], x[:, -self.conv1.ffc.global_in_num:] + else: + x_l, x_g = x if type(x) is tuple else (x, 0) + + id_l, id_g = x_l, x_g + x_l, x_g = self.conv1((x_l, x_g), z) + x_l, x_g = self.conv2((x_l, x_g), z) + + x_l, x_g = id_l + x_l, id_g + x_g + out = x_l, x_g + if self.inline: + out = torch.cat(out, dim=1) + return out + + +class FFCADAINResBlocks(nn.Module): + def __init__(self, num_block, input_nc, feature_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(FFCADAINResBlocks, self).__init__() + self.num_block = num_block + for i in range(num_block): + model = FFCResnetBlock(input_nc, feature_nc, norm_layer, nonlinearity, use_spect) + setattr(self, 'res'+str(i), model) + + def forward(self, x, z): + for i in range(self.num_block): + model = getattr(self, 'res'+str(i)) + x = model(x, z) + return x + + +class Jump(nn.Module): + def __init__(self, input_nc, norm_layer=nn.BatchNorm2d, nonlinearity=nn.LeakyReLU(), use_spect=False): + super(Jump, self).__init__() + kwargs = {'kernel_size': 3, 'stride': 1, 'padding': 1} + conv = spectral_norm(nn.Conv2d(input_nc, input_nc, **kwargs), use_spect) + if type(norm_layer) == type(None): + self.model = nn.Sequential(conv, nonlinearity) + else: + self.model = nn.Sequential(conv, norm_layer(input_nc), nonlinearity) + + def forward(self, x): + out = self.model(x) + return out + + +class FinalBlock2d(nn.Module): + def __init__(self, input_nc, output_nc, use_spect=False, tanh_or_sigmoid='tanh'): + super(FinalBlock2d, self).__init__() + kwargs = {'kernel_size': 7, 'stride': 1, 'padding':3} + conv = spectral_norm(nn.Conv2d(input_nc, output_nc, **kwargs), use_spect) + if tanh_or_sigmoid == 'sigmoid': + out_nonlinearity = nn.Sigmoid() + else: + out_nonlinearity = nn.Tanh() + self.model = nn.Sequential(conv, out_nonlinearity) + + def forward(self, x): + out = self.model(x) + return out + + +class ModulatedConv2d(nn.Module): + def __init__(self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + eps=1e-8): + super(ModulatedConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.demodulate = demodulate + self.sample_mode = sample_mode + self.eps = eps + + # modulation inside each modulated conv + self.modulation = nn.Linear(num_style_feat, in_channels, bias=True) + # initialization + default_init_weights(self.modulation, scale=1, bias_fill=1, a=0, mode='fan_in', nonlinearity='linear') + + self.weight = nn.Parameter( + torch.randn(1, out_channels, in_channels, kernel_size, kernel_size) / + math.sqrt(in_channels * kernel_size**2)) + self.padding = kernel_size // 2 + + def forward(self, x, style): + b, c, h, w = x.shape + style = self.modulation(style).view(b, 1, c, 1, 1) + weight = self.weight * style + + if self.demodulate: + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps) + weight = weight * demod.view(b, self.out_channels, 1, 1, 1) + + weight = weight.view(b * self.out_channels, c, self.kernel_size, self.kernel_size) + + # upsample or downsample if necessary + if self.sample_mode == 'upsample': + x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) + elif self.sample_mode == 'downsample': + x = F.interpolate(x, scale_factor=0.5, mode='bilinear', align_corners=False) + + b, c, h, w = x.shape + x = x.view(1, b * c, h, w) + out = F.conv2d(x, weight, padding=self.padding, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + return out + + def __repr__(self): + return (f'{self.__class__.__name__}(in_channels={self.in_channels}, out_channels={self.out_channels}, ' + f'kernel_size={self.kernel_size}, demodulate={self.demodulate}, sample_mode={self.sample_mode})') + + +class StyleConv(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, num_style_feat, demodulate=True, sample_mode=None): + super(StyleConv, self).__init__() + self.modulated_conv = ModulatedConv2d( + in_channels, out_channels, kernel_size, num_style_feat, demodulate=demodulate, sample_mode=sample_mode) + self.weight = nn.Parameter(torch.zeros(1)) # for noise injection + self.bias = nn.Parameter(torch.zeros(1, out_channels, 1, 1)) + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + def forward(self, x, style, noise=None): + # modulate + out = self.modulated_conv(x, style) * 2**0.5 # for conversion + # noise injection + if noise is None: + b, _, h, w = out.shape + noise = out.new_empty(b, 1, h, w).normal_() + out = out + self.weight * noise + # add bias + out = out + self.bias + # activation + out = self.activate(out) + return out + + +class ToRGB(nn.Module): + def __init__(self, in_channels, num_style_feat, upsample=True): + super(ToRGB, self).__init__() + self.upsample = upsample + self.modulated_conv = ModulatedConv2d( + in_channels, 3, kernel_size=1, num_style_feat=num_style_feat, demodulate=False, sample_mode=None) + self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1)) + + def forward(self, x, style, skip=None): + out = self.modulated_conv(x, style) + out = out + self.bias + if skip is not None: + if self.upsample: + skip = F.interpolate(skip, scale_factor=2, mode='bilinear', align_corners=False) + out = out + skip + return out \ No newline at end of file diff --git a/models/ffc.py b/models/ffc.py new file mode 100644 index 0000000000000000000000000000000000000000..89a5c4c09dc5f3e739a3ee9446225a738e0de97a --- /dev/null +++ b/models/ffc.py @@ -0,0 +1,233 @@ +# Fast Fourier Convolution NeurIPS 2020 +# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py +# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf + +import torch +import torch.nn as nn +import torch.nn.functional as F +# from models.modules.squeeze_excitation import SELayer +import torch.fft + +class SELayer(nn.Module): + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction, bias=False), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel, bias=False), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + res = x * y.expand_as(x) + return res + + +class FFCSE_block(nn.Module): + def __init__(self, channels, ratio_g): + super(FFCSE_block, self).__init__() + in_cg = int(channels * ratio_g) + in_cl = channels - in_cg + r = 16 + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.conv1 = nn.Conv2d(channels, channels // r, + kernel_size=1, bias=True) + self.relu1 = nn.ReLU(inplace=True) + self.conv_a2l = None if in_cl == 0 else nn.Conv2d( + channels // r, in_cl, kernel_size=1, bias=True) + self.conv_a2g = None if in_cg == 0 else nn.Conv2d( + channels // r, in_cg, kernel_size=1, bias=True) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x = x if type(x) is tuple else (x, 0) + id_l, id_g = x + + x = id_l if type(id_g) is int else torch.cat([id_l, id_g], dim=1) + x = self.avgpool(x) + x = self.relu1(self.conv1(x)) + + x_l = 0 if self.conv_a2l is None else id_l * \ + self.sigmoid(self.conv_a2l(x)) + x_g = 0 if self.conv_a2g is None else id_g * \ + self.sigmoid(self.conv_a2g(x)) + return x_l, x_g + + +class FourierUnit(nn.Module): + + def __init__(self, in_channels, out_channels, groups=1, spatial_scale_factor=None, spatial_scale_mode='bilinear', + spectral_pos_encoding=False, use_se=False, se_kwargs=None, ffc3d=False, fft_norm='ortho'): + # bn_layer not used + super(FourierUnit, self).__init__() + self.groups = groups + + self.conv_layer = torch.nn.Conv2d(in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0), + out_channels=out_channels * 2, + kernel_size=1, stride=1, padding=0, groups=self.groups, bias=False) + self.bn = torch.nn.BatchNorm2d(out_channels * 2) + self.relu = torch.nn.ReLU(inplace=True) + + # squeeze and excitation block + self.use_se = use_se + if use_se: + if se_kwargs is None: + se_kwargs = {} + self.se = SELayer(self.conv_layer.in_channels, **se_kwargs) + + self.spatial_scale_factor = spatial_scale_factor + self.spatial_scale_mode = spatial_scale_mode + self.spectral_pos_encoding = spectral_pos_encoding + self.ffc3d = ffc3d + self.fft_norm = fft_norm + + def forward(self, x): + batch = x.shape[0] + + if self.spatial_scale_factor is not None: + orig_size = x.shape[-2:] + x = F.interpolate(x, scale_factor=self.spatial_scale_factor, mode=self.spatial_scale_mode, align_corners=False) + + r_size = x.size() + # (batch, c, h, w/2+1, 2) + fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1) + ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm) + ffted = torch.stack((ffted.real, ffted.imag), dim=-1) + ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1) + ffted = ffted.view((batch, -1,) + ffted.size()[3:]) + + if self.spectral_pos_encoding: + height, width = ffted.shape[-2:] + coords_vert = torch.linspace(0, 1, height)[None, None, :, None].expand(batch, 1, height, width).to(ffted) + coords_hor = torch.linspace(0, 1, width)[None, None, None, :].expand(batch, 1, height, width).to(ffted) + ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1) + + if self.use_se: + ffted = self.se(ffted) + + ffted = self.conv_layer(ffted) # (batch, c*2, h, w/2+1) + ffted = self.relu(self.bn(ffted)) + + ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute( + 0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2) + ffted = torch.complex(ffted[..., 0], ffted[..., 1]) + + ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:] + output = torch.fft.irfftn(ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm) + + if self.spatial_scale_factor is not None: + output = F.interpolate(output, size=orig_size, mode=self.spatial_scale_mode, align_corners=False) + + return output + + +class SpectralTransform(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, groups=1, enable_lfu=True, **fu_kwargs): + # bn_layer not used + super(SpectralTransform, self).__init__() + self.enable_lfu = enable_lfu + if stride == 2: + self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2) + else: + self.downsample = nn.Identity() + + self.stride = stride + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, out_channels // + 2, kernel_size=1, groups=groups, bias=False), + nn.BatchNorm2d(out_channels // 2), + nn.ReLU(inplace=True) + ) + self.fu = FourierUnit( + out_channels // 2, out_channels // 2, groups, **fu_kwargs) + if self.enable_lfu: + self.lfu = FourierUnit( + out_channels // 2, out_channels // 2, groups) + self.conv2 = torch.nn.Conv2d( + out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False) + + def forward(self, x): + x = self.downsample(x) + x = self.conv1(x) + output = self.fu(x) + + if self.enable_lfu: + n, c, h, w = x.shape + split_no = 2 + split_s = h // split_no + xs = torch.cat(torch.split( + x[:, :c // 4], split_s, dim=-2), dim=1).contiguous() + xs = torch.cat(torch.split(xs, split_s, dim=-1), + dim=1).contiguous() + xs = self.lfu(xs) + xs = xs.repeat(1, 1, split_no, split_no).contiguous() + else: + xs = 0 + + output = self.conv2(x + output + xs) + return output + + +class FFC(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + ratio_gin, ratio_gout, stride=1, padding=0, + dilation=1, groups=1, bias=False, enable_lfu=True, + padding_type='reflect', gated=False, **spectral_kwargs): + super(FFC, self).__init__() + + assert stride == 1 or stride == 2, "Stride should be 1 or 2." + self.stride = stride + + in_cg = int(in_channels * ratio_gin) + in_cl = in_channels - in_cg + out_cg = int(out_channels * ratio_gout) + out_cl = out_channels - out_cg + + self.ratio_gin = ratio_gin + self.ratio_gout = ratio_gout + self.global_in_num = in_cg + + module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d + self.convl2l = module(in_cl, out_cl, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d + self.convl2g = module(in_cl, out_cg, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d + self.convg2l = module(in_cg, out_cl, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform + self.convg2g = module( + in_cg, out_cg, stride, 1 if groups == 1 else groups // 2, enable_lfu, **spectral_kwargs) + + self.gated = gated + module = nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d + self.gate = module(in_channels, 2, 1) + + def forward(self, x): + x_l, x_g = x if type(x) is tuple else (x, 0) + out_xl, out_xg = 0, 0 + + if self.gated: + total_input_parts = [x_l] + if torch.is_tensor(x_g): + total_input_parts.append(x_g) + total_input = torch.cat(total_input_parts, dim=1) + + gates = torch.sigmoid(self.gate(total_input)) + g2l_gate, l2g_gate = gates.chunk(2, dim=1) + else: + g2l_gate, l2g_gate = 1, 1 + + if self.ratio_gout != 1: + out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate + if self.ratio_gout != 0: + out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g) + + return out_xl, out_xg \ No newline at end of file diff --git a/models/transformer.py b/models/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..cf582c760432b6ad6cf8988e5155814b28b33107 --- /dev/null +++ b/models/transformer.py @@ -0,0 +1,119 @@ +import torch +from torch import nn + +from einops import rearrange + +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + + +class GELU(nn.Module): + def __init__(self): + super(GELU, self).__init__() + def forward(self, x): + return 0.5*x*(1+F.tanh(np.sqrt(2/np.pi)*(x+0.044715*torch.pow(x,3)))) + +# helpers + +def pair(t): + return t if isinstance(t, tuple) else (t, t) + +# classes + +class PreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + def forward(self, x, **kwargs): + return self.fn(self.norm(x), **kwargs) + +class DualPreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.normx = nn.LayerNorm(dim) + self.normy = nn.LayerNorm(dim) + self.fn = fn + def forward(self, x, y, **kwargs): + return self.fn(self.normx(x), self.normy(y), **kwargs) + +class FeedForward(nn.Module): + def __init__(self, dim, hidden_dim, dropout = 0.): + super().__init__() + self.net = nn.Sequential( + nn.Linear(dim, hidden_dim), + GELU(), + nn.Dropout(dropout), + nn.Linear(hidden_dim, dim), + nn.Dropout(dropout) + ) + def forward(self, x): + return self.net(x) + +class Attention(nn.Module): + def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.): + super().__init__() + inner_dim = dim_head * heads + project_out = not (heads == 1 and dim_head == dim) + + self.heads = heads + self.scale = dim_head ** -0.5 + + self.attend = nn.Softmax(dim = -1) + + self.to_q = nn.Linear(dim, inner_dim, bias = False) + self.to_k = nn.Linear(dim, inner_dim, bias = False) + self.to_v = nn.Linear(dim, inner_dim, bias = False) + + + self.to_out = nn.Sequential( + nn.Linear(inner_dim, dim), + nn.Dropout(dropout) + ) if project_out else nn.Identity() + + def forward(self, x, y): + # qk = self.to_qk(x).chunk(2, dim = -1) # + q = rearrange(self.to_q(x), 'b n (h d) -> b h n d', h = self.heads) # q,k from the zero feature + k = rearrange(self.to_k(x), 'b n (h d) -> b h n d', h = self.heads) # v from the reference features + v = rearrange(self.to_v(y), 'b n (h d) -> b h n d', h = self.heads) + + dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale + + attn = self.attend(dots) + + out = torch.matmul(attn, v) + out = rearrange(out, 'b h n d -> b n (h d)') + return self.to_out(out) + +class Transformer(nn.Module): + def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.): + super().__init__() + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append(nn.ModuleList([ + DualPreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)), + PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)) + ])) + + + def forward(self, x, y): # x is the cropped, y is the foreign reference + bs,c,h,w = x.size() + + # img to embedding + x = x.view(bs,c,-1).permute(0,2,1) + y = y.view(bs,c,-1).permute(0,2,1) + + for attn, ff in self.layers: + x = attn(x, y) + x + x = ff(x) + x + + x = x.view(bs,h,w,c).permute(0,3,1,2) + return x + +class RETURNX(nn.Module): + def __init__(self,): + super().__init__() + + def forward(self, x, y): # x is the cropped, y is the foreign reference + return x \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ed33d64e58d8d0af5a90ff619da1bf7f3c1e5aa --- /dev/null +++ b/requirements.txt @@ -0,0 +1,21 @@ +basicsr==1.4.2 +dlib==19.24.2 +docopt==0.6.2 +dominate==2.8.0 +easydict==1.10 +einops==0.7.0 +face_alignment==1.4.1 +facexlib==0.3.0 +gradio==3.46.1 +imageio==2.31.5 +insightface==0.7.3 +iou==0.1.0 +kornia==0.7.0 +librosa==0.8.0 +matplotlib==3.7.1 +menpo==0.11.0 +mxnet==1.9.1 +numpy==1.23.5 +onnx==1.14.1 +onnxruntime==1.16.0 +onnxsim==0.4.33 diff --git a/third_part/GFPGAN/LICENSE b/third_part/GFPGAN/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..24384c0728442ace20d180d784cb3ea714413923 --- /dev/null +++ b/third_part/GFPGAN/LICENSE @@ -0,0 +1,351 @@ +Tencent is pleased to support the open source community by making GFPGAN available. + +Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + +GFPGAN is licensed under the Apache License Version 2.0 except for the third-party components listed below. + + +Terms of the Apache License Version 2.0: +--------------------------------------------- +Apache License + +Version 2.0, January 2004 + +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +1. Definitions. + +“License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +“Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of this License; and + +You must cause any modified files to carry prominent notices stating that You changed the files; and + +You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + + + +Other dependencies and licenses: + + +Open Source Software licensed under the Apache 2.0 license and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. basicsr +Copyright 2018-2020 BasicSR Authors + + +This BasicSR project is released under the Apache 2.0 license. + +A copy of Apache 2.0 is included in this file. + +StyleGAN2 +The codes are modified from the repository stylegan2-pytorch. Many thanks to the author - Kim Seonghyeon 😊 for translating from the official TensorFlow codes to PyTorch ones. Here is the license of stylegan2-pytorch. +The official repository is https://github.com/NVlabs/stylegan2, and here is the NVIDIA license. +DFDNet +The codes are largely modified from the repository DFDNet. Their license is Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. + +Terms of the Nvidia License: +--------------------------------------------- + +1. Definitions + +"Licensor" means any person or entity that distributes its Work. + +"Software" means the original work of authorship made available under +this License. + +"Work" means the Software and any additions to or derivative works of +the Software that are made available under this License. + +"Nvidia Processors" means any central processing unit (CPU), graphics +processing unit (GPU), field-programmable gate array (FPGA), +application-specific integrated circuit (ASIC) or any combination +thereof designed, made, sold, or provided by Nvidia or its affiliates. + +The terms "reproduce," "reproduction," "derivative works," and +"distribution" have the meaning as provided under U.S. copyright law; +provided, however, that for the purposes of this License, derivative +works shall not include works that remain separable from, or merely +link (or bind by name) to the interfaces of, the Work. + +Works, including the Software, are "made available" under this License +by including in or with the Work either (a) a copyright notice +referencing the applicability of this License to the Work, or (b) a +copy of this License. + +2. License Grants + + 2.1 Copyright Grant. Subject to the terms and conditions of this + License, each Licensor grants to you a perpetual, worldwide, + non-exclusive, royalty-free, copyright license to reproduce, + prepare derivative works of, publicly display, publicly perform, + sublicense and distribute its Work and any resulting derivative + works in any form. + +3. Limitations + + 3.1 Redistribution. You may reproduce or distribute the Work only + if (a) you do so under this License, (b) you include a complete + copy of this License with your distribution, and (c) you retain + without modification any copyright, patent, trademark, or + attribution notices that are present in the Work. + + 3.2 Derivative Works. You may specify that additional or different + terms apply to the use, reproduction, and distribution of your + derivative works of the Work ("Your Terms") only if (a) Your Terms + provide that the use limitation in Section 3.3 applies to your + derivative works, and (b) you identify the specific derivative + works that are subject to Your Terms. Notwithstanding Your Terms, + this License (including the redistribution requirements in Section + 3.1) will continue to apply to the Work itself. + + 3.3 Use Limitation. The Work and any derivative works thereof only + may be used or intended for use non-commercially. The Work or + derivative works thereof may be used or intended for use by Nvidia + or its affiliates commercially or non-commercially. As used herein, + "non-commercially" means for research or evaluation purposes only. + + 3.4 Patent Claims. If you bring or threaten to bring a patent claim + against any Licensor (including any claim, cross-claim or + counterclaim in a lawsuit) to enforce any patents that you allege + are infringed by any Work, then your rights under this License from + such Licensor (including the grants in Sections 2.1 and 2.2) will + terminate immediately. + + 3.5 Trademarks. This License does not grant any rights to use any + Licensor's or its affiliates' names, logos, or trademarks, except + as necessary to reproduce the notices described in this License. + + 3.6 Termination. If you violate any term of this License, then your + rights under this License (including the grants in Sections 2.1 and + 2.2) will terminate immediately. + +4. Disclaimer of Warranty. + +THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +THIS LICENSE. + +5. Limitation of Liability. + +EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +MIT License + +Copyright (c) 2019 Kim Seonghyeon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + + +Open Source Software licensed under the BSD 3-Clause license: +--------------------------------------------- +1. torchvision +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +2. torch +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) + + +Terms of the BSD 3-Clause License: +--------------------------------------------- +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Open Source Software licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. numpy +Copyright (c) 2005-2020, NumPy Developers. +All rights reserved. + +A copy of BSD 3-Clause License is included in this file. + +The NumPy repository and source distributions bundle several libraries that are +compatibly licensed. We list these here. + +Name: Numpydoc +Files: doc/sphinxext/numpydoc/* +License: BSD-2-Clause + For details, see doc/sphinxext/LICENSE.txt + +Name: scipy-sphinx-theme +Files: doc/scipy-sphinx-theme/* +License: BSD-3-Clause AND PSF-2.0 AND Apache-2.0 + For details, see doc/scipy-sphinx-theme/LICENSE.txt + +Name: lapack-lite +Files: numpy/linalg/lapack_lite/* +License: BSD-3-Clause + For details, see numpy/linalg/lapack_lite/LICENSE.txt + +Name: tempita +Files: tools/npy_tempita/* +License: MIT + For details, see tools/npy_tempita/license.txt + +Name: dragon4 +Files: numpy/core/src/multiarray/dragon4.c +License: MIT + For license text, see numpy/core/src/multiarray/dragon4.c + + + +Open Source Software licensed under the MIT license: +--------------------------------------------- +1. facexlib +Copyright (c) 2020 Xintao Wang + +2. opencv-python +Copyright (c) Olli-Pekka Heinisuo +Please note that only files in cv2 package are used. + + +Terms of the MIT License: +--------------------------------------------- +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + +Open Source Software licensed under the MIT license and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. tqdm +Copyright (c) 2013 noamraph + +`tqdm` is a product of collaborative work. +Unless otherwise stated, all authors (see commit logs) retain copyright +for their respective work, and release the work under the MIT licence +(text below). + +Exceptions or notable authors are listed below +in reverse chronological order: + +* files: * + MPLv2.0 2015-2020 (c) Casper da Costa-Luis + [casperdcl](https://github.com/casperdcl). +* files: tqdm/_tqdm.py + MIT 2016 (c) [PR #96] on behalf of Google Inc. +* files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore + MIT 2013 (c) Noam Yorav-Raphael, original author. + +[PR #96]: https://github.com/tqdm/tqdm/pull/96 + + +Mozilla Public Licence (MPL) v. 2.0 - Exhibit A +----------------------------------------------- + +This Source Code Form is subject to the terms of the +Mozilla Public License, v. 2.0. +If a copy of the MPL was not distributed with this file, +You can obtain one at https://mozilla.org/MPL/2.0/. + + +MIT License (MIT) +----------------- + +Copyright (c) 2013 noamraph + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_part/GFPGAN/gfpgan/__init__.py b/third_part/GFPGAN/gfpgan/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..76ce3c82e64c0896009f4a61bb42417571ede2a6 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/__init__.py @@ -0,0 +1,8 @@ +# flake8: noqa + +from .archs import * +from .data import * +from .models import * +from .utils import * + +# from .version import * diff --git a/third_part/GFPGAN/gfpgan/archs/__init__.py b/third_part/GFPGAN/gfpgan/archs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bec5f17bfa38729b55f57cae8e40c27310db2b7b --- /dev/null +++ b/third_part/GFPGAN/gfpgan/archs/__init__.py @@ -0,0 +1,10 @@ +import importlib +from basicsr.utils import scandir +from os import path as osp + +# automatically scan and import arch modules for registry +# scan all the files that end with '_arch.py' under the archs folder +arch_folder = osp.dirname(osp.abspath(__file__)) +arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')] +# import all the arch modules +_arch_modules = [importlib.import_module(f'gfpgan.archs.{file_name}') for file_name in arch_filenames] diff --git a/third_part/GFPGAN/gfpgan/archs/arcface_arch.py b/third_part/GFPGAN/gfpgan/archs/arcface_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..e6d3bd97f83334450bd78ad2c3b9871102a56b70 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/archs/arcface_arch.py @@ -0,0 +1,245 @@ +import torch.nn as nn +from basicsr.utils.registry import ARCH_REGISTRY + + +def conv3x3(inplanes, outplanes, stride=1): + """A simple wrapper for 3x3 convolution with padding. + + Args: + inplanes (int): Channel number of inputs. + outplanes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + """ + return nn.Conv2d(inplanes, outplanes, kernel_size=3, stride=stride, padding=1, bias=False) + + +class BasicBlock(nn.Module): + """Basic residual block used in the ResNetArcFace architecture. + + Args: + inplanes (int): Channel number of inputs. + planes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + downsample (nn.Module): The downsample module. Default: None. + """ + expansion = 1 # output channel expansion ratio + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class IRBlock(nn.Module): + """Improved residual block (IR Block) used in the ResNetArcFace architecture. + + Args: + inplanes (int): Channel number of inputs. + planes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + downsample (nn.Module): The downsample module. Default: None. + use_se (bool): Whether use the SEBlock (squeeze and excitation block). Default: True. + """ + expansion = 1 # output channel expansion ratio + + def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True): + super(IRBlock, self).__init__() + self.bn0 = nn.BatchNorm2d(inplanes) + self.conv1 = conv3x3(inplanes, inplanes) + self.bn1 = nn.BatchNorm2d(inplanes) + self.prelu = nn.PReLU() + self.conv2 = conv3x3(inplanes, planes, stride) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.use_se = use_se + if self.use_se: + self.se = SEBlock(planes) + + def forward(self, x): + residual = x + out = self.bn0(x) + out = self.conv1(out) + out = self.bn1(out) + out = self.prelu(out) + + out = self.conv2(out) + out = self.bn2(out) + if self.use_se: + out = self.se(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.prelu(out) + + return out + + +class Bottleneck(nn.Module): + """Bottleneck block used in the ResNetArcFace architecture. + + Args: + inplanes (int): Channel number of inputs. + planes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + downsample (nn.Module): The downsample module. Default: None. + """ + expansion = 4 # output channel expansion ratio + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class SEBlock(nn.Module): + """The squeeze-and-excitation block (SEBlock) used in the IRBlock. + + Args: + channel (int): Channel number of inputs. + reduction (int): Channel reduction ration. Default: 16. + """ + + def __init__(self, channel, reduction=16): + super(SEBlock, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) # pool to 1x1 without spatial information + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.PReLU(), nn.Linear(channel // reduction, channel), + nn.Sigmoid()) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return x * y + + +@ARCH_REGISTRY.register() +class ResNetArcFace(nn.Module): + """ArcFace with ResNet architectures. + + Ref: ArcFace: Additive Angular Margin Loss for Deep Face Recognition. + + Args: + block (str): Block used in the ArcFace architecture. + layers (tuple(int)): Block numbers in each layer. + use_se (bool): Whether use the SEBlock (squeeze and excitation block). Default: True. + """ + + def __init__(self, block, layers, use_se=True): + if block == 'IRBlock': + block = IRBlock + self.inplanes = 64 + self.use_se = use_se + super(ResNetArcFace, self).__init__() + + self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.prelu = nn.PReLU() + self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.bn4 = nn.BatchNorm2d(512) + self.dropout = nn.Dropout() + self.fc5 = nn.Linear(512 * 8 * 8, 512) + self.bn5 = nn.BatchNorm1d(512) + + # initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.xavier_normal_(m.weight) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, use_se=self.use_se)) + self.inplanes = planes + for _ in range(1, num_blocks): + layers.append(block(self.inplanes, planes, use_se=self.use_se)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.prelu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.bn4(x) + x = self.dropout(x) + x = x.view(x.size(0), -1) + x = self.fc5(x) + x = self.bn5(x) + + return x diff --git a/third_part/GFPGAN/gfpgan/archs/gfpgan_bilinear_arch.py b/third_part/GFPGAN/gfpgan/archs/gfpgan_bilinear_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..52e0de88de8543cf4afdc3988c4cdfc7c7060687 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/archs/gfpgan_bilinear_arch.py @@ -0,0 +1,312 @@ +import math +import random +import torch +from basicsr.utils.registry import ARCH_REGISTRY +from torch import nn + +from .gfpganv1_arch import ResUpBlock +from .stylegan2_bilinear_arch import (ConvLayer, EqualConv2d, EqualLinear, ResBlock, ScaledLeakyReLU, + StyleGAN2GeneratorBilinear) + + +class StyleGAN2GeneratorBilinearSFT(StyleGAN2GeneratorBilinear): + """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform). + + It is the bilinear version. It does not use the complicated UpFirDnSmooth function that is not friendly for + deployment. It can be easily converted to the clean version: StyleGAN2GeneratorCSFT. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__(self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + lr_mlp=0.01, + narrow=1, + sft_half=False): + super(StyleGAN2GeneratorBilinearSFT, self).__init__( + out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + lr_mlp=lr_mlp, + narrow=narrow) + self.sft_half = sft_half + + def forward(self, + styles, + conditions, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False): + """Forward function for StyleGAN2GeneratorBilinearSFT. + + Args: + styles (list[Tensor]): Sample codes of styles. + conditions (list[Tensor]): SFT conditions to generators. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [getattr(self.noises, f'noise{i}') for i in range(self.num_layers)] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append(truncation_latent + truncation * (style - truncation_latent)) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip(self.style_convs[::2], self.style_convs[1::2], noise[1::2], + noise[2::2], self.to_rgbs): + out = conv1(out, latent[:, i], noise=noise1) + + # the conditions may have fewer levels + if i < len(conditions): + # SFT part to combine the conditions + if self.sft_half: # only apply SFT to half of the channels + out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1) + out_sft = out_sft * conditions[i - 1] + conditions[i] + out = torch.cat([out_same, out_sft], dim=1) + else: # apply SFT to all the channels + out = out * conditions[i - 1] + conditions[i] + + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +@ARCH_REGISTRY.register() +class GFPGANBilinear(nn.Module): + """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT. + + It is the bilinear version and it does not use the complicated UpFirDnSmooth function that is not friendly for + deployment. It can be easily converted to the clean version: GFPGANv1Clean. + + + Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None. + fix_decoder (bool): Whether to fix the decoder. Default: True. + + num_mlp (int): Layer number of MLP style layers. Default: 8. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + input_is_latent (bool): Whether input is latent style. Default: False. + different_w (bool): Whether to use different latent w for different layers. Default: False. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + channel_multiplier=1, + decoder_load_path=None, + fix_decoder=True, + # for stylegan decoder + num_mlp=8, + lr_mlp=0.01, + input_is_latent=False, + different_w=False, + narrow=1, + sft_half=False): + + super(GFPGANBilinear, self).__init__() + self.input_is_latent = input_is_latent + self.different_w = different_w + self.num_style_feat = num_style_feat + + unet_narrow = narrow * 0.5 # by default, use a half of input channels + channels = { + '4': int(512 * unet_narrow), + '8': int(512 * unet_narrow), + '16': int(512 * unet_narrow), + '32': int(512 * unet_narrow), + '64': int(256 * channel_multiplier * unet_narrow), + '128': int(128 * channel_multiplier * unet_narrow), + '256': int(64 * channel_multiplier * unet_narrow), + '512': int(32 * channel_multiplier * unet_narrow), + '1024': int(16 * channel_multiplier * unet_narrow) + } + + self.log_size = int(math.log(out_size, 2)) + first_out_size = 2**(int(math.log(out_size, 2))) + + self.conv_body_first = ConvLayer(3, channels[f'{first_out_size}'], 1, bias=True, activate=True) + + # downsample + in_channels = channels[f'{first_out_size}'] + self.conv_body_down = nn.ModuleList() + for i in range(self.log_size, 2, -1): + out_channels = channels[f'{2**(i - 1)}'] + self.conv_body_down.append(ResBlock(in_channels, out_channels)) + in_channels = out_channels + + self.final_conv = ConvLayer(in_channels, channels['4'], 3, bias=True, activate=True) + + # upsample + in_channels = channels['4'] + self.conv_body_up = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + self.conv_body_up.append(ResUpBlock(in_channels, out_channels)) + in_channels = out_channels + + # to RGB + self.toRGB = nn.ModuleList() + for i in range(3, self.log_size + 1): + self.toRGB.append(EqualConv2d(channels[f'{2**i}'], 3, 1, stride=1, padding=0, bias=True, bias_init_val=0)) + + if different_w: + linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat + else: + linear_out_channel = num_style_feat + + self.final_linear = EqualLinear( + channels['4'] * 4 * 4, linear_out_channel, bias=True, bias_init_val=0, lr_mul=1, activation=None) + + # the decoder: stylegan2 generator with SFT modulations + self.stylegan_decoder = StyleGAN2GeneratorBilinearSFT( + out_size=out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + lr_mlp=lr_mlp, + narrow=narrow, + sft_half=sft_half) + + # load pre-trained stylegan2 model if necessary + if decoder_load_path: + self.stylegan_decoder.load_state_dict( + torch.load(decoder_load_path, map_location=lambda storage, loc: storage)['params_ema']) + # fix decoder without updating params + if fix_decoder: + for _, param in self.stylegan_decoder.named_parameters(): + param.requires_grad = False + + # for SFT modulations (scale and shift) + self.condition_scale = nn.ModuleList() + self.condition_shift = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + if sft_half: + sft_out_channels = out_channels + else: + sft_out_channels = out_channels * 2 + self.condition_scale.append( + nn.Sequential( + EqualConv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=0), + ScaledLeakyReLU(0.2), + EqualConv2d(out_channels, sft_out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=1))) + self.condition_shift.append( + nn.Sequential( + EqualConv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=0), + ScaledLeakyReLU(0.2), + EqualConv2d(out_channels, sft_out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=0))) + + def forward(self, x, return_latents=False, return_rgb=True, randomize_noise=True): + """Forward function for GFPGANBilinear. + + Args: + x (Tensor): Input images. + return_latents (bool): Whether to return style latents. Default: False. + return_rgb (bool): Whether return intermediate rgb images. Default: True. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + """ + conditions = [] + unet_skips = [] + out_rgbs = [] + + # encoder + feat = self.conv_body_first(x) + for i in range(self.log_size - 2): + feat = self.conv_body_down[i](feat) + unet_skips.insert(0, feat) + + feat = self.final_conv(feat) + + # style code + style_code = self.final_linear(feat.view(feat.size(0), -1)) + if self.different_w: + style_code = style_code.view(style_code.size(0), -1, self.num_style_feat) + + # decode + for i in range(self.log_size - 2): + # add unet skip + feat = feat + unet_skips[i] + # ResUpLayer + feat = self.conv_body_up[i](feat) + # generate scale and shift for SFT layers + scale = self.condition_scale[i](feat) + conditions.append(scale.clone()) + shift = self.condition_shift[i](feat) + conditions.append(shift.clone()) + # generate rgb images + if return_rgb: + out_rgbs.append(self.toRGB[i](feat)) + + # decoder + image, _ = self.stylegan_decoder([style_code], + conditions, + return_latents=return_latents, + input_is_latent=self.input_is_latent, + randomize_noise=randomize_noise) + + return image, out_rgbs diff --git a/third_part/GFPGAN/gfpgan/archs/gfpganv1_arch.py b/third_part/GFPGAN/gfpgan/archs/gfpganv1_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..e092b4f7633dece505e5cd3bac4a482df3746654 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/archs/gfpganv1_arch.py @@ -0,0 +1,439 @@ +import math +import random +import torch +from basicsr.archs.stylegan2_arch import (ConvLayer, EqualConv2d, EqualLinear, ResBlock, ScaledLeakyReLU, + StyleGAN2Generator) +from basicsr.ops.fused_act import FusedLeakyReLU +from basicsr.utils.registry import ARCH_REGISTRY +from torch import nn +from torch.nn import functional as F + + +class StyleGAN2GeneratorSFT(StyleGAN2Generator): + """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform). + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + resample_kernel (list[int]): A list indicating the 1D resample kernel magnitude. A cross production will be + applied to extent 1D resample kernel to 2D resample kernel. Default: (1, 3, 3, 1). + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__(self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + resample_kernel=(1, 3, 3, 1), + lr_mlp=0.01, + narrow=1, + sft_half=False): + super(StyleGAN2GeneratorSFT, self).__init__( + out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + resample_kernel=resample_kernel, + lr_mlp=lr_mlp, + narrow=narrow) + self.sft_half = sft_half + + def forward(self, + styles, + conditions, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False): + """Forward function for StyleGAN2GeneratorSFT. + + Args: + styles (list[Tensor]): Sample codes of styles. + conditions (list[Tensor]): SFT conditions to generators. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [getattr(self.noises, f'noise{i}') for i in range(self.num_layers)] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append(truncation_latent + truncation * (style - truncation_latent)) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip(self.style_convs[::2], self.style_convs[1::2], noise[1::2], + noise[2::2], self.to_rgbs): + out = conv1(out, latent[:, i], noise=noise1) + + # the conditions may have fewer levels + if i < len(conditions): + # SFT part to combine the conditions + if self.sft_half: # only apply SFT to half of the channels + out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1) + out_sft = out_sft * conditions[i - 1] + conditions[i] + out = torch.cat([out_same, out_sft], dim=1) + else: # apply SFT to all the channels + out = out * conditions[i - 1] + conditions[i] + + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class ConvUpLayer(nn.Module): + """Convolutional upsampling layer. It uses bilinear upsampler + Conv. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + stride (int): Stride of the convolution. Default: 1 + padding (int): Zero-padding added to both sides of the input. Default: 0. + bias (bool): If ``True``, adds a learnable bias to the output. Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + activate (bool): Whether use activateion. Default: True. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + bias=True, + bias_init_val=0, + activate=True): + super(ConvUpLayer, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + # self.scale is used to scale the convolution weights, which is related to the common initializations. + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + + self.weight = nn.Parameter(torch.randn(out_channels, in_channels, kernel_size, kernel_size)) + + if bias and not activate: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter('bias', None) + + # activation + if activate: + if bias: + self.activation = FusedLeakyReLU(out_channels) + else: + self.activation = ScaledLeakyReLU(0.2) + else: + self.activation = None + + def forward(self, x): + # bilinear upsample + out = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) + # conv + out = F.conv2d( + out, + self.weight * self.scale, + bias=self.bias, + stride=self.stride, + padding=self.padding, + ) + # activation + if self.activation is not None: + out = self.activation(out) + return out + + +class ResUpBlock(nn.Module): + """Residual block with upsampling. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + """ + + def __init__(self, in_channels, out_channels): + super(ResUpBlock, self).__init__() + + self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True) + self.conv2 = ConvUpLayer(in_channels, out_channels, 3, stride=1, padding=1, bias=True, activate=True) + self.skip = ConvUpLayer(in_channels, out_channels, 1, bias=False, activate=False) + + def forward(self, x): + out = self.conv1(x) + out = self.conv2(out) + skip = self.skip(x) + out = (out + skip) / math.sqrt(2) + return out + + +@ARCH_REGISTRY.register() +class GFPGANv1(nn.Module): + """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT. + + Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + resample_kernel (list[int]): A list indicating the 1D resample kernel magnitude. A cross production will be + applied to extent 1D resample kernel to 2D resample kernel. Default: (1, 3, 3, 1). + decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None. + fix_decoder (bool): Whether to fix the decoder. Default: True. + + num_mlp (int): Layer number of MLP style layers. Default: 8. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + input_is_latent (bool): Whether input is latent style. Default: False. + different_w (bool): Whether to use different latent w for different layers. Default: False. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + channel_multiplier=1, + resample_kernel=(1, 3, 3, 1), + decoder_load_path=None, + fix_decoder=True, + # for stylegan decoder + num_mlp=8, + lr_mlp=0.01, + input_is_latent=False, + different_w=False, + narrow=1, + sft_half=False): + + super(GFPGANv1, self).__init__() + self.input_is_latent = input_is_latent + self.different_w = different_w + self.num_style_feat = num_style_feat + + unet_narrow = narrow * 0.5 # by default, use a half of input channels + channels = { + '4': int(512 * unet_narrow), + '8': int(512 * unet_narrow), + '16': int(512 * unet_narrow), + '32': int(512 * unet_narrow), + '64': int(256 * channel_multiplier * unet_narrow), + '128': int(128 * channel_multiplier * unet_narrow), + '256': int(64 * channel_multiplier * unet_narrow), + '512': int(32 * channel_multiplier * unet_narrow), + '1024': int(16 * channel_multiplier * unet_narrow) + } + + self.log_size = int(math.log(out_size, 2)) + first_out_size = 2**(int(math.log(out_size, 2))) + + self.conv_body_first = ConvLayer(3, channels[f'{first_out_size}'], 1, bias=True, activate=True) + + # downsample + in_channels = channels[f'{first_out_size}'] + self.conv_body_down = nn.ModuleList() + for i in range(self.log_size, 2, -1): + out_channels = channels[f'{2**(i - 1)}'] + self.conv_body_down.append(ResBlock(in_channels, out_channels, resample_kernel)) + in_channels = out_channels + + self.final_conv = ConvLayer(in_channels, channels['4'], 3, bias=True, activate=True) + + # upsample + in_channels = channels['4'] + self.conv_body_up = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + self.conv_body_up.append(ResUpBlock(in_channels, out_channels)) + in_channels = out_channels + + # to RGB + self.toRGB = nn.ModuleList() + for i in range(3, self.log_size + 1): + self.toRGB.append(EqualConv2d(channels[f'{2**i}'], 3, 1, stride=1, padding=0, bias=True, bias_init_val=0)) + + if different_w: + linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat + else: + linear_out_channel = num_style_feat + + self.final_linear = EqualLinear( + channels['4'] * 4 * 4, linear_out_channel, bias=True, bias_init_val=0, lr_mul=1, activation=None) + + # the decoder: stylegan2 generator with SFT modulations + self.stylegan_decoder = StyleGAN2GeneratorSFT( + out_size=out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + resample_kernel=resample_kernel, + lr_mlp=lr_mlp, + narrow=narrow, + sft_half=sft_half) + + # load pre-trained stylegan2 model if necessary + if decoder_load_path: + self.stylegan_decoder.load_state_dict( + torch.load(decoder_load_path, map_location=lambda storage, loc: storage)['params_ema']) + # fix decoder without updating params + if fix_decoder: + for _, param in self.stylegan_decoder.named_parameters(): + param.requires_grad = False + + # for SFT modulations (scale and shift) + self.condition_scale = nn.ModuleList() + self.condition_shift = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + if sft_half: + sft_out_channels = out_channels + else: + sft_out_channels = out_channels * 2 + self.condition_scale.append( + nn.Sequential( + EqualConv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=0), + ScaledLeakyReLU(0.2), + EqualConv2d(out_channels, sft_out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=1))) + self.condition_shift.append( + nn.Sequential( + EqualConv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=0), + ScaledLeakyReLU(0.2), + EqualConv2d(out_channels, sft_out_channels, 3, stride=1, padding=1, bias=True, bias_init_val=0))) + + def forward(self, x, return_latents=False, return_rgb=True, randomize_noise=True): + """Forward function for GFPGANv1. + + Args: + x (Tensor): Input images. + return_latents (bool): Whether to return style latents. Default: False. + return_rgb (bool): Whether return intermediate rgb images. Default: True. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + """ + conditions = [] + unet_skips = [] + out_rgbs = [] + + # encoder + feat = self.conv_body_first(x) + for i in range(self.log_size - 2): + feat = self.conv_body_down[i](feat) + unet_skips.insert(0, feat) + + feat = self.final_conv(feat) + + # style code + style_code = self.final_linear(feat.view(feat.size(0), -1)) + if self.different_w: + style_code = style_code.view(style_code.size(0), -1, self.num_style_feat) + + # decode + for i in range(self.log_size - 2): + # add unet skip + feat = feat + unet_skips[i] + # ResUpLayer + feat = self.conv_body_up[i](feat) + # generate scale and shift for SFT layers + scale = self.condition_scale[i](feat) + conditions.append(scale.clone()) + shift = self.condition_shift[i](feat) + conditions.append(shift.clone()) + # generate rgb images + if return_rgb: + out_rgbs.append(self.toRGB[i](feat)) + + # decoder + image, _ = self.stylegan_decoder([style_code], + conditions, + return_latents=return_latents, + input_is_latent=self.input_is_latent, + randomize_noise=randomize_noise) + + return image, out_rgbs + + +@ARCH_REGISTRY.register() +class FacialComponentDiscriminator(nn.Module): + """Facial component (eyes, mouth, noise) discriminator used in GFPGAN. + """ + + def __init__(self): + super(FacialComponentDiscriminator, self).__init__() + # It now uses a VGG-style architectrue with fixed model size + self.conv1 = ConvLayer(3, 64, 3, downsample=False, resample_kernel=(1, 3, 3, 1), bias=True, activate=True) + self.conv2 = ConvLayer(64, 128, 3, downsample=True, resample_kernel=(1, 3, 3, 1), bias=True, activate=True) + self.conv3 = ConvLayer(128, 128, 3, downsample=False, resample_kernel=(1, 3, 3, 1), bias=True, activate=True) + self.conv4 = ConvLayer(128, 256, 3, downsample=True, resample_kernel=(1, 3, 3, 1), bias=True, activate=True) + self.conv5 = ConvLayer(256, 256, 3, downsample=False, resample_kernel=(1, 3, 3, 1), bias=True, activate=True) + self.final_conv = ConvLayer(256, 1, 3, bias=True, activate=False) + + def forward(self, x, return_feats=False): + """Forward function for FacialComponentDiscriminator. + + Args: + x (Tensor): Input images. + return_feats (bool): Whether to return intermediate features. Default: False. + """ + feat = self.conv1(x) + feat = self.conv3(self.conv2(feat)) + rlt_feats = [] + if return_feats: + rlt_feats.append(feat.clone()) + feat = self.conv5(self.conv4(feat)) + if return_feats: + rlt_feats.append(feat.clone()) + out = self.final_conv(feat) + + if return_feats: + return out, rlt_feats + else: + return out, None diff --git a/third_part/GFPGAN/gfpgan/archs/gfpganv1_clean_arch.py b/third_part/GFPGAN/gfpgan/archs/gfpganv1_clean_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..eb2e15d288bf0ad641034ed58d5dab37b0baabb3 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/archs/gfpganv1_clean_arch.py @@ -0,0 +1,324 @@ +import math +import random +import torch +from basicsr.utils.registry import ARCH_REGISTRY +from torch import nn +from torch.nn import functional as F + +from .stylegan2_clean_arch import StyleGAN2GeneratorClean + + +class StyleGAN2GeneratorCSFT(StyleGAN2GeneratorClean): + """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform). + + It is the clean version without custom compiled CUDA extensions used in StyleGAN2. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__(self, out_size, num_style_feat=512, num_mlp=8, channel_multiplier=2, narrow=1, sft_half=False): + super(StyleGAN2GeneratorCSFT, self).__init__( + out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + narrow=narrow) + self.sft_half = sft_half + + def forward(self, + styles, + conditions, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False): + """Forward function for StyleGAN2GeneratorCSFT. + + Args: + styles (list[Tensor]): Sample codes of styles. + conditions (list[Tensor]): SFT conditions to generators. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [getattr(self.noises, f'noise{i}') for i in range(self.num_layers)] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append(truncation_latent + truncation * (style - truncation_latent)) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip(self.style_convs[::2], self.style_convs[1::2], noise[1::2], + noise[2::2], self.to_rgbs): + out = conv1(out, latent[:, i], noise=noise1) + + # the conditions may have fewer levels + if i < len(conditions): + # SFT part to combine the conditions + if self.sft_half: # only apply SFT to half of the channels + out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1) + out_sft = out_sft * conditions[i - 1] + conditions[i] + out = torch.cat([out_same, out_sft], dim=1) + else: # apply SFT to all the channels + out = out * conditions[i - 1] + conditions[i] + + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class ResBlock(nn.Module): + """Residual block with bilinear upsampling/downsampling. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + mode (str): Upsampling/downsampling mode. Options: down | up. Default: down. + """ + + def __init__(self, in_channels, out_channels, mode='down'): + super(ResBlock, self).__init__() + + self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1) + self.conv2 = nn.Conv2d(in_channels, out_channels, 3, 1, 1) + self.skip = nn.Conv2d(in_channels, out_channels, 1, bias=False) + if mode == 'down': + self.scale_factor = 0.5 + elif mode == 'up': + self.scale_factor = 2 + + def forward(self, x): + out = F.leaky_relu_(self.conv1(x), negative_slope=0.2) + # upsample/downsample + out = F.interpolate(out, scale_factor=self.scale_factor, mode='bilinear', align_corners=False) + out = F.leaky_relu_(self.conv2(out), negative_slope=0.2) + # skip + x = F.interpolate(x, scale_factor=self.scale_factor, mode='bilinear', align_corners=False) + skip = self.skip(x) + out = out + skip + return out + + +@ARCH_REGISTRY.register() +class GFPGANv1Clean(nn.Module): + """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT. + + It is the clean version without custom compiled CUDA extensions used in StyleGAN2. + + Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None. + fix_decoder (bool): Whether to fix the decoder. Default: True. + + num_mlp (int): Layer number of MLP style layers. Default: 8. + input_is_latent (bool): Whether input is latent style. Default: False. + different_w (bool): Whether to use different latent w for different layers. Default: False. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + channel_multiplier=1, + decoder_load_path=None, + fix_decoder=True, + # for stylegan decoder + num_mlp=8, + input_is_latent=False, + different_w=False, + narrow=1, + sft_half=False): + + super(GFPGANv1Clean, self).__init__() + self.input_is_latent = input_is_latent + self.different_w = different_w + self.num_style_feat = num_style_feat + + unet_narrow = narrow * 0.5 # by default, use a half of input channels + channels = { + '4': int(512 * unet_narrow), + '8': int(512 * unet_narrow), + '16': int(512 * unet_narrow), + '32': int(512 * unet_narrow), + '64': int(256 * channel_multiplier * unet_narrow), + '128': int(128 * channel_multiplier * unet_narrow), + '256': int(64 * channel_multiplier * unet_narrow), + '512': int(32 * channel_multiplier * unet_narrow), + '1024': int(16 * channel_multiplier * unet_narrow) + } + + self.log_size = int(math.log(out_size, 2)) + first_out_size = 2**(int(math.log(out_size, 2))) + + self.conv_body_first = nn.Conv2d(3, channels[f'{first_out_size}'], 1) + + # downsample + in_channels = channels[f'{first_out_size}'] + self.conv_body_down = nn.ModuleList() + for i in range(self.log_size, 2, -1): + out_channels = channels[f'{2**(i - 1)}'] + self.conv_body_down.append(ResBlock(in_channels, out_channels, mode='down')) + in_channels = out_channels + + self.final_conv = nn.Conv2d(in_channels, channels['4'], 3, 1, 1) + + # upsample + in_channels = channels['4'] + self.conv_body_up = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + self.conv_body_up.append(ResBlock(in_channels, out_channels, mode='up')) + in_channels = out_channels + + # to RGB + self.toRGB = nn.ModuleList() + for i in range(3, self.log_size + 1): + self.toRGB.append(nn.Conv2d(channels[f'{2**i}'], 3, 1)) + + if different_w: + linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat + else: + linear_out_channel = num_style_feat + + self.final_linear = nn.Linear(channels['4'] * 4 * 4, linear_out_channel) + + # the decoder: stylegan2 generator with SFT modulations + self.stylegan_decoder = StyleGAN2GeneratorCSFT( + out_size=out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + narrow=narrow, + sft_half=sft_half) + + # load pre-trained stylegan2 model if necessary + if decoder_load_path: + self.stylegan_decoder.load_state_dict( + torch.load(decoder_load_path, map_location=lambda storage, loc: storage)['params_ema']) + # fix decoder without updating params + if fix_decoder: + for _, param in self.stylegan_decoder.named_parameters(): + param.requires_grad = False + + # for SFT modulations (scale and shift) + self.condition_scale = nn.ModuleList() + self.condition_shift = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + if sft_half: + sft_out_channels = out_channels + else: + sft_out_channels = out_channels * 2 + self.condition_scale.append( + nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, 1, 1), nn.LeakyReLU(0.2, True), + nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1))) + self.condition_shift.append( + nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, 1, 1), nn.LeakyReLU(0.2, True), + nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1))) + + def forward(self, x, return_latents=False, return_rgb=True, randomize_noise=True): + """Forward function for GFPGANv1Clean. + + Args: + x (Tensor): Input images. + return_latents (bool): Whether to return style latents. Default: False. + return_rgb (bool): Whether return intermediate rgb images. Default: True. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + """ + conditions = [] + unet_skips = [] + out_rgbs = [] + + # encoder + feat = F.leaky_relu_(self.conv_body_first(x), negative_slope=0.2) + for i in range(self.log_size - 2): + feat = self.conv_body_down[i](feat) + unet_skips.insert(0, feat) + feat = F.leaky_relu_(self.final_conv(feat), negative_slope=0.2) + + # style code + style_code = self.final_linear(feat.view(feat.size(0), -1)) + if self.different_w: + style_code = style_code.view(style_code.size(0), -1, self.num_style_feat) + + # decode + for i in range(self.log_size - 2): + # add unet skip + feat = feat + unet_skips[i] + # ResUpLayer + feat = self.conv_body_up[i](feat) + # generate scale and shift for SFT layers + scale = self.condition_scale[i](feat) + conditions.append(scale.clone()) + shift = self.condition_shift[i](feat) + conditions.append(shift.clone()) + # generate rgb images + if return_rgb: + out_rgbs.append(self.toRGB[i](feat)) + + # decoder + image, _ = self.stylegan_decoder([style_code], + conditions, + return_latents=return_latents, + input_is_latent=self.input_is_latent, + randomize_noise=randomize_noise) + + return image, out_rgbs diff --git a/third_part/GFPGAN/gfpgan/archs/stylegan2_bilinear_arch.py b/third_part/GFPGAN/gfpgan/archs/stylegan2_bilinear_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..1342ee3c9a6b8f742fb76ce7d5b907cd39fbc350 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/archs/stylegan2_bilinear_arch.py @@ -0,0 +1,613 @@ +import math +import random +import torch +from basicsr.ops.fused_act import FusedLeakyReLU, fused_leaky_relu +from basicsr.utils.registry import ARCH_REGISTRY +from torch import nn +from torch.nn import functional as F + + +class NormStyleCode(nn.Module): + + def forward(self, x): + """Normalize the style codes. + + Args: + x (Tensor): Style codes with shape (b, c). + + Returns: + Tensor: Normalized tensor. + """ + return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8) + + +class EqualLinear(nn.Module): + """Equalized Linear as StyleGAN2. + + Args: + in_channels (int): Size of each sample. + out_channels (int): Size of each output sample. + bias (bool): If set to ``False``, the layer will not learn an additive + bias. Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + lr_mul (float): Learning rate multiplier. Default: 1. + activation (None | str): The activation after ``linear`` operation. + Supported: 'fused_lrelu', None. Default: None. + """ + + def __init__(self, in_channels, out_channels, bias=True, bias_init_val=0, lr_mul=1, activation=None): + super(EqualLinear, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.lr_mul = lr_mul + self.activation = activation + if self.activation not in ['fused_lrelu', None]: + raise ValueError(f'Wrong activation value in EqualLinear: {activation}' + "Supported ones are: ['fused_lrelu', None].") + self.scale = (1 / math.sqrt(in_channels)) * lr_mul + + self.weight = nn.Parameter(torch.randn(out_channels, in_channels).div_(lr_mul)) + if bias: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter('bias', None) + + def forward(self, x): + if self.bias is None: + bias = None + else: + bias = self.bias * self.lr_mul + if self.activation == 'fused_lrelu': + out = F.linear(x, self.weight * self.scale) + out = fused_leaky_relu(out, bias) + else: + out = F.linear(x, self.weight * self.scale, bias=bias) + return out + + def __repr__(self): + return (f'{self.__class__.__name__}(in_channels={self.in_channels}, ' + f'out_channels={self.out_channels}, bias={self.bias is not None})') + + +class ModulatedConv2d(nn.Module): + """Modulated Conv2d used in StyleGAN2. + + There is no bias in ModulatedConv2d. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether to demodulate in the conv layer. + Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. + Default: None. + eps (float): A value added to the denominator for numerical stability. + Default: 1e-8. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + eps=1e-8, + interpolation_mode='bilinear'): + super(ModulatedConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.demodulate = demodulate + self.sample_mode = sample_mode + self.eps = eps + self.interpolation_mode = interpolation_mode + if self.interpolation_mode == 'nearest': + self.align_corners = None + else: + self.align_corners = False + + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + # modulation inside each modulated conv + self.modulation = EqualLinear( + num_style_feat, in_channels, bias=True, bias_init_val=1, lr_mul=1, activation=None) + + self.weight = nn.Parameter(torch.randn(1, out_channels, in_channels, kernel_size, kernel_size)) + self.padding = kernel_size // 2 + + def forward(self, x, style): + """Forward function. + + Args: + x (Tensor): Tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + + Returns: + Tensor: Modulated tensor after convolution. + """ + b, c, h, w = x.shape # c = c_in + # weight modulation + style = self.modulation(style).view(b, 1, c, 1, 1) + # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1) + weight = self.scale * self.weight * style # (b, c_out, c_in, k, k) + + if self.demodulate: + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps) + weight = weight * demod.view(b, self.out_channels, 1, 1, 1) + + weight = weight.view(b * self.out_channels, c, self.kernel_size, self.kernel_size) + + if self.sample_mode == 'upsample': + x = F.interpolate(x, scale_factor=2, mode=self.interpolation_mode, align_corners=self.align_corners) + elif self.sample_mode == 'downsample': + x = F.interpolate(x, scale_factor=0.5, mode=self.interpolation_mode, align_corners=self.align_corners) + + b, c, h, w = x.shape + x = x.view(1, b * c, h, w) + # weight: (b*c_out, c_in, k, k), groups=b + out = F.conv2d(x, weight, padding=self.padding, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + + return out + + def __repr__(self): + return (f'{self.__class__.__name__}(in_channels={self.in_channels}, ' + f'out_channels={self.out_channels}, ' + f'kernel_size={self.kernel_size}, ' + f'demodulate={self.demodulate}, sample_mode={self.sample_mode})') + + +class StyleConv(nn.Module): + """Style conv. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether demodulate in the conv layer. Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + interpolation_mode='bilinear'): + super(StyleConv, self).__init__() + self.modulated_conv = ModulatedConv2d( + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=demodulate, + sample_mode=sample_mode, + interpolation_mode=interpolation_mode) + self.weight = nn.Parameter(torch.zeros(1)) # for noise injection + self.activate = FusedLeakyReLU(out_channels) + + def forward(self, x, style, noise=None): + # modulate + out = self.modulated_conv(x, style) + # noise injection + if noise is None: + b, _, h, w = out.shape + noise = out.new_empty(b, 1, h, w).normal_() + out = out + self.weight * noise + # activation (with bias) + out = self.activate(out) + return out + + +class ToRGB(nn.Module): + """To RGB from features. + + Args: + in_channels (int): Channel number of input. + num_style_feat (int): Channel number of style features. + upsample (bool): Whether to upsample. Default: True. + """ + + def __init__(self, in_channels, num_style_feat, upsample=True, interpolation_mode='bilinear'): + super(ToRGB, self).__init__() + self.upsample = upsample + self.interpolation_mode = interpolation_mode + if self.interpolation_mode == 'nearest': + self.align_corners = None + else: + self.align_corners = False + self.modulated_conv = ModulatedConv2d( + in_channels, + 3, + kernel_size=1, + num_style_feat=num_style_feat, + demodulate=False, + sample_mode=None, + interpolation_mode=interpolation_mode) + self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1)) + + def forward(self, x, style, skip=None): + """Forward function. + + Args: + x (Tensor): Feature tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + skip (Tensor): Base/skip tensor. Default: None. + + Returns: + Tensor: RGB images. + """ + out = self.modulated_conv(x, style) + out = out + self.bias + if skip is not None: + if self.upsample: + skip = F.interpolate( + skip, scale_factor=2, mode=self.interpolation_mode, align_corners=self.align_corners) + out = out + skip + return out + + +class ConstantInput(nn.Module): + """Constant input. + + Args: + num_channel (int): Channel number of constant input. + size (int): Spatial size of constant input. + """ + + def __init__(self, num_channel, size): + super(ConstantInput, self).__init__() + self.weight = nn.Parameter(torch.randn(1, num_channel, size, size)) + + def forward(self, batch): + out = self.weight.repeat(batch, 1, 1, 1) + return out + + +@ARCH_REGISTRY.register() +class StyleGAN2GeneratorBilinear(nn.Module): + """StyleGAN2 Generator. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of + StyleGAN2. Default: 2. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + narrow (float): Narrow ratio for channels. Default: 1.0. + """ + + def __init__(self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + lr_mlp=0.01, + narrow=1, + interpolation_mode='bilinear'): + super(StyleGAN2GeneratorBilinear, self).__init__() + # Style MLP layers + self.num_style_feat = num_style_feat + style_mlp_layers = [NormStyleCode()] + for i in range(num_mlp): + style_mlp_layers.append( + EqualLinear( + num_style_feat, num_style_feat, bias=True, bias_init_val=0, lr_mul=lr_mlp, + activation='fused_lrelu')) + self.style_mlp = nn.Sequential(*style_mlp_layers) + + channels = { + '4': int(512 * narrow), + '8': int(512 * narrow), + '16': int(512 * narrow), + '32': int(512 * narrow), + '64': int(256 * channel_multiplier * narrow), + '128': int(128 * channel_multiplier * narrow), + '256': int(64 * channel_multiplier * narrow), + '512': int(32 * channel_multiplier * narrow), + '1024': int(16 * channel_multiplier * narrow) + } + self.channels = channels + + self.constant_input = ConstantInput(channels['4'], size=4) + self.style_conv1 = StyleConv( + channels['4'], + channels['4'], + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + interpolation_mode=interpolation_mode) + self.to_rgb1 = ToRGB(channels['4'], num_style_feat, upsample=False, interpolation_mode=interpolation_mode) + + self.log_size = int(math.log(out_size, 2)) + self.num_layers = (self.log_size - 2) * 2 + 1 + self.num_latent = self.log_size * 2 - 2 + + self.style_convs = nn.ModuleList() + self.to_rgbs = nn.ModuleList() + self.noises = nn.Module() + + in_channels = channels['4'] + # noise + for layer_idx in range(self.num_layers): + resolution = 2**((layer_idx + 5) // 2) + shape = [1, 1, resolution, resolution] + self.noises.register_buffer(f'noise{layer_idx}', torch.randn(*shape)) + # style convs and to_rgbs + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + self.style_convs.append( + StyleConv( + in_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode='upsample', + interpolation_mode=interpolation_mode)) + self.style_convs.append( + StyleConv( + out_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + interpolation_mode=interpolation_mode)) + self.to_rgbs.append( + ToRGB(out_channels, num_style_feat, upsample=True, interpolation_mode=interpolation_mode)) + in_channels = out_channels + + def make_noise(self): + """Make noise for noise injection.""" + device = self.constant_input.weight.device + noises = [torch.randn(1, 1, 4, 4, device=device)] + + for i in range(3, self.log_size + 1): + for _ in range(2): + noises.append(torch.randn(1, 1, 2**i, 2**i, device=device)) + + return noises + + def get_latent(self, x): + return self.style_mlp(x) + + def mean_latent(self, num_latent): + latent_in = torch.randn(num_latent, self.num_style_feat, device=self.constant_input.weight.device) + latent = self.style_mlp(latent_in).mean(0, keepdim=True) + return latent + + def forward(self, + styles, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False): + """Forward function for StyleGAN2Generator. + + Args: + styles (list[Tensor]): Sample codes of styles. + input_is_latent (bool): Whether input is latent style. + Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is + False. Default: True. + truncation (float): TODO. Default: 1. + truncation_latent (Tensor | None): TODO. Default: None. + inject_index (int | None): The injection index for mixing noise. + Default: None. + return_latents (bool): Whether to return style latents. + Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [getattr(self.noises, f'noise{i}') for i in range(self.num_layers)] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append(truncation_latent + truncation * (style - truncation_latent)) + styles = style_truncation + # get style latent with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip(self.style_convs[::2], self.style_convs[1::2], noise[1::2], + noise[2::2], self.to_rgbs): + out = conv1(out, latent[:, i], noise=noise1) + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class ScaledLeakyReLU(nn.Module): + """Scaled LeakyReLU. + + Args: + negative_slope (float): Negative slope. Default: 0.2. + """ + + def __init__(self, negative_slope=0.2): + super(ScaledLeakyReLU, self).__init__() + self.negative_slope = negative_slope + + def forward(self, x): + out = F.leaky_relu(x, negative_slope=self.negative_slope) + return out * math.sqrt(2) + + +class EqualConv2d(nn.Module): + """Equalized Linear as StyleGAN2. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + stride (int): Stride of the convolution. Default: 1 + padding (int): Zero-padding added to both sides of the input. + Default: 0. + bias (bool): If ``True``, adds a learnable bias to the output. + Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + """ + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True, bias_init_val=0): + super(EqualConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + + self.weight = nn.Parameter(torch.randn(out_channels, in_channels, kernel_size, kernel_size)) + if bias: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter('bias', None) + + def forward(self, x): + out = F.conv2d( + x, + self.weight * self.scale, + bias=self.bias, + stride=self.stride, + padding=self.padding, + ) + + return out + + def __repr__(self): + return (f'{self.__class__.__name__}(in_channels={self.in_channels}, ' + f'out_channels={self.out_channels}, ' + f'kernel_size={self.kernel_size},' + f' stride={self.stride}, padding={self.padding}, ' + f'bias={self.bias is not None})') + + +class ConvLayer(nn.Sequential): + """Conv Layer used in StyleGAN2 Discriminator. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Kernel size. + downsample (bool): Whether downsample by a factor of 2. + Default: False. + bias (bool): Whether with bias. Default: True. + activate (bool): Whether use activateion. Default: True. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + downsample=False, + bias=True, + activate=True, + interpolation_mode='bilinear'): + layers = [] + self.interpolation_mode = interpolation_mode + # downsample + if downsample: + if self.interpolation_mode == 'nearest': + self.align_corners = None + else: + self.align_corners = False + + layers.append( + torch.nn.Upsample(scale_factor=0.5, mode=interpolation_mode, align_corners=self.align_corners)) + stride = 1 + self.padding = kernel_size // 2 + # conv + layers.append( + EqualConv2d( + in_channels, out_channels, kernel_size, stride=stride, padding=self.padding, bias=bias + and not activate)) + # activation + if activate: + if bias: + layers.append(FusedLeakyReLU(out_channels)) + else: + layers.append(ScaledLeakyReLU(0.2)) + + super(ConvLayer, self).__init__(*layers) + + +class ResBlock(nn.Module): + """Residual block used in StyleGAN2 Discriminator. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + """ + + def __init__(self, in_channels, out_channels, interpolation_mode='bilinear'): + super(ResBlock, self).__init__() + + self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True) + self.conv2 = ConvLayer( + in_channels, + out_channels, + 3, + downsample=True, + interpolation_mode=interpolation_mode, + bias=True, + activate=True) + self.skip = ConvLayer( + in_channels, + out_channels, + 1, + downsample=True, + interpolation_mode=interpolation_mode, + bias=False, + activate=False) + + def forward(self, x): + out = self.conv1(x) + out = self.conv2(out) + skip = self.skip(x) + out = (out + skip) / math.sqrt(2) + return out diff --git a/third_part/GFPGAN/gfpgan/archs/stylegan2_clean_arch.py b/third_part/GFPGAN/gfpgan/archs/stylegan2_clean_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..9e2ee94e50401b95e4c9997adef5581d521d725f --- /dev/null +++ b/third_part/GFPGAN/gfpgan/archs/stylegan2_clean_arch.py @@ -0,0 +1,368 @@ +import math +import random +import torch +from basicsr.archs.arch_util import default_init_weights +from basicsr.utils.registry import ARCH_REGISTRY +from torch import nn +from torch.nn import functional as F + + +class NormStyleCode(nn.Module): + + def forward(self, x): + """Normalize the style codes. + + Args: + x (Tensor): Style codes with shape (b, c). + + Returns: + Tensor: Normalized tensor. + """ + return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8) + + +class ModulatedConv2d(nn.Module): + """Modulated Conv2d used in StyleGAN2. + + There is no bias in ModulatedConv2d. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether to demodulate in the conv layer. Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None. + eps (float): A value added to the denominator for numerical stability. Default: 1e-8. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + eps=1e-8): + super(ModulatedConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.demodulate = demodulate + self.sample_mode = sample_mode + self.eps = eps + + # modulation inside each modulated conv + self.modulation = nn.Linear(num_style_feat, in_channels, bias=True) + # initialization + default_init_weights(self.modulation, scale=1, bias_fill=1, a=0, mode='fan_in', nonlinearity='linear') + + self.weight = nn.Parameter( + torch.randn(1, out_channels, in_channels, kernel_size, kernel_size) / + math.sqrt(in_channels * kernel_size**2)) + self.padding = kernel_size // 2 + + def forward(self, x, style): + """Forward function. + + Args: + x (Tensor): Tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + + Returns: + Tensor: Modulated tensor after convolution. + """ + b, c, h, w = x.shape # c = c_in + # weight modulation + style = self.modulation(style).view(b, 1, c, 1, 1) + # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1) + weight = self.weight * style # (b, c_out, c_in, k, k) + + if self.demodulate: + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps) + weight = weight * demod.view(b, self.out_channels, 1, 1, 1) + + weight = weight.view(b * self.out_channels, c, self.kernel_size, self.kernel_size) + + # upsample or downsample if necessary + if self.sample_mode == 'upsample': + x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) + elif self.sample_mode == 'downsample': + x = F.interpolate(x, scale_factor=0.5, mode='bilinear', align_corners=False) + + b, c, h, w = x.shape + x = x.view(1, b * c, h, w) + # weight: (b*c_out, c_in, k, k), groups=b + out = F.conv2d(x, weight, padding=self.padding, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + + return out + + def __repr__(self): + return (f'{self.__class__.__name__}(in_channels={self.in_channels}, out_channels={self.out_channels}, ' + f'kernel_size={self.kernel_size}, demodulate={self.demodulate}, sample_mode={self.sample_mode})') + + +class StyleConv(nn.Module): + """Style conv used in StyleGAN2. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether demodulate in the conv layer. Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None. + """ + + def __init__(self, in_channels, out_channels, kernel_size, num_style_feat, demodulate=True, sample_mode=None): + super(StyleConv, self).__init__() + self.modulated_conv = ModulatedConv2d( + in_channels, out_channels, kernel_size, num_style_feat, demodulate=demodulate, sample_mode=sample_mode) + self.weight = nn.Parameter(torch.zeros(1)) # for noise injection + self.bias = nn.Parameter(torch.zeros(1, out_channels, 1, 1)) + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + def forward(self, x, style, noise=None): + # modulate + out = self.modulated_conv(x, style) * 2**0.5 # for conversion + # noise injection + if noise is None: + b, _, h, w = out.shape + noise = out.new_empty(b, 1, h, w).normal_() + out = out + self.weight * noise + # add bias + out = out + self.bias + # activation + out = self.activate(out) + return out + + +class ToRGB(nn.Module): + """To RGB (image space) from features. + + Args: + in_channels (int): Channel number of input. + num_style_feat (int): Channel number of style features. + upsample (bool): Whether to upsample. Default: True. + """ + + def __init__(self, in_channels, num_style_feat, upsample=True): + super(ToRGB, self).__init__() + self.upsample = upsample + self.modulated_conv = ModulatedConv2d( + in_channels, 3, kernel_size=1, num_style_feat=num_style_feat, demodulate=False, sample_mode=None) + self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1)) + + def forward(self, x, style, skip=None): + """Forward function. + + Args: + x (Tensor): Feature tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + skip (Tensor): Base/skip tensor. Default: None. + + Returns: + Tensor: RGB images. + """ + out = self.modulated_conv(x, style) + out = out + self.bias + if skip is not None: + if self.upsample: + skip = F.interpolate(skip, scale_factor=2, mode='bilinear', align_corners=False) + out = out + skip + return out + + +class ConstantInput(nn.Module): + """Constant input. + + Args: + num_channel (int): Channel number of constant input. + size (int): Spatial size of constant input. + """ + + def __init__(self, num_channel, size): + super(ConstantInput, self).__init__() + self.weight = nn.Parameter(torch.randn(1, num_channel, size, size)) + + def forward(self, batch): + out = self.weight.repeat(batch, 1, 1, 1) + return out + + +@ARCH_REGISTRY.register() +class StyleGAN2GeneratorClean(nn.Module): + """Clean version of StyleGAN2 Generator. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + narrow (float): Narrow ratio for channels. Default: 1.0. + """ + + def __init__(self, out_size, num_style_feat=512, num_mlp=8, channel_multiplier=2, narrow=1): + super(StyleGAN2GeneratorClean, self).__init__() + # Style MLP layers + self.num_style_feat = num_style_feat + style_mlp_layers = [NormStyleCode()] + for i in range(num_mlp): + style_mlp_layers.extend( + [nn.Linear(num_style_feat, num_style_feat, bias=True), + nn.LeakyReLU(negative_slope=0.2, inplace=True)]) + self.style_mlp = nn.Sequential(*style_mlp_layers) + # initialization + default_init_weights(self.style_mlp, scale=1, bias_fill=0, a=0.2, mode='fan_in', nonlinearity='leaky_relu') + + # channel list + channels = { + '4': int(512 * narrow), + '8': int(512 * narrow), + '16': int(512 * narrow), + '32': int(512 * narrow), + '64': int(256 * channel_multiplier * narrow), + '128': int(128 * channel_multiplier * narrow), + '256': int(64 * channel_multiplier * narrow), + '512': int(32 * channel_multiplier * narrow), + '1024': int(16 * channel_multiplier * narrow) + } + self.channels = channels + + self.constant_input = ConstantInput(channels['4'], size=4) + self.style_conv1 = StyleConv( + channels['4'], + channels['4'], + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None) + self.to_rgb1 = ToRGB(channels['4'], num_style_feat, upsample=False) + + self.log_size = int(math.log(out_size, 2)) + self.num_layers = (self.log_size - 2) * 2 + 1 + self.num_latent = self.log_size * 2 - 2 + + self.style_convs = nn.ModuleList() + self.to_rgbs = nn.ModuleList() + self.noises = nn.Module() + + in_channels = channels['4'] + # noise + for layer_idx in range(self.num_layers): + resolution = 2**((layer_idx + 5) // 2) + shape = [1, 1, resolution, resolution] + self.noises.register_buffer(f'noise{layer_idx}', torch.randn(*shape)) + # style convs and to_rgbs + for i in range(3, self.log_size + 1): + out_channels = channels[f'{2**i}'] + self.style_convs.append( + StyleConv( + in_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode='upsample')) + self.style_convs.append( + StyleConv( + out_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None)) + self.to_rgbs.append(ToRGB(out_channels, num_style_feat, upsample=True)) + in_channels = out_channels + + def make_noise(self): + """Make noise for noise injection.""" + device = self.constant_input.weight.device + noises = [torch.randn(1, 1, 4, 4, device=device)] + + for i in range(3, self.log_size + 1): + for _ in range(2): + noises.append(torch.randn(1, 1, 2**i, 2**i, device=device)) + + return noises + + def get_latent(self, x): + return self.style_mlp(x) + + def mean_latent(self, num_latent): + latent_in = torch.randn(num_latent, self.num_style_feat, device=self.constant_input.weight.device) + latent = self.style_mlp(latent_in).mean(0, keepdim=True) + return latent + + def forward(self, + styles, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False): + """Forward function for StyleGAN2GeneratorClean. + + Args: + styles (list[Tensor]): Sample codes of styles. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [getattr(self.noises, f'noise{i}') for i in range(self.num_layers)] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append(truncation_latent + truncation * (style - truncation_latent)) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip(self.style_convs[::2], self.style_convs[1::2], noise[1::2], + noise[2::2], self.to_rgbs): + out = conv1(out, latent[:, i], noise=noise1) + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None diff --git a/third_part/GFPGAN/gfpgan/data/__init__.py b/third_part/GFPGAN/gfpgan/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..69fd9f9026407c4d185f86b122000485b06fd986 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/data/__init__.py @@ -0,0 +1,10 @@ +import importlib +from basicsr.utils import scandir +from os import path as osp + +# automatically scan and import dataset modules for registry +# scan all the files that end with '_dataset.py' under the data folder +data_folder = osp.dirname(osp.abspath(__file__)) +dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')] +# import all the dataset modules +_dataset_modules = [importlib.import_module(f'gfpgan.data.{file_name}') for file_name in dataset_filenames] diff --git a/third_part/GFPGAN/gfpgan/data/ffhq_degradation_dataset.py b/third_part/GFPGAN/gfpgan/data/ffhq_degradation_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..64e5755e1211f171cb2a883d47e8d253061f90aa --- /dev/null +++ b/third_part/GFPGAN/gfpgan/data/ffhq_degradation_dataset.py @@ -0,0 +1,230 @@ +import cv2 +import math +import numpy as np +import os.path as osp +import torch +import torch.utils.data as data +from basicsr.data import degradations as degradations +from basicsr.data.data_util import paths_from_folder +from basicsr.data.transforms import augment +from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor +from basicsr.utils.registry import DATASET_REGISTRY +from torchvision.transforms.functional import (adjust_brightness, adjust_contrast, adjust_hue, adjust_saturation, + normalize) + + +@DATASET_REGISTRY.register() +class FFHQDegradationDataset(data.Dataset): + """FFHQ dataset for GFPGAN. + + It reads high resolution images, and then generate low-quality (LQ) images on-the-fly. + + Args: + opt (dict): Config for train datasets. It contains the following keys: + dataroot_gt (str): Data root path for gt. + io_backend (dict): IO backend type and other kwarg. + mean (list | tuple): Image mean. + std (list | tuple): Image std. + use_hflip (bool): Whether to horizontally flip. + Please see more options in the codes. + """ + + def __init__(self, opt): + super(FFHQDegradationDataset, self).__init__() + self.opt = opt + # file client (io backend) + self.file_client = None + self.io_backend_opt = opt['io_backend'] + + self.gt_folder = opt['dataroot_gt'] + self.mean = opt['mean'] + self.std = opt['std'] + self.out_size = opt['out_size'] + + self.crop_components = opt.get('crop_components', False) # facial components + self.eye_enlarge_ratio = opt.get('eye_enlarge_ratio', 1) # whether enlarge eye regions + + if self.crop_components: + # load component list from a pre-process pth files + self.components_list = torch.load(opt.get('component_path')) + + # file client (lmdb io backend) + if self.io_backend_opt['type'] == 'lmdb': + self.io_backend_opt['db_paths'] = self.gt_folder + if not self.gt_folder.endswith('.lmdb'): + raise ValueError(f"'dataroot_gt' should end with '.lmdb', but received {self.gt_folder}") + with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin: + self.paths = [line.split('.')[0] for line in fin] + else: + # disk backend: scan file list from a folder + self.paths = paths_from_folder(self.gt_folder) + + # degradation configurations + self.blur_kernel_size = opt['blur_kernel_size'] + self.kernel_list = opt['kernel_list'] + self.kernel_prob = opt['kernel_prob'] + self.blur_sigma = opt['blur_sigma'] + self.downsample_range = opt['downsample_range'] + self.noise_range = opt['noise_range'] + self.jpeg_range = opt['jpeg_range'] + + # color jitter + self.color_jitter_prob = opt.get('color_jitter_prob') + self.color_jitter_pt_prob = opt.get('color_jitter_pt_prob') + self.color_jitter_shift = opt.get('color_jitter_shift', 20) + # to gray + self.gray_prob = opt.get('gray_prob') + + logger = get_root_logger() + logger.info(f'Blur: blur_kernel_size {self.blur_kernel_size}, sigma: [{", ".join(map(str, self.blur_sigma))}]') + logger.info(f'Downsample: downsample_range [{", ".join(map(str, self.downsample_range))}]') + logger.info(f'Noise: [{", ".join(map(str, self.noise_range))}]') + logger.info(f'JPEG compression: [{", ".join(map(str, self.jpeg_range))}]') + + if self.color_jitter_prob is not None: + logger.info(f'Use random color jitter. Prob: {self.color_jitter_prob}, shift: {self.color_jitter_shift}') + if self.gray_prob is not None: + logger.info(f'Use random gray. Prob: {self.gray_prob}') + self.color_jitter_shift /= 255. + + @staticmethod + def color_jitter(img, shift): + """jitter color: randomly jitter the RGB values, in numpy formats""" + jitter_val = np.random.uniform(-shift, shift, 3).astype(np.float32) + img = img + jitter_val + img = np.clip(img, 0, 1) + return img + + @staticmethod + def color_jitter_pt(img, brightness, contrast, saturation, hue): + """jitter color: randomly jitter the brightness, contrast, saturation, and hue, in torch Tensor formats""" + fn_idx = torch.randperm(4) + for fn_id in fn_idx: + if fn_id == 0 and brightness is not None: + brightness_factor = torch.tensor(1.0).uniform_(brightness[0], brightness[1]).item() + img = adjust_brightness(img, brightness_factor) + + if fn_id == 1 and contrast is not None: + contrast_factor = torch.tensor(1.0).uniform_(contrast[0], contrast[1]).item() + img = adjust_contrast(img, contrast_factor) + + if fn_id == 2 and saturation is not None: + saturation_factor = torch.tensor(1.0).uniform_(saturation[0], saturation[1]).item() + img = adjust_saturation(img, saturation_factor) + + if fn_id == 3 and hue is not None: + hue_factor = torch.tensor(1.0).uniform_(hue[0], hue[1]).item() + img = adjust_hue(img, hue_factor) + return img + + def get_component_coordinates(self, index, status): + """Get facial component (left_eye, right_eye, mouth) coordinates from a pre-loaded pth file""" + components_bbox = self.components_list[f'{index:08d}'] + if status[0]: # hflip + # exchange right and left eye + tmp = components_bbox['left_eye'] + components_bbox['left_eye'] = components_bbox['right_eye'] + components_bbox['right_eye'] = tmp + # modify the width coordinate + components_bbox['left_eye'][0] = self.out_size - components_bbox['left_eye'][0] + components_bbox['right_eye'][0] = self.out_size - components_bbox['right_eye'][0] + components_bbox['mouth'][0] = self.out_size - components_bbox['mouth'][0] + + # get coordinates + locations = [] + for part in ['left_eye', 'right_eye', 'mouth']: + mean = components_bbox[part][0:2] + half_len = components_bbox[part][2] + if 'eye' in part: + half_len *= self.eye_enlarge_ratio + loc = np.hstack((mean - half_len + 1, mean + half_len)) + loc = torch.from_numpy(loc).float() + locations.append(loc) + return locations + + def __getitem__(self, index): + if self.file_client is None: + self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) + + # load gt image + # Shape: (h, w, c); channel order: BGR; image range: [0, 1], float32. + gt_path = self.paths[index] + img_bytes = self.file_client.get(gt_path) + img_gt = imfrombytes(img_bytes, float32=True) + + # random horizontal flip + img_gt, status = augment(img_gt, hflip=self.opt['use_hflip'], rotation=False, return_status=True) + h, w, _ = img_gt.shape + + # get facial component coordinates + if self.crop_components: + locations = self.get_component_coordinates(index, status) + loc_left_eye, loc_right_eye, loc_mouth = locations + + # ------------------------ generate lq image ------------------------ # + # blur + kernel = degradations.random_mixed_kernels( + self.kernel_list, + self.kernel_prob, + self.blur_kernel_size, + self.blur_sigma, + self.blur_sigma, [-math.pi, math.pi], + noise_range=None) + img_lq = cv2.filter2D(img_gt, -1, kernel) + # downsample + scale = np.random.uniform(self.downsample_range[0], self.downsample_range[1]) + img_lq = cv2.resize(img_lq, (int(w // scale), int(h // scale)), interpolation=cv2.INTER_LINEAR) + # noise + if self.noise_range is not None: + img_lq = degradations.random_add_gaussian_noise(img_lq, self.noise_range) + # jpeg compression + if self.jpeg_range is not None: + img_lq = degradations.random_add_jpg_compression(img_lq, self.jpeg_range) + + # resize to original size + img_lq = cv2.resize(img_lq, (w, h), interpolation=cv2.INTER_LINEAR) + + # random color jitter (only for lq) + if self.color_jitter_prob is not None and (np.random.uniform() < self.color_jitter_prob): + img_lq = self.color_jitter(img_lq, self.color_jitter_shift) + # random to gray (only for lq) + if self.gray_prob and np.random.uniform() < self.gray_prob: + img_lq = cv2.cvtColor(img_lq, cv2.COLOR_BGR2GRAY) + img_lq = np.tile(img_lq[:, :, None], [1, 1, 3]) + if self.opt.get('gt_gray'): # whether convert GT to gray images + img_gt = cv2.cvtColor(img_gt, cv2.COLOR_BGR2GRAY) + img_gt = np.tile(img_gt[:, :, None], [1, 1, 3]) # repeat the color channels + + # BGR to RGB, HWC to CHW, numpy to tensor + img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True) + + # random color jitter (pytorch version) (only for lq) + if self.color_jitter_pt_prob is not None and (np.random.uniform() < self.color_jitter_pt_prob): + brightness = self.opt.get('brightness', (0.5, 1.5)) + contrast = self.opt.get('contrast', (0.5, 1.5)) + saturation = self.opt.get('saturation', (0, 1.5)) + hue = self.opt.get('hue', (-0.1, 0.1)) + img_lq = self.color_jitter_pt(img_lq, brightness, contrast, saturation, hue) + + # round and clip + img_lq = torch.clamp((img_lq * 255.0).round(), 0, 255) / 255. + + # normalize + normalize(img_gt, self.mean, self.std, inplace=True) + normalize(img_lq, self.mean, self.std, inplace=True) + + if self.crop_components: + return_dict = { + 'lq': img_lq, + 'gt': img_gt, + 'gt_path': gt_path, + 'loc_left_eye': loc_left_eye, + 'loc_right_eye': loc_right_eye, + 'loc_mouth': loc_mouth + } + return return_dict + else: + return {'lq': img_lq, 'gt': img_gt, 'gt_path': gt_path} + + def __len__(self): + return len(self.paths) diff --git a/third_part/GFPGAN/gfpgan/models/__init__.py b/third_part/GFPGAN/gfpgan/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6afad57a3794b867dabbdb617a16355a24d6a8b3 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/models/__init__.py @@ -0,0 +1,10 @@ +import importlib +from basicsr.utils import scandir +from os import path as osp + +# automatically scan and import model modules for registry +# scan all the files that end with '_model.py' under the model folder +model_folder = osp.dirname(osp.abspath(__file__)) +model_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(model_folder) if v.endswith('_model.py')] +# import all the model modules +_model_modules = [importlib.import_module(f'gfpgan.models.{file_name}') for file_name in model_filenames] diff --git a/third_part/GFPGAN/gfpgan/models/gfpgan_model.py b/third_part/GFPGAN/gfpgan/models/gfpgan_model.py new file mode 100644 index 0000000000000000000000000000000000000000..5e47b28648f2124ea0603f93418682812236d032 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/models/gfpgan_model.py @@ -0,0 +1,580 @@ +import math +import os.path as osp +import torch +from basicsr.archs import build_network +from basicsr.losses import build_loss +# from basicsr.losses.losses import r1_penalty +from basicsr.losses import r1_penalty +from basicsr.metrics import calculate_metric +from basicsr.models.base_model import BaseModel +from basicsr.utils import get_root_logger, imwrite, tensor2img +from basicsr.utils.registry import MODEL_REGISTRY +from collections import OrderedDict +from torch.nn import functional as F +from torchvision.ops import roi_align +from tqdm import tqdm + + +@MODEL_REGISTRY.register() +class GFPGANModel(BaseModel): + """The GFPGAN model for Towards real-world blind face restoratin with generative facial prior""" + + def __init__(self, opt): + super(GFPGANModel, self).__init__(opt) + self.idx = 0 # it is used for saving data for check + + # define network + self.net_g = build_network(opt['network_g']) + self.net_g = self.model_to_device(self.net_g) + self.print_network(self.net_g) + + # load pretrained model + load_path = self.opt['path'].get('pretrain_network_g', None) + if load_path is not None: + param_key = self.opt['path'].get('param_key_g', 'params') + self.load_network(self.net_g, load_path, self.opt['path'].get('strict_load_g', True), param_key) + + self.log_size = int(math.log(self.opt['network_g']['out_size'], 2)) + + if self.is_train: + self.init_training_settings() + + def init_training_settings(self): + train_opt = self.opt['train'] + + # ----------- define net_d ----------- # + self.net_d = build_network(self.opt['network_d']) + self.net_d = self.model_to_device(self.net_d) + self.print_network(self.net_d) + # load pretrained model + load_path = self.opt['path'].get('pretrain_network_d', None) + if load_path is not None: + self.load_network(self.net_d, load_path, self.opt['path'].get('strict_load_d', True)) + + # ----------- define net_g with Exponential Moving Average (EMA) ----------- # + # net_g_ema only used for testing on one GPU and saving. There is no need to wrap with DistributedDataParallel + self.net_g_ema = build_network(self.opt['network_g']).to(self.device) + # load pretrained model + load_path = self.opt['path'].get('pretrain_network_g', None) + if load_path is not None: + self.load_network(self.net_g_ema, load_path, self.opt['path'].get('strict_load_g', True), 'params_ema') + else: + self.model_ema(0) # copy net_g weight + + self.net_g.train() + self.net_d.train() + self.net_g_ema.eval() + + # ----------- facial component networks ----------- # + if ('network_d_left_eye' in self.opt and 'network_d_right_eye' in self.opt and 'network_d_mouth' in self.opt): + self.use_facial_disc = True + else: + self.use_facial_disc = False + + if self.use_facial_disc: + # left eye + self.net_d_left_eye = build_network(self.opt['network_d_left_eye']) + self.net_d_left_eye = self.model_to_device(self.net_d_left_eye) + self.print_network(self.net_d_left_eye) + load_path = self.opt['path'].get('pretrain_network_d_left_eye') + if load_path is not None: + self.load_network(self.net_d_left_eye, load_path, True, 'params') + # right eye + self.net_d_right_eye = build_network(self.opt['network_d_right_eye']) + self.net_d_right_eye = self.model_to_device(self.net_d_right_eye) + self.print_network(self.net_d_right_eye) + load_path = self.opt['path'].get('pretrain_network_d_right_eye') + if load_path is not None: + self.load_network(self.net_d_right_eye, load_path, True, 'params') + # mouth + self.net_d_mouth = build_network(self.opt['network_d_mouth']) + self.net_d_mouth = self.model_to_device(self.net_d_mouth) + self.print_network(self.net_d_mouth) + load_path = self.opt['path'].get('pretrain_network_d_mouth') + if load_path is not None: + self.load_network(self.net_d_mouth, load_path, True, 'params') + + self.net_d_left_eye.train() + self.net_d_right_eye.train() + self.net_d_mouth.train() + + # ----------- define facial component gan loss ----------- # + self.cri_component = build_loss(train_opt['gan_component_opt']).to(self.device) + + # ----------- define losses ----------- # + # pixel loss + if train_opt.get('pixel_opt'): + self.cri_pix = build_loss(train_opt['pixel_opt']).to(self.device) + else: + self.cri_pix = None + + # perceptual loss + if train_opt.get('perceptual_opt'): + self.cri_perceptual = build_loss(train_opt['perceptual_opt']).to(self.device) + else: + self.cri_perceptual = None + + # L1 loss is used in pyramid loss, component style loss and identity loss + self.cri_l1 = build_loss(train_opt['L1_opt']).to(self.device) + + # gan loss (wgan) + self.cri_gan = build_loss(train_opt['gan_opt']).to(self.device) + + # ----------- define identity loss ----------- # + if 'network_identity' in self.opt: + self.use_identity = True + else: + self.use_identity = False + + if self.use_identity: + # define identity network + self.network_identity = build_network(self.opt['network_identity']) + self.network_identity = self.model_to_device(self.network_identity) + self.print_network(self.network_identity) + load_path = self.opt['path'].get('pretrain_network_identity') + if load_path is not None: + self.load_network(self.network_identity, load_path, True, None) + self.network_identity.eval() + for param in self.network_identity.parameters(): + param.requires_grad = False + + # regularization weights + self.r1_reg_weight = train_opt['r1_reg_weight'] # for discriminator + self.net_d_iters = train_opt.get('net_d_iters', 1) + self.net_d_init_iters = train_opt.get('net_d_init_iters', 0) + self.net_d_reg_every = train_opt['net_d_reg_every'] + + # set up optimizers and schedulers + self.setup_optimizers() + self.setup_schedulers() + + def setup_optimizers(self): + train_opt = self.opt['train'] + + # ----------- optimizer g ----------- # + net_g_reg_ratio = 1 + normal_params = [] + for _, param in self.net_g.named_parameters(): + normal_params.append(param) + optim_params_g = [{ # add normal params first + 'params': normal_params, + 'lr': train_opt['optim_g']['lr'] + }] + optim_type = train_opt['optim_g'].pop('type') + lr = train_opt['optim_g']['lr'] * net_g_reg_ratio + betas = (0**net_g_reg_ratio, 0.99**net_g_reg_ratio) + self.optimizer_g = self.get_optimizer(optim_type, optim_params_g, lr, betas=betas) + self.optimizers.append(self.optimizer_g) + + # ----------- optimizer d ----------- # + net_d_reg_ratio = self.net_d_reg_every / (self.net_d_reg_every + 1) + normal_params = [] + for _, param in self.net_d.named_parameters(): + normal_params.append(param) + optim_params_d = [{ # add normal params first + 'params': normal_params, + 'lr': train_opt['optim_d']['lr'] + }] + optim_type = train_opt['optim_d'].pop('type') + lr = train_opt['optim_d']['lr'] * net_d_reg_ratio + betas = (0**net_d_reg_ratio, 0.99**net_d_reg_ratio) + self.optimizer_d = self.get_optimizer(optim_type, optim_params_d, lr, betas=betas) + self.optimizers.append(self.optimizer_d) + + # ----------- optimizers for facial component networks ----------- # + if self.use_facial_disc: + # setup optimizers for facial component discriminators + optim_type = train_opt['optim_component'].pop('type') + lr = train_opt['optim_component']['lr'] + # left eye + self.optimizer_d_left_eye = self.get_optimizer( + optim_type, self.net_d_left_eye.parameters(), lr, betas=(0.9, 0.99)) + self.optimizers.append(self.optimizer_d_left_eye) + # right eye + self.optimizer_d_right_eye = self.get_optimizer( + optim_type, self.net_d_right_eye.parameters(), lr, betas=(0.9, 0.99)) + self.optimizers.append(self.optimizer_d_right_eye) + # mouth + self.optimizer_d_mouth = self.get_optimizer( + optim_type, self.net_d_mouth.parameters(), lr, betas=(0.9, 0.99)) + self.optimizers.append(self.optimizer_d_mouth) + + def feed_data(self, data): + self.lq = data['lq'].to(self.device) + if 'gt' in data: + self.gt = data['gt'].to(self.device) + + if 'loc_left_eye' in data: + # get facial component locations, shape (batch, 4) + self.loc_left_eyes = data['loc_left_eye'] + self.loc_right_eyes = data['loc_right_eye'] + self.loc_mouths = data['loc_mouth'] + + # uncomment to check data + # import torchvision + # if self.opt['rank'] == 0: + # import os + # os.makedirs('tmp/gt', exist_ok=True) + # os.makedirs('tmp/lq', exist_ok=True) + # print(self.idx) + # torchvision.utils.save_image( + # self.gt, f'tmp/gt/gt_{self.idx}.png', nrow=4, padding=2, normalize=True, range=(-1, 1)) + # torchvision.utils.save_image( + # self.lq, f'tmp/lq/lq{self.idx}.png', nrow=4, padding=2, normalize=True, range=(-1, 1)) + # self.idx = self.idx + 1 + + def construct_img_pyramid(self): + """Construct image pyramid for intermediate restoration loss""" + pyramid_gt = [self.gt] + down_img = self.gt + for _ in range(0, self.log_size - 3): + down_img = F.interpolate(down_img, scale_factor=0.5, mode='bilinear', align_corners=False) + pyramid_gt.insert(0, down_img) + return pyramid_gt + + def get_roi_regions(self, eye_out_size=80, mouth_out_size=120): + face_ratio = int(self.opt['network_g']['out_size'] / 512) + eye_out_size *= face_ratio + mouth_out_size *= face_ratio + + rois_eyes = [] + rois_mouths = [] + for b in range(self.loc_left_eyes.size(0)): # loop for batch size + # left eye and right eye + img_inds = self.loc_left_eyes.new_full((2, 1), b) + bbox = torch.stack([self.loc_left_eyes[b, :], self.loc_right_eyes[b, :]], dim=0) # shape: (2, 4) + rois = torch.cat([img_inds, bbox], dim=-1) # shape: (2, 5) + rois_eyes.append(rois) + # mouse + img_inds = self.loc_left_eyes.new_full((1, 1), b) + rois = torch.cat([img_inds, self.loc_mouths[b:b + 1, :]], dim=-1) # shape: (1, 5) + rois_mouths.append(rois) + + rois_eyes = torch.cat(rois_eyes, 0).to(self.device) + rois_mouths = torch.cat(rois_mouths, 0).to(self.device) + + # real images + all_eyes = roi_align(self.gt, boxes=rois_eyes, output_size=eye_out_size) * face_ratio + self.left_eyes_gt = all_eyes[0::2, :, :, :] + self.right_eyes_gt = all_eyes[1::2, :, :, :] + self.mouths_gt = roi_align(self.gt, boxes=rois_mouths, output_size=mouth_out_size) * face_ratio + # output + all_eyes = roi_align(self.output, boxes=rois_eyes, output_size=eye_out_size) * face_ratio + self.left_eyes = all_eyes[0::2, :, :, :] + self.right_eyes = all_eyes[1::2, :, :, :] + self.mouths = roi_align(self.output, boxes=rois_mouths, output_size=mouth_out_size) * face_ratio + + def _gram_mat(self, x): + """Calculate Gram matrix. + + Args: + x (torch.Tensor): Tensor with shape of (n, c, h, w). + + Returns: + torch.Tensor: Gram matrix. + """ + n, c, h, w = x.size() + features = x.view(n, c, w * h) + features_t = features.transpose(1, 2) + gram = features.bmm(features_t) / (c * h * w) + return gram + + def gray_resize_for_identity(self, out, size=128): + out_gray = (0.2989 * out[:, 0, :, :] + 0.5870 * out[:, 1, :, :] + 0.1140 * out[:, 2, :, :]) + out_gray = out_gray.unsqueeze(1) + out_gray = F.interpolate(out_gray, (size, size), mode='bilinear', align_corners=False) + return out_gray + + def optimize_parameters(self, current_iter): + # optimize net_g + for p in self.net_d.parameters(): + p.requires_grad = False + self.optimizer_g.zero_grad() + + # do not update facial component net_d + if self.use_facial_disc: + for p in self.net_d_left_eye.parameters(): + p.requires_grad = False + for p in self.net_d_right_eye.parameters(): + p.requires_grad = False + for p in self.net_d_mouth.parameters(): + p.requires_grad = False + + # image pyramid loss weight + pyramid_loss_weight = self.opt['train'].get('pyramid_loss_weight', 0) + if pyramid_loss_weight > 0 and current_iter > self.opt['train'].get('remove_pyramid_loss', float('inf')): + pyramid_loss_weight = 1e-12 # very small weight to avoid unused param error + if pyramid_loss_weight > 0: + self.output, out_rgbs = self.net_g(self.lq, return_rgb=True) + pyramid_gt = self.construct_img_pyramid() + else: + self.output, out_rgbs = self.net_g(self.lq, return_rgb=False) + + # get roi-align regions + if self.use_facial_disc: + self.get_roi_regions(eye_out_size=80, mouth_out_size=120) + + l_g_total = 0 + loss_dict = OrderedDict() + if (current_iter % self.net_d_iters == 0 and current_iter > self.net_d_init_iters): + # pixel loss + if self.cri_pix: + l_g_pix = self.cri_pix(self.output, self.gt) + l_g_total += l_g_pix + loss_dict['l_g_pix'] = l_g_pix + + # image pyramid loss + if pyramid_loss_weight > 0: + for i in range(0, self.log_size - 2): + l_pyramid = self.cri_l1(out_rgbs[i], pyramid_gt[i]) * pyramid_loss_weight + l_g_total += l_pyramid + loss_dict[f'l_p_{2**(i+3)}'] = l_pyramid + + # perceptual loss + if self.cri_perceptual: + l_g_percep, l_g_style = self.cri_perceptual(self.output, self.gt) + if l_g_percep is not None: + l_g_total += l_g_percep + loss_dict['l_g_percep'] = l_g_percep + if l_g_style is not None: + l_g_total += l_g_style + loss_dict['l_g_style'] = l_g_style + + # gan loss + fake_g_pred = self.net_d(self.output) + l_g_gan = self.cri_gan(fake_g_pred, True, is_disc=False) + l_g_total += l_g_gan + loss_dict['l_g_gan'] = l_g_gan + + # facial component loss + if self.use_facial_disc: + # left eye + fake_left_eye, fake_left_eye_feats = self.net_d_left_eye(self.left_eyes, return_feats=True) + l_g_gan = self.cri_component(fake_left_eye, True, is_disc=False) + l_g_total += l_g_gan + loss_dict['l_g_gan_left_eye'] = l_g_gan + # right eye + fake_right_eye, fake_right_eye_feats = self.net_d_right_eye(self.right_eyes, return_feats=True) + l_g_gan = self.cri_component(fake_right_eye, True, is_disc=False) + l_g_total += l_g_gan + loss_dict['l_g_gan_right_eye'] = l_g_gan + # mouth + fake_mouth, fake_mouth_feats = self.net_d_mouth(self.mouths, return_feats=True) + l_g_gan = self.cri_component(fake_mouth, True, is_disc=False) + l_g_total += l_g_gan + loss_dict['l_g_gan_mouth'] = l_g_gan + + if self.opt['train'].get('comp_style_weight', 0) > 0: + # get gt feat + _, real_left_eye_feats = self.net_d_left_eye(self.left_eyes_gt, return_feats=True) + _, real_right_eye_feats = self.net_d_right_eye(self.right_eyes_gt, return_feats=True) + _, real_mouth_feats = self.net_d_mouth(self.mouths_gt, return_feats=True) + + def _comp_style(feat, feat_gt, criterion): + return criterion(self._gram_mat(feat[0]), self._gram_mat( + feat_gt[0].detach())) * 0.5 + criterion( + self._gram_mat(feat[1]), self._gram_mat(feat_gt[1].detach())) + + # facial component style loss + comp_style_loss = 0 + comp_style_loss += _comp_style(fake_left_eye_feats, real_left_eye_feats, self.cri_l1) + comp_style_loss += _comp_style(fake_right_eye_feats, real_right_eye_feats, self.cri_l1) + comp_style_loss += _comp_style(fake_mouth_feats, real_mouth_feats, self.cri_l1) + comp_style_loss = comp_style_loss * self.opt['train']['comp_style_weight'] + l_g_total += comp_style_loss + loss_dict['l_g_comp_style_loss'] = comp_style_loss + + # identity loss + if self.use_identity: + identity_weight = self.opt['train']['identity_weight'] + # get gray images and resize + out_gray = self.gray_resize_for_identity(self.output) + gt_gray = self.gray_resize_for_identity(self.gt) + + identity_gt = self.network_identity(gt_gray).detach() + identity_out = self.network_identity(out_gray) + l_identity = self.cri_l1(identity_out, identity_gt) * identity_weight + l_g_total += l_identity + loss_dict['l_identity'] = l_identity + + l_g_total.backward() + self.optimizer_g.step() + + # EMA + self.model_ema(decay=0.5**(32 / (10 * 1000))) + + # ----------- optimize net_d ----------- # + for p in self.net_d.parameters(): + p.requires_grad = True + self.optimizer_d.zero_grad() + if self.use_facial_disc: + for p in self.net_d_left_eye.parameters(): + p.requires_grad = True + for p in self.net_d_right_eye.parameters(): + p.requires_grad = True + for p in self.net_d_mouth.parameters(): + p.requires_grad = True + self.optimizer_d_left_eye.zero_grad() + self.optimizer_d_right_eye.zero_grad() + self.optimizer_d_mouth.zero_grad() + + fake_d_pred = self.net_d(self.output.detach()) + real_d_pred = self.net_d(self.gt) + l_d = self.cri_gan(real_d_pred, True, is_disc=True) + self.cri_gan(fake_d_pred, False, is_disc=True) + loss_dict['l_d'] = l_d + # In WGAN, real_score should be positive and fake_score should be negative + loss_dict['real_score'] = real_d_pred.detach().mean() + loss_dict['fake_score'] = fake_d_pred.detach().mean() + l_d.backward() + + # regularization loss + if current_iter % self.net_d_reg_every == 0: + self.gt.requires_grad = True + real_pred = self.net_d(self.gt) + l_d_r1 = r1_penalty(real_pred, self.gt) + l_d_r1 = (self.r1_reg_weight / 2 * l_d_r1 * self.net_d_reg_every + 0 * real_pred[0]) + loss_dict['l_d_r1'] = l_d_r1.detach().mean() + l_d_r1.backward() + + self.optimizer_d.step() + + # optimize facial component discriminators + if self.use_facial_disc: + # left eye + fake_d_pred, _ = self.net_d_left_eye(self.left_eyes.detach()) + real_d_pred, _ = self.net_d_left_eye(self.left_eyes_gt) + l_d_left_eye = self.cri_component( + real_d_pred, True, is_disc=True) + self.cri_gan( + fake_d_pred, False, is_disc=True) + loss_dict['l_d_left_eye'] = l_d_left_eye + l_d_left_eye.backward() + # right eye + fake_d_pred, _ = self.net_d_right_eye(self.right_eyes.detach()) + real_d_pred, _ = self.net_d_right_eye(self.right_eyes_gt) + l_d_right_eye = self.cri_component( + real_d_pred, True, is_disc=True) + self.cri_gan( + fake_d_pred, False, is_disc=True) + loss_dict['l_d_right_eye'] = l_d_right_eye + l_d_right_eye.backward() + # mouth + fake_d_pred, _ = self.net_d_mouth(self.mouths.detach()) + real_d_pred, _ = self.net_d_mouth(self.mouths_gt) + l_d_mouth = self.cri_component( + real_d_pred, True, is_disc=True) + self.cri_gan( + fake_d_pred, False, is_disc=True) + loss_dict['l_d_mouth'] = l_d_mouth + l_d_mouth.backward() + + self.optimizer_d_left_eye.step() + self.optimizer_d_right_eye.step() + self.optimizer_d_mouth.step() + + self.log_dict = self.reduce_loss_dict(loss_dict) + + def test(self): + with torch.no_grad(): + if hasattr(self, 'net_g_ema'): + self.net_g_ema.eval() + self.output, _ = self.net_g_ema(self.lq) + else: + logger = get_root_logger() + logger.warning('Do not have self.net_g_ema, use self.net_g.') + self.net_g.eval() + self.output, _ = self.net_g(self.lq) + self.net_g.train() + + def dist_validation(self, dataloader, current_iter, tb_logger, save_img): + if self.opt['rank'] == 0: + self.nondist_validation(dataloader, current_iter, tb_logger, save_img) + + def nondist_validation(self, dataloader, current_iter, tb_logger, save_img): + dataset_name = dataloader.dataset.opt['name'] + with_metrics = self.opt['val'].get('metrics') is not None + use_pbar = self.opt['val'].get('pbar', False) + + if with_metrics: + if not hasattr(self, 'metric_results'): # only execute in the first run + self.metric_results = {metric: 0 for metric in self.opt['val']['metrics'].keys()} + # initialize the best metric results for each dataset_name (supporting multiple validation datasets) + self._initialize_best_metric_results(dataset_name) + # zero self.metric_results + self.metric_results = {metric: 0 for metric in self.metric_results} + + metric_data = dict() + if use_pbar: + pbar = tqdm(total=len(dataloader), unit='image') + + for idx, val_data in enumerate(dataloader): + img_name = osp.splitext(osp.basename(val_data['lq_path'][0]))[0] + self.feed_data(val_data) + self.test() + + sr_img = tensor2img(self.output.detach().cpu(), min_max=(-1, 1)) + metric_data['img'] = sr_img + if hasattr(self, 'gt'): + gt_img = tensor2img(self.gt.detach().cpu(), min_max=(-1, 1)) + metric_data['img2'] = gt_img + del self.gt + + # tentative for out of GPU memory + del self.lq + del self.output + torch.cuda.empty_cache() + + if save_img: + if self.opt['is_train']: + save_img_path = osp.join(self.opt['path']['visualization'], img_name, + f'{img_name}_{current_iter}.png') + else: + if self.opt['val']['suffix']: + save_img_path = osp.join(self.opt['path']['visualization'], dataset_name, + f'{img_name}_{self.opt["val"]["suffix"]}.png') + else: + save_img_path = osp.join(self.opt['path']['visualization'], dataset_name, + f'{img_name}_{self.opt["name"]}.png') + imwrite(sr_img, save_img_path) + + if with_metrics: + # calculate metrics + for name, opt_ in self.opt['val']['metrics'].items(): + self.metric_results[name] += calculate_metric(metric_data, opt_) + if use_pbar: + pbar.update(1) + pbar.set_description(f'Test {img_name}') + if use_pbar: + pbar.close() + + if with_metrics: + for metric in self.metric_results.keys(): + self.metric_results[metric] /= (idx + 1) + # update the best metric result + self._update_best_metric_result(dataset_name, metric, self.metric_results[metric], current_iter) + + self._log_validation_metric_values(current_iter, dataset_name, tb_logger) + + def _log_validation_metric_values(self, current_iter, dataset_name, tb_logger): + log_str = f'Validation {dataset_name}\n' + for metric, value in self.metric_results.items(): + log_str += f'\t # {metric}: {value:.4f}' + if hasattr(self, 'best_metric_results'): + log_str += (f'\tBest: {self.best_metric_results[dataset_name][metric]["val"]:.4f} @ ' + f'{self.best_metric_results[dataset_name][metric]["iter"]} iter') + log_str += '\n' + + logger = get_root_logger() + logger.info(log_str) + if tb_logger: + for metric, value in self.metric_results.items(): + tb_logger.add_scalar(f'metrics/{dataset_name}/{metric}', value, current_iter) + + def save(self, epoch, current_iter): + # save net_g and net_d + self.save_network([self.net_g, self.net_g_ema], 'net_g', current_iter, param_key=['params', 'params_ema']) + self.save_network(self.net_d, 'net_d', current_iter) + # save component discriminators + if self.use_facial_disc: + self.save_network(self.net_d_left_eye, 'net_d_left_eye', current_iter) + self.save_network(self.net_d_right_eye, 'net_d_right_eye', current_iter) + self.save_network(self.net_d_mouth, 'net_d_mouth', current_iter) + # save training state + self.save_training_state(epoch, current_iter) diff --git a/third_part/GFPGAN/gfpgan/train.py b/third_part/GFPGAN/gfpgan/train.py new file mode 100644 index 0000000000000000000000000000000000000000..fe5f1f909ae15a8d830ef65dcb43436d4f4ee7ae --- /dev/null +++ b/third_part/GFPGAN/gfpgan/train.py @@ -0,0 +1,11 @@ +# flake8: noqa +import os.path as osp +from basicsr.train import train_pipeline + +import gfpgan.archs +import gfpgan.data +import gfpgan.models + +if __name__ == '__main__': + root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir)) + train_pipeline(root_path) diff --git a/third_part/GFPGAN/gfpgan/utils.py b/third_part/GFPGAN/gfpgan/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1cc104d8ba96f66527a634ff15b375d377f9c469 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/utils.py @@ -0,0 +1,143 @@ +import cv2 +import os +import torch +from basicsr.utils import img2tensor, tensor2img +from basicsr.utils.download_util import load_file_from_url +from facexlib.utils.face_restoration_helper import FaceRestoreHelper +from torchvision.transforms.functional import normalize + +from gfpgan.archs.gfpgan_bilinear_arch import GFPGANBilinear +from gfpgan.archs.gfpganv1_arch import GFPGANv1 +from gfpgan.archs.gfpganv1_clean_arch import GFPGANv1Clean + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +class GFPGANer(): + """Helper for restoration with GFPGAN. + + It will detect and crop faces, and then resize the faces to 512x512. + GFPGAN is used to restored the resized faces. + The background is upsampled with the bg_upsampler. + Finally, the faces will be pasted back to the upsample background image. + + Args: + model_path (str): The path to the GFPGAN model. It can be urls (will first download it automatically). + upscale (float): The upscale of the final output. Default: 2. + arch (str): The GFPGAN architecture. Option: clean | original. Default: clean. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + bg_upsampler (nn.Module): The upsampler for the background. Default: None. + """ + + def __init__(self, model_path, upscale=2, arch='clean', channel_multiplier=2, bg_upsampler=None): + self.upscale = upscale + self.bg_upsampler = bg_upsampler + + # initialize model + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + # initialize the GFP-GAN + if arch == 'clean': + self.gfpgan = GFPGANv1Clean( + out_size=512, + num_style_feat=512, + channel_multiplier=channel_multiplier, + decoder_load_path=None, + fix_decoder=False, + num_mlp=8, + input_is_latent=True, + different_w=True, + narrow=1, + sft_half=True) + elif arch == 'bilinear': + self.gfpgan = GFPGANBilinear( + out_size=512, + num_style_feat=512, + channel_multiplier=channel_multiplier, + decoder_load_path=None, + fix_decoder=False, + num_mlp=8, + input_is_latent=True, + different_w=True, + narrow=1, + sft_half=True) + elif arch == 'original': + self.gfpgan = GFPGANv1( + out_size=512, + num_style_feat=512, + channel_multiplier=channel_multiplier, + decoder_load_path=None, + fix_decoder=True, + num_mlp=8, + input_is_latent=True, + different_w=True, + narrow=1, + sft_half=True) + # initialize face helper + self.face_helper = FaceRestoreHelper( + upscale, + face_size=512, + crop_ratio=(1, 1), + det_model='retinaface_resnet50', + save_ext='png', + device=self.device) + + if model_path.startswith('https://'): + model_path = load_file_from_url( + url=model_path, model_dir=os.path.join(ROOT_DIR, 'gfpgan/weights'), progress=True, file_name=None) + loadnet = torch.load(model_path) + if 'params_ema' in loadnet: + keyname = 'params_ema' + else: + keyname = 'params' + self.gfpgan.load_state_dict(loadnet[keyname], strict=True) + self.gfpgan.eval() + self.gfpgan = self.gfpgan.to(self.device) + + @torch.no_grad() + def enhance(self, img, has_aligned=False, only_center_face=False, paste_back=True): + self.face_helper.clean_all() + + if has_aligned: # the inputs are already aligned + img = cv2.resize(img, (512, 512)) + self.face_helper.cropped_faces = [img] + else: + self.face_helper.read_image(img) + # get face landmarks for each face + self.face_helper.get_face_landmarks_5(only_center_face=only_center_face, eye_dist_threshold=5) + # eye_dist_threshold=5: skip faces whose eye distance is smaller than 5 pixels + # TODO: even with eye_dist_threshold, it will still introduce wrong detections and restorations. + # align and warp each face + self.face_helper.align_warp_face() + + # face restoration + for cropped_face in self.face_helper.cropped_faces: + # prepare data + cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True) + normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True) + cropped_face_t = cropped_face_t.unsqueeze(0).to(self.device) + + try: + output = self.gfpgan(cropped_face_t, return_rgb=False)[0] + # convert to image + restored_face = tensor2img(output.squeeze(0), rgb2bgr=True, min_max=(-1, 1)) + except RuntimeError as error: + print(f'\tFailed inference for GFPGAN: {error}.') + restored_face = cropped_face + + restored_face = restored_face.astype('uint8') + self.face_helper.add_restored_face(restored_face) + + if not has_aligned and paste_back: + # upsample the background + if self.bg_upsampler is not None: + # Now only support RealESRGAN for upsampling background + bg_img = self.bg_upsampler.enhance(img, outscale=self.upscale)[0] + else: + bg_img = None + + self.face_helper.get_inverse_affine(None) + # paste each restored face to the input image + restored_img = self.face_helper.paste_faces_to_input_image(upsample_img=bg_img) + return self.face_helper.cropped_faces, self.face_helper.restored_faces, restored_img + else: + return self.face_helper.cropped_faces, self.face_helper.restored_faces, None diff --git a/third_part/GFPGAN/gfpgan/version.py b/third_part/GFPGAN/gfpgan/version.py new file mode 100644 index 0000000000000000000000000000000000000000..565af38bc1f98d7f70727b6269665fcf20b61f93 --- /dev/null +++ b/third_part/GFPGAN/gfpgan/version.py @@ -0,0 +1,5 @@ +# GENERATED VERSION FILE +# TIME: Wed Apr 20 14:43:06 2022 +__version__ = '1.3.2' +__gitsha__ = '924ce47' +version_info = (1, 3, 2) diff --git a/third_part/GFPGAN/gfpgan/weights/README.md b/third_part/GFPGAN/gfpgan/weights/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4d7b7e642591ef88575d9e6c360a4d29e0cc1a4f --- /dev/null +++ b/third_part/GFPGAN/gfpgan/weights/README.md @@ -0,0 +1,3 @@ +# Weights + +Put the downloaded weights to this folder. diff --git a/third_part/GFPGAN/options/train_gfpgan_v1.yml b/third_part/GFPGAN/options/train_gfpgan_v1.yml new file mode 100644 index 0000000000000000000000000000000000000000..aa5212a81de362daaef306e203f03cc665186d47 --- /dev/null +++ b/third_part/GFPGAN/options/train_gfpgan_v1.yml @@ -0,0 +1,216 @@ +# general settings +name: train_GFPGANv1_512 +model_type: GFPGANModel +num_gpu: auto # officially, we use 4 GPUs +manual_seed: 0 + +# dataset and data loader settings +datasets: + train: + name: FFHQ + type: FFHQDegradationDataset + # dataroot_gt: datasets/ffhq/ffhq_512.lmdb + dataroot_gt: datasets/ffhq/ffhq_512 + io_backend: + # type: lmdb + type: disk + + use_hflip: true + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + out_size: 512 + + blur_kernel_size: 41 + kernel_list: ['iso', 'aniso'] + kernel_prob: [0.5, 0.5] + blur_sigma: [0.1, 10] + downsample_range: [0.8, 8] + noise_range: [0, 20] + jpeg_range: [60, 100] + + # color jitter and gray + color_jitter_prob: 0.3 + color_jitter_shift: 20 + color_jitter_pt_prob: 0.3 + gray_prob: 0.01 + + # If you do not want colorization, please set + # color_jitter_prob: ~ + # color_jitter_pt_prob: ~ + # gray_prob: 0.01 + # gt_gray: True + + crop_components: true + component_path: experiments/pretrained_models/FFHQ_eye_mouth_landmarks_512.pth + eye_enlarge_ratio: 1.4 + + # data loader + use_shuffle: true + num_worker_per_gpu: 6 + batch_size_per_gpu: 3 + dataset_enlarge_ratio: 1 + prefetch_mode: ~ + + val: + # Please modify accordingly to use your own validation + # Or comment the val block if do not need validation during training + name: validation + type: PairedImageDataset + dataroot_lq: datasets/faces/validation/input + dataroot_gt: datasets/faces/validation/reference + io_backend: + type: disk + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + scale: 1 + +# network structures +network_g: + type: GFPGANv1 + out_size: 512 + num_style_feat: 512 + channel_multiplier: 1 + resample_kernel: [1, 3, 3, 1] + decoder_load_path: experiments/pretrained_models/StyleGAN2_512_Cmul1_FFHQ_B12G4_scratch_800k.pth + fix_decoder: true + num_mlp: 8 + lr_mlp: 0.01 + input_is_latent: true + different_w: true + narrow: 1 + sft_half: true + +network_d: + type: StyleGAN2Discriminator + out_size: 512 + channel_multiplier: 1 + resample_kernel: [1, 3, 3, 1] + +network_d_left_eye: + type: FacialComponentDiscriminator + +network_d_right_eye: + type: FacialComponentDiscriminator + +network_d_mouth: + type: FacialComponentDiscriminator + +network_identity: + type: ResNetArcFace + block: IRBlock + layers: [2, 2, 2, 2] + use_se: False + +# path +path: + pretrain_network_g: ~ + param_key_g: params_ema + strict_load_g: ~ + pretrain_network_d: ~ + pretrain_network_d_left_eye: ~ + pretrain_network_d_right_eye: ~ + pretrain_network_d_mouth: ~ + pretrain_network_identity: experiments/pretrained_models/arcface_resnet18.pth + # resume + resume_state: ~ + ignore_resume_networks: ['network_identity'] + +# training settings +train: + optim_g: + type: Adam + lr: !!float 2e-3 + optim_d: + type: Adam + lr: !!float 2e-3 + optim_component: + type: Adam + lr: !!float 2e-3 + + scheduler: + type: MultiStepLR + milestones: [600000, 700000] + gamma: 0.5 + + total_iter: 800000 + warmup_iter: -1 # no warm up + + # losses + # pixel loss + pixel_opt: + type: L1Loss + loss_weight: !!float 1e-1 + reduction: mean + # L1 loss used in pyramid loss, component style loss and identity loss + L1_opt: + type: L1Loss + loss_weight: 1 + reduction: mean + + # image pyramid loss + pyramid_loss_weight: 1 + remove_pyramid_loss: 50000 + # perceptual loss (content and style losses) + perceptual_opt: + type: PerceptualLoss + layer_weights: + # before relu + 'conv1_2': 0.1 + 'conv2_2': 0.1 + 'conv3_4': 1 + 'conv4_4': 1 + 'conv5_4': 1 + vgg_type: vgg19 + use_input_norm: true + perceptual_weight: !!float 1 + style_weight: 50 + range_norm: true + criterion: l1 + # gan loss + gan_opt: + type: GANLoss + gan_type: wgan_softplus + loss_weight: !!float 1e-1 + # r1 regularization for discriminator + r1_reg_weight: 10 + # facial component loss + gan_component_opt: + type: GANLoss + gan_type: vanilla + real_label_val: 1.0 + fake_label_val: 0.0 + loss_weight: !!float 1 + comp_style_weight: 200 + # identity loss + identity_weight: 10 + + net_d_iters: 1 + net_d_init_iters: 0 + net_d_reg_every: 16 + +# validation settings +val: + val_freq: !!float 5e3 + save_img: true + + metrics: + psnr: # metric name + type: calculate_psnr + crop_border: 0 + test_y_channel: false + +# logging settings +logger: + print_freq: 100 + save_checkpoint_freq: !!float 5e3 + use_tb_logger: true + wandb: + project: ~ + resume_id: ~ + +# dist training settings +dist_params: + backend: nccl + port: 29500 + +find_unused_parameters: true diff --git a/third_part/GFPGAN/options/train_gfpgan_v1_simple.yml b/third_part/GFPGAN/options/train_gfpgan_v1_simple.yml new file mode 100644 index 0000000000000000000000000000000000000000..3807575826a5e7ed97335f607c091c8a4039a213 --- /dev/null +++ b/third_part/GFPGAN/options/train_gfpgan_v1_simple.yml @@ -0,0 +1,182 @@ +# general settings +name: train_GFPGANv1_512_simple +model_type: GFPGANModel +num_gpu: auto # officially, we use 4 GPUs +manual_seed: 0 + +# dataset and data loader settings +datasets: + train: + name: FFHQ + type: FFHQDegradationDataset + # dataroot_gt: datasets/ffhq/ffhq_512.lmdb + dataroot_gt: datasets/ffhq/ffhq_512 + io_backend: + # type: lmdb + type: disk + + use_hflip: true + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + out_size: 512 + + blur_kernel_size: 41 + kernel_list: ['iso', 'aniso'] + kernel_prob: [0.5, 0.5] + blur_sigma: [0.1, 10] + downsample_range: [0.8, 8] + noise_range: [0, 20] + jpeg_range: [60, 100] + + # color jitter and gray + color_jitter_prob: 0.3 + color_jitter_shift: 20 + color_jitter_pt_prob: 0.3 + gray_prob: 0.01 + + # If you do not want colorization, please set + # color_jitter_prob: ~ + # color_jitter_pt_prob: ~ + # gray_prob: 0.01 + # gt_gray: True + + # data loader + use_shuffle: true + num_worker_per_gpu: 6 + batch_size_per_gpu: 3 + dataset_enlarge_ratio: 1 + prefetch_mode: ~ + + val: + # Please modify accordingly to use your own validation + # Or comment the val block if do not need validation during training + name: validation + type: PairedImageDataset + dataroot_lq: datasets/faces/validation/input + dataroot_gt: datasets/faces/validation/reference + io_backend: + type: disk + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + scale: 1 + +# network structures +network_g: + type: GFPGANv1 + out_size: 512 + num_style_feat: 512 + channel_multiplier: 1 + resample_kernel: [1, 3, 3, 1] + decoder_load_path: experiments/pretrained_models/StyleGAN2_512_Cmul1_FFHQ_B12G4_scratch_800k.pth + fix_decoder: true + num_mlp: 8 + lr_mlp: 0.01 + input_is_latent: true + different_w: true + narrow: 1 + sft_half: true + +network_d: + type: StyleGAN2Discriminator + out_size: 512 + channel_multiplier: 1 + resample_kernel: [1, 3, 3, 1] + + +# path +path: + pretrain_network_g: ~ + param_key_g: params_ema + strict_load_g: ~ + pretrain_network_d: ~ + resume_state: ~ + +# training settings +train: + optim_g: + type: Adam + lr: !!float 2e-3 + optim_d: + type: Adam + lr: !!float 2e-3 + optim_component: + type: Adam + lr: !!float 2e-3 + + scheduler: + type: MultiStepLR + milestones: [600000, 700000] + gamma: 0.5 + + total_iter: 800000 + warmup_iter: -1 # no warm up + + # losses + # pixel loss + pixel_opt: + type: L1Loss + loss_weight: !!float 1e-1 + reduction: mean + # L1 loss used in pyramid loss, component style loss and identity loss + L1_opt: + type: L1Loss + loss_weight: 1 + reduction: mean + + # image pyramid loss + pyramid_loss_weight: 1 + remove_pyramid_loss: 50000 + # perceptual loss (content and style losses) + perceptual_opt: + type: PerceptualLoss + layer_weights: + # before relu + 'conv1_2': 0.1 + 'conv2_2': 0.1 + 'conv3_4': 1 + 'conv4_4': 1 + 'conv5_4': 1 + vgg_type: vgg19 + use_input_norm: true + perceptual_weight: !!float 1 + style_weight: 50 + range_norm: true + criterion: l1 + # gan loss + gan_opt: + type: GANLoss + gan_type: wgan_softplus + loss_weight: !!float 1e-1 + # r1 regularization for discriminator + r1_reg_weight: 10 + + net_d_iters: 1 + net_d_init_iters: 0 + net_d_reg_every: 16 + +# validation settings +val: + val_freq: !!float 5e3 + save_img: true + + metrics: + psnr: # metric name + type: calculate_psnr + crop_border: 0 + test_y_channel: false + +# logging settings +logger: + print_freq: 100 + save_checkpoint_freq: !!float 5e3 + use_tb_logger: true + wandb: + project: ~ + resume_id: ~ + +# dist training settings +dist_params: + backend: nccl + port: 29500 + +find_unused_parameters: true diff --git a/third_part/GPEN/align_faces.py b/third_part/GPEN/align_faces.py new file mode 100644 index 0000000000000000000000000000000000000000..98ed5417fe474b8ce475209783f4399d831ea18a --- /dev/null +++ b/third_part/GPEN/align_faces.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Apr 24 15:43:29 2017 +@author: zhaoy +""" +""" +@Modified by yangxy (yangtao9009@gmail.com) +""" +import cv2 +import numpy as np +from skimage import transform as trans + +# reference facial points, a list of coordinates (x,y) +REFERENCE_FACIAL_POINTS = [ + [30.29459953, 51.69630051], + [65.53179932, 51.50139999], + [48.02519989, 71.73660278], + [33.54930115, 92.3655014], + [62.72990036, 92.20410156] +] + +DEFAULT_CROP_SIZE = (96, 112) + + +def _umeyama(src, dst, estimate_scale=True, scale=1.0): + """Estimate N-D similarity transformation with or without scaling. + Parameters + ---------- + src : (M, N) array + Source coordinates. + dst : (M, N) array + Destination coordinates. + estimate_scale : bool + Whether to estimate scaling factor. + Returns + ------- + T : (N + 1, N + 1) + The homogeneous similarity transformation matrix. The matrix contains + NaN values only if the problem is not well-conditioned. + References + ---------- + .. [1] "Least-squares estimation of transformation parameters between two + point patterns", Shinji Umeyama, PAMI 1991, :DOI:`10.1109/34.88573` + """ + + num = src.shape[0] + dim = src.shape[1] + + # Compute mean of src and dst. + src_mean = src.mean(axis=0) + dst_mean = dst.mean(axis=0) + + # Subtract mean from src and dst. + src_demean = src - src_mean + dst_demean = dst - dst_mean + + # Eq. (38). + A = dst_demean.T @ src_demean / num + + # Eq. (39). + d = np.ones((dim,), dtype=np.double) + if np.linalg.det(A) < 0: + d[dim - 1] = -1 + + T = np.eye(dim + 1, dtype=np.double) + + U, S, V = np.linalg.svd(A) + + # Eq. (40) and (43). + rank = np.linalg.matrix_rank(A) + if rank == 0: + return np.nan * T + elif rank == dim - 1: + if np.linalg.det(U) * np.linalg.det(V) > 0: + T[:dim, :dim] = U @ V + else: + s = d[dim - 1] + d[dim - 1] = -1 + T[:dim, :dim] = U @ np.diag(d) @ V + d[dim - 1] = s + else: + T[:dim, :dim] = U @ np.diag(d) @ V + + if estimate_scale: + # Eq. (41) and (42). + scale = 1.0 / src_demean.var(axis=0).sum() * (S @ d) + else: + scale = scale + + T[:dim, dim] = dst_mean - scale * (T[:dim, :dim] @ src_mean.T) + T[:dim, :dim] *= scale + + return T, scale + + +class FaceWarpException(Exception): + def __str__(self): + return 'In File {}:{}'.format( + __file__, super.__str__(self)) + + +def get_reference_facial_points(output_size=None, + inner_padding_factor=0.0, + outer_padding=(0, 0), + default_square=False): + tmp_5pts = np.array(REFERENCE_FACIAL_POINTS) + tmp_crop_size = np.array(DEFAULT_CROP_SIZE) + + # 0) make the inner region a square + if default_square: + size_diff = max(tmp_crop_size) - tmp_crop_size + tmp_5pts += size_diff / 2 + tmp_crop_size += size_diff + + if (output_size and + output_size[0] == tmp_crop_size[0] and + output_size[1] == tmp_crop_size[1]): + print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size)) + return tmp_5pts + + if (inner_padding_factor == 0 and + outer_padding == (0, 0)): + if output_size is None: + print('No paddings to do: return default reference points') + return tmp_5pts + else: + raise FaceWarpException( + 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size)) + + # check output size + if not (0 <= inner_padding_factor <= 1.0): + raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)') + + if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) + and output_size is None): + output_size = tmp_crop_size * \ + (1 + inner_padding_factor * 2).astype(np.int32) + output_size += np.array(outer_padding) + print(' deduced from paddings, output_size = ', output_size) + + if not (outer_padding[0] < output_size[0] + and outer_padding[1] < output_size[1]): + raise FaceWarpException('Not (outer_padding[0] < output_size[0]' + 'and outer_padding[1] < output_size[1])') + + # 1) pad the inner region according inner_padding_factor + # print('---> STEP1: pad the inner region according inner_padding_factor') + if inner_padding_factor > 0: + size_diff = tmp_crop_size * inner_padding_factor * 2 + tmp_5pts += size_diff / 2 + tmp_crop_size += np.round(size_diff).astype(np.int32) + + # print(' crop_size = ', tmp_crop_size) + # print(' reference_5pts = ', tmp_5pts) + + # 2) resize the padded inner region + # print('---> STEP2: resize the padded inner region') + size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2 + # print(' crop_size = ', tmp_crop_size) + # print(' size_bf_outer_pad = ', size_bf_outer_pad) + + if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]: + raise FaceWarpException('Must have (output_size - outer_padding)' + '= some_scale * (crop_size * (1.0 + inner_padding_factor)') + + scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0] + # print(' resize scale_factor = ', scale_factor) + tmp_5pts = tmp_5pts * scale_factor + # size_diff = tmp_crop_size * (scale_factor - min(scale_factor)) + # tmp_5pts = tmp_5pts + size_diff / 2 + tmp_crop_size = size_bf_outer_pad + # print(' crop_size = ', tmp_crop_size) + # print(' reference_5pts = ', tmp_5pts) + + # 3) add outer_padding to make output_size + reference_5point = tmp_5pts + np.array(outer_padding) + tmp_crop_size = output_size + # print('---> STEP3: add outer_padding to make output_size') + # print(' crop_size = ', tmp_crop_size) + # print(' reference_5pts = ', tmp_5pts) + # + # print('===> end get_reference_facial_points\n') + + return reference_5point + + +def get_affine_transform_matrix(src_pts, dst_pts): + tfm = np.float32([[1, 0, 0], [0, 1, 0]]) + n_pts = src_pts.shape[0] + ones = np.ones((n_pts, 1), src_pts.dtype) + src_pts_ = np.hstack([src_pts, ones]) + dst_pts_ = np.hstack([dst_pts, ones]) + + A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_) + + if rank == 3: + tfm = np.float32([ + [A[0, 0], A[1, 0], A[2, 0]], + [A[0, 1], A[1, 1], A[2, 1]] + ]) + elif rank == 2: + tfm = np.float32([ + [A[0, 0], A[1, 0], 0], + [A[0, 1], A[1, 1], 0] + ]) + + return tfm + + +def warp_and_crop_face(src_img, + facial_pts, + reference_pts=None, + crop_size=(96, 112), + align_type='smilarity'): #smilarity cv2_affine affine + if reference_pts is None: + if crop_size[0] == 96 and crop_size[1] == 112: + reference_pts = REFERENCE_FACIAL_POINTS + else: + default_square = False + inner_padding_factor = 0 + outer_padding = (0, 0) + output_size = crop_size + + reference_pts = get_reference_facial_points(output_size, + inner_padding_factor, + outer_padding, + default_square) + + ref_pts = np.float32(reference_pts) + ref_pts_shp = ref_pts.shape + if max(ref_pts_shp) < 3: # or min(ref_pts_shp) != 2: + raise FaceWarpException( + 'reference_pts.shape must be (K,2) or (2,K) and K>2') + + if ref_pts_shp[0] == 2 or ref_pts_shp[0] == 3: + ref_pts = ref_pts.T + + src_pts = np.float32(facial_pts) + src_pts_shp = src_pts.shape + if max(src_pts_shp) < 3: # or min(src_pts_shp) != 2: + raise FaceWarpException( + 'facial_pts.shape must be (K,2) or (2,K) and K>2') + + if src_pts_shp[0] == 2 or src_pts_shp[0] == 3: + src_pts = src_pts.T + + if src_pts.shape != ref_pts.shape: + raise FaceWarpException( + 'facial_pts and reference_pts must have the same shape') + + if align_type is 'cv2_affine': + tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3]) + tfm_inv = cv2.getAffineTransform(ref_pts[0:3], src_pts[0:3]) + elif align_type is 'cv2_rigid': + tfm, _ = cv2.estimateAffinePartial2D(src_pts[0:3], ref_pts[0:3]) + tfm_inv, _ = cv2.estimateAffinePartial2D(ref_pts[0:3], src_pts[0:3]) + elif align_type is 'affine': + tfm = get_affine_transform_matrix(src_pts, ref_pts) + tfm_inv = get_affine_transform_matrix(ref_pts, src_pts) + else: + params, scale = _umeyama(src_pts, ref_pts) + tfm = params[:2, :] + + params, _ = _umeyama(ref_pts, src_pts, False, scale=1.0/scale) + tfm_inv = params[:2, :] + + # M = cv2.getPerspectiveTransform(ref_pts[0:4], src_pts[0:4]) + face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]), flags=3) + # face_img = cv2.warpPerspective(src_img, M, (crop_size[0], crop_size[1]), flags=cv2.INTER_LINEAR ) + + return face_img, tfm_inv diff --git a/third_part/GPEN/face_detect/.DS_Store b/third_part/GPEN/face_detect/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2112b95d33b5353f276676a84ecba1bf76202daf Binary files /dev/null and b/third_part/GPEN/face_detect/.DS_Store differ diff --git a/third_part/GPEN/face_detect/data/FDDB/img_list.txt b/third_part/GPEN/face_detect/data/FDDB/img_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..5cf3d3199ca5c9c5ef4a904f1b9c89b821a7978a --- /dev/null +++ b/third_part/GPEN/face_detect/data/FDDB/img_list.txt @@ -0,0 +1,2845 @@ +2002/08/11/big/img_591 +2002/08/26/big/img_265 +2002/07/19/big/img_423 +2002/08/24/big/img_490 +2002/08/31/big/img_17676 +2002/07/31/big/img_228 +2002/07/24/big/img_402 +2002/08/04/big/img_769 +2002/07/19/big/img_581 +2002/08/13/big/img_723 +2002/08/12/big/img_821 +2003/01/17/big/img_610 +2002/08/13/big/img_1116 +2002/08/28/big/img_19238 +2002/08/21/big/img_660 +2002/08/14/big/img_607 +2002/08/05/big/img_3708 +2002/08/19/big/img_511 +2002/08/07/big/img_1316 +2002/07/25/big/img_1047 +2002/07/23/big/img_474 +2002/07/27/big/img_970 +2002/09/02/big/img_15752 +2002/09/01/big/img_16378 +2002/09/01/big/img_16189 +2002/08/26/big/img_276 +2002/07/24/big/img_518 +2002/08/14/big/img_1027 +2002/08/24/big/img_733 +2002/08/15/big/img_249 +2003/01/15/big/img_1371 +2002/08/07/big/img_1348 +2003/01/01/big/img_331 +2002/08/23/big/img_536 +2002/07/30/big/img_224 +2002/08/10/big/img_763 +2002/08/21/big/img_293 +2002/08/15/big/img_1211 +2002/08/15/big/img_1194 +2003/01/15/big/img_390 +2002/08/06/big/img_2893 +2002/08/17/big/img_691 +2002/08/07/big/img_1695 +2002/08/16/big/img_829 +2002/07/25/big/img_201 +2002/08/23/big/img_36 +2003/01/15/big/img_763 +2003/01/15/big/img_637 +2002/08/22/big/img_592 +2002/07/25/big/img_817 +2003/01/15/big/img_1219 +2002/08/05/big/img_3508 +2002/08/15/big/img_1108 +2002/07/19/big/img_488 +2003/01/16/big/img_704 +2003/01/13/big/img_1087 +2002/08/10/big/img_670 +2002/07/24/big/img_104 +2002/08/27/big/img_19823 +2002/09/01/big/img_16229 +2003/01/13/big/img_846 +2002/08/04/big/img_412 +2002/07/22/big/img_554 +2002/08/12/big/img_331 +2002/08/02/big/img_533 +2002/08/12/big/img_259 +2002/08/18/big/img_328 +2003/01/14/big/img_630 +2002/08/05/big/img_3541 +2002/08/06/big/img_2390 +2002/08/20/big/img_150 +2002/08/02/big/img_1231 +2002/08/16/big/img_710 +2002/08/19/big/img_591 +2002/07/22/big/img_725 +2002/07/24/big/img_820 +2003/01/13/big/img_568 +2002/08/22/big/img_853 +2002/08/09/big/img_648 +2002/08/23/big/img_528 +2003/01/14/big/img_888 +2002/08/30/big/img_18201 +2002/08/13/big/img_965 +2003/01/14/big/img_660 +2002/07/19/big/img_517 +2003/01/14/big/img_406 +2002/08/30/big/img_18433 +2002/08/07/big/img_1630 +2002/08/06/big/img_2717 +2002/08/21/big/img_470 +2002/07/23/big/img_633 +2002/08/20/big/img_915 +2002/08/16/big/img_893 +2002/07/29/big/img_644 +2002/08/15/big/img_529 +2002/08/16/big/img_668 +2002/08/07/big/img_1871 +2002/07/25/big/img_192 +2002/07/31/big/img_961 +2002/08/19/big/img_738 +2002/07/31/big/img_382 +2002/08/19/big/img_298 +2003/01/17/big/img_608 +2002/08/21/big/img_514 +2002/07/23/big/img_183 +2003/01/17/big/img_536 +2002/07/24/big/img_478 +2002/08/06/big/img_2997 +2002/09/02/big/img_15380 +2002/08/07/big/img_1153 +2002/07/31/big/img_967 +2002/07/31/big/img_711 +2002/08/26/big/img_664 +2003/01/01/big/img_326 +2002/08/24/big/img_775 +2002/08/08/big/img_961 +2002/08/16/big/img_77 +2002/08/12/big/img_296 +2002/07/22/big/img_905 +2003/01/13/big/img_284 +2002/08/13/big/img_887 +2002/08/24/big/img_849 +2002/07/30/big/img_345 +2002/08/18/big/img_419 +2002/08/01/big/img_1347 +2002/08/05/big/img_3670 +2002/07/21/big/img_479 +2002/08/08/big/img_913 +2002/09/02/big/img_15828 +2002/08/30/big/img_18194 +2002/08/08/big/img_471 +2002/08/22/big/img_734 +2002/08/09/big/img_586 +2002/08/09/big/img_454 +2002/07/29/big/img_47 +2002/07/19/big/img_381 +2002/07/29/big/img_733 +2002/08/20/big/img_327 +2002/07/21/big/img_96 +2002/08/06/big/img_2680 +2002/07/25/big/img_919 +2002/07/21/big/img_158 +2002/07/22/big/img_801 +2002/07/22/big/img_567 +2002/07/24/big/img_804 +2002/07/24/big/img_690 +2003/01/15/big/img_576 +2002/08/14/big/img_335 +2003/01/13/big/img_390 +2002/08/11/big/img_258 +2002/07/23/big/img_917 +2002/08/15/big/img_525 +2003/01/15/big/img_505 +2002/07/30/big/img_886 +2003/01/16/big/img_640 +2003/01/14/big/img_642 +2003/01/17/big/img_844 +2002/08/04/big/img_571 +2002/08/29/big/img_18702 +2003/01/15/big/img_240 +2002/07/29/big/img_553 +2002/08/10/big/img_354 +2002/08/18/big/img_17 +2003/01/15/big/img_782 +2002/07/27/big/img_382 +2002/08/14/big/img_970 +2003/01/16/big/img_70 +2003/01/16/big/img_625 +2002/08/18/big/img_341 +2002/08/26/big/img_188 +2002/08/09/big/img_405 +2002/08/02/big/img_37 +2002/08/13/big/img_748 +2002/07/22/big/img_399 +2002/07/25/big/img_844 +2002/08/12/big/img_340 +2003/01/13/big/img_815 +2002/08/26/big/img_5 +2002/08/10/big/img_158 +2002/08/18/big/img_95 +2002/07/29/big/img_1297 +2003/01/13/big/img_508 +2002/09/01/big/img_16680 +2003/01/16/big/img_338 +2002/08/13/big/img_517 +2002/07/22/big/img_626 +2002/08/06/big/img_3024 +2002/07/26/big/img_499 +2003/01/13/big/img_387 +2002/08/31/big/img_18025 +2002/08/13/big/img_520 +2003/01/16/big/img_576 +2002/07/26/big/img_121 +2002/08/25/big/img_703 +2002/08/26/big/img_615 +2002/08/17/big/img_434 +2002/08/02/big/img_677 +2002/08/18/big/img_276 +2002/08/05/big/img_3672 +2002/07/26/big/img_700 +2002/07/31/big/img_277 +2003/01/14/big/img_220 +2002/08/23/big/img_232 +2002/08/31/big/img_17422 +2002/07/22/big/img_508 +2002/08/13/big/img_681 +2003/01/15/big/img_638 +2002/08/30/big/img_18408 +2003/01/14/big/img_533 +2003/01/17/big/img_12 +2002/08/28/big/img_19388 +2002/08/08/big/img_133 +2002/07/26/big/img_885 +2002/08/19/big/img_387 +2002/08/27/big/img_19976 +2002/08/26/big/img_118 +2002/08/28/big/img_19146 +2002/08/05/big/img_3259 +2002/08/15/big/img_536 +2002/07/22/big/img_279 +2002/07/22/big/img_9 +2002/08/13/big/img_301 +2002/08/15/big/img_974 +2002/08/06/big/img_2355 +2002/08/01/big/img_1526 +2002/08/03/big/img_417 +2002/08/04/big/img_407 +2002/08/15/big/img_1029 +2002/07/29/big/img_700 +2002/08/01/big/img_1463 +2002/08/31/big/img_17365 +2002/07/28/big/img_223 +2002/07/19/big/img_827 +2002/07/27/big/img_531 +2002/07/19/big/img_845 +2002/08/20/big/img_382 +2002/07/31/big/img_268 +2002/08/27/big/img_19705 +2002/08/02/big/img_830 +2002/08/23/big/img_250 +2002/07/20/big/img_777 +2002/08/21/big/img_879 +2002/08/26/big/img_20146 +2002/08/23/big/img_789 +2002/08/06/big/img_2683 +2002/08/25/big/img_576 +2002/08/09/big/img_498 +2002/08/08/big/img_384 +2002/08/26/big/img_592 +2002/07/29/big/img_1470 +2002/08/21/big/img_452 +2002/08/30/big/img_18395 +2002/08/15/big/img_215 +2002/07/21/big/img_643 +2002/07/22/big/img_209 +2003/01/17/big/img_346 +2002/08/25/big/img_658 +2002/08/21/big/img_221 +2002/08/14/big/img_60 +2003/01/17/big/img_885 +2003/01/16/big/img_482 +2002/08/19/big/img_593 +2002/08/08/big/img_233 +2002/07/30/big/img_458 +2002/07/23/big/img_384 +2003/01/15/big/img_670 +2003/01/15/big/img_267 +2002/08/26/big/img_540 +2002/07/29/big/img_552 +2002/07/30/big/img_997 +2003/01/17/big/img_377 +2002/08/21/big/img_265 +2002/08/09/big/img_561 +2002/07/31/big/img_945 +2002/09/02/big/img_15252 +2002/08/11/big/img_276 +2002/07/22/big/img_491 +2002/07/26/big/img_517 +2002/08/14/big/img_726 +2002/08/08/big/img_46 +2002/08/28/big/img_19458 +2002/08/06/big/img_2935 +2002/07/29/big/img_1392 +2002/08/13/big/img_776 +2002/08/24/big/img_616 +2002/08/14/big/img_1065 +2002/07/29/big/img_889 +2002/08/18/big/img_188 +2002/08/07/big/img_1453 +2002/08/02/big/img_760 +2002/07/28/big/img_416 +2002/08/07/big/img_1393 +2002/08/26/big/img_292 +2002/08/26/big/img_301 +2003/01/13/big/img_195 +2002/07/26/big/img_532 +2002/08/20/big/img_550 +2002/08/05/big/img_3658 +2002/08/26/big/img_738 +2002/09/02/big/img_15750 +2003/01/17/big/img_451 +2002/07/23/big/img_339 +2002/08/16/big/img_637 +2002/08/14/big/img_748 +2002/08/06/big/img_2739 +2002/07/25/big/img_482 +2002/08/19/big/img_191 +2002/08/26/big/img_537 +2003/01/15/big/img_716 +2003/01/15/big/img_767 +2002/08/02/big/img_452 +2002/08/08/big/img_1011 +2002/08/10/big/img_144 +2003/01/14/big/img_122 +2002/07/24/big/img_586 +2002/07/24/big/img_762 +2002/08/20/big/img_369 +2002/07/30/big/img_146 +2002/08/23/big/img_396 +2003/01/15/big/img_200 +2002/08/15/big/img_1183 +2003/01/14/big/img_698 +2002/08/09/big/img_792 +2002/08/06/big/img_2347 +2002/07/31/big/img_911 +2002/08/26/big/img_722 +2002/08/23/big/img_621 +2002/08/05/big/img_3790 +2003/01/13/big/img_633 +2002/08/09/big/img_224 +2002/07/24/big/img_454 +2002/07/21/big/img_202 +2002/08/02/big/img_630 +2002/08/30/big/img_18315 +2002/07/19/big/img_491 +2002/09/01/big/img_16456 +2002/08/09/big/img_242 +2002/07/25/big/img_595 +2002/07/22/big/img_522 +2002/08/01/big/img_1593 +2002/07/29/big/img_336 +2002/08/15/big/img_448 +2002/08/28/big/img_19281 +2002/07/29/big/img_342 +2002/08/12/big/img_78 +2003/01/14/big/img_525 +2002/07/28/big/img_147 +2002/08/11/big/img_353 +2002/08/22/big/img_513 +2002/08/04/big/img_721 +2002/08/17/big/img_247 +2003/01/14/big/img_891 +2002/08/20/big/img_853 +2002/07/19/big/img_414 +2002/08/01/big/img_1530 +2003/01/14/big/img_924 +2002/08/22/big/img_468 +2002/08/18/big/img_354 +2002/08/30/big/img_18193 +2002/08/23/big/img_492 +2002/08/15/big/img_871 +2002/08/12/big/img_494 +2002/08/06/big/img_2470 +2002/07/23/big/img_923 +2002/08/26/big/img_155 +2002/08/08/big/img_669 +2002/07/23/big/img_404 +2002/08/28/big/img_19421 +2002/08/29/big/img_18993 +2002/08/25/big/img_416 +2003/01/17/big/img_434 +2002/07/29/big/img_1370 +2002/07/28/big/img_483 +2002/08/11/big/img_50 +2002/08/10/big/img_404 +2002/09/02/big/img_15057 +2003/01/14/big/img_911 +2002/09/01/big/img_16697 +2003/01/16/big/img_665 +2002/09/01/big/img_16708 +2002/08/22/big/img_612 +2002/08/28/big/img_19471 +2002/08/02/big/img_198 +2003/01/16/big/img_527 +2002/08/22/big/img_209 +2002/08/30/big/img_18205 +2003/01/14/big/img_114 +2003/01/14/big/img_1028 +2003/01/16/big/img_894 +2003/01/14/big/img_837 +2002/07/30/big/img_9 +2002/08/06/big/img_2821 +2002/08/04/big/img_85 +2003/01/13/big/img_884 +2002/07/22/big/img_570 +2002/08/07/big/img_1773 +2002/07/26/big/img_208 +2003/01/17/big/img_946 +2002/07/19/big/img_930 +2003/01/01/big/img_698 +2003/01/17/big/img_612 +2002/07/19/big/img_372 +2002/07/30/big/img_721 +2003/01/14/big/img_649 +2002/08/19/big/img_4 +2002/07/25/big/img_1024 +2003/01/15/big/img_601 +2002/08/30/big/img_18470 +2002/07/22/big/img_29 +2002/08/07/big/img_1686 +2002/07/20/big/img_294 +2002/08/14/big/img_800 +2002/08/19/big/img_353 +2002/08/19/big/img_350 +2002/08/05/big/img_3392 +2002/08/09/big/img_622 +2003/01/15/big/img_236 +2002/08/11/big/img_643 +2002/08/05/big/img_3458 +2002/08/12/big/img_413 +2002/08/22/big/img_415 +2002/08/13/big/img_635 +2002/08/07/big/img_1198 +2002/08/04/big/img_873 +2002/08/12/big/img_407 +2003/01/15/big/img_346 +2002/08/02/big/img_275 +2002/08/17/big/img_997 +2002/08/21/big/img_958 +2002/08/20/big/img_579 +2002/07/29/big/img_142 +2003/01/14/big/img_1115 +2002/08/16/big/img_365 +2002/07/29/big/img_1414 +2002/08/17/big/img_489 +2002/08/13/big/img_1010 +2002/07/31/big/img_276 +2002/07/25/big/img_1000 +2002/08/23/big/img_524 +2002/08/28/big/img_19147 +2003/01/13/big/img_433 +2002/08/20/big/img_205 +2003/01/01/big/img_458 +2002/07/29/big/img_1449 +2003/01/16/big/img_696 +2002/08/28/big/img_19296 +2002/08/29/big/img_18688 +2002/08/21/big/img_767 +2002/08/20/big/img_532 +2002/08/26/big/img_187 +2002/07/26/big/img_183 +2002/07/27/big/img_890 +2003/01/13/big/img_576 +2002/07/30/big/img_15 +2002/07/31/big/img_889 +2002/08/31/big/img_17759 +2003/01/14/big/img_1114 +2002/07/19/big/img_445 +2002/08/03/big/img_593 +2002/07/24/big/img_750 +2002/07/30/big/img_133 +2002/08/25/big/img_671 +2002/07/20/big/img_351 +2002/08/31/big/img_17276 +2002/08/05/big/img_3231 +2002/09/02/big/img_15882 +2002/08/14/big/img_115 +2002/08/02/big/img_1148 +2002/07/25/big/img_936 +2002/07/31/big/img_639 +2002/08/04/big/img_427 +2002/08/22/big/img_843 +2003/01/17/big/img_17 +2003/01/13/big/img_690 +2002/08/13/big/img_472 +2002/08/09/big/img_425 +2002/08/05/big/img_3450 +2003/01/17/big/img_439 +2002/08/13/big/img_539 +2002/07/28/big/img_35 +2002/08/16/big/img_241 +2002/08/06/big/img_2898 +2003/01/16/big/img_429 +2002/08/05/big/img_3817 +2002/08/27/big/img_19919 +2002/07/19/big/img_422 +2002/08/15/big/img_560 +2002/07/23/big/img_750 +2002/07/30/big/img_353 +2002/08/05/big/img_43 +2002/08/23/big/img_305 +2002/08/01/big/img_2137 +2002/08/30/big/img_18097 +2002/08/01/big/img_1389 +2002/08/02/big/img_308 +2003/01/14/big/img_652 +2002/08/01/big/img_1798 +2003/01/14/big/img_732 +2003/01/16/big/img_294 +2002/08/26/big/img_213 +2002/07/24/big/img_842 +2003/01/13/big/img_630 +2003/01/13/big/img_634 +2002/08/06/big/img_2285 +2002/08/01/big/img_2162 +2002/08/30/big/img_18134 +2002/08/02/big/img_1045 +2002/08/01/big/img_2143 +2002/07/25/big/img_135 +2002/07/20/big/img_645 +2002/08/05/big/img_3666 +2002/08/14/big/img_523 +2002/08/04/big/img_425 +2003/01/14/big/img_137 +2003/01/01/big/img_176 +2002/08/15/big/img_505 +2002/08/24/big/img_386 +2002/08/05/big/img_3187 +2002/08/15/big/img_419 +2003/01/13/big/img_520 +2002/08/04/big/img_444 +2002/08/26/big/img_483 +2002/08/05/big/img_3449 +2002/08/30/big/img_18409 +2002/08/28/big/img_19455 +2002/08/27/big/img_20090 +2002/07/23/big/img_625 +2002/08/24/big/img_205 +2002/08/08/big/img_938 +2003/01/13/big/img_527 +2002/08/07/big/img_1712 +2002/07/24/big/img_801 +2002/08/09/big/img_579 +2003/01/14/big/img_41 +2003/01/15/big/img_1130 +2002/07/21/big/img_672 +2002/08/07/big/img_1590 +2003/01/01/big/img_532 +2002/08/02/big/img_529 +2002/08/05/big/img_3591 +2002/08/23/big/img_5 +2003/01/14/big/img_882 +2002/08/28/big/img_19234 +2002/07/24/big/img_398 +2003/01/14/big/img_592 +2002/08/22/big/img_548 +2002/08/12/big/img_761 +2003/01/16/big/img_497 +2002/08/18/big/img_133 +2002/08/08/big/img_874 +2002/07/19/big/img_247 +2002/08/15/big/img_170 +2002/08/27/big/img_19679 +2002/08/20/big/img_246 +2002/08/24/big/img_358 +2002/07/29/big/img_599 +2002/08/01/big/img_1555 +2002/07/30/big/img_491 +2002/07/30/big/img_371 +2003/01/16/big/img_682 +2002/07/25/big/img_619 +2003/01/15/big/img_587 +2002/08/02/big/img_1212 +2002/08/01/big/img_2152 +2002/07/25/big/img_668 +2003/01/16/big/img_574 +2002/08/28/big/img_19464 +2002/08/11/big/img_536 +2002/07/24/big/img_201 +2002/08/05/big/img_3488 +2002/07/25/big/img_887 +2002/07/22/big/img_789 +2002/07/30/big/img_432 +2002/08/16/big/img_166 +2002/09/01/big/img_16333 +2002/07/26/big/img_1010 +2002/07/21/big/img_793 +2002/07/22/big/img_720 +2002/07/31/big/img_337 +2002/07/27/big/img_185 +2002/08/23/big/img_440 +2002/07/31/big/img_801 +2002/07/25/big/img_478 +2003/01/14/big/img_171 +2002/08/07/big/img_1054 +2002/09/02/big/img_15659 +2002/07/29/big/img_1348 +2002/08/09/big/img_337 +2002/08/26/big/img_684 +2002/07/31/big/img_537 +2002/08/15/big/img_808 +2003/01/13/big/img_740 +2002/08/07/big/img_1667 +2002/08/03/big/img_404 +2002/08/06/big/img_2520 +2002/07/19/big/img_230 +2002/07/19/big/img_356 +2003/01/16/big/img_627 +2002/08/04/big/img_474 +2002/07/29/big/img_833 +2002/07/25/big/img_176 +2002/08/01/big/img_1684 +2002/08/21/big/img_643 +2002/08/27/big/img_19673 +2002/08/02/big/img_838 +2002/08/06/big/img_2378 +2003/01/15/big/img_48 +2002/07/30/big/img_470 +2002/08/15/big/img_963 +2002/08/24/big/img_444 +2002/08/16/big/img_662 +2002/08/15/big/img_1209 +2002/07/24/big/img_25 +2002/08/06/big/img_2740 +2002/07/29/big/img_996 +2002/08/31/big/img_18074 +2002/08/04/big/img_343 +2003/01/17/big/img_509 +2003/01/13/big/img_726 +2002/08/07/big/img_1466 +2002/07/26/big/img_307 +2002/08/10/big/img_598 +2002/08/13/big/img_890 +2002/08/14/big/img_997 +2002/07/19/big/img_392 +2002/08/02/big/img_475 +2002/08/29/big/img_19038 +2002/07/29/big/img_538 +2002/07/29/big/img_502 +2002/08/02/big/img_364 +2002/08/31/big/img_17353 +2002/08/08/big/img_539 +2002/08/01/big/img_1449 +2002/07/22/big/img_363 +2002/08/02/big/img_90 +2002/09/01/big/img_16867 +2002/08/05/big/img_3371 +2002/07/30/big/img_342 +2002/08/07/big/img_1363 +2002/08/22/big/img_790 +2003/01/15/big/img_404 +2002/08/05/big/img_3447 +2002/09/01/big/img_16167 +2003/01/13/big/img_840 +2002/08/22/big/img_1001 +2002/08/09/big/img_431 +2002/07/27/big/img_618 +2002/07/31/big/img_741 +2002/07/30/big/img_964 +2002/07/25/big/img_86 +2002/07/29/big/img_275 +2002/08/21/big/img_921 +2002/07/26/big/img_892 +2002/08/21/big/img_663 +2003/01/13/big/img_567 +2003/01/14/big/img_719 +2002/07/28/big/img_251 +2003/01/15/big/img_1123 +2002/07/29/big/img_260 +2002/08/24/big/img_337 +2002/08/01/big/img_1914 +2002/08/13/big/img_373 +2003/01/15/big/img_589 +2002/08/13/big/img_906 +2002/07/26/big/img_270 +2002/08/26/big/img_313 +2002/08/25/big/img_694 +2003/01/01/big/img_327 +2002/07/23/big/img_261 +2002/08/26/big/img_642 +2002/07/29/big/img_918 +2002/07/23/big/img_455 +2002/07/24/big/img_612 +2002/07/23/big/img_534 +2002/07/19/big/img_534 +2002/07/19/big/img_726 +2002/08/01/big/img_2146 +2002/08/02/big/img_543 +2003/01/16/big/img_777 +2002/07/30/big/img_484 +2002/08/13/big/img_1161 +2002/07/21/big/img_390 +2002/08/06/big/img_2288 +2002/08/21/big/img_677 +2002/08/13/big/img_747 +2002/08/15/big/img_1248 +2002/07/31/big/img_416 +2002/09/02/big/img_15259 +2002/08/16/big/img_781 +2002/08/24/big/img_754 +2002/07/24/big/img_803 +2002/08/20/big/img_609 +2002/08/28/big/img_19571 +2002/09/01/big/img_16140 +2002/08/26/big/img_769 +2002/07/20/big/img_588 +2002/08/02/big/img_898 +2002/07/21/big/img_466 +2002/08/14/big/img_1046 +2002/07/25/big/img_212 +2002/08/26/big/img_353 +2002/08/19/big/img_810 +2002/08/31/big/img_17824 +2002/08/12/big/img_631 +2002/07/19/big/img_828 +2002/07/24/big/img_130 +2002/08/25/big/img_580 +2002/07/31/big/img_699 +2002/07/23/big/img_808 +2002/07/31/big/img_377 +2003/01/16/big/img_570 +2002/09/01/big/img_16254 +2002/07/21/big/img_471 +2002/08/01/big/img_1548 +2002/08/18/big/img_252 +2002/08/19/big/img_576 +2002/08/20/big/img_464 +2002/07/27/big/img_735 +2002/08/21/big/img_589 +2003/01/15/big/img_1192 +2002/08/09/big/img_302 +2002/07/31/big/img_594 +2002/08/23/big/img_19 +2002/08/29/big/img_18819 +2002/08/19/big/img_293 +2002/07/30/big/img_331 +2002/08/23/big/img_607 +2002/07/30/big/img_363 +2002/08/16/big/img_766 +2003/01/13/big/img_481 +2002/08/06/big/img_2515 +2002/09/02/big/img_15913 +2002/09/02/big/img_15827 +2002/09/02/big/img_15053 +2002/08/07/big/img_1576 +2002/07/23/big/img_268 +2002/08/21/big/img_152 +2003/01/15/big/img_578 +2002/07/21/big/img_589 +2002/07/20/big/img_548 +2002/08/27/big/img_19693 +2002/08/31/big/img_17252 +2002/07/31/big/img_138 +2002/07/23/big/img_372 +2002/08/16/big/img_695 +2002/07/27/big/img_287 +2002/08/15/big/img_315 +2002/08/10/big/img_361 +2002/07/29/big/img_899 +2002/08/13/big/img_771 +2002/08/21/big/img_92 +2003/01/15/big/img_425 +2003/01/16/big/img_450 +2002/09/01/big/img_16942 +2002/08/02/big/img_51 +2002/09/02/big/img_15379 +2002/08/24/big/img_147 +2002/08/30/big/img_18122 +2002/07/26/big/img_950 +2002/08/07/big/img_1400 +2002/08/17/big/img_468 +2002/08/15/big/img_470 +2002/07/30/big/img_318 +2002/07/22/big/img_644 +2002/08/27/big/img_19732 +2002/07/23/big/img_601 +2002/08/26/big/img_398 +2002/08/21/big/img_428 +2002/08/06/big/img_2119 +2002/08/29/big/img_19103 +2003/01/14/big/img_933 +2002/08/11/big/img_674 +2002/08/28/big/img_19420 +2002/08/03/big/img_418 +2002/08/17/big/img_312 +2002/07/25/big/img_1044 +2003/01/17/big/img_671 +2002/08/30/big/img_18297 +2002/07/25/big/img_755 +2002/07/23/big/img_471 +2002/08/21/big/img_39 +2002/07/26/big/img_699 +2003/01/14/big/img_33 +2002/07/31/big/img_411 +2002/08/16/big/img_645 +2003/01/17/big/img_116 +2002/09/02/big/img_15903 +2002/08/20/big/img_120 +2002/08/22/big/img_176 +2002/07/29/big/img_1316 +2002/08/27/big/img_19914 +2002/07/22/big/img_719 +2002/08/28/big/img_19239 +2003/01/13/big/img_385 +2002/08/08/big/img_525 +2002/07/19/big/img_782 +2002/08/13/big/img_843 +2002/07/30/big/img_107 +2002/08/11/big/img_752 +2002/07/29/big/img_383 +2002/08/26/big/img_249 +2002/08/29/big/img_18860 +2002/07/30/big/img_70 +2002/07/26/big/img_194 +2002/08/15/big/img_530 +2002/08/08/big/img_816 +2002/07/31/big/img_286 +2003/01/13/big/img_294 +2002/07/31/big/img_251 +2002/07/24/big/img_13 +2002/08/31/big/img_17938 +2002/07/22/big/img_642 +2003/01/14/big/img_728 +2002/08/18/big/img_47 +2002/08/22/big/img_306 +2002/08/20/big/img_348 +2002/08/15/big/img_764 +2002/08/08/big/img_163 +2002/07/23/big/img_531 +2002/07/23/big/img_467 +2003/01/16/big/img_743 +2003/01/13/big/img_535 +2002/08/02/big/img_523 +2002/08/22/big/img_120 +2002/08/11/big/img_496 +2002/08/29/big/img_19075 +2002/08/08/big/img_465 +2002/08/09/big/img_790 +2002/08/19/big/img_588 +2002/08/23/big/img_407 +2003/01/17/big/img_435 +2002/08/24/big/img_398 +2002/08/27/big/img_19899 +2003/01/15/big/img_335 +2002/08/13/big/img_493 +2002/09/02/big/img_15460 +2002/07/31/big/img_470 +2002/08/05/big/img_3550 +2002/07/28/big/img_123 +2002/08/01/big/img_1498 +2002/08/04/big/img_504 +2003/01/17/big/img_427 +2002/08/27/big/img_19708 +2002/07/27/big/img_861 +2002/07/25/big/img_685 +2002/07/31/big/img_207 +2003/01/14/big/img_745 +2002/08/31/big/img_17756 +2002/08/24/big/img_288 +2002/08/18/big/img_181 +2002/08/10/big/img_520 +2002/08/25/big/img_705 +2002/08/23/big/img_226 +2002/08/04/big/img_727 +2002/07/24/big/img_625 +2002/08/28/big/img_19157 +2002/08/23/big/img_586 +2002/07/31/big/img_232 +2003/01/13/big/img_240 +2003/01/14/big/img_321 +2003/01/15/big/img_533 +2002/07/23/big/img_480 +2002/07/24/big/img_371 +2002/08/21/big/img_702 +2002/08/31/big/img_17075 +2002/09/02/big/img_15278 +2002/07/29/big/img_246 +2003/01/15/big/img_829 +2003/01/15/big/img_1213 +2003/01/16/big/img_441 +2002/08/14/big/img_921 +2002/07/23/big/img_425 +2002/08/15/big/img_296 +2002/07/19/big/img_135 +2002/07/26/big/img_402 +2003/01/17/big/img_88 +2002/08/20/big/img_872 +2002/08/13/big/img_1110 +2003/01/16/big/img_1040 +2002/07/23/big/img_9 +2002/08/13/big/img_700 +2002/08/16/big/img_371 +2002/08/27/big/img_19966 +2003/01/17/big/img_391 +2002/08/18/big/img_426 +2002/08/01/big/img_1618 +2002/07/21/big/img_754 +2003/01/14/big/img_1101 +2003/01/16/big/img_1022 +2002/07/22/big/img_275 +2002/08/24/big/img_86 +2002/08/17/big/img_582 +2003/01/15/big/img_765 +2003/01/17/big/img_449 +2002/07/28/big/img_265 +2003/01/13/big/img_552 +2002/07/28/big/img_115 +2003/01/16/big/img_56 +2002/08/02/big/img_1232 +2003/01/17/big/img_925 +2002/07/22/big/img_445 +2002/07/25/big/img_957 +2002/07/20/big/img_589 +2002/08/31/big/img_17107 +2002/07/29/big/img_483 +2002/08/14/big/img_1063 +2002/08/07/big/img_1545 +2002/08/14/big/img_680 +2002/09/01/big/img_16694 +2002/08/14/big/img_257 +2002/08/11/big/img_726 +2002/07/26/big/img_681 +2002/07/25/big/img_481 +2003/01/14/big/img_737 +2002/08/28/big/img_19480 +2003/01/16/big/img_362 +2002/08/27/big/img_19865 +2003/01/01/big/img_547 +2002/09/02/big/img_15074 +2002/08/01/big/img_1453 +2002/08/22/big/img_594 +2002/08/28/big/img_19263 +2002/08/13/big/img_478 +2002/07/29/big/img_1358 +2003/01/14/big/img_1022 +2002/08/16/big/img_450 +2002/08/02/big/img_159 +2002/07/26/big/img_781 +2003/01/13/big/img_601 +2002/08/20/big/img_407 +2002/08/15/big/img_468 +2002/08/31/big/img_17902 +2002/08/16/big/img_81 +2002/07/25/big/img_987 +2002/07/25/big/img_500 +2002/08/02/big/img_31 +2002/08/18/big/img_538 +2002/08/08/big/img_54 +2002/07/23/big/img_686 +2002/07/24/big/img_836 +2003/01/17/big/img_734 +2002/08/16/big/img_1055 +2003/01/16/big/img_521 +2002/07/25/big/img_612 +2002/08/22/big/img_778 +2002/08/03/big/img_251 +2002/08/12/big/img_436 +2002/08/23/big/img_705 +2002/07/28/big/img_243 +2002/07/25/big/img_1029 +2002/08/20/big/img_287 +2002/08/29/big/img_18739 +2002/08/05/big/img_3272 +2002/07/27/big/img_214 +2003/01/14/big/img_5 +2002/08/01/big/img_1380 +2002/08/29/big/img_19097 +2002/07/30/big/img_486 +2002/08/29/big/img_18707 +2002/08/10/big/img_559 +2002/08/15/big/img_365 +2002/08/09/big/img_525 +2002/08/10/big/img_689 +2002/07/25/big/img_502 +2002/08/03/big/img_667 +2002/08/10/big/img_855 +2002/08/10/big/img_706 +2002/08/18/big/img_603 +2003/01/16/big/img_1055 +2002/08/31/big/img_17890 +2002/08/15/big/img_761 +2003/01/15/big/img_489 +2002/08/26/big/img_351 +2002/08/01/big/img_1772 +2002/08/31/big/img_17729 +2002/07/25/big/img_609 +2003/01/13/big/img_539 +2002/07/27/big/img_686 +2002/07/31/big/img_311 +2002/08/22/big/img_799 +2003/01/16/big/img_936 +2002/08/31/big/img_17813 +2002/08/04/big/img_862 +2002/08/09/big/img_332 +2002/07/20/big/img_148 +2002/08/12/big/img_426 +2002/07/24/big/img_69 +2002/07/27/big/img_685 +2002/08/02/big/img_480 +2002/08/26/big/img_154 +2002/07/24/big/img_598 +2002/08/01/big/img_1881 +2002/08/20/big/img_667 +2003/01/14/big/img_495 +2002/07/21/big/img_744 +2002/07/30/big/img_150 +2002/07/23/big/img_924 +2002/08/08/big/img_272 +2002/07/23/big/img_310 +2002/07/25/big/img_1011 +2002/09/02/big/img_15725 +2002/07/19/big/img_814 +2002/08/20/big/img_936 +2002/07/25/big/img_85 +2002/08/24/big/img_662 +2002/08/09/big/img_495 +2003/01/15/big/img_196 +2002/08/16/big/img_707 +2002/08/28/big/img_19370 +2002/08/06/big/img_2366 +2002/08/06/big/img_3012 +2002/08/01/big/img_1452 +2002/07/31/big/img_742 +2002/07/27/big/img_914 +2003/01/13/big/img_290 +2002/07/31/big/img_288 +2002/08/02/big/img_171 +2002/08/22/big/img_191 +2002/07/27/big/img_1066 +2002/08/12/big/img_383 +2003/01/17/big/img_1018 +2002/08/01/big/img_1785 +2002/08/11/big/img_390 +2002/08/27/big/img_20037 +2002/08/12/big/img_38 +2003/01/15/big/img_103 +2002/08/26/big/img_31 +2002/08/18/big/img_660 +2002/07/22/big/img_694 +2002/08/15/big/img_24 +2002/07/27/big/img_1077 +2002/08/01/big/img_1943 +2002/07/22/big/img_292 +2002/09/01/big/img_16857 +2002/07/22/big/img_892 +2003/01/14/big/img_46 +2002/08/09/big/img_469 +2002/08/09/big/img_414 +2003/01/16/big/img_40 +2002/08/28/big/img_19231 +2002/07/27/big/img_978 +2002/07/23/big/img_475 +2002/07/25/big/img_92 +2002/08/09/big/img_799 +2002/07/25/big/img_491 +2002/08/03/big/img_654 +2003/01/15/big/img_687 +2002/08/11/big/img_478 +2002/08/07/big/img_1664 +2002/08/20/big/img_362 +2002/08/01/big/img_1298 +2003/01/13/big/img_500 +2002/08/06/big/img_2896 +2002/08/30/big/img_18529 +2002/08/16/big/img_1020 +2002/07/29/big/img_892 +2002/08/29/big/img_18726 +2002/07/21/big/img_453 +2002/08/17/big/img_437 +2002/07/19/big/img_665 +2002/07/22/big/img_440 +2002/07/19/big/img_582 +2002/07/21/big/img_233 +2003/01/01/big/img_82 +2002/07/25/big/img_341 +2002/07/29/big/img_864 +2002/08/02/big/img_276 +2002/08/29/big/img_18654 +2002/07/27/big/img_1024 +2002/08/19/big/img_373 +2003/01/15/big/img_241 +2002/07/25/big/img_84 +2002/08/13/big/img_834 +2002/08/10/big/img_511 +2002/08/01/big/img_1627 +2002/08/08/big/img_607 +2002/08/06/big/img_2083 +2002/08/01/big/img_1486 +2002/08/08/big/img_700 +2002/08/01/big/img_1954 +2002/08/21/big/img_54 +2002/07/30/big/img_847 +2002/08/28/big/img_19169 +2002/07/21/big/img_549 +2002/08/03/big/img_693 +2002/07/31/big/img_1002 +2003/01/14/big/img_1035 +2003/01/16/big/img_622 +2002/07/30/big/img_1201 +2002/08/10/big/img_444 +2002/07/31/big/img_374 +2002/08/21/big/img_301 +2002/08/13/big/img_1095 +2003/01/13/big/img_288 +2002/07/25/big/img_232 +2003/01/13/big/img_967 +2002/08/26/big/img_360 +2002/08/05/big/img_67 +2002/08/29/big/img_18969 +2002/07/28/big/img_16 +2002/08/16/big/img_515 +2002/07/20/big/img_708 +2002/08/18/big/img_178 +2003/01/15/big/img_509 +2002/07/25/big/img_430 +2002/08/21/big/img_738 +2002/08/16/big/img_886 +2002/09/02/big/img_15605 +2002/09/01/big/img_16242 +2002/08/24/big/img_711 +2002/07/25/big/img_90 +2002/08/09/big/img_491 +2002/07/30/big/img_534 +2003/01/13/big/img_474 +2002/08/25/big/img_510 +2002/08/15/big/img_555 +2002/08/02/big/img_775 +2002/07/23/big/img_975 +2002/08/19/big/img_229 +2003/01/17/big/img_860 +2003/01/02/big/img_10 +2002/07/23/big/img_542 +2002/08/06/big/img_2535 +2002/07/22/big/img_37 +2002/08/06/big/img_2342 +2002/08/25/big/img_515 +2002/08/25/big/img_336 +2002/08/18/big/img_837 +2002/08/21/big/img_616 +2003/01/17/big/img_24 +2002/07/26/big/img_936 +2002/08/14/big/img_896 +2002/07/29/big/img_465 +2002/07/31/big/img_543 +2002/08/01/big/img_1411 +2002/08/02/big/img_423 +2002/08/21/big/img_44 +2002/07/31/big/img_11 +2003/01/15/big/img_628 +2003/01/15/big/img_605 +2002/07/30/big/img_571 +2002/07/23/big/img_428 +2002/08/15/big/img_942 +2002/07/26/big/img_531 +2003/01/16/big/img_59 +2002/08/02/big/img_410 +2002/07/31/big/img_230 +2002/08/19/big/img_806 +2003/01/14/big/img_462 +2002/08/16/big/img_370 +2002/08/13/big/img_380 +2002/08/16/big/img_932 +2002/07/19/big/img_393 +2002/08/20/big/img_764 +2002/08/15/big/img_616 +2002/07/26/big/img_267 +2002/07/27/big/img_1069 +2002/08/14/big/img_1041 +2003/01/13/big/img_594 +2002/09/01/big/img_16845 +2002/08/09/big/img_229 +2003/01/16/big/img_639 +2002/08/19/big/img_398 +2002/08/18/big/img_978 +2002/08/24/big/img_296 +2002/07/29/big/img_415 +2002/07/30/big/img_923 +2002/08/18/big/img_575 +2002/08/22/big/img_182 +2002/07/25/big/img_806 +2002/07/22/big/img_49 +2002/07/29/big/img_989 +2003/01/17/big/img_789 +2003/01/15/big/img_503 +2002/09/01/big/img_16062 +2003/01/17/big/img_794 +2002/08/15/big/img_564 +2003/01/15/big/img_222 +2002/08/01/big/img_1656 +2003/01/13/big/img_432 +2002/07/19/big/img_426 +2002/08/17/big/img_244 +2002/08/13/big/img_805 +2002/09/02/big/img_15067 +2002/08/11/big/img_58 +2002/08/22/big/img_636 +2002/07/22/big/img_416 +2002/08/13/big/img_836 +2002/08/26/big/img_363 +2002/07/30/big/img_917 +2003/01/14/big/img_206 +2002/08/12/big/img_311 +2002/08/31/big/img_17623 +2002/07/29/big/img_661 +2003/01/13/big/img_417 +2002/08/02/big/img_463 +2002/08/02/big/img_669 +2002/08/26/big/img_670 +2002/08/02/big/img_375 +2002/07/19/big/img_209 +2002/08/08/big/img_115 +2002/08/21/big/img_399 +2002/08/20/big/img_911 +2002/08/07/big/img_1212 +2002/08/20/big/img_578 +2002/08/22/big/img_554 +2002/08/21/big/img_484 +2002/07/25/big/img_450 +2002/08/03/big/img_542 +2002/08/15/big/img_561 +2002/07/23/big/img_360 +2002/08/30/big/img_18137 +2002/07/25/big/img_250 +2002/08/03/big/img_647 +2002/08/20/big/img_375 +2002/08/14/big/img_387 +2002/09/01/big/img_16990 +2002/08/28/big/img_19341 +2003/01/15/big/img_239 +2002/08/20/big/img_528 +2002/08/12/big/img_130 +2002/09/02/big/img_15108 +2003/01/15/big/img_372 +2002/08/16/big/img_678 +2002/08/04/big/img_623 +2002/07/23/big/img_477 +2002/08/28/big/img_19590 +2003/01/17/big/img_978 +2002/09/01/big/img_16692 +2002/07/20/big/img_109 +2002/08/06/big/img_2660 +2003/01/14/big/img_464 +2002/08/09/big/img_618 +2002/07/22/big/img_722 +2002/08/25/big/img_419 +2002/08/03/big/img_314 +2002/08/25/big/img_40 +2002/07/27/big/img_430 +2002/08/10/big/img_569 +2002/08/23/big/img_398 +2002/07/23/big/img_893 +2002/08/16/big/img_261 +2002/08/06/big/img_2668 +2002/07/22/big/img_835 +2002/09/02/big/img_15093 +2003/01/16/big/img_65 +2002/08/21/big/img_448 +2003/01/14/big/img_351 +2003/01/17/big/img_133 +2002/07/28/big/img_493 +2003/01/15/big/img_640 +2002/09/01/big/img_16880 +2002/08/15/big/img_350 +2002/08/20/big/img_624 +2002/08/25/big/img_604 +2002/08/06/big/img_2200 +2002/08/23/big/img_290 +2002/08/13/big/img_1152 +2003/01/14/big/img_251 +2002/08/02/big/img_538 +2002/08/22/big/img_613 +2003/01/13/big/img_351 +2002/08/18/big/img_368 +2002/07/23/big/img_392 +2002/07/25/big/img_198 +2002/07/25/big/img_418 +2002/08/26/big/img_614 +2002/07/23/big/img_405 +2003/01/14/big/img_445 +2002/07/25/big/img_326 +2002/08/10/big/img_734 +2003/01/14/big/img_530 +2002/08/08/big/img_561 +2002/08/29/big/img_18990 +2002/08/10/big/img_576 +2002/07/29/big/img_1494 +2002/07/19/big/img_198 +2002/08/10/big/img_562 +2002/07/22/big/img_901 +2003/01/14/big/img_37 +2002/09/02/big/img_15629 +2003/01/14/big/img_58 +2002/08/01/big/img_1364 +2002/07/27/big/img_636 +2003/01/13/big/img_241 +2002/09/01/big/img_16988 +2003/01/13/big/img_560 +2002/08/09/big/img_533 +2002/07/31/big/img_249 +2003/01/17/big/img_1007 +2002/07/21/big/img_64 +2003/01/13/big/img_537 +2003/01/15/big/img_606 +2002/08/18/big/img_651 +2002/08/24/big/img_405 +2002/07/26/big/img_837 +2002/08/09/big/img_562 +2002/08/01/big/img_1983 +2002/08/03/big/img_514 +2002/07/29/big/img_314 +2002/08/12/big/img_493 +2003/01/14/big/img_121 +2003/01/14/big/img_479 +2002/08/04/big/img_410 +2002/07/22/big/img_607 +2003/01/17/big/img_417 +2002/07/20/big/img_547 +2002/08/13/big/img_396 +2002/08/31/big/img_17538 +2002/08/13/big/img_187 +2002/08/12/big/img_328 +2003/01/14/big/img_569 +2002/07/27/big/img_1081 +2002/08/14/big/img_504 +2002/08/23/big/img_785 +2002/07/26/big/img_339 +2002/08/07/big/img_1156 +2002/08/07/big/img_1456 +2002/08/23/big/img_378 +2002/08/27/big/img_19719 +2002/07/31/big/img_39 +2002/07/31/big/img_883 +2003/01/14/big/img_676 +2002/07/29/big/img_214 +2002/07/26/big/img_669 +2002/07/25/big/img_202 +2002/08/08/big/img_259 +2003/01/17/big/img_943 +2003/01/15/big/img_512 +2002/08/05/big/img_3295 +2002/08/27/big/img_19685 +2002/08/08/big/img_277 +2002/08/30/big/img_18154 +2002/07/22/big/img_663 +2002/08/29/big/img_18914 +2002/07/31/big/img_908 +2002/08/27/big/img_19926 +2003/01/13/big/img_791 +2003/01/15/big/img_827 +2002/08/18/big/img_878 +2002/08/14/big/img_670 +2002/07/20/big/img_182 +2002/08/15/big/img_291 +2002/08/06/big/img_2600 +2002/07/23/big/img_587 +2002/08/14/big/img_577 +2003/01/15/big/img_585 +2002/07/30/big/img_310 +2002/08/03/big/img_658 +2002/08/10/big/img_157 +2002/08/19/big/img_811 +2002/07/29/big/img_1318 +2002/08/04/big/img_104 +2002/07/30/big/img_332 +2002/07/24/big/img_789 +2002/07/29/big/img_516 +2002/07/23/big/img_843 +2002/08/01/big/img_1528 +2002/08/13/big/img_798 +2002/08/07/big/img_1729 +2002/08/28/big/img_19448 +2003/01/16/big/img_95 +2002/08/12/big/img_473 +2002/07/27/big/img_269 +2003/01/16/big/img_621 +2002/07/29/big/img_772 +2002/07/24/big/img_171 +2002/07/19/big/img_429 +2002/08/07/big/img_1933 +2002/08/27/big/img_19629 +2002/08/05/big/img_3688 +2002/08/07/big/img_1691 +2002/07/23/big/img_600 +2002/07/29/big/img_666 +2002/08/25/big/img_566 +2002/08/06/big/img_2659 +2002/08/29/big/img_18929 +2002/08/16/big/img_407 +2002/08/18/big/img_774 +2002/08/19/big/img_249 +2002/08/06/big/img_2427 +2002/08/29/big/img_18899 +2002/08/01/big/img_1818 +2002/07/31/big/img_108 +2002/07/29/big/img_500 +2002/08/11/big/img_115 +2002/07/19/big/img_521 +2002/08/02/big/img_1163 +2002/07/22/big/img_62 +2002/08/13/big/img_466 +2002/08/21/big/img_956 +2002/08/23/big/img_602 +2002/08/20/big/img_858 +2002/07/25/big/img_690 +2002/07/19/big/img_130 +2002/08/04/big/img_874 +2002/07/26/big/img_489 +2002/07/22/big/img_548 +2002/08/10/big/img_191 +2002/07/25/big/img_1051 +2002/08/18/big/img_473 +2002/08/12/big/img_755 +2002/08/18/big/img_413 +2002/08/08/big/img_1044 +2002/08/17/big/img_680 +2002/08/26/big/img_235 +2002/08/20/big/img_330 +2002/08/22/big/img_344 +2002/08/09/big/img_593 +2002/07/31/big/img_1006 +2002/08/14/big/img_337 +2002/08/16/big/img_728 +2002/07/24/big/img_834 +2002/08/04/big/img_552 +2002/09/02/big/img_15213 +2002/07/25/big/img_725 +2002/08/30/big/img_18290 +2003/01/01/big/img_475 +2002/07/27/big/img_1083 +2002/08/29/big/img_18955 +2002/08/31/big/img_17232 +2002/08/08/big/img_480 +2002/08/01/big/img_1311 +2002/07/30/big/img_745 +2002/08/03/big/img_649 +2002/08/12/big/img_193 +2002/07/29/big/img_228 +2002/07/25/big/img_836 +2002/08/20/big/img_400 +2002/07/30/big/img_507 +2002/09/02/big/img_15072 +2002/07/26/big/img_658 +2002/07/28/big/img_503 +2002/08/05/big/img_3814 +2002/08/24/big/img_745 +2003/01/13/big/img_817 +2002/08/08/big/img_579 +2002/07/22/big/img_251 +2003/01/13/big/img_689 +2002/07/25/big/img_407 +2002/08/13/big/img_1050 +2002/08/14/big/img_733 +2002/07/24/big/img_82 +2003/01/17/big/img_288 +2003/01/15/big/img_475 +2002/08/14/big/img_620 +2002/08/21/big/img_167 +2002/07/19/big/img_300 +2002/07/26/big/img_219 +2002/08/01/big/img_1468 +2002/07/23/big/img_260 +2002/08/09/big/img_555 +2002/07/19/big/img_160 +2002/08/02/big/img_1060 +2003/01/14/big/img_149 +2002/08/15/big/img_346 +2002/08/24/big/img_597 +2002/08/22/big/img_502 +2002/08/30/big/img_18228 +2002/07/21/big/img_766 +2003/01/15/big/img_841 +2002/07/24/big/img_516 +2002/08/02/big/img_265 +2002/08/15/big/img_1243 +2003/01/15/big/img_223 +2002/08/04/big/img_236 +2002/07/22/big/img_309 +2002/07/20/big/img_656 +2002/07/31/big/img_412 +2002/09/01/big/img_16462 +2003/01/16/big/img_431 +2002/07/22/big/img_793 +2002/08/15/big/img_877 +2002/07/26/big/img_282 +2002/07/25/big/img_529 +2002/08/24/big/img_613 +2003/01/17/big/img_700 +2002/08/06/big/img_2526 +2002/08/24/big/img_394 +2002/08/21/big/img_521 +2002/08/25/big/img_560 +2002/07/29/big/img_966 +2002/07/25/big/img_448 +2003/01/13/big/img_782 +2002/08/21/big/img_296 +2002/09/01/big/img_16755 +2002/08/05/big/img_3552 +2002/09/02/big/img_15823 +2003/01/14/big/img_193 +2002/07/21/big/img_159 +2002/08/02/big/img_564 +2002/08/16/big/img_300 +2002/07/19/big/img_269 +2002/08/13/big/img_676 +2002/07/28/big/img_57 +2002/08/05/big/img_3318 +2002/07/31/big/img_218 +2002/08/21/big/img_898 +2002/07/29/big/img_109 +2002/07/19/big/img_854 +2002/08/23/big/img_311 +2002/08/14/big/img_318 +2002/07/25/big/img_523 +2002/07/21/big/img_678 +2003/01/17/big/img_690 +2002/08/28/big/img_19503 +2002/08/18/big/img_251 +2002/08/22/big/img_672 +2002/08/20/big/img_663 +2002/08/02/big/img_148 +2002/09/02/big/img_15580 +2002/07/25/big/img_778 +2002/08/14/big/img_565 +2002/08/12/big/img_374 +2002/08/13/big/img_1018 +2002/08/20/big/img_474 +2002/08/25/big/img_33 +2002/08/02/big/img_1190 +2002/08/08/big/img_864 +2002/08/14/big/img_1071 +2002/08/30/big/img_18103 +2002/08/18/big/img_533 +2003/01/16/big/img_650 +2002/07/25/big/img_108 +2002/07/26/big/img_81 +2002/07/27/big/img_543 +2002/07/29/big/img_521 +2003/01/13/big/img_434 +2002/08/26/big/img_674 +2002/08/06/big/img_2932 +2002/08/07/big/img_1262 +2003/01/15/big/img_201 +2003/01/16/big/img_673 +2002/09/02/big/img_15988 +2002/07/29/big/img_1306 +2003/01/14/big/img_1072 +2002/08/30/big/img_18232 +2002/08/05/big/img_3711 +2002/07/23/big/img_775 +2002/08/01/big/img_16 +2003/01/16/big/img_630 +2002/08/22/big/img_695 +2002/08/14/big/img_51 +2002/08/14/big/img_782 +2002/08/24/big/img_742 +2003/01/14/big/img_512 +2003/01/15/big/img_1183 +2003/01/15/big/img_714 +2002/08/01/big/img_2078 +2002/07/31/big/img_682 +2002/09/02/big/img_15687 +2002/07/26/big/img_518 +2002/08/27/big/img_19676 +2002/09/02/big/img_15969 +2002/08/02/big/img_931 +2002/08/25/big/img_508 +2002/08/29/big/img_18616 +2002/07/22/big/img_839 +2002/07/28/big/img_313 +2003/01/14/big/img_155 +2002/08/02/big/img_1105 +2002/08/09/big/img_53 +2002/08/16/big/img_469 +2002/08/15/big/img_502 +2002/08/20/big/img_575 +2002/07/25/big/img_138 +2003/01/16/big/img_579 +2002/07/19/big/img_352 +2003/01/14/big/img_762 +2003/01/01/big/img_588 +2002/08/02/big/img_981 +2002/08/21/big/img_447 +2002/09/01/big/img_16151 +2003/01/14/big/img_769 +2002/08/23/big/img_461 +2002/08/17/big/img_240 +2002/09/02/big/img_15220 +2002/07/19/big/img_408 +2002/09/02/big/img_15496 +2002/07/29/big/img_758 +2002/08/28/big/img_19392 +2002/08/06/big/img_2723 +2002/08/31/big/img_17752 +2002/08/23/big/img_469 +2002/08/13/big/img_515 +2002/09/02/big/img_15551 +2002/08/03/big/img_462 +2002/07/24/big/img_613 +2002/07/22/big/img_61 +2002/08/08/big/img_171 +2002/08/21/big/img_177 +2003/01/14/big/img_105 +2002/08/02/big/img_1017 +2002/08/22/big/img_106 +2002/07/27/big/img_542 +2002/07/21/big/img_665 +2002/07/23/big/img_595 +2002/08/04/big/img_657 +2002/08/29/big/img_19002 +2003/01/15/big/img_550 +2002/08/14/big/img_662 +2002/07/20/big/img_425 +2002/08/30/big/img_18528 +2002/07/26/big/img_611 +2002/07/22/big/img_849 +2002/08/07/big/img_1655 +2002/08/21/big/img_638 +2003/01/17/big/img_732 +2003/01/01/big/img_496 +2002/08/18/big/img_713 +2002/08/08/big/img_109 +2002/07/27/big/img_1008 +2002/07/20/big/img_559 +2002/08/16/big/img_699 +2002/08/31/big/img_17702 +2002/07/31/big/img_1013 +2002/08/01/big/img_2027 +2002/08/02/big/img_1001 +2002/08/03/big/img_210 +2002/08/01/big/img_2087 +2003/01/14/big/img_199 +2002/07/29/big/img_48 +2002/07/19/big/img_727 +2002/08/09/big/img_249 +2002/08/04/big/img_632 +2002/08/22/big/img_620 +2003/01/01/big/img_457 +2002/08/05/big/img_3223 +2002/07/27/big/img_240 +2002/07/25/big/img_797 +2002/08/13/big/img_430 +2002/07/25/big/img_615 +2002/08/12/big/img_28 +2002/07/30/big/img_220 +2002/07/24/big/img_89 +2002/08/21/big/img_357 +2002/08/09/big/img_590 +2003/01/13/big/img_525 +2002/08/17/big/img_818 +2003/01/02/big/img_7 +2002/07/26/big/img_636 +2003/01/13/big/img_1122 +2002/07/23/big/img_810 +2002/08/20/big/img_888 +2002/07/27/big/img_3 +2002/08/15/big/img_451 +2002/09/02/big/img_15787 +2002/07/31/big/img_281 +2002/08/05/big/img_3274 +2002/08/07/big/img_1254 +2002/07/31/big/img_27 +2002/08/01/big/img_1366 +2002/07/30/big/img_182 +2002/08/27/big/img_19690 +2002/07/29/big/img_68 +2002/08/23/big/img_754 +2002/07/30/big/img_540 +2002/08/27/big/img_20063 +2002/08/14/big/img_471 +2002/08/02/big/img_615 +2002/07/30/big/img_186 +2002/08/25/big/img_150 +2002/07/27/big/img_626 +2002/07/20/big/img_225 +2003/01/15/big/img_1252 +2002/07/19/big/img_367 +2003/01/15/big/img_582 +2002/08/09/big/img_572 +2002/08/08/big/img_428 +2003/01/15/big/img_639 +2002/08/28/big/img_19245 +2002/07/24/big/img_321 +2002/08/02/big/img_662 +2002/08/08/big/img_1033 +2003/01/17/big/img_867 +2002/07/22/big/img_652 +2003/01/14/big/img_224 +2002/08/18/big/img_49 +2002/07/26/big/img_46 +2002/08/31/big/img_18021 +2002/07/25/big/img_151 +2002/08/23/big/img_540 +2002/08/25/big/img_693 +2002/07/23/big/img_340 +2002/07/28/big/img_117 +2002/09/02/big/img_15768 +2002/08/26/big/img_562 +2002/07/24/big/img_480 +2003/01/15/big/img_341 +2002/08/10/big/img_783 +2002/08/20/big/img_132 +2003/01/14/big/img_370 +2002/07/20/big/img_720 +2002/08/03/big/img_144 +2002/08/20/big/img_538 +2002/08/01/big/img_1745 +2002/08/11/big/img_683 +2002/08/03/big/img_328 +2002/08/10/big/img_793 +2002/08/14/big/img_689 +2002/08/02/big/img_162 +2003/01/17/big/img_411 +2002/07/31/big/img_361 +2002/08/15/big/img_289 +2002/08/08/big/img_254 +2002/08/15/big/img_996 +2002/08/20/big/img_785 +2002/07/24/big/img_511 +2002/08/06/big/img_2614 +2002/08/29/big/img_18733 +2002/08/17/big/img_78 +2002/07/30/big/img_378 +2002/08/31/big/img_17947 +2002/08/26/big/img_88 +2002/07/30/big/img_558 +2002/08/02/big/img_67 +2003/01/14/big/img_325 +2002/07/29/big/img_1357 +2002/07/19/big/img_391 +2002/07/30/big/img_307 +2003/01/13/big/img_219 +2002/07/24/big/img_807 +2002/08/23/big/img_543 +2002/08/29/big/img_18620 +2002/07/22/big/img_769 +2002/08/26/big/img_503 +2002/07/30/big/img_78 +2002/08/14/big/img_1036 +2002/08/09/big/img_58 +2002/07/24/big/img_616 +2002/08/02/big/img_464 +2002/07/26/big/img_576 +2002/07/22/big/img_273 +2003/01/16/big/img_470 +2002/07/29/big/img_329 +2002/07/30/big/img_1086 +2002/07/31/big/img_353 +2002/09/02/big/img_15275 +2003/01/17/big/img_555 +2002/08/26/big/img_212 +2002/08/01/big/img_1692 +2003/01/15/big/img_600 +2002/07/29/big/img_825 +2002/08/08/big/img_68 +2002/08/10/big/img_719 +2002/07/31/big/img_636 +2002/07/29/big/img_325 +2002/07/21/big/img_515 +2002/07/22/big/img_705 +2003/01/13/big/img_818 +2002/08/09/big/img_486 +2002/08/22/big/img_141 +2002/07/22/big/img_303 +2002/08/09/big/img_393 +2002/07/29/big/img_963 +2002/08/02/big/img_1215 +2002/08/19/big/img_674 +2002/08/12/big/img_690 +2002/08/21/big/img_637 +2002/08/21/big/img_841 +2002/08/24/big/img_71 +2002/07/25/big/img_596 +2002/07/24/big/img_864 +2002/08/18/big/img_293 +2003/01/14/big/img_657 +2002/08/15/big/img_411 +2002/08/16/big/img_348 +2002/08/05/big/img_3157 +2002/07/20/big/img_663 +2003/01/13/big/img_654 +2003/01/16/big/img_433 +2002/08/30/big/img_18200 +2002/08/12/big/img_226 +2003/01/16/big/img_491 +2002/08/08/big/img_666 +2002/07/19/big/img_576 +2003/01/15/big/img_776 +2003/01/16/big/img_899 +2002/07/19/big/img_397 +2002/08/14/big/img_44 +2003/01/15/big/img_762 +2002/08/02/big/img_982 +2002/09/02/big/img_15234 +2002/08/17/big/img_556 +2002/08/21/big/img_410 +2002/08/21/big/img_386 +2002/07/19/big/img_690 +2002/08/05/big/img_3052 +2002/08/14/big/img_219 +2002/08/16/big/img_273 +2003/01/15/big/img_752 +2002/08/08/big/img_184 +2002/07/31/big/img_743 +2002/08/23/big/img_338 +2003/01/14/big/img_1055 +2002/08/05/big/img_3405 +2003/01/15/big/img_17 +2002/08/03/big/img_141 +2002/08/14/big/img_549 +2002/07/27/big/img_1034 +2002/07/31/big/img_932 +2002/08/30/big/img_18487 +2002/09/02/big/img_15814 +2002/08/01/big/img_2086 +2002/09/01/big/img_16535 +2002/07/22/big/img_500 +2003/01/13/big/img_400 +2002/08/25/big/img_607 +2002/08/30/big/img_18384 +2003/01/14/big/img_951 +2002/08/13/big/img_1150 +2002/08/08/big/img_1022 +2002/08/10/big/img_428 +2002/08/28/big/img_19242 +2002/08/05/big/img_3098 +2002/07/23/big/img_400 +2002/08/26/big/img_365 +2002/07/20/big/img_318 +2002/08/13/big/img_740 +2003/01/16/big/img_37 +2002/08/26/big/img_274 +2002/08/02/big/img_205 +2002/08/21/big/img_695 +2002/08/06/big/img_2289 +2002/08/20/big/img_794 +2002/08/18/big/img_438 +2002/08/07/big/img_1380 +2002/08/02/big/img_737 +2002/08/07/big/img_1651 +2002/08/15/big/img_1238 +2002/08/01/big/img_1681 +2002/08/06/big/img_3017 +2002/07/23/big/img_706 +2002/07/31/big/img_392 +2002/08/09/big/img_539 +2002/07/29/big/img_835 +2002/08/26/big/img_723 +2002/08/28/big/img_19235 +2003/01/16/big/img_353 +2002/08/10/big/img_150 +2002/08/29/big/img_19025 +2002/08/21/big/img_310 +2002/08/10/big/img_823 +2002/07/26/big/img_981 +2002/08/11/big/img_288 +2002/08/19/big/img_534 +2002/08/21/big/img_300 +2002/07/31/big/img_49 +2002/07/30/big/img_469 +2002/08/28/big/img_19197 +2002/08/25/big/img_205 +2002/08/10/big/img_390 +2002/08/23/big/img_291 +2002/08/26/big/img_230 +2002/08/18/big/img_76 +2002/07/23/big/img_409 +2002/08/14/big/img_1053 +2003/01/14/big/img_291 +2002/08/10/big/img_503 +2002/08/27/big/img_19928 +2002/08/03/big/img_563 +2002/08/17/big/img_250 +2002/08/06/big/img_2381 +2002/08/17/big/img_948 +2002/08/06/big/img_2710 +2002/07/22/big/img_696 +2002/07/31/big/img_670 +2002/08/12/big/img_594 +2002/07/29/big/img_624 +2003/01/17/big/img_934 +2002/08/03/big/img_584 +2002/08/22/big/img_1003 +2002/08/05/big/img_3396 +2003/01/13/big/img_570 +2002/08/02/big/img_219 +2002/09/02/big/img_15774 +2002/08/16/big/img_818 +2002/08/23/big/img_402 +2003/01/14/big/img_552 +2002/07/29/big/img_71 +2002/08/05/big/img_3592 +2002/08/16/big/img_80 +2002/07/27/big/img_672 +2003/01/13/big/img_470 +2003/01/16/big/img_702 +2002/09/01/big/img_16130 +2002/08/08/big/img_240 +2002/09/01/big/img_16338 +2002/07/26/big/img_312 +2003/01/14/big/img_538 +2002/07/20/big/img_695 +2002/08/30/big/img_18098 +2002/08/25/big/img_259 +2002/08/16/big/img_1042 +2002/08/09/big/img_837 +2002/08/31/big/img_17760 +2002/07/31/big/img_14 +2002/08/09/big/img_361 +2003/01/16/big/img_107 +2002/08/14/big/img_124 +2002/07/19/big/img_463 +2003/01/15/big/img_275 +2002/07/25/big/img_1151 +2002/07/29/big/img_1501 +2002/08/27/big/img_19889 +2002/08/29/big/img_18603 +2003/01/17/big/img_601 +2002/08/25/big/img_355 +2002/08/08/big/img_297 +2002/08/20/big/img_290 +2002/07/31/big/img_195 +2003/01/01/big/img_336 +2002/08/18/big/img_369 +2002/07/25/big/img_621 +2002/08/11/big/img_508 +2003/01/14/big/img_458 +2003/01/15/big/img_795 +2002/08/12/big/img_498 +2002/08/01/big/img_1734 +2002/08/02/big/img_246 +2002/08/16/big/img_565 +2002/08/11/big/img_475 +2002/08/22/big/img_408 +2002/07/28/big/img_78 +2002/07/21/big/img_81 +2003/01/14/big/img_697 +2002/08/14/big/img_661 +2002/08/15/big/img_507 +2002/08/19/big/img_55 +2002/07/22/big/img_152 +2003/01/14/big/img_470 +2002/08/03/big/img_379 +2002/08/22/big/img_506 +2003/01/16/big/img_966 +2002/08/18/big/img_698 +2002/08/24/big/img_528 +2002/08/23/big/img_10 +2002/08/01/big/img_1655 +2002/08/22/big/img_953 +2002/07/19/big/img_630 +2002/07/22/big/img_889 +2002/08/16/big/img_351 +2003/01/16/big/img_83 +2002/07/19/big/img_805 +2002/08/14/big/img_704 +2002/07/19/big/img_389 +2002/08/31/big/img_17765 +2002/07/29/big/img_606 +2003/01/17/big/img_939 +2002/09/02/big/img_15081 +2002/08/21/big/img_181 +2002/07/29/big/img_1321 +2002/07/21/big/img_497 +2002/07/20/big/img_539 +2002/08/24/big/img_119 +2002/08/01/big/img_1281 +2002/07/26/big/img_207 +2002/07/26/big/img_432 +2002/07/27/big/img_1006 +2002/08/05/big/img_3087 +2002/08/14/big/img_252 +2002/08/14/big/img_798 +2002/07/24/big/img_538 +2002/09/02/big/img_15507 +2002/08/08/big/img_901 +2003/01/14/big/img_557 +2002/08/07/big/img_1819 +2002/08/04/big/img_470 +2002/08/01/big/img_1504 +2002/08/16/big/img_1070 +2002/08/16/big/img_372 +2002/08/23/big/img_416 +2002/08/30/big/img_18208 +2002/08/01/big/img_2043 +2002/07/22/big/img_385 +2002/08/22/big/img_466 +2002/08/21/big/img_869 +2002/08/28/big/img_19429 +2002/08/02/big/img_770 +2002/07/23/big/img_433 +2003/01/14/big/img_13 +2002/07/27/big/img_953 +2002/09/02/big/img_15728 +2002/08/01/big/img_1361 +2002/08/29/big/img_18897 +2002/08/26/big/img_534 +2002/08/11/big/img_121 +2002/08/26/big/img_20130 +2002/07/31/big/img_363 +2002/08/13/big/img_978 +2002/07/25/big/img_835 +2002/08/02/big/img_906 +2003/01/14/big/img_548 +2002/07/30/big/img_80 +2002/07/26/big/img_982 +2003/01/16/big/img_99 +2002/08/19/big/img_362 +2002/08/24/big/img_376 +2002/08/07/big/img_1264 +2002/07/27/big/img_938 +2003/01/17/big/img_535 +2002/07/26/big/img_457 +2002/08/08/big/img_848 +2003/01/15/big/img_859 +2003/01/15/big/img_622 +2002/07/30/big/img_403 +2002/07/29/big/img_217 +2002/07/26/big/img_891 +2002/07/24/big/img_70 +2002/08/25/big/img_619 +2002/08/05/big/img_3375 +2002/08/01/big/img_2160 +2002/08/06/big/img_2227 +2003/01/14/big/img_117 +2002/08/14/big/img_227 +2002/08/13/big/img_565 +2002/08/19/big/img_625 +2002/08/03/big/img_812 +2002/07/24/big/img_41 +2002/08/16/big/img_235 +2002/07/29/big/img_759 +2002/07/21/big/img_433 +2002/07/29/big/img_190 +2003/01/16/big/img_435 +2003/01/13/big/img_708 +2002/07/30/big/img_57 +2002/08/22/big/img_162 +2003/01/01/big/img_558 +2003/01/15/big/img_604 +2002/08/16/big/img_935 +2002/08/20/big/img_394 +2002/07/28/big/img_465 +2002/09/02/big/img_15534 +2002/08/16/big/img_87 +2002/07/22/big/img_469 +2002/08/12/big/img_245 +2003/01/13/big/img_236 +2002/08/06/big/img_2736 +2002/08/03/big/img_348 +2003/01/14/big/img_218 +2002/07/26/big/img_232 +2003/01/15/big/img_244 +2002/07/25/big/img_1121 +2002/08/01/big/img_1484 +2002/07/26/big/img_541 +2002/08/07/big/img_1244 +2002/07/31/big/img_3 +2002/08/30/big/img_18437 +2002/08/29/big/img_19094 +2002/08/01/big/img_1355 +2002/08/19/big/img_338 +2002/07/19/big/img_255 +2002/07/21/big/img_76 +2002/08/25/big/img_199 +2002/08/12/big/img_740 +2002/07/30/big/img_852 +2002/08/15/big/img_599 +2002/08/23/big/img_254 +2002/08/19/big/img_125 +2002/07/24/big/img_2 +2002/08/04/big/img_145 +2002/08/05/big/img_3137 +2002/07/28/big/img_463 +2003/01/14/big/img_801 +2002/07/23/big/img_366 +2002/08/26/big/img_600 +2002/08/26/big/img_649 +2002/09/02/big/img_15849 +2002/07/26/big/img_248 +2003/01/13/big/img_200 +2002/08/07/big/img_1794 +2002/08/31/big/img_17270 +2002/08/23/big/img_608 +2003/01/13/big/img_837 +2002/08/23/big/img_581 +2002/08/20/big/img_754 +2002/08/18/big/img_183 +2002/08/20/big/img_328 +2002/07/22/big/img_494 +2002/07/29/big/img_399 +2002/08/28/big/img_19284 +2002/08/08/big/img_566 +2002/07/25/big/img_376 +2002/07/23/big/img_138 +2002/07/25/big/img_435 +2002/08/17/big/img_685 +2002/07/19/big/img_90 +2002/07/20/big/img_716 +2002/08/31/big/img_17458 +2002/08/26/big/img_461 +2002/07/25/big/img_355 +2002/08/06/big/img_2152 +2002/07/27/big/img_932 +2002/07/23/big/img_232 +2002/08/08/big/img_1020 +2002/07/31/big/img_366 +2002/08/06/big/img_2667 +2002/08/21/big/img_465 +2002/08/15/big/img_305 +2002/08/02/big/img_247 +2002/07/28/big/img_46 +2002/08/27/big/img_19922 +2002/08/23/big/img_643 +2003/01/13/big/img_624 +2002/08/23/big/img_625 +2002/08/05/big/img_3787 +2003/01/13/big/img_627 +2002/09/01/big/img_16381 +2002/08/05/big/img_3668 +2002/07/21/big/img_535 +2002/08/27/big/img_19680 +2002/07/22/big/img_413 +2002/07/29/big/img_481 +2003/01/15/big/img_496 +2002/07/23/big/img_701 +2002/08/29/big/img_18670 +2002/07/28/big/img_319 +2003/01/14/big/img_517 +2002/07/26/big/img_256 +2003/01/16/big/img_593 +2002/07/30/big/img_956 +2002/07/30/big/img_667 +2002/07/25/big/img_100 +2002/08/11/big/img_570 +2002/07/26/big/img_745 +2002/08/04/big/img_834 +2002/08/25/big/img_521 +2002/08/01/big/img_2148 +2002/09/02/big/img_15183 +2002/08/22/big/img_514 +2002/08/23/big/img_477 +2002/07/23/big/img_336 +2002/07/26/big/img_481 +2002/08/20/big/img_409 +2002/07/23/big/img_918 +2002/08/09/big/img_474 +2002/08/02/big/img_929 +2002/08/31/big/img_17932 +2002/08/19/big/img_161 +2002/08/09/big/img_667 +2002/07/31/big/img_805 +2002/09/02/big/img_15678 +2002/08/31/big/img_17509 +2002/08/29/big/img_18998 +2002/07/23/big/img_301 +2002/08/07/big/img_1612 +2002/08/06/big/img_2472 +2002/07/23/big/img_466 +2002/08/27/big/img_19634 +2003/01/16/big/img_16 +2002/08/14/big/img_193 +2002/08/21/big/img_340 +2002/08/27/big/img_19799 +2002/08/01/big/img_1345 +2002/08/07/big/img_1448 +2002/08/11/big/img_324 +2003/01/16/big/img_754 +2002/08/13/big/img_418 +2003/01/16/big/img_544 +2002/08/19/big/img_135 +2002/08/10/big/img_455 +2002/08/10/big/img_693 +2002/08/31/big/img_17967 +2002/08/28/big/img_19229 +2002/08/04/big/img_811 +2002/09/01/big/img_16225 +2003/01/16/big/img_428 +2002/09/02/big/img_15295 +2002/07/26/big/img_108 +2002/07/21/big/img_477 +2002/08/07/big/img_1354 +2002/08/23/big/img_246 +2002/08/16/big/img_652 +2002/07/27/big/img_553 +2002/07/31/big/img_346 +2002/08/04/big/img_537 +2002/08/08/big/img_498 +2002/08/29/big/img_18956 +2003/01/13/big/img_922 +2002/08/31/big/img_17425 +2002/07/26/big/img_438 +2002/08/19/big/img_185 +2003/01/16/big/img_33 +2002/08/10/big/img_252 +2002/07/29/big/img_598 +2002/08/27/big/img_19820 +2002/08/06/big/img_2664 +2002/08/20/big/img_705 +2003/01/14/big/img_816 +2002/08/03/big/img_552 +2002/07/25/big/img_561 +2002/07/25/big/img_934 +2002/08/01/big/img_1893 +2003/01/14/big/img_746 +2003/01/16/big/img_519 +2002/08/03/big/img_681 +2002/07/24/big/img_808 +2002/08/14/big/img_803 +2002/08/25/big/img_155 +2002/07/30/big/img_1107 +2002/08/29/big/img_18882 +2003/01/15/big/img_598 +2002/08/19/big/img_122 +2002/07/30/big/img_428 +2002/07/24/big/img_684 +2002/08/22/big/img_192 +2002/08/22/big/img_543 +2002/08/07/big/img_1318 +2002/08/18/big/img_25 +2002/07/26/big/img_583 +2002/07/20/big/img_464 +2002/08/19/big/img_664 +2002/08/24/big/img_861 +2002/09/01/big/img_16136 +2002/08/22/big/img_400 +2002/08/12/big/img_445 +2003/01/14/big/img_174 +2002/08/27/big/img_19677 +2002/08/31/big/img_17214 +2002/08/30/big/img_18175 +2003/01/17/big/img_402 +2002/08/06/big/img_2396 +2002/08/18/big/img_448 +2002/08/21/big/img_165 +2002/08/31/big/img_17609 +2003/01/01/big/img_151 +2002/08/26/big/img_372 +2002/09/02/big/img_15994 +2002/07/26/big/img_660 +2002/09/02/big/img_15197 +2002/07/29/big/img_258 +2002/08/30/big/img_18525 +2003/01/13/big/img_368 +2002/07/29/big/img_1538 +2002/07/21/big/img_787 +2002/08/18/big/img_152 +2002/08/06/big/img_2379 +2003/01/17/big/img_864 +2002/08/27/big/img_19998 +2002/08/01/big/img_1634 +2002/07/25/big/img_414 +2002/08/22/big/img_627 +2002/08/07/big/img_1669 +2002/08/16/big/img_1052 +2002/08/31/big/img_17796 +2002/08/18/big/img_199 +2002/09/02/big/img_15147 +2002/08/09/big/img_460 +2002/08/14/big/img_581 +2002/08/30/big/img_18286 +2002/07/26/big/img_337 +2002/08/18/big/img_589 +2003/01/14/big/img_866 +2002/07/20/big/img_624 +2002/08/01/big/img_1801 +2002/07/24/big/img_683 +2002/08/09/big/img_725 +2003/01/14/big/img_34 +2002/07/30/big/img_144 +2002/07/30/big/img_706 +2002/08/08/big/img_394 +2002/08/19/big/img_619 +2002/08/06/big/img_2703 +2002/08/29/big/img_19034 +2002/07/24/big/img_67 +2002/08/27/big/img_19841 +2002/08/19/big/img_427 +2003/01/14/big/img_333 +2002/09/01/big/img_16406 +2002/07/19/big/img_882 +2002/08/17/big/img_238 +2003/01/14/big/img_739 +2002/07/22/big/img_151 +2002/08/21/big/img_743 +2002/07/25/big/img_1048 +2002/07/30/big/img_395 +2003/01/13/big/img_584 +2002/08/13/big/img_742 +2002/08/13/big/img_1168 +2003/01/14/big/img_147 +2002/07/26/big/img_803 +2002/08/05/big/img_3298 +2002/08/07/big/img_1451 +2002/08/16/big/img_424 +2002/07/29/big/img_1069 +2002/09/01/big/img_16735 +2002/07/21/big/img_637 +2003/01/14/big/img_585 +2002/08/02/big/img_358 +2003/01/13/big/img_358 +2002/08/14/big/img_198 +2002/08/17/big/img_935 +2002/08/04/big/img_42 +2002/08/30/big/img_18245 +2002/07/25/big/img_158 +2002/08/22/big/img_744 +2002/08/06/big/img_2291 +2002/08/05/big/img_3044 +2002/07/30/big/img_272 +2002/08/23/big/img_641 +2002/07/24/big/img_797 +2002/07/30/big/img_392 +2003/01/14/big/img_447 +2002/07/31/big/img_898 +2002/08/06/big/img_2812 +2002/08/13/big/img_564 +2002/07/22/big/img_43 +2002/07/26/big/img_634 +2002/07/19/big/img_843 +2002/08/26/big/img_58 +2002/07/21/big/img_375 +2002/08/25/big/img_729 +2002/07/19/big/img_561 +2003/01/15/big/img_884 +2002/07/25/big/img_891 +2002/08/09/big/img_558 +2002/08/26/big/img_587 +2002/08/13/big/img_1146 +2002/09/02/big/img_15153 +2002/07/26/big/img_316 +2002/08/01/big/img_1940 +2002/08/26/big/img_90 +2003/01/13/big/img_347 +2002/07/25/big/img_520 +2002/08/29/big/img_18718 +2002/08/28/big/img_19219 +2002/08/13/big/img_375 +2002/07/20/big/img_719 +2002/08/31/big/img_17431 +2002/07/28/big/img_192 +2002/08/26/big/img_259 +2002/08/18/big/img_484 +2002/07/29/big/img_580 +2002/07/26/big/img_84 +2002/08/02/big/img_302 +2002/08/31/big/img_17007 +2003/01/15/big/img_543 +2002/09/01/big/img_16488 +2002/08/22/big/img_798 +2002/07/30/big/img_383 +2002/08/04/big/img_668 +2002/08/13/big/img_156 +2002/08/07/big/img_1353 +2002/07/25/big/img_281 +2003/01/14/big/img_587 +2003/01/15/big/img_524 +2002/08/19/big/img_726 +2002/08/21/big/img_709 +2002/08/26/big/img_465 +2002/07/31/big/img_658 +2002/08/28/big/img_19148 +2002/07/23/big/img_423 +2002/08/16/big/img_758 +2002/08/22/big/img_523 +2002/08/16/big/img_591 +2002/08/23/big/img_845 +2002/07/26/big/img_678 +2002/08/09/big/img_806 +2002/08/06/big/img_2369 +2002/07/29/big/img_457 +2002/07/19/big/img_278 +2002/08/30/big/img_18107 +2002/07/26/big/img_444 +2002/08/20/big/img_278 +2002/08/26/big/img_92 +2002/08/26/big/img_257 +2002/07/25/big/img_266 +2002/08/05/big/img_3829 +2002/07/26/big/img_757 +2002/07/29/big/img_1536 +2002/08/09/big/img_472 +2003/01/17/big/img_480 +2002/08/28/big/img_19355 +2002/07/26/big/img_97 +2002/08/06/big/img_2503 +2002/07/19/big/img_254 +2002/08/01/big/img_1470 +2002/08/21/big/img_42 +2002/08/20/big/img_217 +2002/08/06/big/img_2459 +2002/07/19/big/img_552 +2002/08/13/big/img_717 +2002/08/12/big/img_586 +2002/08/20/big/img_411 +2003/01/13/big/img_768 +2002/08/07/big/img_1747 +2002/08/15/big/img_385 +2002/08/01/big/img_1648 +2002/08/15/big/img_311 +2002/08/21/big/img_95 +2002/08/09/big/img_108 +2002/08/21/big/img_398 +2002/08/17/big/img_340 +2002/08/14/big/img_474 +2002/08/13/big/img_294 +2002/08/24/big/img_840 +2002/08/09/big/img_808 +2002/08/23/big/img_491 +2002/07/28/big/img_33 +2003/01/13/big/img_664 +2002/08/02/big/img_261 +2002/08/09/big/img_591 +2002/07/26/big/img_309 +2003/01/14/big/img_372 +2002/08/19/big/img_581 +2002/08/19/big/img_168 +2002/08/26/big/img_422 +2002/07/24/big/img_106 +2002/08/01/big/img_1936 +2002/08/05/big/img_3764 +2002/08/21/big/img_266 +2002/08/31/big/img_17968 +2002/08/01/big/img_1941 +2002/08/15/big/img_550 +2002/08/14/big/img_13 +2002/07/30/big/img_171 +2003/01/13/big/img_490 +2002/07/25/big/img_427 +2002/07/19/big/img_770 +2002/08/12/big/img_759 +2003/01/15/big/img_1360 +2002/08/05/big/img_3692 +2003/01/16/big/img_30 +2002/07/25/big/img_1026 +2002/07/22/big/img_288 +2002/08/29/big/img_18801 +2002/07/24/big/img_793 +2002/08/13/big/img_178 +2002/08/06/big/img_2322 +2003/01/14/big/img_560 +2002/08/18/big/img_408 +2003/01/16/big/img_915 +2003/01/16/big/img_679 +2002/08/07/big/img_1552 +2002/08/29/big/img_19050 +2002/08/01/big/img_2172 +2002/07/31/big/img_30 +2002/07/30/big/img_1019 +2002/07/30/big/img_587 +2003/01/13/big/img_773 +2002/07/30/big/img_410 +2002/07/28/big/img_65 +2002/08/05/big/img_3138 +2002/07/23/big/img_541 +2002/08/22/big/img_963 +2002/07/27/big/img_657 +2002/07/30/big/img_1051 +2003/01/16/big/img_150 +2002/07/31/big/img_519 +2002/08/01/big/img_1961 +2002/08/05/big/img_3752 +2002/07/23/big/img_631 +2003/01/14/big/img_237 +2002/07/28/big/img_21 +2002/07/22/big/img_813 +2002/08/05/big/img_3563 +2003/01/17/big/img_620 +2002/07/19/big/img_523 +2002/07/30/big/img_904 +2002/08/29/big/img_18642 +2002/08/11/big/img_492 +2002/08/01/big/img_2130 +2002/07/25/big/img_618 +2002/08/17/big/img_305 +2003/01/16/big/img_520 +2002/07/26/big/img_495 +2002/08/17/big/img_164 +2002/08/03/big/img_440 +2002/07/24/big/img_441 +2002/08/06/big/img_2146 +2002/08/11/big/img_558 +2002/08/02/big/img_545 +2002/08/31/big/img_18090 +2003/01/01/big/img_136 +2002/07/25/big/img_1099 +2003/01/13/big/img_728 +2003/01/16/big/img_197 +2002/07/26/big/img_651 +2002/08/11/big/img_676 +2003/01/15/big/img_10 +2002/08/21/big/img_250 +2002/08/14/big/img_325 +2002/08/04/big/img_390 +2002/07/24/big/img_554 +2003/01/16/big/img_333 +2002/07/31/big/img_922 +2002/09/02/big/img_15586 +2003/01/16/big/img_184 +2002/07/22/big/img_766 +2002/07/21/big/img_608 +2002/08/07/big/img_1578 +2002/08/17/big/img_961 +2002/07/27/big/img_324 +2002/08/05/big/img_3765 +2002/08/23/big/img_462 +2003/01/16/big/img_382 +2002/08/27/big/img_19838 +2002/08/01/big/img_1505 +2002/08/21/big/img_662 +2002/08/14/big/img_605 +2002/08/19/big/img_816 +2002/07/29/big/img_136 +2002/08/20/big/img_719 +2002/08/06/big/img_2826 +2002/08/10/big/img_630 +2003/01/17/big/img_973 +2002/08/14/big/img_116 +2002/08/02/big/img_666 +2002/08/21/big/img_710 +2002/08/05/big/img_55 +2002/07/31/big/img_229 +2002/08/01/big/img_1549 +2002/07/23/big/img_432 +2002/07/21/big/img_430 +2002/08/21/big/img_549 +2002/08/08/big/img_985 +2002/07/20/big/img_610 +2002/07/23/big/img_978 +2002/08/23/big/img_219 +2002/07/25/big/img_175 +2003/01/15/big/img_230 +2002/08/23/big/img_385 +2002/07/31/big/img_879 +2002/08/12/big/img_495 +2002/08/22/big/img_499 +2002/08/30/big/img_18322 +2002/08/15/big/img_795 +2002/08/13/big/img_835 +2003/01/17/big/img_930 +2002/07/30/big/img_873 +2002/08/11/big/img_257 +2002/07/31/big/img_593 +2002/08/21/big/img_916 +2003/01/13/big/img_814 +2002/07/25/big/img_722 +2002/08/16/big/img_379 +2002/07/31/big/img_497 +2002/07/22/big/img_602 +2002/08/21/big/img_642 +2002/08/21/big/img_614 +2002/08/23/big/img_482 +2002/07/29/big/img_603 +2002/08/13/big/img_705 +2002/07/23/big/img_833 +2003/01/14/big/img_511 +2002/07/24/big/img_376 +2002/08/17/big/img_1030 +2002/08/05/big/img_3576 +2002/08/16/big/img_540 +2002/07/22/big/img_630 +2002/08/10/big/img_180 +2002/08/14/big/img_905 +2002/08/29/big/img_18777 +2002/08/22/big/img_693 +2003/01/16/big/img_933 +2002/08/20/big/img_555 +2002/08/15/big/img_549 +2003/01/14/big/img_830 +2003/01/16/big/img_64 +2002/08/27/big/img_19670 +2002/08/22/big/img_729 +2002/07/27/big/img_981 +2002/08/09/big/img_458 +2003/01/17/big/img_884 +2002/07/25/big/img_639 +2002/08/31/big/img_18008 +2002/08/22/big/img_249 +2002/08/17/big/img_971 +2002/08/04/big/img_308 +2002/07/28/big/img_362 +2002/08/12/big/img_142 +2002/08/26/big/img_61 +2002/08/14/big/img_422 +2002/07/19/big/img_607 +2003/01/15/big/img_717 +2002/08/01/big/img_1475 +2002/08/29/big/img_19061 +2003/01/01/big/img_346 +2002/07/20/big/img_315 +2003/01/15/big/img_756 +2002/08/15/big/img_879 +2002/08/08/big/img_615 +2003/01/13/big/img_431 +2002/08/05/big/img_3233 +2002/08/24/big/img_526 +2003/01/13/big/img_717 +2002/09/01/big/img_16408 +2002/07/22/big/img_217 +2002/07/31/big/img_960 +2002/08/21/big/img_610 +2002/08/05/big/img_3753 +2002/08/03/big/img_151 +2002/08/21/big/img_267 +2002/08/01/big/img_2175 +2002/08/04/big/img_556 +2002/08/21/big/img_527 +2002/09/02/big/img_15800 +2002/07/27/big/img_156 +2002/07/20/big/img_590 +2002/08/15/big/img_700 +2002/08/08/big/img_444 +2002/07/25/big/img_94 +2002/07/24/big/img_778 +2002/08/14/big/img_694 +2002/07/20/big/img_666 +2002/08/02/big/img_200 +2002/08/02/big/img_578 +2003/01/17/big/img_332 +2002/09/01/big/img_16352 +2002/08/27/big/img_19668 +2002/07/23/big/img_823 +2002/08/13/big/img_431 +2003/01/16/big/img_463 +2002/08/27/big/img_19711 +2002/08/23/big/img_154 +2002/07/31/big/img_360 +2002/08/23/big/img_555 +2002/08/10/big/img_561 +2003/01/14/big/img_550 +2002/08/07/big/img_1370 +2002/07/30/big/img_1184 +2002/08/01/big/img_1445 +2002/08/23/big/img_22 +2002/07/30/big/img_606 +2003/01/17/big/img_271 +2002/08/31/big/img_17316 +2002/08/16/big/img_973 +2002/07/26/big/img_77 +2002/07/20/big/img_788 +2002/08/06/big/img_2426 +2002/08/07/big/img_1498 +2002/08/16/big/img_358 +2002/08/06/big/img_2851 +2002/08/12/big/img_359 +2002/08/01/big/img_1521 +2002/08/02/big/img_709 +2002/08/20/big/img_935 +2002/08/12/big/img_188 +2002/08/24/big/img_411 +2002/08/22/big/img_680 +2002/08/06/big/img_2480 +2002/07/20/big/img_627 +2002/07/30/big/img_214 +2002/07/25/big/img_354 +2002/08/02/big/img_636 +2003/01/15/big/img_661 +2002/08/07/big/img_1327 +2002/08/01/big/img_2108 +2002/08/31/big/img_17919 +2002/08/29/big/img_18768 +2002/08/05/big/img_3840 +2002/07/26/big/img_242 +2003/01/14/big/img_451 +2002/08/20/big/img_923 +2002/08/27/big/img_19908 +2002/08/16/big/img_282 +2002/08/19/big/img_440 +2003/01/01/big/img_230 +2002/08/08/big/img_212 +2002/07/20/big/img_443 +2002/08/25/big/img_635 +2003/01/13/big/img_1169 +2002/07/26/big/img_998 +2002/08/15/big/img_995 +2002/08/06/big/img_3002 +2002/07/29/big/img_460 +2003/01/14/big/img_925 +2002/07/23/big/img_539 +2002/08/16/big/img_694 +2003/01/13/big/img_459 +2002/07/23/big/img_249 +2002/08/20/big/img_539 +2002/08/04/big/img_186 +2002/08/26/big/img_264 +2002/07/22/big/img_704 +2002/08/25/big/img_277 +2002/08/22/big/img_988 +2002/07/29/big/img_504 +2002/08/05/big/img_3600 +2002/08/30/big/img_18380 +2003/01/14/big/img_937 +2002/08/21/big/img_254 +2002/08/10/big/img_130 +2002/08/20/big/img_339 +2003/01/14/big/img_428 +2002/08/20/big/img_889 +2002/08/31/big/img_17637 +2002/07/26/big/img_644 +2002/09/01/big/img_16776 +2002/08/06/big/img_2239 +2002/08/06/big/img_2646 +2003/01/13/big/img_491 +2002/08/10/big/img_579 +2002/08/21/big/img_713 +2002/08/22/big/img_482 +2002/07/22/big/img_167 +2002/07/24/big/img_539 +2002/08/14/big/img_721 +2002/07/25/big/img_389 +2002/09/01/big/img_16591 +2002/08/13/big/img_543 +2003/01/14/big/img_432 +2002/08/09/big/img_287 +2002/07/26/big/img_126 +2002/08/23/big/img_412 +2002/08/15/big/img_1034 +2002/08/28/big/img_19485 +2002/07/31/big/img_236 +2002/07/30/big/img_523 +2002/07/19/big/img_141 +2003/01/17/big/img_957 +2002/08/04/big/img_81 +2002/07/25/big/img_206 +2002/08/15/big/img_716 +2002/08/13/big/img_403 +2002/08/15/big/img_685 +2002/07/26/big/img_884 +2002/07/19/big/img_499 +2002/07/23/big/img_772 +2002/07/27/big/img_752 +2003/01/14/big/img_493 +2002/08/25/big/img_664 +2002/07/31/big/img_334 +2002/08/26/big/img_678 +2002/09/01/big/img_16541 +2003/01/14/big/img_347 +2002/07/23/big/img_187 +2002/07/30/big/img_1163 +2002/08/05/big/img_35 +2002/08/22/big/img_944 +2002/08/07/big/img_1239 +2002/07/29/big/img_1215 +2002/08/03/big/img_312 +2002/08/05/big/img_3523 +2002/07/29/big/img_218 +2002/08/13/big/img_672 +2002/08/16/big/img_205 +2002/08/17/big/img_594 +2002/07/29/big/img_1411 +2002/07/30/big/img_942 +2003/01/16/big/img_312 +2002/08/08/big/img_312 +2002/07/25/big/img_15 +2002/08/09/big/img_839 +2002/08/01/big/img_2069 +2002/08/31/big/img_17512 +2002/08/01/big/img_3 +2002/07/31/big/img_320 +2003/01/15/big/img_1265 +2002/08/14/big/img_563 +2002/07/31/big/img_167 +2002/08/20/big/img_374 +2002/08/13/big/img_406 +2002/08/08/big/img_625 +2002/08/02/big/img_314 +2002/08/27/big/img_19964 +2002/09/01/big/img_16670 +2002/07/31/big/img_599 +2002/08/29/big/img_18906 +2002/07/24/big/img_373 +2002/07/26/big/img_513 +2002/09/02/big/img_15497 +2002/08/19/big/img_117 +2003/01/01/big/img_158 +2002/08/24/big/img_178 +2003/01/13/big/img_935 +2002/08/13/big/img_609 +2002/08/30/big/img_18341 +2002/08/25/big/img_674 +2003/01/13/big/img_209 +2002/08/13/big/img_258 +2002/08/05/big/img_3543 +2002/08/07/big/img_1970 +2002/08/06/big/img_3004 +2003/01/17/big/img_487 +2002/08/24/big/img_873 +2002/08/29/big/img_18730 +2002/08/09/big/img_375 +2003/01/16/big/img_751 +2002/08/02/big/img_603 +2002/08/19/big/img_325 +2002/09/01/big/img_16420 +2002/08/05/big/img_3633 +2002/08/21/big/img_516 +2002/07/19/big/img_501 +2002/07/26/big/img_688 +2002/07/24/big/img_256 +2002/07/25/big/img_438 +2002/07/31/big/img_1017 +2002/08/22/big/img_512 +2002/07/21/big/img_543 +2002/08/08/big/img_223 +2002/08/19/big/img_189 +2002/08/12/big/img_630 +2002/07/30/big/img_958 +2002/07/28/big/img_208 +2002/08/31/big/img_17691 +2002/07/22/big/img_542 +2002/07/19/big/img_741 +2002/07/19/big/img_158 +2002/08/15/big/img_399 +2002/08/01/big/img_2159 +2002/08/14/big/img_455 +2002/08/17/big/img_1011 +2002/08/26/big/img_744 +2002/08/12/big/img_624 +2003/01/17/big/img_821 +2002/08/16/big/img_980 +2002/07/28/big/img_281 +2002/07/25/big/img_171 +2002/08/03/big/img_116 +2002/07/22/big/img_467 +2002/07/31/big/img_750 +2002/07/26/big/img_435 +2002/07/19/big/img_822 +2002/08/13/big/img_626 +2002/08/11/big/img_344 +2002/08/02/big/img_473 +2002/09/01/big/img_16817 +2002/08/01/big/img_1275 +2002/08/28/big/img_19270 +2002/07/23/big/img_607 +2002/08/09/big/img_316 +2002/07/29/big/img_626 +2002/07/24/big/img_824 +2002/07/22/big/img_342 +2002/08/08/big/img_794 +2002/08/07/big/img_1209 +2002/07/19/big/img_18 +2002/08/25/big/img_634 +2002/07/24/big/img_730 +2003/01/17/big/img_356 +2002/07/23/big/img_305 +2002/07/30/big/img_453 +2003/01/13/big/img_972 +2002/08/06/big/img_2610 +2002/08/29/big/img_18920 +2002/07/31/big/img_123 +2002/07/26/big/img_979 +2002/08/24/big/img_635 +2002/08/05/big/img_3704 +2002/08/07/big/img_1358 +2002/07/22/big/img_306 +2002/08/13/big/img_619 +2002/08/02/big/img_366 diff --git a/third_part/GPEN/face_detect/data/__init__.py b/third_part/GPEN/face_detect/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ea50ebaf88d64e75f4960bc99b14f138a343e575 --- /dev/null +++ b/third_part/GPEN/face_detect/data/__init__.py @@ -0,0 +1,3 @@ +from .wider_face import WiderFaceDetection, detection_collate +from .data_augment import * +from .config import * diff --git a/third_part/GPEN/face_detect/data/config.py b/third_part/GPEN/face_detect/data/config.py new file mode 100644 index 0000000000000000000000000000000000000000..e57cdc530e3d78c4aa6310985c90c5ee125f8f01 --- /dev/null +++ b/third_part/GPEN/face_detect/data/config.py @@ -0,0 +1,42 @@ +# config.py + +cfg_mnet = { + 'name': 'mobilenet0.25', + 'min_sizes': [[16, 32], [64, 128], [256, 512]], + 'steps': [8, 16, 32], + 'variance': [0.1, 0.2], + 'clip': False, + 'loc_weight': 2.0, + 'gpu_train': True, + 'batch_size': 32, + 'ngpu': 1, + 'epoch': 250, + 'decay1': 190, + 'decay2': 220, + 'image_size': 640, + 'pretrain': False, + 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, + 'in_channel': 32, + 'out_channel': 64 +} + +cfg_re50 = { + 'name': 'Resnet50', + 'min_sizes': [[16, 32], [64, 128], [256, 512]], + 'steps': [8, 16, 32], + 'variance': [0.1, 0.2], + 'clip': False, + 'loc_weight': 2.0, + 'gpu_train': True, + 'batch_size': 24, + 'ngpu': 4, + 'epoch': 100, + 'decay1': 70, + 'decay2': 90, + 'image_size': 840, + 'pretrain': False, + 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3}, + 'in_channel': 256, + 'out_channel': 256 +} + diff --git a/third_part/GPEN/face_detect/data/data_augment.py b/third_part/GPEN/face_detect/data/data_augment.py new file mode 100644 index 0000000000000000000000000000000000000000..8987add0c8cfc39c7c051d86ddc057dbfb5c8f19 --- /dev/null +++ b/third_part/GPEN/face_detect/data/data_augment.py @@ -0,0 +1,237 @@ +import cv2 +import numpy as np +import random +from face_detect.utils.box_utils import matrix_iof + + +def _crop(image, boxes, labels, landm, img_dim): + height, width, _ = image.shape + pad_image_flag = True + + for _ in range(250): + """ + if random.uniform(0, 1) <= 0.2: + scale = 1.0 + else: + scale = random.uniform(0.3, 1.0) + """ + PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0] + scale = random.choice(PRE_SCALES) + short_side = min(width, height) + w = int(scale * short_side) + h = w + + if width == w: + l = 0 + else: + l = random.randrange(width - w) + if height == h: + t = 0 + else: + t = random.randrange(height - h) + roi = np.array((l, t, l + w, t + h)) + + value = matrix_iof(boxes, roi[np.newaxis]) + flag = (value >= 1) + if not flag.any(): + continue + + centers = (boxes[:, :2] + boxes[:, 2:]) / 2 + mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1) + boxes_t = boxes[mask_a].copy() + labels_t = labels[mask_a].copy() + landms_t = landm[mask_a].copy() + landms_t = landms_t.reshape([-1, 5, 2]) + + if boxes_t.shape[0] == 0: + continue + + image_t = image[roi[1]:roi[3], roi[0]:roi[2]] + + boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2]) + boxes_t[:, :2] -= roi[:2] + boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:]) + boxes_t[:, 2:] -= roi[:2] + + # landm + landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2] + landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0])) + landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2]) + landms_t = landms_t.reshape([-1, 10]) + + + # make sure that the cropped image contains at least one face > 16 pixel at training image scale + b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim + b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim + mask_b = np.minimum(b_w_t, b_h_t) > 0.0 + boxes_t = boxes_t[mask_b] + labels_t = labels_t[mask_b] + landms_t = landms_t[mask_b] + + if boxes_t.shape[0] == 0: + continue + + pad_image_flag = False + + return image_t, boxes_t, labels_t, landms_t, pad_image_flag + return image, boxes, labels, landm, pad_image_flag + + +def _distort(image): + + def _convert(image, alpha=1, beta=0): + tmp = image.astype(float) * alpha + beta + tmp[tmp < 0] = 0 + tmp[tmp > 255] = 255 + image[:] = tmp + + image = image.copy() + + if random.randrange(2): + + #brightness distortion + if random.randrange(2): + _convert(image, beta=random.uniform(-32, 32)) + + #contrast distortion + if random.randrange(2): + _convert(image, alpha=random.uniform(0.5, 1.5)) + + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + + #saturation distortion + if random.randrange(2): + _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) + + #hue distortion + if random.randrange(2): + tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) + tmp %= 180 + image[:, :, 0] = tmp + + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + + else: + + #brightness distortion + if random.randrange(2): + _convert(image, beta=random.uniform(-32, 32)) + + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + + #saturation distortion + if random.randrange(2): + _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) + + #hue distortion + if random.randrange(2): + tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) + tmp %= 180 + image[:, :, 0] = tmp + + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + + #contrast distortion + if random.randrange(2): + _convert(image, alpha=random.uniform(0.5, 1.5)) + + return image + + +def _expand(image, boxes, fill, p): + if random.randrange(2): + return image, boxes + + height, width, depth = image.shape + + scale = random.uniform(1, p) + w = int(scale * width) + h = int(scale * height) + + left = random.randint(0, w - width) + top = random.randint(0, h - height) + + boxes_t = boxes.copy() + boxes_t[:, :2] += (left, top) + boxes_t[:, 2:] += (left, top) + expand_image = np.empty( + (h, w, depth), + dtype=image.dtype) + expand_image[:, :] = fill + expand_image[top:top + height, left:left + width] = image + image = expand_image + + return image, boxes_t + + +def _mirror(image, boxes, landms): + _, width, _ = image.shape + if random.randrange(2): + image = image[:, ::-1] + boxes = boxes.copy() + boxes[:, 0::2] = width - boxes[:, 2::-2] + + # landm + landms = landms.copy() + landms = landms.reshape([-1, 5, 2]) + landms[:, :, 0] = width - landms[:, :, 0] + tmp = landms[:, 1, :].copy() + landms[:, 1, :] = landms[:, 0, :] + landms[:, 0, :] = tmp + tmp1 = landms[:, 4, :].copy() + landms[:, 4, :] = landms[:, 3, :] + landms[:, 3, :] = tmp1 + landms = landms.reshape([-1, 10]) + + return image, boxes, landms + + +def _pad_to_square(image, rgb_mean, pad_image_flag): + if not pad_image_flag: + return image + height, width, _ = image.shape + long_side = max(width, height) + image_t = np.empty((long_side, long_side, 3), dtype=image.dtype) + image_t[:, :] = rgb_mean + image_t[0:0 + height, 0:0 + width] = image + return image_t + + +def _resize_subtract_mean(image, insize, rgb_mean): + interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] + interp_method = interp_methods[random.randrange(5)] + image = cv2.resize(image, (insize, insize), interpolation=interp_method) + image = image.astype(np.float32) + image -= rgb_mean + return image.transpose(2, 0, 1) + + +class preproc(object): + + def __init__(self, img_dim, rgb_means): + self.img_dim = img_dim + self.rgb_means = rgb_means + + def __call__(self, image, targets): + assert targets.shape[0] > 0, "this image does not have gt" + + boxes = targets[:, :4].copy() + labels = targets[:, -1].copy() + landm = targets[:, 4:-1].copy() + + image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim) + image_t = _distort(image_t) + image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag) + image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t) + height, width, _ = image_t.shape + image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means) + boxes_t[:, 0::2] /= width + boxes_t[:, 1::2] /= height + + landm_t[:, 0::2] /= width + landm_t[:, 1::2] /= height + + labels_t = np.expand_dims(labels_t, 1) + targets_t = np.hstack((boxes_t, landm_t, labels_t)) + + return image_t, targets_t diff --git a/third_part/GPEN/face_detect/data/wider_face.py b/third_part/GPEN/face_detect/data/wider_face.py new file mode 100644 index 0000000000000000000000000000000000000000..22f56efdc221bd4162d22884669ba44a3d4de5cd --- /dev/null +++ b/third_part/GPEN/face_detect/data/wider_face.py @@ -0,0 +1,101 @@ +import os +import os.path +import sys +import torch +import torch.utils.data as data +import cv2 +import numpy as np + +class WiderFaceDetection(data.Dataset): + def __init__(self, txt_path, preproc=None): + self.preproc = preproc + self.imgs_path = [] + self.words = [] + f = open(txt_path,'r') + lines = f.readlines() + isFirst = True + labels = [] + for line in lines: + line = line.rstrip() + if line.startswith('#'): + if isFirst is True: + isFirst = False + else: + labels_copy = labels.copy() + self.words.append(labels_copy) + labels.clear() + path = line[2:] + path = txt_path.replace('label.txt','images/') + path + self.imgs_path.append(path) + else: + line = line.split(' ') + label = [float(x) for x in line] + labels.append(label) + + self.words.append(labels) + + def __len__(self): + return len(self.imgs_path) + + def __getitem__(self, index): + img = cv2.imread(self.imgs_path[index]) + height, width, _ = img.shape + + labels = self.words[index] + annotations = np.zeros((0, 15)) + if len(labels) == 0: + return annotations + for idx, label in enumerate(labels): + annotation = np.zeros((1, 15)) + # bbox + annotation[0, 0] = label[0] # x1 + annotation[0, 1] = label[1] # y1 + annotation[0, 2] = label[0] + label[2] # x2 + annotation[0, 3] = label[1] + label[3] # y2 + + # landmarks + annotation[0, 4] = label[4] # l0_x + annotation[0, 5] = label[5] # l0_y + annotation[0, 6] = label[7] # l1_x + annotation[0, 7] = label[8] # l1_y + annotation[0, 8] = label[10] # l2_x + annotation[0, 9] = label[11] # l2_y + annotation[0, 10] = label[13] # l3_x + annotation[0, 11] = label[14] # l3_y + annotation[0, 12] = label[16] # l4_x + annotation[0, 13] = label[17] # l4_y + if (annotation[0, 4]<0): + annotation[0, 14] = -1 + else: + annotation[0, 14] = 1 + + annotations = np.append(annotations, annotation, axis=0) + target = np.array(annotations) + if self.preproc is not None: + img, target = self.preproc(img, target) + + return torch.from_numpy(img), target + +def detection_collate(batch): + """Custom collate fn for dealing with batches of images that have a different + number of associated object annotations (bounding boxes). + + Arguments: + batch: (tuple) A tuple of tensor images and lists of annotations + + Return: + A tuple containing: + 1) (tensor) batch of images stacked on their 0 dim + 2) (list of tensors) annotations for a given image are stacked on 0 dim + """ + targets = [] + imgs = [] + for _, sample in enumerate(batch): + for _, tup in enumerate(sample): + if torch.is_tensor(tup): + imgs.append(tup) + elif isinstance(tup, type(np.empty(0))): + annos = torch.from_numpy(tup).float() + targets.append(annos) + + return (torch.stack(imgs, 0), targets) diff --git a/third_part/GPEN/face_detect/facemodels/__init__.py b/third_part/GPEN/face_detect/facemodels/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/GPEN/face_detect/facemodels/net.py b/third_part/GPEN/face_detect/facemodels/net.py new file mode 100644 index 0000000000000000000000000000000000000000..beb6040b24258f8b96020c1c9fc2610819718017 --- /dev/null +++ b/third_part/GPEN/face_detect/facemodels/net.py @@ -0,0 +1,137 @@ +import time +import torch +import torch.nn as nn +import torchvision.models._utils as _utils +import torchvision.models as models +import torch.nn.functional as F +from torch.autograd import Variable + +def conv_bn(inp, oup, stride = 1, leaky = 0): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.LeakyReLU(negative_slope=leaky, inplace=True) + ) + +def conv_bn_no_relu(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + ) + +def conv_bn1X1(inp, oup, stride, leaky=0): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), + nn.BatchNorm2d(oup), + nn.LeakyReLU(negative_slope=leaky, inplace=True) + ) + +def conv_dw(inp, oup, stride, leaky=0.1): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.LeakyReLU(negative_slope= leaky,inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.LeakyReLU(negative_slope= leaky,inplace=True), + ) + +class SSH(nn.Module): + def __init__(self, in_channel, out_channel): + super(SSH, self).__init__() + assert out_channel % 4 == 0 + leaky = 0 + if (out_channel <= 64): + leaky = 0.1 + self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1) + + self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky) + self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) + + self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky) + self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) + + def forward(self, input): + conv3X3 = self.conv3X3(input) + + conv5X5_1 = self.conv5X5_1(input) + conv5X5 = self.conv5X5_2(conv5X5_1) + + conv7X7_2 = self.conv7X7_2(conv5X5_1) + conv7X7 = self.conv7x7_3(conv7X7_2) + + out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1) + out = F.relu(out) + return out + +class FPN(nn.Module): + def __init__(self,in_channels_list,out_channels): + super(FPN,self).__init__() + leaky = 0 + if (out_channels <= 64): + leaky = 0.1 + self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky) + self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky) + self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky) + + self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky) + self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky) + + def forward(self, input): + # names = list(input.keys()) + input = list(input.values()) + + output1 = self.output1(input[0]) + output2 = self.output2(input[1]) + output3 = self.output3(input[2]) + + up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest") + output2 = output2 + up3 + output2 = self.merge2(output2) + + up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest") + output1 = output1 + up2 + output1 = self.merge1(output1) + + out = [output1, output2, output3] + return out + + + +class MobileNetV1(nn.Module): + def __init__(self): + super(MobileNetV1, self).__init__() + self.stage1 = nn.Sequential( + conv_bn(3, 8, 2, leaky = 0.1), # 3 + conv_dw(8, 16, 1), # 7 + conv_dw(16, 32, 2), # 11 + conv_dw(32, 32, 1), # 19 + conv_dw(32, 64, 2), # 27 + conv_dw(64, 64, 1), # 43 + ) + self.stage2 = nn.Sequential( + conv_dw(64, 128, 2), # 43 + 16 = 59 + conv_dw(128, 128, 1), # 59 + 32 = 91 + conv_dw(128, 128, 1), # 91 + 32 = 123 + conv_dw(128, 128, 1), # 123 + 32 = 155 + conv_dw(128, 128, 1), # 155 + 32 = 187 + conv_dw(128, 128, 1), # 187 + 32 = 219 + ) + self.stage3 = nn.Sequential( + conv_dw(128, 256, 2), # 219 +3 2 = 241 + conv_dw(256, 256, 1), # 241 + 64 = 301 + ) + self.avg = nn.AdaptiveAvgPool2d((1,1)) + self.fc = nn.Linear(256, 1000) + + def forward(self, x): + x = self.stage1(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.avg(x) + # x = self.model(x) + x = x.view(-1, 256) + x = self.fc(x) + return x + diff --git a/third_part/GPEN/face_detect/facemodels/retinaface.py b/third_part/GPEN/face_detect/facemodels/retinaface.py new file mode 100644 index 0000000000000000000000000000000000000000..e902c9bbf38bdcc63629b77dbf95b5d53e7163bf --- /dev/null +++ b/third_part/GPEN/face_detect/facemodels/retinaface.py @@ -0,0 +1,127 @@ +import torch +import torch.nn as nn +import torchvision.models.detection.backbone_utils as backbone_utils +import torchvision.models._utils as _utils +import torch.nn.functional as F +from collections import OrderedDict + +from face_detect.facemodels.net import MobileNetV1 as MobileNetV1 +from face_detect.facemodels.net import FPN as FPN +from face_detect.facemodels.net import SSH as SSH + + + +class ClassHead(nn.Module): + def __init__(self,inchannels=512,num_anchors=3): + super(ClassHead,self).__init__() + self.num_anchors = num_anchors + self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0) + + def forward(self,x): + out = self.conv1x1(x) + out = out.permute(0,2,3,1).contiguous() + + return out.view(out.shape[0], -1, 2) + +class BboxHead(nn.Module): + def __init__(self,inchannels=512,num_anchors=3): + super(BboxHead,self).__init__() + self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0) + + def forward(self,x): + out = self.conv1x1(x) + out = out.permute(0,2,3,1).contiguous() + + return out.view(out.shape[0], -1, 4) + +class LandmarkHead(nn.Module): + def __init__(self,inchannels=512,num_anchors=3): + super(LandmarkHead,self).__init__() + self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0) + + def forward(self,x): + out = self.conv1x1(x) + out = out.permute(0,2,3,1).contiguous() + + return out.view(out.shape[0], -1, 10) + +class RetinaFace(nn.Module): + def __init__(self, cfg = None, phase = 'train'): + """ + :param cfg: Network related settings. + :param phase: train or test. + """ + super(RetinaFace,self).__init__() + self.phase = phase + backbone = None + if cfg['name'] == 'mobilenet0.25': + backbone = MobileNetV1() + if cfg['pretrain']: + checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu')) + from collections import OrderedDict + new_state_dict = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + name = k[7:] # remove module. + new_state_dict[name] = v + # load params + backbone.load_state_dict(new_state_dict) + elif cfg['name'] == 'Resnet50': + import torchvision.models as models + backbone = models.resnet50(pretrained=cfg['pretrain']) + + self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers']) + in_channels_stage2 = cfg['in_channel'] + in_channels_list = [ + in_channels_stage2 * 2, + in_channels_stage2 * 4, + in_channels_stage2 * 8, + ] + out_channels = cfg['out_channel'] + self.fpn = FPN(in_channels_list,out_channels) + self.ssh1 = SSH(out_channels, out_channels) + self.ssh2 = SSH(out_channels, out_channels) + self.ssh3 = SSH(out_channels, out_channels) + + self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel']) + self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel']) + self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel']) + + def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2): + classhead = nn.ModuleList() + for i in range(fpn_num): + classhead.append(ClassHead(inchannels,anchor_num)) + return classhead + + def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2): + bboxhead = nn.ModuleList() + for i in range(fpn_num): + bboxhead.append(BboxHead(inchannels,anchor_num)) + return bboxhead + + def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2): + landmarkhead = nn.ModuleList() + for i in range(fpn_num): + landmarkhead.append(LandmarkHead(inchannels,anchor_num)) + return landmarkhead + + def forward(self,inputs): + out = self.body(inputs) + + # FPN + fpn = self.fpn(out) + + # SSH + feature1 = self.ssh1(fpn[0]) + feature2 = self.ssh2(fpn[1]) + feature3 = self.ssh3(fpn[2]) + features = [feature1, feature2, feature3] + + bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1) + classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1) + ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1) + + if self.phase == 'train': + output = (bbox_regressions, classifications, ldm_regressions) + else: + output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions) + return output \ No newline at end of file diff --git a/third_part/GPEN/face_detect/layers/__init__.py b/third_part/GPEN/face_detect/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..53a3f4b5160995d93bc7911e808b3045d74362c9 --- /dev/null +++ b/third_part/GPEN/face_detect/layers/__init__.py @@ -0,0 +1,2 @@ +from .functions import * +from .modules import * diff --git a/third_part/GPEN/face_detect/layers/functions/prior_box.py b/third_part/GPEN/face_detect/layers/functions/prior_box.py new file mode 100644 index 0000000000000000000000000000000000000000..80c7f858371ed71f39ed609eb44b423d8693bf61 --- /dev/null +++ b/third_part/GPEN/face_detect/layers/functions/prior_box.py @@ -0,0 +1,34 @@ +import torch +from itertools import product as product +import numpy as np +from math import ceil + + +class PriorBox(object): + def __init__(self, cfg, image_size=None, phase='train'): + super(PriorBox, self).__init__() + self.min_sizes = cfg['min_sizes'] + self.steps = cfg['steps'] + self.clip = cfg['clip'] + self.image_size = image_size + self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] + self.name = "s" + + def forward(self): + anchors = [] + for k, f in enumerate(self.feature_maps): + min_sizes = self.min_sizes[k] + for i, j in product(range(f[0]), range(f[1])): + for min_size in min_sizes: + s_kx = min_size / self.image_size[1] + s_ky = min_size / self.image_size[0] + dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] + dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] + for cy, cx in product(dense_cy, dense_cx): + anchors += [cx, cy, s_kx, s_ky] + + # back to torch land + output = torch.Tensor(anchors).view(-1, 4) + if self.clip: + output.clamp_(max=1, min=0) + return output diff --git a/third_part/GPEN/face_detect/layers/modules/__init__.py b/third_part/GPEN/face_detect/layers/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cf24bddbf283f233d0b93fc074a2bac2f5c044a9 --- /dev/null +++ b/third_part/GPEN/face_detect/layers/modules/__init__.py @@ -0,0 +1,3 @@ +from .multibox_loss import MultiBoxLoss + +__all__ = ['MultiBoxLoss'] diff --git a/third_part/GPEN/face_detect/layers/modules/multibox_loss.py b/third_part/GPEN/face_detect/layers/modules/multibox_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..fb2d422408a4840418d8e56091079050800e3183 --- /dev/null +++ b/third_part/GPEN/face_detect/layers/modules/multibox_loss.py @@ -0,0 +1,125 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from face_detect.utils.box_utils import match, log_sum_exp +from face_detect.data import cfg_mnet +GPU = cfg_mnet['gpu_train'] + +class MultiBoxLoss(nn.Module): + """SSD Weighted Loss Function + Compute Targets: + 1) Produce Confidence Target Indices by matching ground truth boxes + with (default) 'priorboxes' that have jaccard index > threshold parameter + (default threshold: 0.5). + 2) Produce localization target by 'encoding' variance into offsets of ground + truth boxes and their matched 'priorboxes'. + 3) Hard negative mining to filter the excessive number of negative examples + that comes with using a large number of default bounding boxes. + (default negative:positive ratio 3:1) + Objective Loss: + L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N + Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss + weighted by α which is set to 1 by cross val. + Args: + c: class confidences, + l: predicted boxes, + g: ground truth boxes + N: number of matched default boxes + See: https://arxiv.org/pdf/1512.02325.pdf for more details. + """ + + def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target): + super(MultiBoxLoss, self).__init__() + self.num_classes = num_classes + self.threshold = overlap_thresh + self.background_label = bkg_label + self.encode_target = encode_target + self.use_prior_for_matching = prior_for_matching + self.do_neg_mining = neg_mining + self.negpos_ratio = neg_pos + self.neg_overlap = neg_overlap + self.variance = [0.1, 0.2] + + def forward(self, predictions, priors, targets): + """Multibox Loss + Args: + predictions (tuple): A tuple containing loc preds, conf preds, + and prior boxes from SSD net. + conf shape: torch.size(batch_size,num_priors,num_classes) + loc shape: torch.size(batch_size,num_priors,4) + priors shape: torch.size(num_priors,4) + + ground_truth (tensor): Ground truth boxes and labels for a batch, + shape: [batch_size,num_objs,5] (last idx is the label). + """ + + loc_data, conf_data, landm_data = predictions + priors = priors + num = loc_data.size(0) + num_priors = (priors.size(0)) + + # match priors (default boxes) and ground truth boxes + loc_t = torch.Tensor(num, num_priors, 4) + landm_t = torch.Tensor(num, num_priors, 10) + conf_t = torch.LongTensor(num, num_priors) + for idx in range(num): + truths = targets[idx][:, :4].data + labels = targets[idx][:, -1].data + landms = targets[idx][:, 4:14].data + defaults = priors.data + match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) + if GPU: + loc_t = loc_t.cuda() + conf_t = conf_t.cuda() + landm_t = landm_t.cuda() + + zeros = torch.tensor(0).cuda() + # landm Loss (Smooth L1) + # Shape: [batch,num_priors,10] + pos1 = conf_t > zeros + num_pos_landm = pos1.long().sum(1, keepdim=True) + N1 = max(num_pos_landm.data.sum().float(), 1) + pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) + landm_p = landm_data[pos_idx1].view(-1, 10) + landm_t = landm_t[pos_idx1].view(-1, 10) + loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') + + + pos = conf_t != zeros + conf_t[pos] = 1 + + # Localization Loss (Smooth L1) + # Shape: [batch,num_priors,4] + pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) + loc_p = loc_data[pos_idx].view(-1, 4) + loc_t = loc_t[pos_idx].view(-1, 4) + loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') + + # Compute max conf across batch for hard negative mining + batch_conf = conf_data.view(-1, self.num_classes) + loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) + + # Hard Negative Mining + loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now + loss_c = loss_c.view(num, -1) + _, loss_idx = loss_c.sort(1, descending=True) + _, idx_rank = loss_idx.sort(1) + num_pos = pos.long().sum(1, keepdim=True) + num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) + neg = idx_rank < num_neg.expand_as(idx_rank) + + # Confidence Loss Including Positive and Negative Examples + pos_idx = pos.unsqueeze(2).expand_as(conf_data) + neg_idx = neg.unsqueeze(2).expand_as(conf_data) + conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) + targets_weighted = conf_t[(pos+neg).gt(0)] + loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') + + # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N + N = max(num_pos.data.sum().float(), 1) + loss_l /= N + loss_c /= N + loss_landm /= N1 + + return loss_l, loss_c, loss_landm diff --git a/third_part/GPEN/face_detect/retinaface_detection.py b/third_part/GPEN/face_detect/retinaface_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..a2928b52c220e069bf2ea27dc5d454097761e604 --- /dev/null +++ b/third_part/GPEN/face_detect/retinaface_detection.py @@ -0,0 +1,193 @@ +''' +@paper: GAN Prior Embedded Network for Blind Face Restoration in the Wild (CVPR2021) +@author: yangxy (yangtao9009@gmail.com) +''' +import os +import torch +import torch.backends.cudnn as cudnn +import numpy as np +from face_detect.data import cfg_re50 +from face_detect.layers.functions.prior_box import PriorBox +from face_detect.utils.nms.py_cpu_nms import py_cpu_nms +import cv2 +from face_detect.facemodels.retinaface import RetinaFace +from face_detect.utils.box_utils import decode, decode_landm +import time +import torch.nn.functional as F + + +class RetinaFaceDetection(object): + def __init__(self, base_dir, device='cuda', network='RetinaFace-R50'): + torch.set_grad_enabled(False) + cudnn.benchmark = True + self.pretrained_path = os.path.join(base_dir, network+'.pth') + self.device = device #torch.cuda.current_device() + self.cfg = cfg_re50 + self.net = RetinaFace(cfg=self.cfg, phase='test') + self.load_model() + self.net = self.net.to(device) + + self.mean = torch.tensor([[[[104]], [[117]], [[123]]]]).to(device) + + def check_keys(self, pretrained_state_dict): + ckpt_keys = set(pretrained_state_dict.keys()) + model_keys = set(self.net.state_dict().keys()) + used_pretrained_keys = model_keys & ckpt_keys + unused_pretrained_keys = ckpt_keys - model_keys + missing_keys = model_keys - ckpt_keys + assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' + return True + + def remove_prefix(self, state_dict, prefix): + ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' + f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x + return {f(key): value for key, value in state_dict.items()} + + def load_model(self, load_to_cpu=False): + #if load_to_cpu: + # pretrained_dict = torch.load(self.pretrained_path, map_location=lambda storage, loc: storage) + #else: + # pretrained_dict = torch.load(self.pretrained_path, map_location=lambda storage, loc: storage.cuda()) + pretrained_dict = torch.load(self.pretrained_path, map_location=torch.device('cpu')) + if "state_dict" in pretrained_dict.keys(): + pretrained_dict = self.remove_prefix(pretrained_dict['state_dict'], 'module.') + else: + pretrained_dict = self.remove_prefix(pretrained_dict, 'module.') + self.check_keys(pretrained_dict) + self.net.load_state_dict(pretrained_dict, strict=False) + self.net.eval() + + def detect(self, img_raw, resize=1, confidence_threshold=0.9, nms_threshold=0.4, top_k=5000, keep_top_k=750, save_image=False): + img = np.float32(img_raw) + + im_height, im_width = img.shape[:2] + ss = 1.0 + # tricky + if max(im_height, im_width) > 1500: + ss = 1000.0/max(im_height, im_width) + img = cv2.resize(img, (0,0), fx=ss, fy=ss) + im_height, im_width = img.shape[:2] + + scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) + img -= (104, 117, 123) + img = img.transpose(2, 0, 1) + img = torch.from_numpy(img).unsqueeze(0) + img = img.to(self.device) + scale = scale.to(self.device) + + with torch.no_grad(): + loc, conf, landms = self.net(img) # forward pass + + priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) + priors = priorbox.forward() + priors = priors.to(self.device) + prior_data = priors.data + boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) + boxes = boxes * scale / resize + boxes = boxes.cpu().numpy() + scores = conf.squeeze(0).data.cpu().numpy()[:, 1] + landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) + scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], + img.shape[3], img.shape[2], img.shape[3], img.shape[2], + img.shape[3], img.shape[2]]) + scale1 = scale1.to(self.device) + landms = landms * scale1 / resize + landms = landms.cpu().numpy() + + # ignore low scores + inds = np.where(scores > confidence_threshold)[0] + boxes = boxes[inds] + landms = landms[inds] + scores = scores[inds] + + # keep top-K before NMS + order = scores.argsort()[::-1][:top_k] + boxes = boxes[order] + landms = landms[order] + scores = scores[order] + + # do NMS + dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) + keep = py_cpu_nms(dets, nms_threshold) + # keep = nms(dets, nms_threshold,force_cpu=args.cpu) + dets = dets[keep, :] + landms = landms[keep] + + # keep top-K faster NMS + dets = dets[:keep_top_k, :] + landms = landms[:keep_top_k, :] + + # sort faces(delete) + ''' + fscores = [det[4] for det in dets] + sorted_idx = sorted(range(len(fscores)), key=lambda k:fscores[k], reverse=False) # sort index + tmp = [landms[idx] for idx in sorted_idx] + landms = np.asarray(tmp) + ''' + + landms = landms.reshape((-1, 5, 2)) + landms = landms.transpose((0, 2, 1)) + landms = landms.reshape(-1, 10, ) + return dets/ss, landms/ss + + def detect_tensor(self, img, resize=1, confidence_threshold=0.9, nms_threshold=0.4, top_k=5000, keep_top_k=750, save_image=False): + im_height, im_width = img.shape[-2:] + ss = 1000/max(im_height, im_width) + img = F.interpolate(img, scale_factor=ss) + im_height, im_width = img.shape[-2:] + scale = torch.Tensor([im_width, im_height, im_width, im_height]).to(self.device) + img -= self.mean + + loc, conf, landms = self.net(img) # forward pass + + priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) + priors = priorbox.forward() + priors = priors.to(self.device) + prior_data = priors.data + boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) + boxes = boxes * scale / resize + boxes = boxes.cpu().numpy() + scores = conf.squeeze(0).data.cpu().numpy()[:, 1] + landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) + scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], + img.shape[3], img.shape[2], img.shape[3], img.shape[2], + img.shape[3], img.shape[2]]) + scale1 = scale1.to(self.device) + landms = landms * scale1 / resize + landms = landms.cpu().numpy() + + # ignore low scores + inds = np.where(scores > confidence_threshold)[0] + boxes = boxes[inds] + landms = landms[inds] + scores = scores[inds] + + # keep top-K before NMS + order = scores.argsort()[::-1][:top_k] + boxes = boxes[order] + landms = landms[order] + scores = scores[order] + + # do NMS + dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) + keep = py_cpu_nms(dets, nms_threshold) + # keep = nms(dets, nms_threshold,force_cpu=args.cpu) + dets = dets[keep, :] + landms = landms[keep] + + # keep top-K faster NMS + dets = dets[:keep_top_k, :] + landms = landms[:keep_top_k, :] + + # sort faces(delete) + ''' + fscores = [det[4] for det in dets] + sorted_idx = sorted(range(len(fscores)), key=lambda k:fscores[k], reverse=False) # sort index + tmp = [landms[idx] for idx in sorted_idx] + landms = np.asarray(tmp) + ''' + + landms = landms.reshape((-1, 5, 2)) + landms = landms.transpose((0, 2, 1)) + landms = landms.reshape(-1, 10, ) + return dets/ss, landms/ss diff --git a/third_part/GPEN/face_detect/utils/__init__.py b/third_part/GPEN/face_detect/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/GPEN/face_detect/utils/box_utils.py b/third_part/GPEN/face_detect/utils/box_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c1d12bc612ae3ba3ea9d138bfc5997a2b15d8dd9 --- /dev/null +++ b/third_part/GPEN/face_detect/utils/box_utils.py @@ -0,0 +1,330 @@ +import torch +import numpy as np + + +def point_form(boxes): + """ Convert prior_boxes to (xmin, ymin, xmax, ymax) + representation for comparison to point form ground truth data. + Args: + boxes: (tensor) center-size default boxes from priorbox layers. + Return: + boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. + """ + return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin + boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax + + +def center_size(boxes): + """ Convert prior_boxes to (cx, cy, w, h) + representation for comparison to center-size form ground truth data. + Args: + boxes: (tensor) point_form boxes + Return: + boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. + """ + return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy + boxes[:, 2:] - boxes[:, :2], 1) # w, h + + +def intersect(box_a, box_b): + """ We resize both tensors to [A,B,2] without new malloc: + [A,2] -> [A,1,2] -> [A,B,2] + [B,2] -> [1,B,2] -> [A,B,2] + Then we compute the area of intersect between box_a and box_b. + Args: + box_a: (tensor) bounding boxes, Shape: [A,4]. + box_b: (tensor) bounding boxes, Shape: [B,4]. + Return: + (tensor) intersection area, Shape: [A,B]. + """ + A = box_a.size(0) + B = box_b.size(0) + max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), + box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) + min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), + box_b[:, :2].unsqueeze(0).expand(A, B, 2)) + inter = torch.clamp((max_xy - min_xy), min=0) + return inter[:, :, 0] * inter[:, :, 1] + + +def jaccard(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. Here we operate on + ground truth boxes and default boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] + box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] + Return: + jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] + area_b = ((box_b[:, 2]-box_b[:, 0]) * + (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +def matrix_iou(a, b): + """ + return iou of a and b, numpy version for data augenmentation + """ + lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) + rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) + + area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) + area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) + area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) + return area_i / (area_a[:, np.newaxis] + area_b - area_i) + + +def matrix_iof(a, b): + """ + return iof of a and b, numpy version for data augenmentation + """ + lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) + rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) + + area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) + area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) + return area_i / np.maximum(area_a[:, np.newaxis], 1) + + +def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx): + """Match each prior box with the ground truth box of the highest jaccard + overlap, encode the bounding boxes, then return the matched indices + corresponding to both confidence and location preds. + Args: + threshold: (float) The overlap threshold used when mathing boxes. + truths: (tensor) Ground truth boxes, Shape: [num_obj, 4]. + priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. + variances: (tensor) Variances corresponding to each prior coord, + Shape: [num_priors, 4]. + labels: (tensor) All the class labels for the image, Shape: [num_obj]. + landms: (tensor) Ground truth landms, Shape [num_obj, 10]. + loc_t: (tensor) Tensor to be filled w/ endcoded location targets. + conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. + landm_t: (tensor) Tensor to be filled w/ endcoded landm targets. + idx: (int) current batch index + Return: + The matched indices corresponding to 1)location 2)confidence 3)landm preds. + """ + # jaccard index + overlaps = jaccard( + truths, + point_form(priors) + ) + # (Bipartite Matching) + # [1,num_objects] best prior for each ground truth + best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) + + # ignore hard gt + valid_gt_idx = best_prior_overlap[:, 0] >= 0.2 + best_prior_idx_filter = best_prior_idx[valid_gt_idx, :] + if best_prior_idx_filter.shape[0] <= 0: + loc_t[idx] = 0 + conf_t[idx] = 0 + return + + # [1,num_priors] best ground truth for each prior + best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) + best_truth_idx.squeeze_(0) + best_truth_overlap.squeeze_(0) + best_prior_idx.squeeze_(1) + best_prior_idx_filter.squeeze_(1) + best_prior_overlap.squeeze_(1) + best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior + # TODO refactor: index best_prior_idx with long tensor + # ensure every gt matches with its prior of max overlap + for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes + best_truth_idx[best_prior_idx[j]] = j + matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来 + conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来 + conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本 + loc = encode(matches, priors, variances) + + matches_landm = landms[best_truth_idx] + landm = encode_landm(matches_landm, priors, variances) + loc_t[idx] = loc # [num_priors,4] encoded offsets to learn + conf_t[idx] = conf # [num_priors] top class label for each prior + landm_t[idx] = landm + + +def encode(matched, priors, variances): + """Encode the variances from the priorbox layers into the ground truth boxes + we have matched (based on jaccard overlap) with the prior boxes. + Args: + matched: (tensor) Coords of ground truth for each prior in point-form + Shape: [num_priors, 4]. + priors: (tensor) Prior boxes in center-offset form + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + encoded boxes (tensor), Shape: [num_priors, 4] + """ + + # dist b/t match center and prior's center + g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] + # encode variance + g_cxcy /= (variances[0] * priors[:, 2:]) + # match wh / prior wh + g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] + g_wh = torch.log(g_wh) / variances[1] + # return target for smooth_l1_loss + return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] + +def encode_landm(matched, priors, variances): + """Encode the variances from the priorbox layers into the ground truth boxes + we have matched (based on jaccard overlap) with the prior boxes. + Args: + matched: (tensor) Coords of ground truth for each prior in point-form + Shape: [num_priors, 10]. + priors: (tensor) Prior boxes in center-offset form + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + encoded landm (tensor), Shape: [num_priors, 10] + """ + + # dist b/t match center and prior's center + matched = torch.reshape(matched, (matched.size(0), 5, 2)) + priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) + priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) + priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) + priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) + priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2) + g_cxcy = matched[:, :, :2] - priors[:, :, :2] + # encode variance + g_cxcy /= (variances[0] * priors[:, :, 2:]) + # g_cxcy /= priors[:, :, 2:] + g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1) + # return target for smooth_l1_loss + return g_cxcy + + +# Adapted from https://github.com/Hakuyume/chainer-ssd +def decode(loc, priors, variances): + """Decode locations from predictions using priors to undo + the encoding we did for offset regression at train time. + Args: + loc (tensor): location predictions for loc layers, + Shape: [num_priors,4] + priors (tensor): Prior boxes in center-offset form. + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + decoded bounding box predictions + """ + + boxes = torch.cat(( + priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) + boxes[:, :2] -= boxes[:, 2:] / 2 + boxes[:, 2:] += boxes[:, :2] + return boxes + +def decode_landm(pre, priors, variances): + """Decode landm from predictions using priors to undo + the encoding we did for offset regression at train time. + Args: + pre (tensor): landm predictions for loc layers, + Shape: [num_priors,10] + priors (tensor): Prior boxes in center-offset form. + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + decoded landm predictions + """ + landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:], + ), dim=1) + return landms + + +def log_sum_exp(x): + """Utility function for computing log_sum_exp while determining + This will be used to determine unaveraged confidence loss across + all examples in a batch. + Args: + x (Variable(tensor)): conf_preds from conf layers + """ + x_max = x.data.max() + return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max + + +# Original author: Francisco Massa: +# https://github.com/fmassa/object-detection.torch +# Ported to PyTorch by Max deGroot (02/01/2017) +def nms(boxes, scores, overlap=0.5, top_k=200): + """Apply non-maximum suppression at test time to avoid detecting too many + overlapping bounding boxes for a given object. + Args: + boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. + scores: (tensor) The class predscores for the img, Shape:[num_priors]. + overlap: (float) The overlap thresh for suppressing unnecessary boxes. + top_k: (int) The Maximum number of box preds to consider. + Return: + The indices of the kept boxes with respect to num_priors. + """ + + keep = torch.Tensor(scores.size(0)).fill_(0).long() + if boxes.numel() == 0: + return keep + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + area = torch.mul(x2 - x1, y2 - y1) + v, idx = scores.sort(0) # sort in ascending order + # I = I[v >= 0.01] + idx = idx[-top_k:] # indices of the top-k largest vals + xx1 = boxes.new() + yy1 = boxes.new() + xx2 = boxes.new() + yy2 = boxes.new() + w = boxes.new() + h = boxes.new() + + # keep = torch.Tensor() + count = 0 + while idx.numel() > 0: + i = idx[-1] # index of current largest val + # keep.append(i) + keep[count] = i + count += 1 + if idx.size(0) == 1: + break + idx = idx[:-1] # remove kept element from view + # load bboxes of next highest vals + torch.index_select(x1, 0, idx, out=xx1) + torch.index_select(y1, 0, idx, out=yy1) + torch.index_select(x2, 0, idx, out=xx2) + torch.index_select(y2, 0, idx, out=yy2) + # store element-wise max with next highest score + xx1 = torch.clamp(xx1, min=x1[i]) + yy1 = torch.clamp(yy1, min=y1[i]) + xx2 = torch.clamp(xx2, max=x2[i]) + yy2 = torch.clamp(yy2, max=y2[i]) + w.resize_as_(xx2) + h.resize_as_(yy2) + w = xx2 - xx1 + h = yy2 - yy1 + # check sizes of xx1 and xx2.. after each iteration + w = torch.clamp(w, min=0.0) + h = torch.clamp(h, min=0.0) + inter = w*h + # IoU = i / (area(a) + area(b) - i) + rem_areas = torch.index_select(area, 0, idx) # load remaining areas) + union = (rem_areas - inter) + area[i] + IoU = inter/union # store result in iou + # keep only elements with an IoU <= overlap + idx = idx[IoU.le(overlap)] + return keep, count + + diff --git a/third_part/GPEN/face_detect/utils/nms/__init__.py b/third_part/GPEN/face_detect/utils/nms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/GPEN/face_detect/utils/nms/py_cpu_nms.py b/third_part/GPEN/face_detect/utils/nms/py_cpu_nms.py new file mode 100644 index 0000000000000000000000000000000000000000..54e7b25fef72b518df6dcf8d6fb78b986796c6e3 --- /dev/null +++ b/third_part/GPEN/face_detect/utils/nms/py_cpu_nms.py @@ -0,0 +1,38 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np + +def py_cpu_nms(dets, thresh): + """Pure Python NMS baseline.""" + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep diff --git a/third_part/GPEN/face_detect/utils/timer.py b/third_part/GPEN/face_detect/utils/timer.py new file mode 100644 index 0000000000000000000000000000000000000000..e4b3b8098a5ad41f8d18d42b6b2fedb694aa5508 --- /dev/null +++ b/third_part/GPEN/face_detect/utils/timer.py @@ -0,0 +1,40 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import time + + +class Timer(object): + """A simple timer.""" + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + if average: + return self.average_time + else: + return self.diff + + def clear(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. diff --git a/third_part/GPEN/face_model/face_gan.py b/third_part/GPEN/face_model/face_gan.py new file mode 100644 index 0000000000000000000000000000000000000000..9a1f73870860c9f90e480d4fe8290591ef5a01c6 --- /dev/null +++ b/third_part/GPEN/face_model/face_gan.py @@ -0,0 +1,55 @@ +''' +@paper: GAN Prior Embedded Network for Blind Face Restoration in the Wild (CVPR2021) +@author: yangxy (yangtao9009@gmail.com) +''' +import torch +import os +import cv2 +import glob +import numpy as np +from torch import nn +import torch.nn.functional as F +from torchvision import transforms, utils +from face_model.gpen_model import FullGenerator + +class FaceGAN(object): + def __init__(self, base_dir='./', size=512, model=None, channel_multiplier=2, narrow=1, is_norm=True, device='cuda'): + self.mfile = os.path.join(base_dir, model+'.pth') + self.n_mlp = 8 + self.device = device + self.is_norm = is_norm + self.resolution = size + self.load_model(channel_multiplier, narrow) + + def load_model(self, channel_multiplier=2, narrow=1): + self.model = FullGenerator(self.resolution, 512, self.n_mlp, channel_multiplier, narrow=narrow, device=self.device) + pretrained_dict = torch.load(self.mfile, map_location=torch.device('cpu')) + self.model.load_state_dict(pretrained_dict) + self.model.to(self.device) + self.model.eval() + + def process(self, img): + img = cv2.resize(img, (self.resolution, self.resolution)) + img_t = self.img2tensor(img) + + with torch.no_grad(): + out, __ = self.model(img_t) + + out = self.tensor2img(out) + + return out + + def img2tensor(self, img): + img_t = torch.from_numpy(img).to(self.device)/255. + if self.is_norm: + img_t = (img_t - 0.5) / 0.5 + img_t = img_t.permute(2, 0, 1).unsqueeze(0).flip(1) # BGR->RGB + return img_t + + def tensor2img(self, img_t, pmax=255.0, imtype=np.uint8): + if self.is_norm: + img_t = img_t * 0.5 + 0.5 + img_t = img_t.squeeze(0).permute(1, 2, 0).flip(2) # RGB->BGR + img_np = np.clip(img_t.float().cpu().numpy(), 0, 1) * pmax + + return img_np.astype(imtype) diff --git a/third_part/GPEN/face_model/gpen_model.py b/third_part/GPEN/face_model/gpen_model.py new file mode 100644 index 0000000000000000000000000000000000000000..fb5cd6fb7947b5c190a6d867998d7bd841ee3801 --- /dev/null +++ b/third_part/GPEN/face_model/gpen_model.py @@ -0,0 +1,746 @@ +''' +@paper: GAN Prior Embedded Network for Blind Face Restoration in the Wild (CVPR2021) +@author: yangxy (yangtao9009@gmail.com) +''' +import math +import random +import functools +import operator +import itertools + +import torch +from torch import nn +from torch.nn import functional as F +from torch.autograd import Function + +from face_model.op import FusedLeakyReLU, fused_leaky_relu, upfirdn2d + +class PixelNorm(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, input): + return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8) + + +def make_kernel(k): + k = torch.tensor(k, dtype=torch.float32) + + if k.ndim == 1: + k = k[None, :] * k[:, None] + + k /= k.sum() + + return k + + +class Upsample(nn.Module): + def __init__(self, kernel, factor=2, device='cpu'): + super().__init__() + + self.factor = factor + kernel = make_kernel(kernel) * (factor ** 2) + self.register_buffer('kernel', kernel) + + p = kernel.shape[0] - factor + + pad0 = (p + 1) // 2 + factor - 1 + pad1 = p // 2 + + self.pad = (pad0, pad1) + self.device = device + + def forward(self, input): + out = upfirdn2d(input, self.kernel, up=self.factor, down=1, pad=self.pad, device=self.device) + + return out + + +class Downsample(nn.Module): + def __init__(self, kernel, factor=2, device='cpu'): + super().__init__() + + self.factor = factor + kernel = make_kernel(kernel) + self.register_buffer('kernel', kernel) + + p = kernel.shape[0] - factor + + pad0 = (p + 1) // 2 + pad1 = p // 2 + + self.pad = (pad0, pad1) + self.device = device + + def forward(self, input): + out = upfirdn2d(input, self.kernel, up=1, down=self.factor, pad=self.pad, device=self.device) + + return out + + +class Blur(nn.Module): + def __init__(self, kernel, pad, upsample_factor=1, device='cpu'): + super().__init__() + + kernel = make_kernel(kernel) + + if upsample_factor > 1: + kernel = kernel * (upsample_factor ** 2) + + self.register_buffer('kernel', kernel) + + self.pad = pad + self.device = device + + def forward(self, input): + out = upfirdn2d(input, self.kernel, pad=self.pad, device=self.device) + + return out + + +class EqualConv2d(nn.Module): + def __init__( + self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=True + ): + super().__init__() + + self.weight = nn.Parameter( + torch.randn(out_channel, in_channel, kernel_size, kernel_size) + ) + self.scale = 1 / math.sqrt(in_channel * kernel_size ** 2) + + self.stride = stride + self.padding = padding + + if bias: + self.bias = nn.Parameter(torch.zeros(out_channel)) + + else: + self.bias = None + + def forward(self, input): + out = F.conv2d( + input, + self.weight * self.scale, + bias=self.bias, + stride=self.stride, + padding=self.padding, + ) + + return out + + def __repr__(self): + return ( + f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},' + f' {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})' + ) + + +class EqualLinear(nn.Module): + def __init__( + self, in_dim, out_dim, bias=True, bias_init=0, lr_mul=1, activation=None, device='cpu' + ): + super().__init__() + + self.weight = nn.Parameter(torch.randn(out_dim, in_dim).div_(lr_mul)) + + if bias: + self.bias = nn.Parameter(torch.zeros(out_dim).fill_(bias_init)) + + else: + self.bias = None + + self.activation = activation + self.device = device + + self.scale = (1 / math.sqrt(in_dim)) * lr_mul + self.lr_mul = lr_mul + + def forward(self, input): + if self.activation: + out = F.linear(input, self.weight * self.scale) + out = fused_leaky_relu(out, self.bias * self.lr_mul, device=self.device) + + else: + out = F.linear(input, self.weight * self.scale, bias=self.bias * self.lr_mul) + + return out + + def __repr__(self): + return ( + f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]})' + ) + + +class ScaledLeakyReLU(nn.Module): + def __init__(self, negative_slope=0.2): + super().__init__() + + self.negative_slope = negative_slope + + def forward(self, input): + out = F.leaky_relu(input, negative_slope=self.negative_slope) + + return out * math.sqrt(2) + + +class ModulatedConv2d(nn.Module): + def __init__( + self, + in_channel, + out_channel, + kernel_size, + style_dim, + demodulate=True, + upsample=False, + downsample=False, + blur_kernel=[1, 3, 3, 1], + device='cpu' + ): + super().__init__() + + self.eps = 1e-8 + self.kernel_size = kernel_size + self.in_channel = in_channel + self.out_channel = out_channel + self.upsample = upsample + self.downsample = downsample + + if upsample: + factor = 2 + p = (len(blur_kernel) - factor) - (kernel_size - 1) + pad0 = (p + 1) // 2 + factor - 1 + pad1 = p // 2 + 1 + + self.blur = Blur(blur_kernel, pad=(pad0, pad1), upsample_factor=factor, device=device) + + if downsample: + factor = 2 + p = (len(blur_kernel) - factor) + (kernel_size - 1) + pad0 = (p + 1) // 2 + pad1 = p // 2 + + self.blur = Blur(blur_kernel, pad=(pad0, pad1), device=device) + + fan_in = in_channel * kernel_size ** 2 + self.scale = 1 / math.sqrt(fan_in) + self.padding = kernel_size // 2 + + self.weight = nn.Parameter( + torch.randn(1, out_channel, in_channel, kernel_size, kernel_size) + ) + + self.modulation = EqualLinear(style_dim, in_channel, bias_init=1) + + self.demodulate = demodulate + + def __repr__(self): + return ( + f'{self.__class__.__name__}({self.in_channel}, {self.out_channel}, {self.kernel_size}, ' + f'upsample={self.upsample}, downsample={self.downsample})' + ) + + def forward(self, input, style): + batch, in_channel, height, width = input.shape + + style = self.modulation(style).view(batch, 1, in_channel, 1, 1) + weight = self.scale * self.weight * style + + if self.demodulate: + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8) + weight = weight * demod.view(batch, self.out_channel, 1, 1, 1) + + weight = weight.view( + batch * self.out_channel, in_channel, self.kernel_size, self.kernel_size + ) + + if self.upsample: + input = input.view(1, batch * in_channel, height, width) + weight = weight.view( + batch, self.out_channel, in_channel, self.kernel_size, self.kernel_size + ) + weight = weight.transpose(1, 2).reshape( + batch * in_channel, self.out_channel, self.kernel_size, self.kernel_size + ) + out = F.conv_transpose2d(input, weight, padding=0, stride=2, groups=batch) + _, _, height, width = out.shape + out = out.view(batch, self.out_channel, height, width) + out = self.blur(out) + + elif self.downsample: + input = self.blur(input) + _, _, height, width = input.shape + input = input.view(1, batch * in_channel, height, width) + out = F.conv2d(input, weight, padding=0, stride=2, groups=batch) + _, _, height, width = out.shape + out = out.view(batch, self.out_channel, height, width) + + else: + input = input.view(1, batch * in_channel, height, width) + out = F.conv2d(input, weight, padding=self.padding, groups=batch) + _, _, height, width = out.shape + out = out.view(batch, self.out_channel, height, width) + + return out + + +class NoiseInjection(nn.Module): + def __init__(self, isconcat=True): + super().__init__() + + self.isconcat = isconcat + self.weight = nn.Parameter(torch.zeros(1)) + + def forward(self, image, noise=None): + if noise is None: + batch, _, height, width = image.shape + noise = image.new_empty(batch, 1, height, width).normal_() + + if self.isconcat: + return torch.cat((image, self.weight * noise), dim=1) + else: + return image + self.weight * noise + + +class ConstantInput(nn.Module): + def __init__(self, channel, size=4): + super().__init__() + + self.input = nn.Parameter(torch.randn(1, channel, size, size)) + + def forward(self, input): + batch = input.shape[0] + out = self.input.repeat(batch, 1, 1, 1) + + return out + + +class StyledConv(nn.Module): + def __init__( + self, + in_channel, + out_channel, + kernel_size, + style_dim, + upsample=False, + blur_kernel=[1, 3, 3, 1], + demodulate=True, + isconcat=True, + device='cpu' + ): + super().__init__() + + self.conv = ModulatedConv2d( + in_channel, + out_channel, + kernel_size, + style_dim, + upsample=upsample, + blur_kernel=blur_kernel, + demodulate=demodulate, + device=device + ) + + self.noise = NoiseInjection(isconcat) + #self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1)) + #self.activate = ScaledLeakyReLU(0.2) + feat_multiplier = 2 if isconcat else 1 + self.activate = FusedLeakyReLU(out_channel*feat_multiplier, device=device) + + def forward(self, input, style, noise=None): + out = self.conv(input, style) + out = self.noise(out, noise=noise) + # out = out + self.bias + out = self.activate(out) + + return out + + +class ToRGB(nn.Module): + def __init__(self, in_channel, style_dim, upsample=True, blur_kernel=[1, 3, 3, 1], device='cpu'): + super().__init__() + + if upsample: + self.upsample = Upsample(blur_kernel, device=device) + + self.conv = ModulatedConv2d(in_channel, 3, 1, style_dim, demodulate=False, device=device) + self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1)) + + def forward(self, input, style, skip=None): + out = self.conv(input, style) + out = out + self.bias + + if skip is not None: + skip = self.upsample(skip) + + out = out + skip + + return out + +class Generator(nn.Module): + def __init__( + self, + size, + style_dim, + n_mlp, + channel_multiplier=2, + blur_kernel=[1, 3, 3, 1], + lr_mlp=0.01, + isconcat=True, + narrow=1, + device='cpu' + ): + super().__init__() + + self.size = size + self.n_mlp = n_mlp + self.style_dim = style_dim + self.feat_multiplier = 2 if isconcat else 1 + + layers = [PixelNorm()] + + for i in range(n_mlp): + layers.append( + EqualLinear( + style_dim, style_dim, lr_mul=lr_mlp, activation='fused_lrelu', device=device + ) + ) + + self.style = nn.Sequential(*layers) + + self.channels = { + 4: int(512 * narrow), + 8: int(512 * narrow), + 16: int(512 * narrow), + 32: int(512 * narrow), + 64: int(256 * channel_multiplier * narrow), + 128: int(128 * channel_multiplier * narrow), + 256: int(64 * channel_multiplier * narrow), + 512: int(32 * channel_multiplier * narrow), + 1024: int(16 * channel_multiplier * narrow) + } + + self.input = ConstantInput(self.channels[4]) + self.conv1 = StyledConv( + self.channels[4], self.channels[4], 3, style_dim, blur_kernel=blur_kernel, isconcat=isconcat, device=device + ) + self.to_rgb1 = ToRGB(self.channels[4]*self.feat_multiplier, style_dim, upsample=False, device=device) + + self.log_size = int(math.log(size, 2)) + + self.convs = nn.ModuleList() + self.upsamples = nn.ModuleList() + self.to_rgbs = nn.ModuleList() + + in_channel = self.channels[4] + + for i in range(3, self.log_size + 1): + out_channel = self.channels[2 ** i] + + self.convs.append( + StyledConv( + in_channel*self.feat_multiplier, + out_channel, + 3, + style_dim, + upsample=True, + blur_kernel=blur_kernel, + isconcat=isconcat, + device=device + ) + ) + + self.convs.append( + StyledConv( + out_channel*self.feat_multiplier, out_channel, 3, style_dim, blur_kernel=blur_kernel, isconcat=isconcat, device=device + ) + ) + + self.to_rgbs.append(ToRGB(out_channel*self.feat_multiplier, style_dim, device=device)) + + in_channel = out_channel + + self.n_latent = self.log_size * 2 - 2 + + def make_noise(self): + device = self.input.input.device + + noises = [torch.randn(1, 1, 2 ** 2, 2 ** 2, device=device)] + + for i in range(3, self.log_size + 1): + for _ in range(2): + noises.append(torch.randn(1, 1, 2 ** i, 2 ** i, device=device)) + + return noises + + def mean_latent(self, n_latent): + latent_in = torch.randn( + n_latent, self.style_dim, device=self.input.input.device + ) + latent = self.style(latent_in).mean(0, keepdim=True) + + return latent + + def get_latent(self, input): + return self.style(input) + + def forward( + self, + styles, + return_latents=False, + inject_index=None, + truncation=1, + truncation_latent=None, + input_is_latent=False, + noise=None, + ): + if not input_is_latent: + styles = [self.style(s) for s in styles] + + if noise is None: + ''' + noise = [None] * (2 * (self.log_size - 2) + 1) + ''' + noise = [] + batch = styles[0].shape[0] + for i in range(self.n_mlp + 1): + size = 2 ** (i+2) + noise.append(torch.randn(batch, self.channels[size], size, size, device=styles[0].device)) + + if truncation < 1: + style_t = [] + + for style in styles: + style_t.append( + truncation_latent + truncation * (style - truncation_latent) + ) + + styles = style_t + + if len(styles) < 2: + inject_index = self.n_latent + + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + + else: + if inject_index is None: + inject_index = random.randint(1, self.n_latent - 1) + + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = styles[1].unsqueeze(1).repeat(1, self.n_latent - inject_index, 1) + + latent = torch.cat([latent, latent2], 1) + + out = self.input(latent) + out = self.conv1(out, latent[:, 0], noise=noise[0]) + + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs + ): + out = conv1(out, latent[:, i], noise=noise1) + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) + + i += 2 + + image = skip + + if return_latents: + return image, latent + + else: + return image, None + +class ConvLayer(nn.Sequential): + def __init__( + self, + in_channel, + out_channel, + kernel_size, + downsample=False, + blur_kernel=[1, 3, 3, 1], + bias=True, + activate=True, + device='cpu' + ): + layers = [] + + if downsample: + factor = 2 + p = (len(blur_kernel) - factor) + (kernel_size - 1) + pad0 = (p + 1) // 2 + pad1 = p // 2 + + layers.append(Blur(blur_kernel, pad=(pad0, pad1), device=device)) + + stride = 2 + self.padding = 0 + + else: + stride = 1 + self.padding = kernel_size // 2 + + layers.append( + EqualConv2d( + in_channel, + out_channel, + kernel_size, + padding=self.padding, + stride=stride, + bias=bias and not activate, + ) + ) + + if activate: + if bias: + layers.append(FusedLeakyReLU(out_channel, device=device)) + + else: + layers.append(ScaledLeakyReLU(0.2)) + + super().__init__(*layers) + + +class ResBlock(nn.Module): + def __init__(self, in_channel, out_channel, blur_kernel=[1, 3, 3, 1], device='cpu'): + super().__init__() + + self.conv1 = ConvLayer(in_channel, in_channel, 3, device=device) + self.conv2 = ConvLayer(in_channel, out_channel, 3, downsample=True, device=device) + + self.skip = ConvLayer( + in_channel, out_channel, 1, downsample=True, activate=False, bias=False + ) + + def forward(self, input): + out = self.conv1(input) + out = self.conv2(out) + + skip = self.skip(input) + out = (out + skip) / math.sqrt(2) + + return out + +class FullGenerator(nn.Module): + def __init__( + self, + size, + style_dim, + n_mlp, + channel_multiplier=2, + blur_kernel=[1, 3, 3, 1], + lr_mlp=0.01, + isconcat=True, + narrow=1, + device='cpu' + ): + super().__init__() + channels = { + 4: int(512 * narrow), + 8: int(512 * narrow), + 16: int(512 * narrow), + 32: int(512 * narrow), + 64: int(256 * channel_multiplier * narrow), + 128: int(128 * channel_multiplier * narrow), + 256: int(64 * channel_multiplier * narrow), + 512: int(32 * channel_multiplier * narrow), + 1024: int(16 * channel_multiplier * narrow) + } + + self.log_size = int(math.log(size, 2)) + self.generator = Generator(size, style_dim, n_mlp, channel_multiplier=channel_multiplier, blur_kernel=blur_kernel, lr_mlp=lr_mlp, isconcat=isconcat, narrow=narrow, device=device) + + conv = [ConvLayer(3, channels[size], 1, device=device)] + self.ecd0 = nn.Sequential(*conv) + in_channel = channels[size] + + self.names = ['ecd%d'%i for i in range(self.log_size-1)] + for i in range(self.log_size, 2, -1): + out_channel = channels[2 ** (i - 1)] + #conv = [ResBlock(in_channel, out_channel, blur_kernel)] + conv = [ConvLayer(in_channel, out_channel, 3, downsample=True, device=device)] + setattr(self, self.names[self.log_size-i+1], nn.Sequential(*conv)) + in_channel = out_channel + self.final_linear = nn.Sequential(EqualLinear(channels[4] * 4 * 4, style_dim, activation='fused_lrelu', device=device)) + + def forward(self, + inputs, + return_latents=False, + inject_index=None, + truncation=1, + truncation_latent=None, + input_is_latent=False, + ): + noise = [] + for i in range(self.log_size-1): + ecd = getattr(self, self.names[i]) + inputs = ecd(inputs) + noise.append(inputs) + + inputs = inputs.view(inputs.shape[0], -1) + outs = self.final_linear(inputs) + noise = list(itertools.chain.from_iterable(itertools.repeat(x, 2) for x in noise))[::-1] + outs = self.generator([outs], return_latents, inject_index, truncation, truncation_latent, input_is_latent, noise=noise[1:]) + return outs + +class Discriminator(nn.Module): + def __init__(self, size, channel_multiplier=2, blur_kernel=[1, 3, 3, 1], narrow=1, device='cpu'): + super().__init__() + + channels = { + 4: int(512 * narrow), + 8: int(512 * narrow), + 16: int(512 * narrow), + 32: int(512 * narrow), + 64: int(256 * channel_multiplier * narrow), + 128: int(128 * channel_multiplier * narrow), + 256: int(64 * channel_multiplier * narrow), + 512: int(32 * channel_multiplier * narrow), + 1024: int(16 * channel_multiplier * narrow) + } + + convs = [ConvLayer(3, channels[size], 1, device=device)] + + log_size = int(math.log(size, 2)) + + in_channel = channels[size] + + for i in range(log_size, 2, -1): + out_channel = channels[2 ** (i - 1)] + + convs.append(ResBlock(in_channel, out_channel, blur_kernel, device=device)) + + in_channel = out_channel + + self.convs = nn.Sequential(*convs) + + self.stddev_group = 4 + self.stddev_feat = 1 + + self.final_conv = ConvLayer(in_channel + 1, channels[4], 3, device=device) + self.final_linear = nn.Sequential( + EqualLinear(channels[4] * 4 * 4, channels[4], activation='fused_lrelu', device=device), + EqualLinear(channels[4], 1), + ) + + def forward(self, input): + out = self.convs(input) + + batch, channel, height, width = out.shape + group = min(batch, self.stddev_group) + stddev = out.view( + group, -1, self.stddev_feat, channel // self.stddev_feat, height, width + ) + stddev = torch.sqrt(stddev.var(0, unbiased=False) + 1e-8) + stddev = stddev.mean([2, 3, 4], keepdims=True).squeeze(2) + stddev = stddev.repeat(group, 1, height, width) + out = torch.cat([out, stddev], 1) + + out = self.final_conv(out) + + out = out.view(batch, -1) + out = self.final_linear(out) + return out diff --git a/third_part/GPEN/face_model/op/__init__.py b/third_part/GPEN/face_model/op/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d0918d92285955855be89f00096b888ee5597ce3 --- /dev/null +++ b/third_part/GPEN/face_model/op/__init__.py @@ -0,0 +1,2 @@ +from .fused_act import FusedLeakyReLU, fused_leaky_relu +from .upfirdn2d import upfirdn2d diff --git a/third_part/GPEN/face_model/op/fused_act.py b/third_part/GPEN/face_model/op/fused_act.py new file mode 100644 index 0000000000000000000000000000000000000000..59db126ebcb59423cadd12baa830cbadce8b0292 --- /dev/null +++ b/third_part/GPEN/face_model/op/fused_act.py @@ -0,0 +1,96 @@ +import os +import platform + +import torch +from torch import nn +import torch.nn.functional as F +from torch.autograd import Function +from torch.utils.cpp_extension import load, _import_module_from_library + +# if running GPEN without cuda, please comment line 11-19 +if platform.system() == 'Linux' and torch.cuda.is_available(): + module_path = os.path.dirname(__file__) + fused = load( + 'fused', + sources=[ + os.path.join(module_path, 'fused_bias_act.cpp'), + os.path.join(module_path, 'fused_bias_act_kernel.cu'), + ], + ) + + +#fused = _import_module_from_library('fused', '/tmp/torch_extensions/fused', True) + + +class FusedLeakyReLUFunctionBackward(Function): + @staticmethod + def forward(ctx, grad_output, out, negative_slope, scale): + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + empty = grad_output.new_empty(0) + + grad_input = fused.fused_bias_act( + grad_output, empty, out, 3, 1, negative_slope, scale + ) + + dim = [0] + + if grad_input.ndim > 2: + dim += list(range(2, grad_input.ndim)) + + grad_bias = grad_input.sum(dim).detach() + + return grad_input, grad_bias + + @staticmethod + def backward(ctx, gradgrad_input, gradgrad_bias): + out, = ctx.saved_tensors + gradgrad_out = fused.fused_bias_act( + gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale + ) + + return gradgrad_out, None, None, None + + +class FusedLeakyReLUFunction(Function): + @staticmethod + def forward(ctx, input, bias, negative_slope, scale): + empty = input.new_empty(0) + out = fused.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale) + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + return out + + @staticmethod + def backward(ctx, grad_output): + out, = ctx.saved_tensors + + grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( + grad_output, out, ctx.negative_slope, ctx.scale + ) + + return grad_input, grad_bias, None, None + + +class FusedLeakyReLU(nn.Module): + def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5, device='cpu'): + super().__init__() + + self.bias = nn.Parameter(torch.zeros(channel)) + self.negative_slope = negative_slope + self.scale = scale + self.device = device + + def forward(self, input): + return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale, self.device) + + +def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5, device='cpu'): + if platform.system() == 'Linux' and torch.cuda.is_available() and device != 'cpu': + return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) + else: + return scale * F.leaky_relu(input + bias.view((1, -1)+(1,)*(len(input.shape)-2)), negative_slope=negative_slope) diff --git a/third_part/GPEN/face_model/op/fused_bias_act.cpp b/third_part/GPEN/face_model/op/fused_bias_act.cpp new file mode 100644 index 0000000000000000000000000000000000000000..02be898f970bcc8ea297867fcaa4e71b24b3d949 --- /dev/null +++ b/third_part/GPEN/face_model/op/fused_bias_act.cpp @@ -0,0 +1,21 @@ +#include + + +torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, + int act, int grad, float alpha, float scale); + +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) + +torch::Tensor fused_bias_act(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, + int act, int grad, float alpha, float scale) { + CHECK_CUDA(input); + CHECK_CUDA(bias); + + return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)"); +} \ No newline at end of file diff --git a/third_part/GPEN/face_model/op/fused_bias_act_kernel.cu b/third_part/GPEN/face_model/op/fused_bias_act_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..c9fa56fea7ede7072dc8925cfb0148f136eb85b8 --- /dev/null +++ b/third_part/GPEN/face_model/op/fused_bias_act_kernel.cu @@ -0,0 +1,99 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#include + +#include +#include +#include +#include + +#include +#include + + +template +static __global__ void fused_bias_act_kernel(scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref, + int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) { + int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x; + + scalar_t zero = 0.0; + + for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) { + scalar_t x = p_x[xi]; + + if (use_bias) { + x += p_b[(xi / step_b) % size_b]; + } + + scalar_t ref = use_ref ? p_ref[xi] : zero; + + scalar_t y; + + switch (act * 10 + grad) { + default: + case 10: y = x; break; + case 11: y = x; break; + case 12: y = 0.0; break; + + case 30: y = (x > 0.0) ? x : x * alpha; break; + case 31: y = (ref > 0.0) ? x : x * alpha; break; + case 32: y = 0.0; break; + } + + out[xi] = y * scale; + } +} + + +torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, + int act, int grad, float alpha, float scale) { + int curDevice = -1; + cudaGetDevice(&curDevice); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); + + auto x = input.contiguous(); + auto b = bias.contiguous(); + auto ref = refer.contiguous(); + + int use_bias = b.numel() ? 1 : 0; + int use_ref = ref.numel() ? 1 : 0; + + int size_x = x.numel(); + int size_b = b.numel(); + int step_b = 1; + + for (int i = 1 + 1; i < x.dim(); i++) { + step_b *= x.size(i); + } + + int loop_x = 4; + int block_size = 4 * 32; + int grid_size = (size_x - 1) / (loop_x * block_size) + 1; + + auto y = torch::empty_like(x); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "fused_bias_act_kernel", [&] { + fused_bias_act_kernel<<>>( + y.data_ptr(), + x.data_ptr(), + b.data_ptr(), + ref.data_ptr(), + act, + grad, + alpha, + scale, + loop_x, + size_x, + step_b, + size_b, + use_bias, + use_ref + ); + }); + + return y; +} \ No newline at end of file diff --git a/third_part/GPEN/face_model/op/upfirdn2d.cpp b/third_part/GPEN/face_model/op/upfirdn2d.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d2e633dc896433c205e18bc3e455539192ff968e --- /dev/null +++ b/third_part/GPEN/face_model/op/upfirdn2d.cpp @@ -0,0 +1,23 @@ +#include + + +torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel, + int up_x, int up_y, int down_x, int down_y, + int pad_x0, int pad_x1, int pad_y0, int pad_y1); + +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) + +torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel, + int up_x, int up_y, int down_x, int down_y, + int pad_x0, int pad_x1, int pad_y0, int pad_y1) { + CHECK_CUDA(input); + CHECK_CUDA(kernel); + + return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)"); +} \ No newline at end of file diff --git a/third_part/GPEN/face_model/op/upfirdn2d.py b/third_part/GPEN/face_model/op/upfirdn2d.py new file mode 100644 index 0000000000000000000000000000000000000000..2e3844749dea0a79fed49f161d9760ee6b4c07fd --- /dev/null +++ b/third_part/GPEN/face_model/op/upfirdn2d.py @@ -0,0 +1,194 @@ +import os +import platform + +import torch +import torch.nn.functional as F +from torch.autograd import Function +from torch.utils.cpp_extension import load, _import_module_from_library + +# if running GPEN without cuda, please comment line 10-18 +if platform.system() == 'Linux' and torch.cuda.is_available(): + module_path = os.path.dirname(__file__) + upfirdn2d_op = load( + 'upfirdn2d', + sources=[ + os.path.join(module_path, 'upfirdn2d.cpp'), + os.path.join(module_path, 'upfirdn2d_kernel.cu'), + ], + ) + + +#upfirdn2d_op = _import_module_from_library('upfirdn2d', '/tmp/torch_extensions/upfirdn2d', True) + +class UpFirDn2dBackward(Function): + @staticmethod + def forward( + ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size + ): + + up_x, up_y = up + down_x, down_y = down + g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad + + grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) + + grad_input = upfirdn2d_op.upfirdn2d( + grad_output, + grad_kernel, + down_x, + down_y, + up_x, + up_y, + g_pad_x0, + g_pad_x1, + g_pad_y0, + g_pad_y1, + ) + grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3]) + + ctx.save_for_backward(kernel) + + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + ctx.up_x = up_x + ctx.up_y = up_y + ctx.down_x = down_x + ctx.down_y = down_y + ctx.pad_x0 = pad_x0 + ctx.pad_x1 = pad_x1 + ctx.pad_y0 = pad_y0 + ctx.pad_y1 = pad_y1 + ctx.in_size = in_size + ctx.out_size = out_size + + return grad_input + + @staticmethod + def backward(ctx, gradgrad_input): + kernel, = ctx.saved_tensors + + gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1) + + gradgrad_out = upfirdn2d_op.upfirdn2d( + gradgrad_input, + kernel, + ctx.up_x, + ctx.up_y, + ctx.down_x, + ctx.down_y, + ctx.pad_x0, + ctx.pad_x1, + ctx.pad_y0, + ctx.pad_y1, + ) + # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], ctx.out_size[1], ctx.in_size[3]) + gradgrad_out = gradgrad_out.view( + ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1] + ) + + return gradgrad_out, None, None, None, None, None, None, None, None + + +class UpFirDn2d(Function): + @staticmethod + def forward(ctx, input, kernel, up, down, pad): + up_x, up_y = up + down_x, down_y = down + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + kernel_h, kernel_w = kernel.shape + batch, channel, in_h, in_w = input.shape + ctx.in_size = input.shape + + input = input.reshape(-1, in_h, in_w, 1) + + ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + ctx.out_size = (out_h, out_w) + + ctx.up = (up_x, up_y) + ctx.down = (down_x, down_y) + ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) + + g_pad_x0 = kernel_w - pad_x0 - 1 + g_pad_y0 = kernel_h - pad_y0 - 1 + g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 + g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 + + ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) + + out = upfirdn2d_op.upfirdn2d( + input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1 + ) + # out = out.view(major, out_h, out_w, minor) + out = out.view(-1, channel, out_h, out_w) + + return out + + @staticmethod + def backward(ctx, grad_output): + kernel, grad_kernel = ctx.saved_tensors + + grad_input = UpFirDn2dBackward.apply( + grad_output, + kernel, + grad_kernel, + ctx.up, + ctx.down, + ctx.pad, + ctx.g_pad, + ctx.in_size, + ctx.out_size, + ) + + return grad_input, None, None, None, None + + +def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0), device='cpu'): + if platform.system() == 'Linux' and torch.cuda.is_available() and device != 'cpu': + out = UpFirDn2d.apply( + input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1]) + ) + else: + out = upfirdn2d_native(input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1]) + + return out + + +def upfirdn2d_native( + input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1 +): + input = input.permute(0, 2, 3, 1) + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + out = input.view(-1, in_h, 1, in_w, 1, minor) + out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.view(-1, in_h * up_y, in_w * up_x, minor) + + out = F.pad( + out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)] + ) + out = out[ + :, + max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0), + max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0), + :, + ] + + out = out.permute(0, 3, 1, 2) + out = out.reshape( + [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1] + ) + w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) + out = F.conv2d(out, w) + out = out.reshape( + -1, + minor, + in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, + in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + ) + # out = out.permute(0, 2, 3, 1) + return out[:, :, ::down_y, ::down_x] + diff --git a/third_part/GPEN/face_model/op/upfirdn2d_kernel.cu b/third_part/GPEN/face_model/op/upfirdn2d_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..2a710aa6adc3d43ac93136a1814e3c39970e1c7e --- /dev/null +++ b/third_part/GPEN/face_model/op/upfirdn2d_kernel.cu @@ -0,0 +1,272 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#include + +#include +#include +#include +#include + +#include +#include + + +static __host__ __device__ __forceinline__ int floor_div(int a, int b) { + int c = a / b; + + if (c * b > a) { + c--; + } + + return c; +} + + +struct UpFirDn2DKernelParams { + int up_x; + int up_y; + int down_x; + int down_y; + int pad_x0; + int pad_x1; + int pad_y0; + int pad_y1; + + int major_dim; + int in_h; + int in_w; + int minor_dim; + int kernel_h; + int kernel_w; + int out_h; + int out_w; + int loop_major; + int loop_x; +}; + + +template +__global__ void upfirdn2d_kernel(scalar_t* out, const scalar_t* input, const scalar_t* kernel, const UpFirDn2DKernelParams p) { + const int tile_in_h = ((tile_out_h - 1) * down_y + kernel_h - 1) / up_y + 1; + const int tile_in_w = ((tile_out_w - 1) * down_x + kernel_w - 1) / up_x + 1; + + __shared__ volatile float sk[kernel_h][kernel_w]; + __shared__ volatile float sx[tile_in_h][tile_in_w]; + + int minor_idx = blockIdx.x; + int tile_out_y = minor_idx / p.minor_dim; + minor_idx -= tile_out_y * p.minor_dim; + tile_out_y *= tile_out_h; + int tile_out_x_base = blockIdx.y * p.loop_x * tile_out_w; + int major_idx_base = blockIdx.z * p.loop_major; + + if (tile_out_x_base >= p.out_w | tile_out_y >= p.out_h | major_idx_base >= p.major_dim) { + return; + } + + for (int tap_idx = threadIdx.x; tap_idx < kernel_h * kernel_w; tap_idx += blockDim.x) { + int ky = tap_idx / kernel_w; + int kx = tap_idx - ky * kernel_w; + scalar_t v = 0.0; + + if (kx < p.kernel_w & ky < p.kernel_h) { + v = kernel[(p.kernel_h - 1 - ky) * p.kernel_w + (p.kernel_w - 1 - kx)]; + } + + sk[ky][kx] = v; + } + + for (int loop_major = 0, major_idx = major_idx_base; loop_major < p.loop_major & major_idx < p.major_dim; loop_major++, major_idx++) { + for (int loop_x = 0, tile_out_x = tile_out_x_base; loop_x < p.loop_x & tile_out_x < p.out_w; loop_x++, tile_out_x += tile_out_w) { + int tile_mid_x = tile_out_x * down_x + up_x - 1 - p.pad_x0; + int tile_mid_y = tile_out_y * down_y + up_y - 1 - p.pad_y0; + int tile_in_x = floor_div(tile_mid_x, up_x); + int tile_in_y = floor_div(tile_mid_y, up_y); + + __syncthreads(); + + for (int in_idx = threadIdx.x; in_idx < tile_in_h * tile_in_w; in_idx += blockDim.x) { + int rel_in_y = in_idx / tile_in_w; + int rel_in_x = in_idx - rel_in_y * tile_in_w; + int in_x = rel_in_x + tile_in_x; + int in_y = rel_in_y + tile_in_y; + + scalar_t v = 0.0; + + if (in_x >= 0 & in_y >= 0 & in_x < p.in_w & in_y < p.in_h) { + v = input[((major_idx * p.in_h + in_y) * p.in_w + in_x) * p.minor_dim + minor_idx]; + } + + sx[rel_in_y][rel_in_x] = v; + } + + __syncthreads(); + for (int out_idx = threadIdx.x; out_idx < tile_out_h * tile_out_w; out_idx += blockDim.x) { + int rel_out_y = out_idx / tile_out_w; + int rel_out_x = out_idx - rel_out_y * tile_out_w; + int out_x = rel_out_x + tile_out_x; + int out_y = rel_out_y + tile_out_y; + + int mid_x = tile_mid_x + rel_out_x * down_x; + int mid_y = tile_mid_y + rel_out_y * down_y; + int in_x = floor_div(mid_x, up_x); + int in_y = floor_div(mid_y, up_y); + int rel_in_x = in_x - tile_in_x; + int rel_in_y = in_y - tile_in_y; + int kernel_x = (in_x + 1) * up_x - mid_x - 1; + int kernel_y = (in_y + 1) * up_y - mid_y - 1; + + scalar_t v = 0.0; + + #pragma unroll + for (int y = 0; y < kernel_h / up_y; y++) + #pragma unroll + for (int x = 0; x < kernel_w / up_x; x++) + v += sx[rel_in_y + y][rel_in_x + x] * sk[kernel_y + y * up_y][kernel_x + x * up_x]; + + if (out_x < p.out_w & out_y < p.out_h) { + out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim + minor_idx] = v; + } + } + } + } +} + + +torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel, + int up_x, int up_y, int down_x, int down_y, + int pad_x0, int pad_x1, int pad_y0, int pad_y1) { + int curDevice = -1; + cudaGetDevice(&curDevice); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); + + UpFirDn2DKernelParams p; + + auto x = input.contiguous(); + auto k = kernel.contiguous(); + + p.major_dim = x.size(0); + p.in_h = x.size(1); + p.in_w = x.size(2); + p.minor_dim = x.size(3); + p.kernel_h = k.size(0); + p.kernel_w = k.size(1); + p.up_x = up_x; + p.up_y = up_y; + p.down_x = down_x; + p.down_y = down_y; + p.pad_x0 = pad_x0; + p.pad_x1 = pad_x1; + p.pad_y0 = pad_y0; + p.pad_y1 = pad_y1; + + p.out_h = (p.in_h * p.up_y + p.pad_y0 + p.pad_y1 - p.kernel_h + p.down_y) / p.down_y; + p.out_w = (p.in_w * p.up_x + p.pad_x0 + p.pad_x1 - p.kernel_w + p.down_x) / p.down_x; + + auto out = at::empty({p.major_dim, p.out_h, p.out_w, p.minor_dim}, x.options()); + + int mode = -1; + + int tile_out_h; + int tile_out_w; + + if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 4 && p.kernel_w <= 4) { + mode = 1; + tile_out_h = 16; + tile_out_w = 64; + } + + if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 3 && p.kernel_w <= 3) { + mode = 2; + tile_out_h = 16; + tile_out_w = 64; + } + + if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 4 && p.kernel_w <= 4) { + mode = 3; + tile_out_h = 16; + tile_out_w = 64; + } + + if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 2 && p.kernel_w <= 2) { + mode = 4; + tile_out_h = 16; + tile_out_w = 64; + } + + if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 && p.kernel_h <= 4 && p.kernel_w <= 4) { + mode = 5; + tile_out_h = 8; + tile_out_w = 32; + } + + if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 && p.kernel_h <= 2 && p.kernel_w <= 2) { + mode = 6; + tile_out_h = 8; + tile_out_w = 32; + } + + dim3 block_size; + dim3 grid_size; + + if (tile_out_h > 0 && tile_out_w) { + p.loop_major = (p.major_dim - 1) / 16384 + 1; + p.loop_x = 1; + block_size = dim3(32 * 8, 1, 1); + grid_size = dim3(((p.out_h - 1) / tile_out_h + 1) * p.minor_dim, + (p.out_w - 1) / (p.loop_x * tile_out_w) + 1, + (p.major_dim - 1) / p.loop_major + 1); + } + + AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] { + switch (mode) { + case 1: + upfirdn2d_kernel<<>>( + out.data_ptr(), x.data_ptr(), k.data_ptr(), p + ); + + break; + + case 2: + upfirdn2d_kernel<<>>( + out.data_ptr(), x.data_ptr(), k.data_ptr(), p + ); + + break; + + case 3: + upfirdn2d_kernel<<>>( + out.data_ptr(), x.data_ptr(), k.data_ptr(), p + ); + + break; + + case 4: + upfirdn2d_kernel<<>>( + out.data_ptr(), x.data_ptr(), k.data_ptr(), p + ); + + break; + + case 5: + upfirdn2d_kernel<<>>( + out.data_ptr(), x.data_ptr(), k.data_ptr(), p + ); + + break; + + case 6: + upfirdn2d_kernel<<>>( + out.data_ptr(), x.data_ptr(), k.data_ptr(), p + ); + + break; + } + }); + + return out; +} \ No newline at end of file diff --git a/third_part/GPEN/face_morpher/.gitignore b/third_part/GPEN/face_morpher/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..cbe661a4706b7961526ea417db0004e6717e7606 --- /dev/null +++ b/third_part/GPEN/face_morpher/.gitignore @@ -0,0 +1,3 @@ +*.pyc +*.swp +MANIFEST diff --git a/third_part/GPEN/face_morpher/README.rst b/third_part/GPEN/face_morpher/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..a864e85a5552868a66860c96ea2f01df95fb5448 --- /dev/null +++ b/third_part/GPEN/face_morpher/README.rst @@ -0,0 +1,260 @@ +Face Morpher +============ + +| Warp, average and morph human faces! +| Scripts will automatically detect frontal faces and skip images if + none is detected. + +Built with Python, `dlib`_, Numpy, Scipy, dlib. + +| Supported on Python 2.7, Python 3.6+ +| Tested on macOS Mojave and 64bit Linux (dockerized). + +Requirements +-------------- +- ``pip install -r requirements.txt`` +- Download `http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2` and extract file. +- Export environment variable ``DLIB_DATA_DIR`` to the folder where ``shape_predictor_68_face_landmarks.dat`` is located. Default ``data``. E.g ``export DLIB_DATA_DIR=/Downloads/data`` + +Either: + +- `Use as local command-line utility`_ +- `Use as pip library`_ +- `Try out in a docker container`_ + +.. _`Use as local command-line utility`: + +Use as local command-line utility +--------------------------------- +:: + + $ git clone https://github.com/alyssaq/face_morpher + +Morphing Faces +-------------- + +Morph from a source to destination image: + +:: + + python facemorpher/morpher.py --src= --dest= --plot + +Morph through a series of images in a folder: + +:: + + python facemorpher/morpher.py --images= --out_video=out.avi + +All options listed in ``morpher.py`` (pasted below): + +:: + + Morph from source to destination face or + Morph through all images in a folder + + Usage: + morpher.py (--src= --dest= | --images=) + [--width=] [--height=] + [--num=] [--fps=] + [--out_frames=] [--out_video=] + [--plot] [--background=(black|transparent|average)] + + Options: + -h, --help Show this screen. + --src= Filepath to source image (.jpg, .jpeg, .png) + --dest= Filepath to destination image (.jpg, .jpeg, .png) + --images= Folderpath to images + --width= Custom width of the images/video [default: 500] + --height= Custom height of the images/video [default: 600] + --num= Number of morph frames [default: 20] + --fps= Number frames per second for the video [default: 10] + --out_frames= Folder path to save all image frames + --out_video= Filename to save a video + --plot Flag to plot images to result.png [default: False] + --background= Background of images to be one of (black|transparent|average) [default: black] + --version Show version. + +Averaging Faces +--------------- + +Average faces from all images in a folder: + +:: + + python facemorpher/averager.py --images= --out=average.png + +All options listed in ``averager.py`` (pasted below): + +:: + + Face averager + + Usage: + averager.py --images= [--blur] [--plot] + [--background=(black|transparent|average)] + [--width=] [--height=] + [--out=] [--destimg=] + + Options: + -h, --help Show this screen. + --images= Folder to images (.jpg, .jpeg, .png) + --blur Flag to blur edges of image [default: False] + --width= Custom width of the images/video [default: 500] + --height= Custom height of the images/video [default: 600] + --out= Filename to save the average face [default: result.png] + --destimg= Destination face image to overlay average face + --plot Flag to display the average face [default: False] + --background= Background of image to be one of (black|transparent|average) [default: black] + --version Show version. + +Steps (facemorpher folder) +-------------------------- + +1. Locator +^^^^^^^^^^ + +- Locates face points +- For a different locator, return an array of (x, y) control face + points + +2. Aligner +^^^^^^^^^^ + +- Align faces by resizing, centering and cropping to given size + +3. Warper +^^^^^^^^^ + +- Given 2 images and its face points, warp one image to the other +- Triangulates face points +- Affine transforms each triangle with bilinear interpolation + +4a. Morpher +^^^^^^^^^^^ + +- Morph between 2 or more images + +4b. Averager +^^^^^^^^^^^^ + +- Average faces from 2 or more images + +Blender +^^^^^^^ + +Optional blending of warped image: + +- Weighted average +- Alpha feathering +- Poisson blend + +Examples - `Being John Malkovich`_ +---------------------------------- + +Create a morphing video between the 2 images: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +| ``> python facemorpher/morpher.py --src=alyssa.jpg --dest=john_malkovich.jpg`` +| ``--out_video=out.avi`` + +(out.avi played and recorded as gif) + +.. figure:: https://raw.github.com/alyssaq/face_morpher/master/examples/being_john_malvokich.gif + :alt: gif + +Save the frames to a folder: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +| ``> python facemorpher/morpher.py --src=alyssa.jpg --dest=john_malkovich.jpg`` +| ``--out_frames=out_folder --num=30`` + +Plot the frames: +^^^^^^^^^^^^^^^^ + +| ``> python facemorpher/morpher.py --src=alyssa.jpg --dest=john_malkovich.jpg`` +| ``--num=12 --plot`` + +.. figure:: https://raw.github.com/alyssaq/face_morpher/master/examples/plot.png + :alt: plot + +Average all face images in a folder: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +85 images used + +| ``> python facemorpher/averager.py --images=images --blur --background=transparent`` +| ``--width=220 --height=250`` + +.. figure:: https://raw.github.com/alyssaq/face_morpher/master/examples/average_faces.png + :alt: average\_faces + +.. _`Use as pip library`: + +Use as pip library +--------------------------------- +:: + + $ pip install facemorpher + +Examples +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Additional options are exactly the same as the command line + +:: + + import facemorpher + + # Get a list of image paths in a folder + imgpaths = facemorpher.list_imgpaths('imagefolder') + + # To morph, supply an array of face images: + facemorpher.morpher(imgpaths, plot=True) + + # To average, supply an array of face images: + facemorpher.averager(['image1.png', 'image2.png'], plot=True) + + +Once pip installed, 2 binaries are also available as a command line utility: + +:: + + $ facemorpher --src= --dest= --plot + $ faceaverager --images= --plot + +Try out in a docker container +--------------------------------- +Mount local folder to `/images` in docker container, run it and enter a bash session. +--rm removes the container when you close it. +:: + + $ docker run -v /Users/alyssa/Desktop/images:/images --name py3 --rm -it jjanzic/docker-python3-opencv bash + +Once you're in the container, install ``facemorpher`` and try the examples listed above +:: + + root@0dad0912ebbe:/# pip install facemorpher + root@0dad0912ebbe:/# facemorpher --src= --dest= --plot + +Documentation +------------- + +http://alyssaq.github.io/face_morpher + +Build & publish Docs +^^^^^^^^^^^^^^^^^^^^ + +:: + + ./scripts/publish_ghpages.sh + +License +------- +`MIT`_ + +.. _Being John Malkovich: http://www.rottentomatoes.com/m/being_john_malkovich +.. _Mac installation steps: https://gist.github.com/alyssaq/f60393545173379e0f3f#file-4-opencv3-with-python3-md +.. _MIT: http://alyssaq.github.io/mit-license +.. _OpenCV: http://opencv.org +.. _Homebrew: https://brew.sh +.. _source: https://github.com/opencv/opencv +.. _dlib: http://dlib.net diff --git a/third_part/GPEN/face_morpher/facemorpher/__init__.py b/third_part/GPEN/face_morpher/facemorpher/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bb9074b9e5b2a13ad848569a86acb851e073b289 --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/__init__.py @@ -0,0 +1,9 @@ +""" +Face Morpher module init code +""" +from .morpher import morpher, list_imgpaths +from .averager import averager + +__all__ = ['list_imgpaths', + 'morpher', + 'averager'] diff --git a/third_part/GPEN/face_morpher/facemorpher/aligner.py b/third_part/GPEN/face_morpher/facemorpher/aligner.py new file mode 100644 index 0000000000000000000000000000000000000000..04a4072a8ff443fc34d1f9d27363ef3d28dc8283 --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/aligner.py @@ -0,0 +1,102 @@ +""" +Align face and image sizes +""" +import cv2 +import numpy as np + +def positive_cap(num): + """ Cap a number to ensure positivity + + :param num: positive or negative number + :returns: (overflow, capped_number) + """ + if num < 0: + return 0, abs(num) + else: + return num, 0 + +def roi_coordinates(rect, size, scale): + """ Align the rectangle into the center and return the top-left coordinates + within the new size. If rect is smaller, we add borders. + + :param rect: (x, y, w, h) bounding rectangle of the face + :param size: (width, height) are the desired dimensions + :param scale: scaling factor of the rectangle to be resized + :returns: 4 numbers. Top-left coordinates of the aligned ROI. + (x, y, border_x, border_y). All values are > 0. + """ + rectx, recty, rectw, recth = rect + new_height, new_width = size + mid_x = int((rectx + rectw/2) * scale) + mid_y = int((recty + recth/2) * scale) + roi_x = mid_x - int(new_width/2) + roi_y = mid_y - int(new_height/2) + + roi_x, border_x = positive_cap(roi_x) + roi_y, border_y = positive_cap(roi_y) + return roi_x, roi_y, border_x, border_y + +def scaling_factor(rect, size): + """ Calculate the scaling factor for the current image to be + resized to the new dimensions + + :param rect: (x, y, w, h) bounding rectangle of the face + :param size: (width, height) are the desired dimensions + :returns: floating point scaling factor + """ + new_height, new_width = size + rect_h, rect_w = rect[2:] + height_ratio = rect_h / new_height + width_ratio = rect_w / new_width + scale = 1 + if height_ratio > width_ratio: + new_recth = 0.8 * new_height + scale = new_recth / rect_h + else: + new_rectw = 0.8 * new_width + scale = new_rectw / rect_w + return scale + +def resize_image(img, scale): + """ Resize image with the provided scaling factor + + :param img: image to be resized + :param scale: scaling factor for resizing the image + """ + cur_height, cur_width = img.shape[:2] + new_scaled_height = int(scale * cur_height) + new_scaled_width = int(scale * cur_width) + + return cv2.resize(img, (new_scaled_width, new_scaled_height)) + +def resize_align(img, points, size): + """ Resize image and associated points, align face to the center + and crop to the desired size + + :param img: image to be resized + :param points: *m* x 2 array of points + :param size: (height, width) tuple of new desired size + """ + new_height, new_width = size + + # Resize image based on bounding rectangle + rect = cv2.boundingRect(np.array([points], np.int32)) + scale = scaling_factor(rect, size) + img = resize_image(img, scale) + + # Align bounding rect to center + cur_height, cur_width = img.shape[:2] + roi_x, roi_y, border_x, border_y = roi_coordinates(rect, size, scale) + roi_h = np.min([new_height-border_y, cur_height-roi_y]) + roi_w = np.min([new_width-border_x, cur_width-roi_x]) + + # Crop to supplied size + crop = np.zeros((new_height, new_width, 3), img.dtype) + crop[border_y:border_y+roi_h, border_x:border_x+roi_w] = ( + img[roi_y:roi_y+roi_h, roi_x:roi_x+roi_w]) + + # Scale and align face points to the crop + points[:, 0] = (points[:, 0] * scale) + (border_x - roi_x) + points[:, 1] = (points[:, 1] * scale) + (border_y - roi_y) + + return (crop, points) diff --git a/third_part/GPEN/face_morpher/facemorpher/averager.py b/third_part/GPEN/face_morpher/facemorpher/averager.py new file mode 100644 index 0000000000000000000000000000000000000000..0197eb4298038de05d1410d8b4d8077ab91f9dfc --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/averager.py @@ -0,0 +1,123 @@ +""" +:: + + Face averager + + Usage: + averager.py --images= [--blur] [--plot] + [--background=(black|transparent|average)] + [--width=] [--height=] + [--out=] [--destimg=] + + Options: + -h, --help Show this screen. + --images= Folder to images (.jpg, .jpeg, .png) + --blur Flag to blur edges of image [default: False] + --width= Custom width of the images/video [default: 500] + --height= Custom height of the images/video [default: 600] + --out= Filename to save the average face [default: result.png] + --destimg= Destination face image to overlay average face + --plot Flag to display the average face [default: False] + --background= Background of image to be one of (black|transparent|average) [default: black] + --version Show version. +""" + +from docopt import docopt +import os +import cv2 +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.image as mpimg + +from facemorpher import locator +from facemorpher import aligner +from facemorpher import warper +from facemorpher import blender +from facemorpher import plotter + +def list_imgpaths(imgfolder): + for fname in os.listdir(imgfolder): + if (fname.lower().endswith('.jpg') or + fname.lower().endswith('.png') or + fname.lower().endswith('.jpeg')): + yield os.path.join(imgfolder, fname) + +def sharpen(img): + blured = cv2.GaussianBlur(img, (0, 0), 2.5) + return cv2.addWeighted(img, 1.4, blured, -0.4, 0) + +def load_image_points(path, size): + img = cv2.imread(path) + points = locator.face_points(img) + + if len(points) == 0: + print('No face in %s' % path) + return None, None + else: + return aligner.resize_align(img, points, size) + +def averager(imgpaths, dest_filename=None, width=500, height=600, background='black', + blur_edges=False, out_filename='result.png', plot=False): + + size = (height, width) + + images = [] + point_set = [] + for path in imgpaths: + img, points = load_image_points(path, size) + if img is not None: + images.append(img) + point_set.append(points) + + if len(images) == 0: + raise FileNotFoundError('Could not find any valid images.' + + ' Supported formats are .jpg, .png, .jpeg') + + if dest_filename is not None: + dest_img, dest_points = load_image_points(dest_filename, size) + if dest_img is None or dest_points is None: + raise Exception('No face or detected face points in dest img: ' + dest_filename) + else: + dest_img = np.zeros(images[0].shape, np.uint8) + dest_points = locator.average_points(point_set) + + num_images = len(images) + result_images = np.zeros(images[0].shape, np.float32) + for i in range(num_images): + result_images += warper.warp_image(images[i], point_set[i], + dest_points, size, np.float32) + + result_image = np.uint8(result_images / num_images) + face_indexes = np.nonzero(result_image) + dest_img[face_indexes] = result_image[face_indexes] + + mask = blender.mask_from_points(size, dest_points) + if blur_edges: + blur_radius = 10 + mask = cv2.blur(mask, (blur_radius, blur_radius)) + + if background in ('transparent', 'average'): + dest_img = np.dstack((dest_img, mask)) + + if background == 'average': + average_background = locator.average_points(images) + dest_img = blender.overlay_image(dest_img, mask, average_background) + + print('Averaged {} images'.format(num_images)) + plt = plotter.Plotter(plot, num_images=1, out_filename=out_filename) + plt.save(dest_img) + plt.plot_one(dest_img) + plt.show() + +def main(): + args = docopt(__doc__, version='Face Averager 1.0') + try: + averager(list_imgpaths(args['--images']), args['--destimg'], + int(args['--width']), int(args['--height']), + args['--background'], args['--blur'], args['--out'], args['--plot']) + except Exception as e: + print(e) + + +if __name__ == "__main__": + main() diff --git a/third_part/GPEN/face_morpher/facemorpher/blender.py b/third_part/GPEN/face_morpher/facemorpher/blender.py new file mode 100644 index 0000000000000000000000000000000000000000..d05c77e380627a240b7db15a565b3a8dc0b681d1 --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/blender.py @@ -0,0 +1,133 @@ +import cv2 +import numpy as np +import scipy.sparse + +def mask_from_points(size, points): + """ Create a mask of supplied size from supplied points + :param size: tuple of output mask size + :param points: array of [x, y] points + :returns: mask of values 0 and 255 where + 255 indicates the convex hull containing the points + """ + radius = 10 # kernel size + kernel = np.ones((radius, radius), np.uint8) + + mask = np.zeros(size, np.uint8) + cv2.fillConvexPoly(mask, cv2.convexHull(points), 255) + mask = cv2.erode(mask, kernel) + + return mask + +def overlay_image(foreground_image, mask, background_image): + """ Overlay foreground image onto the background given a mask + :param foreground_image: foreground image points + :param mask: [0-255] values in mask + :param background_image: background image points + :returns: image with foreground where mask > 0 overlaid on background image + """ + foreground_pixels = mask > 0 + background_image[..., :3][foreground_pixels] = foreground_image[..., :3][foreground_pixels] + return background_image + +def apply_mask(img, mask): + """ Apply mask to supplied image + :param img: max 3 channel image + :param mask: [0-255] values in mask + :returns: new image with mask applied + """ + masked_img = np.copy(img) + num_channels = 3 + for c in range(num_channels): + masked_img[..., c] = img[..., c] * (mask / 255) + + return masked_img + +def weighted_average(img1, img2, percent=0.5): + if percent <= 0: + return img2 + elif percent >= 1: + return img1 + else: + return cv2.addWeighted(img1, percent, img2, 1-percent, 0) + +def alpha_feathering(src_img, dest_img, img_mask, blur_radius=15): + mask = cv2.blur(img_mask, (blur_radius, blur_radius)) + mask = mask / 255.0 + + result_img = np.empty(src_img.shape, np.uint8) + for i in range(3): + result_img[..., i] = src_img[..., i] * mask + dest_img[..., i] * (1-mask) + + return result_img + +def poisson_blend(img_source, dest_img, img_mask, offset=(0, 0)): + # http://opencv.jp/opencv2-x-samples/poisson-blending + img_target = np.copy(dest_img) + import pyamg + # compute regions to be blended + region_source = ( + max(-offset[0], 0), + max(-offset[1], 0), + min(img_target.shape[0] - offset[0], img_source.shape[0]), + min(img_target.shape[1] - offset[1], img_source.shape[1])) + region_target = ( + max(offset[0], 0), + max(offset[1], 0), + min(img_target.shape[0], img_source.shape[0] + offset[0]), + min(img_target.shape[1], img_source.shape[1] + offset[1])) + region_size = (region_source[2] - region_source[0], + region_source[3] - region_source[1]) + + # clip and normalize mask image + img_mask = img_mask[region_source[0]:region_source[2], + region_source[1]:region_source[3]] + + # create coefficient matrix + coff_mat = scipy.sparse.identity(np.prod(region_size), format='lil') + for y in range(region_size[0]): + for x in range(region_size[1]): + if img_mask[y, x]: + index = x + y * region_size[1] + coff_mat[index, index] = 4 + if index + 1 < np.prod(region_size): + coff_mat[index, index + 1] = -1 + if index - 1 >= 0: + coff_mat[index, index - 1] = -1 + if index + region_size[1] < np.prod(region_size): + coff_mat[index, index + region_size[1]] = -1 + if index - region_size[1] >= 0: + coff_mat[index, index - region_size[1]] = -1 + coff_mat = coff_mat.tocsr() + + # create poisson matrix for b + poisson_mat = pyamg.gallery.poisson(img_mask.shape) + # for each layer (ex. RGB) + for num_layer in range(img_target.shape[2]): + # get subimages + t = img_target[region_target[0]:region_target[2], + region_target[1]:region_target[3], num_layer] + s = img_source[region_source[0]:region_source[2], + region_source[1]:region_source[3], num_layer] + t = t.flatten() + s = s.flatten() + + # create b + b = poisson_mat * s + for y in range(region_size[0]): + for x in range(region_size[1]): + if not img_mask[y, x]: + index = x + y * region_size[1] + b[index] = t[index] + + # solve Ax = b + x = pyamg.solve(coff_mat, b, verb=False, tol=1e-10) + + # assign x to target image + x = np.reshape(x, region_size) + x[x > 255] = 255 + x[x < 0] = 0 + x = np.array(x, img_target.dtype) + img_target[region_target[0]:region_target[2], + region_target[1]:region_target[3], num_layer] = x + + return img_target diff --git a/third_part/GPEN/face_morpher/facemorpher/locator.py b/third_part/GPEN/face_morpher/facemorpher/locator.py new file mode 100644 index 0000000000000000000000000000000000000000..adea52946124990774a2dd7bea4ef71ec1854edc --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/locator.py @@ -0,0 +1,112 @@ +""" +Locate face points +""" + +import cv2 +import numpy as np +import os.path as path +import dlib +import os + + +DATA_DIR = os.environ.get( + 'DLIB_DATA_DIR', + path.join(path.dirname(path.dirname(path.realpath(__file__))), 'data') +) +dlib_detector = dlib.get_frontal_face_detector() +dlib_predictor = dlib.shape_predictor(path.join(DATA_DIR, 'shape_predictor_68_face_landmarks.dat')) + +def boundary_points(points, width_percent=0.1, height_percent=0.1): + """ Produce additional boundary points + :param points: *m* x 2 array of x,y points + :param width_percent: [-1, 1] percentage of width to taper inwards. Negative for opposite direction + :param height_percent: [-1, 1] percentage of height to taper downwards. Negative for opposite direction + :returns: 2 additional points at the top corners + """ + x, y, w, h = cv2.boundingRect(np.array([points], np.int32)) + spacerw = int(w * width_percent) + spacerh = int(h * height_percent) + return [[x+spacerw, y+spacerh], + [x+w-spacerw, y+spacerh]] + + +def face_points(img, add_boundary_points=True): + return face_points_dlib(img, add_boundary_points) + +def face_points_dlib(img, add_boundary_points=True): + """ Locates 68 face points using dlib (http://dlib.net) + Requires shape_predictor_68_face_landmarks.dat to be in face_morpher/data + Download at: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 + :param img: an image array + :param add_boundary_points: bool to add additional boundary points + :returns: Array of x,y face points. Empty array if no face found + """ + try: + points = [] + rgbimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + rects = dlib_detector(rgbimg, 1) + + if rects and len(rects) > 0: + # We only take the first found face + shapes = dlib_predictor(rgbimg, rects[0]) + points = np.array([(shapes.part(i).x, shapes.part(i).y) for i in range(68)], np.int32) + + if add_boundary_points: + # Add more points inwards and upwards as dlib only detects up to eyebrows + points = np.vstack([ + points, + boundary_points(points, 0.1, -0.03), + boundary_points(points, 0.13, -0.05), + boundary_points(points, 0.15, -0.08), + boundary_points(points, 0.33, -0.12)]) + + return points + except Exception as e: + print(e) + return [] + +def face_points_stasm(img, add_boundary_points=True): + import stasm + """ Locates 77 face points using stasm (http://www.milbo.users.sonic.net/stasm) + + :param img: an image array + :param add_boundary_points: bool to add 2 additional points + :returns: Array of x,y face points. Empty array if no face found + """ + try: + points = stasm.search_single(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)) + except Exception as e: + print('Failed finding face points: ', e) + return [] + + points = points.astype(np.int32) + if len(points) == 0: + return points + + if add_boundary_points: + return np.vstack([points, boundary_points(points)]) + + return points + +def average_points(point_set): + """ Averages a set of face points from images + + :param point_set: *n* x *m* x 2 array of face points. \\ + *n* = number of images. *m* = number of face points per image + """ + return np.mean(point_set, 0).astype(np.int32) + +def weighted_average_points(start_points, end_points, percent=0.5): + """ Weighted average of two sets of supplied points + + :param start_points: *m* x 2 array of start face points. + :param end_points: *m* x 2 array of end face points. + :param percent: [0, 1] percentage weight on start_points + :returns: *m* x 2 array of weighted average points + """ + if percent <= 0: + return end_points + elif percent >= 1: + return start_points + else: + return np.asarray(start_points*percent + end_points*(1-percent), np.int32) diff --git a/third_part/GPEN/face_morpher/facemorpher/morpher.py b/third_part/GPEN/face_morpher/facemorpher/morpher.py new file mode 100644 index 0000000000000000000000000000000000000000..d2b11bed82c9a4c5270f43d0cd563d2541d8c745 --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/morpher.py @@ -0,0 +1,153 @@ +""" +:: + + Morph from source to destination face or + Morph through all images in a folder + + Usage: + morpher.py (--src= --dest= | --images=) + [--width=] [--height=] + [--num=] [--fps=] + [--out_frames=] [--out_video=] + [--plot] [--background=(black|transparent|average)] + + Options: + -h, --help Show this screen. + --src= Filepath to source image (.jpg, .jpeg, .png) + --dest= Filepath to destination image (.jpg, .jpeg, .png) + --images= Folderpath to images + --width= Custom width of the images/video [default: 500] + --height= Custom height of the images/video [default: 600] + --num= Number of morph frames [default: 20] + --fps= Number frames per second for the video [default: 10] + --out_frames= Folder path to save all image frames + --out_video= Filename to save a video + --plot Flag to plot images to result.png [default: False] + --background= Background of images to be one of (black|transparent|average) [default: black] + --version Show version. +""" +from docopt import docopt +import os +import numpy as np +import cv2 + +from facemorpher import locator +from facemorpher import aligner +from facemorpher import warper +from facemorpher import blender +from facemorpher import plotter +from facemorpher import videoer + +def verify_args(args): + if args['--images'] is None: + valid = os.path.isfile(args['--src']) & os.path.isfile(args['--dest']) + if not valid: + print('--src=%s or --dest=%s file does not exist. Double check the supplied paths' % ( + args['--src'], args['--dest'])) + exit(1) + else: + valid = os.path.isdir(args['--images']) + if not valid: + print('--images=%s is not a valid directory' % args['--images']) + exit(1) + +def load_image_points(path, size): + img = cv2.imread(path) + points = locator.face_points(img) + + if len(points) == 0: + print('No face in %s' % path) + return None, None + else: + return aligner.resize_align(img, points, size) + +def load_valid_image_points(imgpaths, size): + for path in imgpaths: + img, points = load_image_points(path, size) + if img is not None: + print(path) + yield (img, points) + +def list_imgpaths(images_folder=None, src_image=None, dest_image=None): + if images_folder is None: + yield src_image + yield dest_image + else: + for fname in os.listdir(images_folder): + if (fname.lower().endswith('.jpg') or + fname.lower().endswith('.png') or + fname.lower().endswith('.jpeg')): + yield os.path.join(images_folder, fname) + +def morph(src_img, src_points, dest_img, dest_points, + video, width=500, height=600, num_frames=20, fps=10, + out_frames=None, out_video=None, plot=False, background='black'): + """ + Create a morph sequence from source to destination image + + :param src_img: ndarray source image + :param src_points: source image array of x,y face points + :param dest_img: ndarray destination image + :param dest_points: destination image array of x,y face points + :param video: facemorpher.videoer.Video object + """ + size = (height, width) + stall_frames = np.clip(int(fps*0.15), 1, fps) # Show first & last longer + plt = plotter.Plotter(plot, num_images=num_frames, out_folder=out_frames) + num_frames -= (stall_frames * 2) # No need to process src and dest image + + plt.plot_one(src_img) + video.write(src_img, 1) + + # Produce morph frames! + for percent in np.linspace(1, 0, num=num_frames): + points = locator.weighted_average_points(src_points, dest_points, percent) + src_face = warper.warp_image(src_img, src_points, points, size) + end_face = warper.warp_image(dest_img, dest_points, points, size) + average_face = blender.weighted_average(src_face, end_face, percent) + + if background in ('transparent', 'average'): + mask = blender.mask_from_points(average_face.shape[:2], points) + average_face = np.dstack((average_face, mask)) + + if background == 'average': + average_background = blender.weighted_average(src_img, dest_img, percent) + average_face = blender.overlay_image(average_face, mask, average_background) + + plt.plot_one(average_face) + plt.save(average_face) + video.write(average_face) + + plt.plot_one(dest_img) + video.write(dest_img, stall_frames) + plt.show() + +def morpher(imgpaths, width=500, height=600, num_frames=20, fps=10, + out_frames=None, out_video=None, plot=False, background='black'): + """ + Create a morph sequence from multiple images in imgpaths + + :param imgpaths: array or generator of image paths + """ + video = videoer.Video(out_video, fps, width, height) + images_points_gen = load_valid_image_points(imgpaths, (height, width)) + src_img, src_points = next(images_points_gen) + for dest_img, dest_points in images_points_gen: + morph(src_img, src_points, dest_img, dest_points, video, + width, height, num_frames, fps, out_frames, out_video, plot, background) + src_img, src_points = dest_img, dest_points + video.end() + +def main(): + args = docopt(__doc__, version='Face Morpher 1.0') + verify_args(args) + + morpher(list_imgpaths(args['--images'], args['--src'], args['--dest']), + int(args['--width']), int(args['--height']), + int(args['--num']), int(args['--fps']), + args['--out_frames'], args['--out_video'], + args['--plot'], args['--background']) + + +if __name__ == "__main__": + main() diff --git a/third_part/GPEN/face_morpher/facemorpher/plotter.py b/third_part/GPEN/face_morpher/facemorpher/plotter.py new file mode 100644 index 0000000000000000000000000000000000000000..cfd7fa403b37f9c166cb5243ef222c19b36930fa --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/plotter.py @@ -0,0 +1,90 @@ +""" +Plot and save images +""" + +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +import os.path +import numpy as np +import cv2 + +def bgr2rgb(img): + # OpenCV's BGR to RGB + rgb = np.copy(img) + rgb[..., 0], rgb[..., 2] = img[..., 2], img[..., 0] + return rgb + +def check_do_plot(func): + def inner(self, *args, **kwargs): + if self.do_plot: + func(self, *args, **kwargs) + + return inner + +def check_do_save(func): + def inner(self, *args, **kwargs): + if self.do_save: + func(self, *args, **kwargs) + + return inner + +class Plotter(object): + def __init__(self, plot=True, rows=0, cols=0, num_images=0, out_folder=None, out_filename=None): + self.save_counter = 1 + self.plot_counter = 1 + self.do_plot = plot + self.do_save = out_filename is not None + self.out_filename = out_filename + self.set_filepath(out_folder) + + if (rows + cols) == 0 and num_images > 0: + # Auto-calculate the number of rows and cols for the figure + self.rows = np.ceil(np.sqrt(num_images / 2.0)) + self.cols = np.ceil(num_images / self.rows) + else: + self.rows = rows + self.cols = cols + + def set_filepath(self, folder): + if folder is None: + self.filepath = None + return + + if not os.path.exists(folder): + os.makedirs(folder) + self.filepath = os.path.join(folder, 'frame{0:03d}.png') + self.do_save = True + + @check_do_save + def save(self, img, filename=None): + if self.filepath: + filename = self.filepath.format(self.save_counter) + self.save_counter += 1 + elif filename is None: + filename = self.out_filename + + mpimg.imsave(filename, bgr2rgb(img)) + print(filename + ' saved') + + @check_do_plot + def plot_one(self, img): + p = plt.subplot(self.rows, self.cols, self.plot_counter) + p.axes.get_xaxis().set_visible(False) + p.axes.get_yaxis().set_visible(False) + plt.imshow(bgr2rgb(img)) + self.plot_counter += 1 + + @check_do_plot + def show(self): + plt.gcf().subplots_adjust(hspace=0.05, wspace=0, + left=0, bottom=0, right=1, top=0.98) + plt.axis('off') + #plt.show() + plt.savefig('result.png') + + @check_do_plot + def plot_mesh(self, points, tri, color='k'): + """ plot triangles """ + for tri_indices in tri.simplices: + t_ext = [tri_indices[0], tri_indices[1], tri_indices[2], tri_indices[0]] + plt.plot(points[t_ext, 0], points[t_ext, 1], color) diff --git a/third_part/GPEN/face_morpher/facemorpher/videoer.py b/third_part/GPEN/face_morpher/facemorpher/videoer.py new file mode 100644 index 0000000000000000000000000000000000000000..6aabff4f958858981589d956de235a32e40efc7f --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/videoer.py @@ -0,0 +1,36 @@ +""" +Create a video with image frames +""" + +import cv2 +import numpy as np + + +def check_write_video(func): + def inner(self, *args, **kwargs): + if self.video: + return func(self, *args, **kwargs) + else: + pass + return inner + + +class Video(object): + def __init__(self, filename, fps, w, h): + self.filename = filename + + if filename is None: + self.video = None + else: + fourcc = cv2.VideoWriter_fourcc(*'MJPG') + self.video = cv2.VideoWriter(filename, fourcc, fps, (w, h), True) + + @check_write_video + def write(self, img, num_times=1): + for i in range(num_times): + self.video.write(img[..., :3]) + + @check_write_video + def end(self): + print(self.filename + ' saved') + self.video.release() diff --git a/third_part/GPEN/face_morpher/facemorpher/warper.py b/third_part/GPEN/face_morpher/facemorpher/warper.py new file mode 100644 index 0000000000000000000000000000000000000000..50b1a3151683891fe29f85b69660417956b904a6 --- /dev/null +++ b/third_part/GPEN/face_morpher/facemorpher/warper.py @@ -0,0 +1,139 @@ +import numpy as np +import scipy.spatial as spatial + +def bilinear_interpolate(img, coords): + """ Interpolates over every image channel + http://en.wikipedia.org/wiki/Bilinear_interpolation + + :param img: max 3 channel image + :param coords: 2 x _m_ array. 1st row = xcoords, 2nd row = ycoords + :returns: array of interpolated pixels with same shape as coords + """ + int_coords = np.int32(coords) + x0, y0 = int_coords + dx, dy = coords - int_coords + + # 4 Neighour pixels + q11 = img[y0, x0] + q21 = img[y0, x0+1] + q12 = img[y0+1, x0] + q22 = img[y0+1, x0+1] + + btm = q21.T * dx + q11.T * (1 - dx) + top = q22.T * dx + q12.T * (1 - dx) + inter_pixel = top * dy + btm * (1 - dy) + + return inter_pixel.T + +def grid_coordinates(points): + """ x,y grid coordinates within the ROI of supplied points + + :param points: points to generate grid coordinates + :returns: array of (x, y) coordinates + """ + xmin = np.min(points[:, 0]) + xmax = np.max(points[:, 0]) + 1 + ymin = np.min(points[:, 1]) + ymax = np.max(points[:, 1]) + 1 + return np.asarray([(x, y) for y in range(ymin, ymax) + for x in range(xmin, xmax)], np.uint32) + +def process_warp(src_img, result_img, tri_affines, dst_points, delaunay): + """ + Warp each triangle from the src_image only within the + ROI of the destination image (points in dst_points). + """ + roi_coords = grid_coordinates(dst_points) + # indices to vertices. -1 if pixel is not in any triangle + roi_tri_indices = delaunay.find_simplex(roi_coords) + + for simplex_index in range(len(delaunay.simplices)): + coords = roi_coords[roi_tri_indices == simplex_index] + num_coords = len(coords) + out_coords = np.dot(tri_affines[simplex_index], + np.vstack((coords.T, np.ones(num_coords)))) + x, y = coords.T + result_img[y, x] = bilinear_interpolate(src_img, out_coords) + + return None + +def triangular_affine_matrices(vertices, src_points, dest_points): + """ + Calculate the affine transformation matrix for each + triangle (x,y) vertex from dest_points to src_points + + :param vertices: array of triplet indices to corners of triangle + :param src_points: array of [x, y] points to landmarks for source image + :param dest_points: array of [x, y] points to landmarks for destination image + :returns: 2 x 3 affine matrix transformation for a triangle + """ + ones = [1, 1, 1] + for tri_indices in vertices: + src_tri = np.vstack((src_points[tri_indices, :].T, ones)) + dst_tri = np.vstack((dest_points[tri_indices, :].T, ones)) + mat = np.dot(src_tri, np.linalg.inv(dst_tri))[:2, :] + yield mat + +def warp_image(src_img, src_points, dest_points, dest_shape, dtype=np.uint8): + # Resultant image will not have an alpha channel + num_chans = 3 + src_img = src_img[:, :, :3] + + rows, cols = dest_shape[:2] + result_img = np.zeros((rows, cols, num_chans), dtype) + + delaunay = spatial.Delaunay(dest_points) + tri_affines = np.asarray(list(triangular_affine_matrices( + delaunay.simplices, src_points, dest_points))) + + process_warp(src_img, result_img, tri_affines, dest_points, delaunay) + + return result_img + +def test_local(): + from functools import partial + import cv2 + import scipy.misc + import locator + import aligner + from matplotlib import pyplot as plt + + # Load source image + face_points_func = partial(locator.face_points, '../data') + base_path = '../females/Screenshot 2015-03-04 17.11.12.png' + src_path = '../females/BlDmB5QCYAAY8iw.jpg' + src_img = cv2.imread(src_path) + + # Define control points for warps + src_points = face_points_func(src_path) + base_img = cv2.imread(base_path) + base_points = face_points_func(base_path) + + size = (600, 500) + src_img, src_points = aligner.resize_align(src_img, src_points, size) + base_img, base_points = aligner.resize_align(base_img, base_points, size) + result_points = locator.weighted_average_points(src_points, base_points, 0.2) + + # Perform transform + dst_img1 = warp_image(src_img, src_points, result_points, size) + dst_img2 = warp_image(base_img, base_points, result_points, size) + + import blender + ave = blender.weighted_average(dst_img1, dst_img2, 0.6) + mask = blender.mask_from_points(size, result_points) + blended_img = blender.poisson_blend(dst_img1, dst_img2, mask) + + plt.subplot(2, 2, 1) + plt.imshow(ave) + plt.subplot(2, 2, 2) + plt.imshow(dst_img1) + plt.subplot(2, 2, 3) + plt.imshow(dst_img2) + plt.subplot(2, 2, 4) + + plt.imshow(blended_img) + plt.show() + + +if __name__ == "__main__": + test_local() diff --git a/third_part/GPEN/face_morpher/requirements.txt b/third_part/GPEN/face_morpher/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c0ce5bfe39994c62362323bbf7482a0c55fda517 --- /dev/null +++ b/third_part/GPEN/face_morpher/requirements.txt @@ -0,0 +1,5 @@ +numpy +scipy +matplotlib +docopt +dlib diff --git a/third_part/GPEN/face_morpher/scripts/make_docs.sh b/third_part/GPEN/face_morpher/scripts/make_docs.sh new file mode 100644 index 0000000000000000000000000000000000000000..3b8b22d7fef878db567a97d7c0bcbc7541099151 --- /dev/null +++ b/third_part/GPEN/face_morpher/scripts/make_docs.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +rm -rf docs +# reStructuredText in python files to rst. Documentation in docs folder +sphinx-apidoc -A "Alyssa Quek" -f -F -o docs facemorpher/ + +cd docs + +# Append module path to end of conf file +echo "" >> conf.py +echo "import os" >> conf.py +echo "import sys" >> conf.py +echo "sys.path.insert(0, os.path.abspath('../'))" >> conf.py +echo "sys.path.insert(0, os.path.abspath('../facemorpher'))" >> conf.py + +# Make sphinx documentation +make html +cd .. diff --git a/third_part/GPEN/face_morpher/scripts/publish_ghpages.sh b/third_part/GPEN/face_morpher/scripts/publish_ghpages.sh new file mode 100644 index 0000000000000000000000000000000000000000..3d3d956c2ad1d53ecd16bfa17e73fe8f30597412 --- /dev/null +++ b/third_part/GPEN/face_morpher/scripts/publish_ghpages.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# delete previous gh-pages +git branch -D gh-pages +git push origin :gh-pages + +git checkout -b gh-pages +git rebase master +git reset HEAD + +# make docs +./scripts/make_docs.sh + +# Add docs +mv docs/_build/html/*.html . +git add *.html +mv docs/_build/html/*.js . +git add *.js +mv docs/_build/html/_static/ _static +git add _static + +touch .nojekyll +git add .nojekyll + +# Publish to gh-pages +git commit -m "docs" +git push origin gh-pages + +git checkout master diff --git a/third_part/GPEN/face_morpher/setup.cfg b/third_part/GPEN/face_morpher/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..437cb1884786ae6ca9163546d563b4193e15a04e --- /dev/null +++ b/third_part/GPEN/face_morpher/setup.cfg @@ -0,0 +1,7 @@ +[pep8] +ignore = E111,E114,E226,E302,E41,E121,E701 +max-line-length = 100 + +[flake8] +ignore = E111,E114,E226,E302,E41,E121,E701 +max-line-length = 100 \ No newline at end of file diff --git a/third_part/GPEN/face_morpher/setup.py b/third_part/GPEN/face_morpher/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..09fba56665695af2aa58ead076fa2c106ef18cda --- /dev/null +++ b/third_part/GPEN/face_morpher/setup.py @@ -0,0 +1,30 @@ +from setuptools import setup, find_packages + +# To test locally: python setup.py sdist bdist_wheel +# To upload to pypi: twine upload dist/* + +setup( + name='facemorpher', + version='5.2.dev0', + author='Alyssa Quek', + author_email='alyssaquek@gmail.com', + description=('Warp, morph and average human faces!'), + keywords='face morphing, averaging, warping', + url='https://github.com/alyssaq/face_morpher', + license='MIT', + packages=find_packages(), + install_requires=[ + 'docopt', + 'numpy', + 'scipy', + 'matplotlib', + 'dlib' + ], + entry_points={'console_scripts': [ + 'facemorpher=facemorpher.morpher:main', + 'faceaverager=facemorpher.averager:main' + ] + }, + data_files=[('readme', ['README.rst'])], + long_description=open('README.rst').read(), +) diff --git a/third_part/GPEN/face_parse/blocks.py b/third_part/GPEN/face_parse/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..fc7facb9ea3aa57d4897750ea65735473200852c --- /dev/null +++ b/third_part/GPEN/face_parse/blocks.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +import torch +import torch.nn as nn +from torch.nn.parameter import Parameter +from torch.nn import functional as F +import numpy as np + +class NormLayer(nn.Module): + """Normalization Layers. + ------------ + # Arguments + - channels: input channels, for batch norm and instance norm. + - input_size: input shape without batch size, for layer norm. + """ + def __init__(self, channels, normalize_shape=None, norm_type='bn', ref_channels=None): + super(NormLayer, self).__init__() + norm_type = norm_type.lower() + self.norm_type = norm_type + if norm_type == 'bn': + self.norm = nn.BatchNorm2d(channels, affine=True) + elif norm_type == 'in': + self.norm = nn.InstanceNorm2d(channels, affine=False) + elif norm_type == 'gn': + self.norm = nn.GroupNorm(32, channels, affine=True) + elif norm_type == 'pixel': + self.norm = lambda x: F.normalize(x, p=2, dim=1) + elif norm_type == 'layer': + self.norm = nn.LayerNorm(normalize_shape) + elif norm_type == 'none': + self.norm = lambda x: x*1.0 + else: + assert 1==0, 'Norm type {} not support.'.format(norm_type) + + def forward(self, x, ref=None): + if self.norm_type == 'spade': + return self.norm(x, ref) + else: + return self.norm(x) + + +class ReluLayer(nn.Module): + """Relu Layer. + ------------ + # Arguments + - relu type: type of relu layer, candidates are + - ReLU + - LeakyReLU: default relu slope 0.2 + - PRelu + - SELU + - none: direct pass + """ + def __init__(self, channels, relu_type='relu'): + super(ReluLayer, self).__init__() + relu_type = relu_type.lower() + if relu_type == 'relu': + self.func = nn.ReLU(True) + elif relu_type == 'leakyrelu': + self.func = nn.LeakyReLU(0.2, inplace=True) + elif relu_type == 'prelu': + self.func = nn.PReLU(channels) + elif relu_type == 'selu': + self.func = nn.SELU(True) + elif relu_type == 'none': + self.func = lambda x: x*1.0 + else: + assert 1==0, 'Relu type {} not support.'.format(relu_type) + + def forward(self, x): + return self.func(x) + + +class ConvLayer(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3, scale='none', norm_type='none', relu_type='none', use_pad=True, bias=True): + super(ConvLayer, self).__init__() + self.use_pad = use_pad + self.norm_type = norm_type + if norm_type in ['bn']: + bias = False + + stride = 2 if scale == 'down' else 1 + + self.scale_func = lambda x: x + if scale == 'up': + self.scale_func = lambda x: nn.functional.interpolate(x, scale_factor=2, mode='nearest') + + self.reflection_pad = nn.ReflectionPad2d(int(np.ceil((kernel_size - 1.)/2))) + self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, bias=bias) + + self.relu = ReluLayer(out_channels, relu_type) + self.norm = NormLayer(out_channels, norm_type=norm_type) + + def forward(self, x): + out = self.scale_func(x) + if self.use_pad: + out = self.reflection_pad(out) + out = self.conv2d(out) + out = self.norm(out) + out = self.relu(out) + return out + + +class ResidualBlock(nn.Module): + """ + Residual block recommended in: http://torch.ch/blog/2016/02/04/resnets.html + """ + def __init__(self, c_in, c_out, relu_type='prelu', norm_type='bn', scale='none'): + super(ResidualBlock, self).__init__() + + if scale == 'none' and c_in == c_out: + self.shortcut_func = lambda x: x + else: + self.shortcut_func = ConvLayer(c_in, c_out, 3, scale) + + scale_config_dict = {'down': ['none', 'down'], 'up': ['up', 'none'], 'none': ['none', 'none']} + scale_conf = scale_config_dict[scale] + + self.conv1 = ConvLayer(c_in, c_out, 3, scale_conf[0], norm_type=norm_type, relu_type=relu_type) + self.conv2 = ConvLayer(c_out, c_out, 3, scale_conf[1], norm_type=norm_type, relu_type='none') + + def forward(self, x): + identity = self.shortcut_func(x) + + res = self.conv1(x) + res = self.conv2(res) + return identity + res + + diff --git a/third_part/GPEN/face_parse/face_parsing.py b/third_part/GPEN/face_parse/face_parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..39d7cb7febd019d9ac0deb49dfcef795b6a1bd86 --- /dev/null +++ b/third_part/GPEN/face_parse/face_parsing.py @@ -0,0 +1,148 @@ +''' +@paper: GAN Prior Embedded Network for Blind Face Restoration in the Wild (CVPR2021) +@author: yangxy (yangtao9009@gmail.com) +''' +import os +import cv2 +import torch +import numpy as np +from face_parse.parse_model import ParseNet +import torch.nn.functional as F + +from face_parse.model import BiSeNet +import torchvision.transforms as transforms + +class FaceParse(object): + def __init__(self, base_dir='./', model='ParseNet-latest', device='cuda', mask_map = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0]): + self.mfile = os.path.join(base_dir, model+'.pth') + self.size = 512 + self.device = device + + ''' + 0: 'background' 1: 'skin' 2: 'nose' + 3: 'eye_g' 4: 'l_eye' 5: 'r_eye' + 6: 'l_brow' 7: 'r_brow' 8: 'l_ear' + 9: 'r_ear' 10: 'mouth' 11: 'u_lip' + 12: 'l_lip' 13: 'hair' 14: 'hat' + 15: 'ear_r' 16: 'neck_l' 17: 'neck' + 18: 'cloth' + ''' + # self.MASK_COLORMAP = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]] + #self.#MASK_COLORMAP = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]] = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [0, 0, 0], [0, 0, 0]] + # self.MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0] + self.MASK_COLORMAP = mask_map + + self.load_model() + + def load_model(self): + self.faceparse = ParseNet(self.size, self.size, 32, 64, 19, norm_type='bn', relu_type='LeakyReLU', ch_range=[32, 256]) + self.faceparse.load_state_dict(torch.load(self.mfile, map_location=torch.device('cpu'))) + self.faceparse.to(self.device) + self.faceparse.eval() + + def process(self, im, masks=[0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0]): + im = cv2.resize(im, (self.size, self.size)) + imt = self.img2tensor(im) + with torch.no_grad(): + pred_mask, sr_img_tensor = self.faceparse(imt) # (1, 19, 512, 512) + mask = self.tenor2mask(pred_mask, masks) + + return mask + + def process_tensor(self, imt): + imt = F.interpolate(imt.flip(1)*2-1, (self.size, self.size)) + pred_mask, sr_img_tensor = self.faceparse(imt) + + mask = pred_mask.argmax(dim=1) + for idx, color in enumerate(self.MASK_COLORMAP): + mask = torch.where(mask==idx, color, mask) + #mask = mask.repeat(3, 1, 1).unsqueeze(0) #.cpu().float().numpy() + mask = mask.unsqueeze(0) + + return mask + + def img2tensor(self, img): + img = img[..., ::-1] # BGR to RGB + img = img / 255. * 2 - 1 + img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).unsqueeze(0).to(self.device) + return img_tensor.float() + + def tenor2mask(self, tensor, masks): + if len(tensor.shape) < 4: + tensor = tensor.unsqueeze(0) + if tensor.shape[1] > 1: + tensor = tensor.argmax(dim=1) + + tensor = tensor.squeeze(1).data.cpu().numpy() # (1, 512, 512) + color_maps = [] + for t in tensor: + #tmp_img = np.zeros(tensor.shape[1:] + (3,)) + tmp_img = np.zeros(tensor.shape[1:]) + for idx, color in enumerate(masks): + tmp_img[t == idx] = color + color_maps.append(tmp_img.astype(np.uint8)) + return color_maps + + + +class FaceParse_v2(object): + def __init__(self, device='cuda', mask_map = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0]): + self.mfile = '/apdcephfs/private_quincheng/Expression/face-parsing.PyTorch/res/cp/79999_iter.pth' + self.size = 512 + self.device = device + + ''' + 0: 'background' 1: 'skin' 2: 'nose' + 3: 'eye_g' 4: 'l_eye' 5: 'r_eye' + 6: 'l_brow' 7: 'r_brow' 8: 'l_ear' + 9: 'r_ear' 10: 'mouth' 11: 'u_lip' + 12: 'l_lip' 13: 'hair' 14: 'hat' + 15: 'ear_r' 16: 'neck_l' 17: 'neck' + 18: 'cloth' + ''' + # self.MASK_COLORMAP = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]] + #self.#MASK_COLORMAP = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]] = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [0, 0, 0], [0, 0, 0]] + # self.MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0] + self.MASK_COLORMAP = mask_map + self.load_model() + self.to_tensor = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + + def load_model(self): + self.faceparse = BiSeNet(n_classes=19) + self.faceparse.load_state_dict(torch.load(self.mfile)) + self.faceparse.to(self.device) + self.faceparse.eval() + + def process(self, im, masks=[0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0]): + im = cv2.resize(im[...,::-1], (self.size, self.size)) + im = self.to_tensor(im) + imt = torch.unsqueeze(im, 0).to(self.device) + with torch.no_grad(): + pred_mask = self.faceparse(imt)[0] + mask = self.tenor2mask(pred_mask, masks) + return mask + + # def img2tensor(self, img): + # img = img[..., ::-1] # BGR to RGB + # img = img / 255. * 2 - 1 + # img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).unsqueeze(0).to(self.device) + # return img_tensor.float() + + def tenor2mask(self, tensor, masks): + if len(tensor.shape) < 4: + tensor = tensor.unsqueeze(0) + if tensor.shape[1] > 1: + tensor = tensor.argmax(dim=1) + + tensor = tensor.squeeze(1).data.cpu().numpy() + color_maps = [] + for t in tensor: + #tmp_img = np.zeros(tensor.shape[1:] + (3,)) + tmp_img = np.zeros(tensor.shape[1:]) + for idx, color in enumerate(masks): + tmp_img[t == idx] = color + color_maps.append(tmp_img.astype(np.uint8)) + return color_maps \ No newline at end of file diff --git a/third_part/GPEN/face_parse/mask.png b/third_part/GPEN/face_parse/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..729f36dfb23b90381fe819fe24326153ba46828e Binary files /dev/null and b/third_part/GPEN/face_parse/mask.png differ diff --git a/third_part/GPEN/face_parse/model.py b/third_part/GPEN/face_parse/model.py new file mode 100644 index 0000000000000000000000000000000000000000..5119e751c3ae18e4dc1eecde7bfcb5bf9c62fb92 --- /dev/null +++ b/third_part/GPEN/face_parse/model.py @@ -0,0 +1,283 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +from .resnet import Resnet18 +# from modules.bn import InPlaceABNSync as BatchNorm2d + + +class ConvBNReLU(nn.Module): + def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs): + super(ConvBNReLU, self).__init__() + self.conv = nn.Conv2d(in_chan, + out_chan, + kernel_size = ks, + stride = stride, + padding = padding, + bias = False) + self.bn = nn.BatchNorm2d(out_chan) + self.init_weight() + + def forward(self, x): + x = self.conv(x) + x = F.relu(self.bn(x)) + return x + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + +class BiSeNetOutput(nn.Module): + def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs): + super(BiSeNetOutput, self).__init__() + self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) + self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False) + self.init_weight() + + def forward(self, x): + x = self.conv(x) + x = self.conv_out(x) + return x + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class AttentionRefinementModule(nn.Module): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(AttentionRefinementModule, self).__init__() + self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1) + self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False) + self.bn_atten = nn.BatchNorm2d(out_chan) + self.sigmoid_atten = nn.Sigmoid() + self.init_weight() + + def forward(self, x): + feat = self.conv(x) + atten = F.avg_pool2d(feat, feat.size()[2:]) + atten = self.conv_atten(atten) + atten = self.bn_atten(atten) + atten = self.sigmoid_atten(atten) + out = torch.mul(feat, atten) + return out + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + +class ContextPath(nn.Module): + def __init__(self, *args, **kwargs): + super(ContextPath, self).__init__() + self.resnet = Resnet18() + self.arm16 = AttentionRefinementModule(256, 128) + self.arm32 = AttentionRefinementModule(512, 128) + self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0) + + self.init_weight() + + def forward(self, x): + H0, W0 = x.size()[2:] + feat8, feat16, feat32 = self.resnet(x) + H8, W8 = feat8.size()[2:] + H16, W16 = feat16.size()[2:] + H32, W32 = feat32.size()[2:] + + avg = F.avg_pool2d(feat32, feat32.size()[2:]) + avg = self.conv_avg(avg) + avg_up = F.interpolate(avg, (H32, W32), mode='nearest') + + feat32_arm = self.arm32(feat32) + feat32_sum = feat32_arm + avg_up + feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest') + feat32_up = self.conv_head32(feat32_up) + + feat16_arm = self.arm16(feat16) + feat16_sum = feat16_arm + feat32_up + feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest') + feat16_up = self.conv_head16(feat16_up) + + return feat8, feat16_up, feat32_up # x8, x8, x16 + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, (nn.Linear, nn.Conv2d)): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +### This is not used, since I replace this with the resnet feature with the same size +class SpatialPath(nn.Module): + def __init__(self, *args, **kwargs): + super(SpatialPath, self).__init__() + self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3) + self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0) + self.init_weight() + + def forward(self, x): + feat = self.conv1(x) + feat = self.conv2(feat) + feat = self.conv3(feat) + feat = self.conv_out(feat) + return feat + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class FeatureFusionModule(nn.Module): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(FeatureFusionModule, self).__init__() + self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0) + self.conv1 = nn.Conv2d(out_chan, + out_chan//4, + kernel_size = 1, + stride = 1, + padding = 0, + bias = False) + self.conv2 = nn.Conv2d(out_chan//4, + out_chan, + kernel_size = 1, + stride = 1, + padding = 0, + bias = False) + self.relu = nn.ReLU(inplace=True) + self.sigmoid = nn.Sigmoid() + self.init_weight() + + def forward(self, fsp, fcp): + fcat = torch.cat([fsp, fcp], dim=1) + feat = self.convblk(fcat) + atten = F.avg_pool2d(feat, feat.size()[2:]) + atten = self.conv1(atten) + atten = self.relu(atten) + atten = self.conv2(atten) + atten = self.sigmoid(atten) + feat_atten = torch.mul(feat, atten) + feat_out = feat_atten + feat + return feat_out + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class BiSeNet(nn.Module): + def __init__(self, n_classes, *args, **kwargs): + super(BiSeNet, self).__init__() + self.cp = ContextPath() + ## here self.sp is deleted + self.ffm = FeatureFusionModule(256, 256) + self.conv_out = BiSeNetOutput(256, 256, n_classes) + self.conv_out16 = BiSeNetOutput(128, 64, n_classes) + self.conv_out32 = BiSeNetOutput(128, 64, n_classes) + self.init_weight() + + def forward(self, x): + H, W = x.size()[2:] + feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature + feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature + feat_fuse = self.ffm(feat_sp, feat_cp8) + + feat_out = self.conv_out(feat_fuse) + feat_out16 = self.conv_out16(feat_cp8) + feat_out32 = self.conv_out32(feat_cp16) + + feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True) + feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True) + feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True) + return feat_out, feat_out16, feat_out32 + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] + for name, child in self.named_children(): + child_wd_params, child_nowd_params = child.get_params() + if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput): + lr_mul_wd_params += child_wd_params + lr_mul_nowd_params += child_nowd_params + else: + wd_params += child_wd_params + nowd_params += child_nowd_params + return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params + + +if __name__ == "__main__": + net = BiSeNet(19) + net.cuda() + net.eval() + in_ten = torch.randn(16, 3, 640, 480).cuda() + out, out16, out32 = net(in_ten) + print(out.shape) + + net.get_params() diff --git a/third_part/GPEN/face_parse/parse_model.py b/third_part/GPEN/face_parse/parse_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1e98cef0677c5f8fd290e259f992e21a635c71e1 --- /dev/null +++ b/third_part/GPEN/face_parse/parse_model.py @@ -0,0 +1,77 @@ +''' +@Created by chaofengc (chaofenghust@gmail.com) + +@Modified by yangxy (yangtao9009@gmail.com) +''' + +from face_parse.blocks import * +import torch +from torch import nn +import numpy as np + +def define_P(in_size=512, out_size=512, min_feat_size=32, relu_type='LeakyReLU', isTrain=False, weight_path=None): + net = ParseNet(in_size, out_size, min_feat_size, 64, 19, norm_type='bn', relu_type=relu_type, ch_range=[32, 256]) + if not isTrain: + net.eval() + if weight_path is not None: + net.load_state_dict(torch.load(weight_path)) + return net + + +class ParseNet(nn.Module): + def __init__(self, + in_size=128, + out_size=128, + min_feat_size=32, + base_ch=64, + parsing_ch=19, + res_depth=10, + relu_type='prelu', + norm_type='bn', + ch_range=[32, 512], + ): + super().__init__() + self.res_depth = res_depth + act_args = {'norm_type': norm_type, 'relu_type': relu_type} + min_ch, max_ch = ch_range + + ch_clip = lambda x: max(min_ch, min(x, max_ch)) + min_feat_size = min(in_size, min_feat_size) + + down_steps = int(np.log2(in_size//min_feat_size)) + up_steps = int(np.log2(out_size//min_feat_size)) + + # =============== define encoder-body-decoder ==================== + self.encoder = [] + self.encoder.append(ConvLayer(3, base_ch, 3, 1)) + head_ch = base_ch + for i in range(down_steps): + cin, cout = ch_clip(head_ch), ch_clip(head_ch * 2) + self.encoder.append(ResidualBlock(cin, cout, scale='down', **act_args)) + head_ch = head_ch * 2 + + self.body = [] + for i in range(res_depth): + self.body.append(ResidualBlock(ch_clip(head_ch), ch_clip(head_ch), **act_args)) + + self.decoder = [] + for i in range(up_steps): + cin, cout = ch_clip(head_ch), ch_clip(head_ch // 2) + self.decoder.append(ResidualBlock(cin, cout, scale='up', **act_args)) + head_ch = head_ch // 2 + + self.encoder = nn.Sequential(*self.encoder) + self.body = nn.Sequential(*self.body) + self.decoder = nn.Sequential(*self.decoder) + self.out_img_conv = ConvLayer(ch_clip(head_ch), 3) + self.out_mask_conv = ConvLayer(ch_clip(head_ch), parsing_ch) + + def forward(self, x): + feat = self.encoder(x) + x = feat + self.body(feat) + x = self.decoder(x) + out_img = self.out_img_conv(x) + out_mask = self.out_mask_conv(x) + return out_mask, out_img + + diff --git a/third_part/GPEN/face_parse/resnet.py b/third_part/GPEN/face_parse/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..aa2bf95130e9815ba378cb6f73207068b81a04b9 --- /dev/null +++ b/third_part/GPEN/face_parse/resnet.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as modelzoo + +# from modules.bn import InPlaceABNSync as BatchNorm2d + +resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + def __init__(self, in_chan, out_chan, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(in_chan, out_chan, stride) + self.bn1 = nn.BatchNorm2d(out_chan) + self.conv2 = conv3x3(out_chan, out_chan) + self.bn2 = nn.BatchNorm2d(out_chan) + self.relu = nn.ReLU(inplace=True) + self.downsample = None + if in_chan != out_chan or stride != 1: + self.downsample = nn.Sequential( + nn.Conv2d(in_chan, out_chan, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_chan), + ) + + def forward(self, x): + residual = self.conv1(x) + residual = F.relu(self.bn1(residual)) + residual = self.conv2(residual) + residual = self.bn2(residual) + + shortcut = x + if self.downsample is not None: + shortcut = self.downsample(x) + + out = shortcut + residual + out = self.relu(out) + return out + + +def create_layer_basic(in_chan, out_chan, bnum, stride=1): + layers = [BasicBlock(in_chan, out_chan, stride=stride)] + for i in range(bnum-1): + layers.append(BasicBlock(out_chan, out_chan, stride=1)) + return nn.Sequential(*layers) + + +class Resnet18(nn.Module): + def __init__(self): + super(Resnet18, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) + self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) + self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) + self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) + self.init_weight() + + def forward(self, x): + x = self.conv1(x) + x = F.relu(self.bn1(x)) + x = self.maxpool(x) + + x = self.layer1(x) + feat8 = self.layer2(x) # 1/8 + feat16 = self.layer3(feat8) # 1/16 + feat32 = self.layer4(feat16) # 1/32 + return feat8, feat16, feat32 + + def init_weight(self): + state_dict = modelzoo.load_url(resnet18_url) + self_state_dict = self.state_dict() + for k, v in state_dict.items(): + if 'fc' in k: continue + self_state_dict.update({k: v}) + self.load_state_dict(self_state_dict) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, (nn.Linear, nn.Conv2d)): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +if __name__ == "__main__": + net = Resnet18() + x = torch.randn(16, 3, 224, 224) + out = net(x) + print(out[0].size()) + print(out[1].size()) + print(out[2].size()) + net.get_params() diff --git a/third_part/GPEN/face_parse/test.png b/third_part/GPEN/face_parse/test.png new file mode 100644 index 0000000000000000000000000000000000000000..f2bea14ce322ad51da09a640c0fcd3617ef3105a Binary files /dev/null and b/third_part/GPEN/face_parse/test.png differ diff --git a/third_part/GPEN/gpen_face_enhancer.py b/third_part/GPEN/gpen_face_enhancer.py new file mode 100644 index 0000000000000000000000000000000000000000..565ea9acfd582d30703d21f2f262fbd1a0166187 --- /dev/null +++ b/third_part/GPEN/gpen_face_enhancer.py @@ -0,0 +1,138 @@ +import cv2 +import numpy as np + +######### face enhancement +from face_parse.face_parsing import FaceParse +from face_detect.retinaface_detection import RetinaFaceDetection +from face_parse.face_parsing import FaceParse +from face_model.face_gan import FaceGAN +# from sr_model.real_esrnet import RealESRNet +from align_faces import warp_and_crop_face, get_reference_facial_points +from utils.inference_utils import Laplacian_Pyramid_Blending_with_mask + +class FaceEnhancement(object): + def __init__(self, base_dir='./', size=512, model=None, use_sr=True, sr_model=None, channel_multiplier=2, narrow=1, device='cuda'): + self.facedetector = RetinaFaceDetection(base_dir, device) + self.facegan = FaceGAN(base_dir, size, model, channel_multiplier, narrow, device=device) + # self.srmodel = RealESRNet(base_dir, sr_model, device=device) + self.srmodel=None + self.faceparser = FaceParse(base_dir, device=device) + self.use_sr = use_sr + self.size = size + self.threshold = 0.9 + + # the mask for pasting restored faces back + self.mask = np.zeros((512, 512), np.float32) + cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1, cv2.LINE_AA) + self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11) + self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11) + + self.kernel = np.array(( + [0.0625, 0.125, 0.0625], + [0.125, 0.25, 0.125], + [0.0625, 0.125, 0.0625]), dtype="float32") + + # get the reference 5 landmarks position in the crop settings + default_square = True + inner_padding_factor = 0.25 + outer_padding = (0, 0) + self.reference_5pts = get_reference_facial_points( + (self.size, self.size), inner_padding_factor, outer_padding, default_square) + + def mask_postprocess(self, mask, thres=20): + mask[:thres, :] = 0; mask[-thres:, :] = 0 + mask[:, :thres] = 0; mask[:, -thres:] = 0 + mask = cv2.GaussianBlur(mask, (101, 101), 11) + mask = cv2.GaussianBlur(mask, (101, 101), 11) + return mask.astype(np.float32) + + def process(self, img, ori_img, bbox=None, face_enhance=True, possion_blending=False): + if self.use_sr: + img_sr = self.srmodel.process(img) + if img_sr is not None: + img = cv2.resize(img, img_sr.shape[:2][::-1]) + + facebs, landms = self.facedetector.detect(img.copy()) + + orig_faces, enhanced_faces = [], [] + height, width = img.shape[:2] + full_mask = np.zeros((height, width), dtype=np.float32) + full_img = np.zeros(ori_img.shape, dtype=np.uint8) + + for i, (faceb, facial5points) in enumerate(zip(facebs, landms)): + if faceb[4]0)] = tmp_mask[np.where(mask>0)] + full_img[np.where(mask>0)] = tmp_img[np.where(mask>0)] + + mask_sharp = cv2.GaussianBlur(mask_sharp, (0,0), sigmaX=1, sigmaY=1, borderType = cv2.BORDER_DEFAULT) + + full_mask = full_mask[:, :, np.newaxis] + mask_sharp = mask_sharp[:, :, np.newaxis] + + if self.use_sr and img_sr is not None: + img = cv2.convertScaleAbs(img_sr*(1-full_mask) + full_img*full_mask) + + elif possion_blending is True: + if bbox is not None: + y1, y2, x1, x2 = bbox + mask_bbox = np.zeros_like(mask_sharp) + mask_bbox[y1:y2 - 5, x1:x2] = 1 + full_img, ori_img, full_mask = [cv2.resize(x,(512,512)) for x in (full_img, ori_img, np.float32(mask_sharp * mask_bbox))] + else: + full_img, ori_img, full_mask = [cv2.resize(x,(512,512)) for x in (full_img, ori_img, full_mask)] + + img = Laplacian_Pyramid_Blending_with_mask(full_img, ori_img, full_mask, 6) + img = np.clip(img, 0 ,255) + img = np.uint8(cv2.resize(img, (width, height))) + + else: + img = cv2.convertScaleAbs(ori_img*(1-full_mask) + full_img*full_mask) + img = cv2.convertScaleAbs(ori_img*(1-mask_sharp) + img*mask_sharp) + + return img, orig_faces, enhanced_faces \ No newline at end of file diff --git a/third_part/face3d/checkpoints/model_name/test_opt.txt b/third_part/face3d/checkpoints/model_name/test_opt.txt new file mode 100644 index 0000000000000000000000000000000000000000..53a07e5c699a5ae26f356d55e17d7bf4d67ef5ca --- /dev/null +++ b/third_part/face3d/checkpoints/model_name/test_opt.txt @@ -0,0 +1,34 @@ +----------------- Options --------------- + add_image: True + bfm_folder: BFM + bfm_model: BFM_model_front.mat + camera_d: 10.0 + center: 112.0 + checkpoints_dir: ./checkpoints + dataset_mode: None + ddp_port: 12355 + display_per_batch: True + epoch: 20 [default: latest] + eval_batch_nums: inf + focal: 1015.0 + gpu_ids: 0 + inference_batch_size: 8 + init_path: checkpoints/init_model/resnet50-0676ba61.pth + input_dir: demo_video [default: None] + isTrain: False [default: None] + keypoint_dir: demo_cctv [default: None] + model: facerecon + name: model_name [default: face_recon] + net_recon: resnet50 + output_dir: demo_cctv [default: mp4] + phase: test + save_split_files: False + suffix: + use_ddp: False [default: True] + use_last_fc: False + verbose: False + vis_batch_nums: 1 + world_size: 1 + z_far: 15.0 + z_near: 5.0 +----------------- End ------------------- diff --git a/third_part/face3d/coeff_detector.py b/third_part/face3d/coeff_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..5db17d3ecfc5f3191d3e9669411932113566b516 --- /dev/null +++ b/third_part/face3d/coeff_detector.py @@ -0,0 +1,118 @@ +import os +import glob +import numpy as np +from os import makedirs, name +from PIL import Image +from tqdm import tqdm + +import torch +import torch.nn as nn + +from face3d.options.inference_options import InferenceOptions +from face3d.models import create_model +from face3d.util.preprocess import align_img +from face3d.util.load_mats import load_lm3d +from face3d.extract_kp_videos import KeypointExtractor + + +class CoeffDetector(nn.Module): + def __init__(self, opt): + super().__init__() + + self.model = create_model(opt) + self.model.setup(opt) + self.model.device = 'cuda' + self.model.parallelize() + self.model.eval() + + self.lm3d_std = load_lm3d(opt.bfm_folder) + + def forward(self, img, lm): + + img, trans_params = self.image_transform(img, lm) + + data_input = { + 'imgs': img[None], + } + self.model.set_input(data_input) + self.model.test() + pred_coeff = {key:self.model.pred_coeffs_dict[key].cpu().numpy() for key in self.model.pred_coeffs_dict} + pred_coeff = np.concatenate([ + pred_coeff['id'], + pred_coeff['exp'], + pred_coeff['tex'], + pred_coeff['angle'], + pred_coeff['gamma'], + pred_coeff['trans'], + trans_params[None], + ], 1) + + return {'coeff_3dmm':pred_coeff, + 'crop_img': Image.fromarray((img.cpu().permute(1, 2, 0).numpy()*255).astype(np.uint8))} + + def image_transform(self, images, lm): + """ + param: + images: -- PIL image + lm: -- numpy array + """ + W,H = images.size + if np.mean(lm) == -1: + lm = (self.lm3d_std[:, :2]+1)/2. + lm = np.concatenate( + [lm[:, :1]*W, lm[:, 1:2]*H], 1 + ) + else: + lm[:, -1] = H - 1 - lm[:, -1] + + trans_params, img, lm, _ = align_img(images, lm, self.lm3d_std) + img = torch.tensor(np.array(img)/255., dtype=torch.float32).permute(2, 0, 1) + trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]) + trans_params = torch.tensor(trans_params.astype(np.float32)) + return img, trans_params + +def get_data_path(root, keypoint_root): + filenames = list() + keypoint_filenames = list() + + IMAGE_EXTENSIONS_LOWERCASE = {'jpg', 'png', 'jpeg', 'webp'} + IMAGE_EXTENSIONS = IMAGE_EXTENSIONS_LOWERCASE.union({f.upper() for f in IMAGE_EXTENSIONS_LOWERCASE}) + extensions = IMAGE_EXTENSIONS + + for ext in extensions: + filenames += glob.glob(f'{root}/*.{ext}', recursive=True) + filenames = sorted(filenames) + for filename in filenames: + name = os.path.splitext(os.path.basename(filename))[0] + keypoint_filenames.append( + os.path.join(keypoint_root, name + '.txt') + ) + return filenames, keypoint_filenames + + +if __name__ == "__main__": + opt = InferenceOptions().parse() + coeff_detector = CoeffDetector(opt) + kp_extractor = KeypointExtractor() + image_names, keypoint_names = get_data_path(opt.input_dir, opt.keypoint_dir) + makedirs(opt.keypoint_dir, exist_ok=True) + makedirs(opt.output_dir, exist_ok=True) + + for image_name, keypoint_name in tqdm(zip(image_names, keypoint_names)): + image = Image.open(image_name) + if not os.path.isfile(keypoint_name): + lm = kp_extractor.extract_keypoint(image, keypoint_name) + else: + lm = np.loadtxt(keypoint_name).astype(np.float32) + lm = lm.reshape([-1, 2]) + predicted = coeff_detector(image, lm) + name = os.path.splitext(os.path.basename(image_name))[0] + np.savetxt( + "{}/{}_3dmm_coeff.txt".format(opt.output_dir, name), + predicted['coeff_3dmm'].reshape(-1)) + + + + + + \ No newline at end of file diff --git a/third_part/face3d/data/__init__.py b/third_part/face3d/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9a9761c518a1b07c5996165869742af0a52c82bc --- /dev/null +++ b/third_part/face3d/data/__init__.py @@ -0,0 +1,116 @@ +"""This package includes all the modules related to data loading and preprocessing + + To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset. + You need to implement four functions: + -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). + -- <__len__>: return the size of dataset. + -- <__getitem__>: get a data point from data loader. + -- : (optionally) add dataset-specific options and set default options. + +Now you can use the dataset class by specifying flag '--dataset_mode dummy'. +See our template dataset class 'template_dataset.py' for more details. +""" +import numpy as np +import importlib +import torch.utils.data +from face3d.data.base_dataset import BaseDataset + + +def find_dataset_using_name(dataset_name): + """Import the module "data/[dataset_name]_dataset.py". + + In the file, the class called DatasetNameDataset() will + be instantiated. It has to be a subclass of BaseDataset, + and it is case-insensitive. + """ + dataset_filename = "data." + dataset_name + "_dataset" + datasetlib = importlib.import_module(dataset_filename) + + dataset = None + target_dataset_name = dataset_name.replace('_', '') + 'dataset' + for name, cls in datasetlib.__dict__.items(): + if name.lower() == target_dataset_name.lower() \ + and issubclass(cls, BaseDataset): + dataset = cls + + if dataset is None: + raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) + + return dataset + + +def get_option_setter(dataset_name): + """Return the static method of the dataset class.""" + dataset_class = find_dataset_using_name(dataset_name) + return dataset_class.modify_commandline_options + + +def create_dataset(opt, rank=0): + """Create a dataset given the option. + + This function wraps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + + Example: + >>> from data import create_dataset + >>> dataset = create_dataset(opt) + """ + data_loader = CustomDatasetDataLoader(opt, rank=rank) + dataset = data_loader.load_data() + return dataset + +class CustomDatasetDataLoader(): + """Wrapper class of Dataset class that performs multi-threaded data loading""" + + def __init__(self, opt, rank=0): + """Initialize this class + + Step 1: create a dataset instance given the name [dataset_mode] + Step 2: create a multi-threaded data loader. + """ + self.opt = opt + dataset_class = find_dataset_using_name(opt.dataset_mode) + self.dataset = dataset_class(opt) + self.sampler = None + print("rank %d %s dataset [%s] was created" % (rank, self.dataset.name, type(self.dataset).__name__)) + if opt.use_ddp and opt.isTrain: + world_size = opt.world_size + self.sampler = torch.utils.data.distributed.DistributedSampler( + self.dataset, + num_replicas=world_size, + rank=rank, + shuffle=not opt.serial_batches + ) + self.dataloader = torch.utils.data.DataLoader( + self.dataset, + sampler=self.sampler, + num_workers=int(opt.num_threads / world_size), + batch_size=int(opt.batch_size / world_size), + drop_last=True) + else: + self.dataloader = torch.utils.data.DataLoader( + self.dataset, + batch_size=opt.batch_size, + shuffle=(not opt.serial_batches) and opt.isTrain, + num_workers=int(opt.num_threads), + drop_last=True + ) + + def set_epoch(self, epoch): + self.dataset.current_epoch = epoch + if self.sampler is not None: + self.sampler.set_epoch(epoch) + + def load_data(self): + return self + + def __len__(self): + """Return the number of data in the dataset""" + return min(len(self.dataset), self.opt.max_dataset_size) + + def __iter__(self): + """Return a batch of data""" + for i, data in enumerate(self.dataloader): + if i * self.opt.batch_size >= self.opt.max_dataset_size: + break + yield data diff --git a/third_part/face3d/data/base_dataset.py b/third_part/face3d/data/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1bd57d082d519f512d7114b4f867b6695fb7de06 --- /dev/null +++ b/third_part/face3d/data/base_dataset.py @@ -0,0 +1,125 @@ +"""This module implements an abstract base class (ABC) 'BaseDataset' for datasets. + +It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses. +""" +import random +import numpy as np +import torch.utils.data as data +from PIL import Image +import torchvision.transforms as transforms +from abc import ABC, abstractmethod + + +class BaseDataset(data.Dataset, ABC): + """This class is an abstract base class (ABC) for datasets. + + To create a subclass, you need to implement the following four functions: + -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). + -- <__len__>: return the size of dataset. + -- <__getitem__>: get a data point. + -- : (optionally) add dataset-specific options and set default options. + """ + + def __init__(self, opt): + """Initialize the class; save the options in the class + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + self.opt = opt + # self.root = opt.dataroot + self.current_epoch = 0 + + @staticmethod + def modify_commandline_options(parser, is_train): + """Add new dataset-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + """ + return parser + + @abstractmethod + def __len__(self): + """Return the total number of images in the dataset.""" + return 0 + + @abstractmethod + def __getitem__(self, index): + """Return a data point and its metadata information. + + Parameters: + index - - a random integer for data indexing + + Returns: + a dictionary of data with their names. It ususally contains the data itself and its metadata information. + """ + pass + + +def get_transform(grayscale=False): + transform_list = [] + if grayscale: + transform_list.append(transforms.Grayscale(1)) + transform_list += [transforms.ToTensor()] + return transforms.Compose(transform_list) + +def get_affine_mat(opt, size): + shift_x, shift_y, scale, rot_angle, flip = 0., 0., 1., 0., False + w, h = size + + if 'shift' in opt.preprocess: + shift_pixs = int(opt.shift_pixs) + shift_x = random.randint(-shift_pixs, shift_pixs) + shift_y = random.randint(-shift_pixs, shift_pixs) + if 'scale' in opt.preprocess: + scale = 1 + opt.scale_delta * (2 * random.random() - 1) + if 'rot' in opt.preprocess: + rot_angle = opt.rot_angle * (2 * random.random() - 1) + rot_rad = -rot_angle * np.pi/180 + if 'flip' in opt.preprocess: + flip = random.random() > 0.5 + + shift_to_origin = np.array([1, 0, -w//2, 0, 1, -h//2, 0, 0, 1]).reshape([3, 3]) + flip_mat = np.array([-1 if flip else 1, 0, 0, 0, 1, 0, 0, 0, 1]).reshape([3, 3]) + shift_mat = np.array([1, 0, shift_x, 0, 1, shift_y, 0, 0, 1]).reshape([3, 3]) + rot_mat = np.array([np.cos(rot_rad), np.sin(rot_rad), 0, -np.sin(rot_rad), np.cos(rot_rad), 0, 0, 0, 1]).reshape([3, 3]) + scale_mat = np.array([scale, 0, 0, 0, scale, 0, 0, 0, 1]).reshape([3, 3]) + shift_to_center = np.array([1, 0, w//2, 0, 1, h//2, 0, 0, 1]).reshape([3, 3]) + + affine = shift_to_center @ scale_mat @ rot_mat @ shift_mat @ flip_mat @ shift_to_origin + affine_inv = np.linalg.inv(affine) + return affine, affine_inv, flip + +def apply_img_affine(img, affine_inv, method=Image.BICUBIC): + return img.transform(img.size, Image.AFFINE, data=affine_inv.flatten()[:6], resample=Image.BICUBIC) + +def apply_lm_affine(landmark, affine, flip, size): + _, h = size + lm = landmark.copy() + lm[:, 1] = h - 1 - lm[:, 1] + lm = np.concatenate((lm, np.ones([lm.shape[0], 1])), -1) + lm = lm @ np.transpose(affine) + lm[:, :2] = lm[:, :2] / lm[:, 2:] + lm = lm[:, :2] + lm[:, 1] = h - 1 - lm[:, 1] + if flip: + lm_ = lm.copy() + lm_[:17] = lm[16::-1] + lm_[17:22] = lm[26:21:-1] + lm_[22:27] = lm[21:16:-1] + lm_[31:36] = lm[35:30:-1] + lm_[36:40] = lm[45:41:-1] + lm_[40:42] = lm[47:45:-1] + lm_[42:46] = lm[39:35:-1] + lm_[46:48] = lm[41:39:-1] + lm_[48:55] = lm[54:47:-1] + lm_[55:60] = lm[59:54:-1] + lm_[60:65] = lm[64:59:-1] + lm_[65:68] = lm[67:64:-1] + lm = lm_ + return lm diff --git a/third_part/face3d/data/flist_dataset.py b/third_part/face3d/data/flist_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c0b6945c80aa756074a5d3c02b9443b15ddcfc57 --- /dev/null +++ b/third_part/face3d/data/flist_dataset.py @@ -0,0 +1,125 @@ +"""This script defines the custom dataset for Deep3DFaceRecon_pytorch +""" + +import os.path +from data.base_dataset import BaseDataset, get_transform, get_affine_mat, apply_img_affine, apply_lm_affine +from data.image_folder import make_dataset +from PIL import Image +import random +import util.util as util +import numpy as np +import json +import torch +from scipy.io import loadmat, savemat +import pickle +from util.preprocess import align_img, estimate_norm +from util.load_mats import load_lm3d + + +def default_flist_reader(flist): + """ + flist format: impath label\nimpath label\n ...(same to caffe's filelist) + """ + imlist = [] + with open(flist, 'r') as rf: + for line in rf.readlines(): + impath = line.strip() + imlist.append(impath) + + return imlist + +def jason_flist_reader(flist): + with open(flist, 'r') as fp: + info = json.load(fp) + return info + +def parse_label(label): + return torch.tensor(np.array(label).astype(np.float32)) + + +class FlistDataset(BaseDataset): + """ + It requires one directories to host training images '/path/to/data/train' + You can train the model with the dataset flag '--dataroot /path/to/data'. + """ + + def __init__(self, opt): + """Initialize this dataset class. + + Parameters: + opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + BaseDataset.__init__(self, opt) + + self.lm3d_std = load_lm3d(opt.bfm_folder) + + msk_names = default_flist_reader(opt.flist) + self.msk_paths = [os.path.join(opt.data_root, i) for i in msk_names] + + self.size = len(self.msk_paths) + self.opt = opt + + self.name = 'train' if opt.isTrain else 'val' + if '_' in opt.flist: + self.name += '_' + opt.flist.split(os.sep)[-1].split('_')[0] + + + def __getitem__(self, index): + """Return a data point and its metadata information. + + Parameters: + index (int) -- a random integer for data indexing + + Returns a dictionary that contains A, B, A_paths and B_paths + img (tensor) -- an image in the input domain + msk (tensor) -- its corresponding attention mask + lm (tensor) -- its corresponding 3d landmarks + im_paths (str) -- image paths + aug_flag (bool) -- a flag used to tell whether its raw or augmented + """ + msk_path = self.msk_paths[index % self.size] # make sure index is within then range + img_path = msk_path.replace('mask/', '') + lm_path = '.'.join(msk_path.replace('mask', 'landmarks').split('.')[:-1]) + '.txt' + + raw_img = Image.open(img_path).convert('RGB') + raw_msk = Image.open(msk_path).convert('RGB') + raw_lm = np.loadtxt(lm_path).astype(np.float32) + + _, img, lm, msk = align_img(raw_img, raw_lm, self.lm3d_std, raw_msk) + + aug_flag = self.opt.use_aug and self.opt.isTrain + if aug_flag: + img, lm, msk = self._augmentation(img, lm, self.opt, msk) + + _, H = img.size + M = estimate_norm(lm, H) + transform = get_transform() + img_tensor = transform(img) + msk_tensor = transform(msk)[:1, ...] + lm_tensor = parse_label(lm) + M_tensor = parse_label(M) + + + return {'imgs': img_tensor, + 'lms': lm_tensor, + 'msks': msk_tensor, + 'M': M_tensor, + 'im_paths': img_path, + 'aug_flag': aug_flag, + 'dataset': self.name} + + def _augmentation(self, img, lm, opt, msk=None): + affine, affine_inv, flip = get_affine_mat(opt, img.size) + img = apply_img_affine(img, affine_inv) + lm = apply_lm_affine(lm, affine, flip, img.size) + if msk is not None: + msk = apply_img_affine(msk, affine_inv, method=Image.BILINEAR) + return img, lm, msk + + + + + def __len__(self): + """Return the total number of images in the dataset. + """ + return self.size diff --git a/third_part/face3d/data/image_folder.py b/third_part/face3d/data/image_folder.py new file mode 100644 index 0000000000000000000000000000000000000000..efadc2ecbe2fb4b53b78230aba25ec505eff0e55 --- /dev/null +++ b/third_part/face3d/data/image_folder.py @@ -0,0 +1,66 @@ +"""A modified image folder class + +We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py) +so that this class can load images from both current directory and its subdirectories. +""" +import numpy as np +import torch.utils.data as data + +from PIL import Image +import os +import os.path + +IMG_EXTENSIONS = [ + '.jpg', '.JPG', '.jpeg', '.JPEG', + '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', + '.tif', '.TIF', '.tiff', '.TIFF', +] + + +def is_image_file(filename): + return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) + + +def make_dataset(dir, max_dataset_size=float("inf")): + images = [] + assert os.path.isdir(dir) or os.path.islink(dir), '%s is not a valid directory' % dir + + for root, _, fnames in sorted(os.walk(dir, followlinks=True)): + for fname in fnames: + if is_image_file(fname): + path = os.path.join(root, fname) + images.append(path) + return images[:min(max_dataset_size, len(images))] + + +def default_loader(path): + return Image.open(path).convert('RGB') + + +class ImageFolder(data.Dataset): + + def __init__(self, root, transform=None, return_paths=False, + loader=default_loader): + imgs = make_dataset(root) + if len(imgs) == 0: + raise(RuntimeError("Found 0 images in: " + root + "\n" + "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) + + self.root = root + self.imgs = imgs + self.transform = transform + self.return_paths = return_paths + self.loader = loader + + def __getitem__(self, index): + path = self.imgs[index] + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + if self.return_paths: + return img, path + else: + return img + + def __len__(self): + return len(self.imgs) diff --git a/third_part/face3d/data/template_dataset.py b/third_part/face3d/data/template_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..bfdf16be2a8a834b204c45d88c86857b37b9bd25 --- /dev/null +++ b/third_part/face3d/data/template_dataset.py @@ -0,0 +1,75 @@ +"""Dataset class template + +This module provides a template for users to implement custom datasets. +You can specify '--dataset_mode template' to use this dataset. +The class name should be consistent with both the filename and its dataset_mode option. +The filename should be _dataset.py +The class name should be Dataset.py +You need to implement the following functions: + -- : Add dataset-specific options and rewrite default values for existing options. + -- <__init__>: Initialize this dataset class. + -- <__getitem__>: Return a data point and its metadata information. + -- <__len__>: Return the number of images. +""" +from data.base_dataset import BaseDataset, get_transform +# from data.image_folder import make_dataset +# from PIL import Image + + +class TemplateDataset(BaseDataset): + """A template dataset class for you to implement custom datasets.""" + @staticmethod + def modify_commandline_options(parser, is_train): + """Add new dataset-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + """ + parser.add_argument('--new_dataset_option', type=float, default=1.0, help='new dataset option') + parser.set_defaults(max_dataset_size=10, new_dataset_option=2.0) # specify dataset-specific default values + return parser + + def __init__(self, opt): + """Initialize this dataset class. + + Parameters: + opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions + + A few things can be done here. + - save the options (have been done in BaseDataset) + - get image paths and meta information of the dataset. + - define the image transformation. + """ + # save the option and dataset root + BaseDataset.__init__(self, opt) + # get the image paths of your dataset; + self.image_paths = [] # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root + # define the default transform function. You can use ; You can also define your custom transform function + self.transform = get_transform(opt) + + def __getitem__(self, index): + """Return a data point and its metadata information. + + Parameters: + index -- a random integer for data indexing + + Returns: + a dictionary of data with their names. It usually contains the data itself and its metadata information. + + Step 1: get a random image path: e.g., path = self.image_paths[index] + Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB'). + Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image) + Step 4: return a data point as a dictionary. + """ + path = 'temp' # needs to be a string + data_A = None # needs to be a tensor + data_B = None # needs to be a tensor + return {'data_A': data_A, 'data_B': data_B, 'path': path} + + def __len__(self): + """Return the total number of images.""" + return len(self.image_paths) diff --git a/third_part/face3d/data_preparation.py b/third_part/face3d/data_preparation.py new file mode 100644 index 0000000000000000000000000000000000000000..6ffc79d34a040cfd3c5c82f4f860656999ceef84 --- /dev/null +++ b/third_part/face3d/data_preparation.py @@ -0,0 +1,45 @@ +"""This script is the data preparation script for Deep3DFaceRecon_pytorch +""" + +import os +import numpy as np +import argparse +from util.detect_lm68 import detect_68p,load_lm_graph +from util.skin_mask import get_skin_mask +from util.generate_list import check_list, write_list +import warnings +warnings.filterwarnings("ignore") + +parser = argparse.ArgumentParser() +parser.add_argument('--data_root', type=str, default='datasets', help='root directory for training data') +parser.add_argument('--img_folder', nargs="+", required=True, help='folders of training images') +parser.add_argument('--mode', type=str, default='train', help='train or val') +opt = parser.parse_args() + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +def data_prepare(folder_list,mode): + + lm_sess,input_op,output_op = load_lm_graph('./checkpoints/lm_model/68lm_detector.pb') # load a tensorflow version 68-landmark detector + + for img_folder in folder_list: + detect_68p(img_folder,lm_sess,input_op,output_op) # detect landmarks for images + get_skin_mask(img_folder) # generate skin attention mask for images + + # create files that record path to all training data + msks_list = [] + for img_folder in folder_list: + path = os.path.join(img_folder, 'mask') + msks_list += ['/'.join([img_folder, 'mask', i]) for i in sorted(os.listdir(path)) if 'jpg' in i or + 'png' in i or 'jpeg' in i or 'PNG' in i] + + imgs_list = [i.replace('mask/', '') for i in msks_list] + lms_list = [i.replace('mask', 'landmarks') for i in msks_list] + lms_list = ['.'.join(i.split('.')[:-1]) + '.txt' for i in lms_list] + + lms_list_final, imgs_list_final, msks_list_final = check_list(lms_list, imgs_list, msks_list) # check if the path is valid + write_list(lms_list_final, imgs_list_final, msks_list_final, mode=mode) # save files + +if __name__ == '__main__': + print('Datasets:',opt.img_folder) + data_prepare([os.path.join(opt.data_root,folder) for folder in opt.img_folder],opt.mode) diff --git a/third_part/face3d/extract_kp_videos.py b/third_part/face3d/extract_kp_videos.py new file mode 100644 index 0000000000000000000000000000000000000000..ebeff7e3f5f66141a107fed4e4fca52791eea80b --- /dev/null +++ b/third_part/face3d/extract_kp_videos.py @@ -0,0 +1,107 @@ +import os +import cv2 +import time +import glob +import argparse +import face_alignment +import numpy as np +from PIL import Image +from tqdm import tqdm +from itertools import cycle + +from torch.multiprocessing import Pool, Process, set_start_method + +class KeypointExtractor(): + def __init__(self): + self.detector = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D) + + def extract_keypoint(self, images, name=None, info=True): + if isinstance(images, list): + keypoints = [] + if info: + i_range = tqdm(images,desc='landmark Det:') + else: + i_range = images + + for image in i_range: + current_kp = self.extract_keypoint(image) + if np.mean(current_kp) == -1 and keypoints: + keypoints.append(keypoints[-1]) + else: + keypoints.append(current_kp[None]) + + keypoints = np.concatenate(keypoints, 0) + np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1)) + return keypoints + else: + while True: + try: + keypoints = self.detector.get_landmarks_from_image(np.array(images))[0] + break + except RuntimeError as e: + if str(e).startswith('CUDA'): + print("Warning: out of memory, sleep for 1s") + time.sleep(1) + else: + print(e) + break + except TypeError: + print('No face detected in this image') + shape = [68, 2] + keypoints = -1. * np.ones(shape) + break + if name is not None: + np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1)) + return keypoints + +def read_video(filename): + frames = [] + cap = cv2.VideoCapture(filename) + while cap.isOpened(): + ret, frame = cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame = Image.fromarray(frame) + frames.append(frame) + else: + break + cap.release() + return frames + +def run(data): + filename, opt, device = data + os.environ['CUDA_VISIBLE_DEVICES'] = device + kp_extractor = KeypointExtractor() + images = read_video(filename) + name = filename.split('/')[-2:] + os.makedirs(os.path.join(opt.output_dir, name[-2]), exist_ok=True) + kp_extractor.extract_keypoint( + images, + name=os.path.join(opt.output_dir, name[-2], name[-1]) + ) + +if __name__ == '__main__': + set_start_method('spawn') + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--input_dir', type=str, help='the folder of the input files') + parser.add_argument('--output_dir', type=str, help='the folder of the output files') + parser.add_argument('--device_ids', type=str, default='0,1') + parser.add_argument('--workers', type=int, default=4) + + opt = parser.parse_args() + filenames = list() + VIDEO_EXTENSIONS_LOWERCASE = {'mp4'} + VIDEO_EXTENSIONS = VIDEO_EXTENSIONS_LOWERCASE.union({f.upper() for f in VIDEO_EXTENSIONS_LOWERCASE}) + extensions = VIDEO_EXTENSIONS + + for ext in extensions: + os.listdir(f'{opt.input_dir}') + print(f'{opt.input_dir}/*.{ext}') + filenames = sorted(glob.glob(f'{opt.input_dir}/*.{ext}')) + print('Total number of videos:', len(filenames)) + pool = Pool(opt.workers) + args_list = cycle([opt]) + device_ids = opt.device_ids.split(",") + device_ids = cycle(device_ids) + for data in tqdm(pool.imap_unordered(run, zip(filenames, args_list, device_ids))): + None diff --git a/third_part/face3d/face_recon_videos.py b/third_part/face3d/face_recon_videos.py new file mode 100644 index 0000000000000000000000000000000000000000..97e00afee65d234e7d085140aca64e4abfddf407 --- /dev/null +++ b/third_part/face3d/face_recon_videos.py @@ -0,0 +1,157 @@ +import os +import cv2 +import glob +import numpy as np +from PIL import Image +from tqdm import tqdm +from scipy.io import savemat + +import torch + +from models import create_model +from options.inference_options import InferenceOptions +from util.preprocess import align_img +from util.load_mats import load_lm3d +from util.util import mkdirs, tensor2im, save_image + + +def get_data_path(root, keypoint_root): + filenames = list() + keypoint_filenames = list() + + VIDEO_EXTENSIONS_LOWERCASE = {'mp4'} + VIDEO_EXTENSIONS = VIDEO_EXTENSIONS_LOWERCASE.union({f.upper() for f in VIDEO_EXTENSIONS_LOWERCASE}) + extensions = VIDEO_EXTENSIONS + + for ext in extensions: + filenames += glob.glob(f'{root}/**/*.{ext}', recursive=True) + filenames = sorted(filenames) + keypoint_filenames = sorted(glob.glob(f'{keypoint_root}/**/*.txt', recursive=True)) + assert len(filenames) == len(keypoint_filenames) + + return filenames, keypoint_filenames + +class VideoPathDataset(torch.utils.data.Dataset): + def __init__(self, filenames, txt_filenames, bfm_folder): + self.filenames = filenames + self.txt_filenames = txt_filenames + self.lm3d_std = load_lm3d(bfm_folder) + + def __len__(self): + return len(self.filenames) + + def __getitem__(self, index): + filename = self.filenames[index] + txt_filename = self.txt_filenames[index] + frames = self.read_video(filename) + lm = np.loadtxt(txt_filename).astype(np.float32) + lm = lm.reshape([len(frames), -1, 2]) + out_images, out_trans_params = list(), list() + for i in range(len(frames)): + out_img, _, out_trans_param \ + = self.image_transform(frames[i], lm[i]) + out_images.append(out_img[None]) + out_trans_params.append(out_trans_param[None]) + return { + 'imgs': torch.cat(out_images, 0), + 'trans_param':torch.cat(out_trans_params, 0), + 'filename': filename + } + + def read_video(self, filename): + frames = list() + cap = cv2.VideoCapture(filename) + while cap.isOpened(): + ret, frame = cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame = Image.fromarray(frame) + frames.append(frame) + else: + break + cap.release() + return frames + + def image_transform(self, images, lm): + W,H = images.size + if np.mean(lm) == -1: + lm = (self.lm3d_std[:, :2]+1)/2. + lm = np.concatenate( + [lm[:, :1]*W, lm[:, 1:2]*H], 1 + ) + else: + lm[:, -1] = H - 1 - lm[:, -1] + + trans_params, img, lm, _ = align_img(images, lm, self.lm3d_std) + img = torch.tensor(np.array(img)/255., dtype=torch.float32).permute(2, 0, 1) + lm = torch.tensor(lm) + trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]) + trans_params = torch.tensor(trans_params.astype(np.float32)) + return img, lm, trans_params + +def main(opt, model): + # import torch.multiprocessing + # torch.multiprocessing.set_sharing_strategy('file_system') + filenames, keypoint_filenames = get_data_path(opt.input_dir, opt.keypoint_dir) + dataset = VideoPathDataset(filenames, keypoint_filenames, opt.bfm_folder) + dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=1, # can noly set to one here! + shuffle=False, + drop_last=False, + num_workers=0, + ) + batch_size = opt.inference_batch_size + for data in tqdm(dataloader): + num_batch = data['imgs'][0].shape[0] // batch_size + 1 + pred_coeffs = list() + for index in range(num_batch): + data_input = { + 'imgs': data['imgs'][0,index*batch_size:(index+1)*batch_size], + } + model.set_input(data_input) + model.test() + pred_coeff = {key:model.pred_coeffs_dict[key].cpu().numpy() for key in model.pred_coeffs_dict} + pred_coeff = np.concatenate([ + pred_coeff['id'], + pred_coeff['exp'], + pred_coeff['tex'], + pred_coeff['angle'], + pred_coeff['gamma'], + pred_coeff['trans']], 1) + pred_coeffs.append(pred_coeff) + visuals = model.get_current_visuals() # get image results + if False: # debug + for name in visuals: + images = visuals[name] + for i in range(images.shape[0]): + image_numpy = tensor2im(images[i]) + save_image( + image_numpy, + os.path.join( + opt.output_dir, + os.path.basename(data['filename'][0])+str(i).zfill(5)+'.jpg') + ) + exit() + + pred_coeffs = np.concatenate(pred_coeffs, 0) + pred_trans_params = data['trans_param'][0].cpu().numpy() + name = data['filename'][0].split('/')[-2:] + name[-1] = os.path.splitext(name[-1])[0] + '.mat' + os.makedirs(os.path.join(opt.output_dir, name[-2]), exist_ok=True) + savemat( + os.path.join(opt.output_dir, name[-2], name[-1]), + {'coeff':pred_coeffs, 'transform_params':pred_trans_params} + ) + +if __name__ == '__main__': + opt = InferenceOptions().parse() # get test options + model = create_model(opt) + model.setup(opt) + model.device = 'cuda:0' + model.parallelize() + model.eval() + + main(opt, model) + + diff --git a/third_part/face3d/models/__init__.py b/third_part/face3d/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fb0ec62dee85c6eaf6b86c332cb913cfae209d85 --- /dev/null +++ b/third_part/face3d/models/__init__.py @@ -0,0 +1,67 @@ +"""This package contains modules related to objective functions, optimizations, and network architectures. + +To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. +You need to implement the following five functions: + -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). + -- : unpack data from dataset and apply preprocessing. + -- : produce intermediate results. + -- : calculate loss, gradients, and update network weights. + -- : (optionally) add model-specific options and set default options. + +In the function <__init__>, you need to define four lists: + -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.model_names (str list): define networks used in our training. + -- self.visual_names (str list): specify the images that you want to display and save. + -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. + +Now you can use the model class by specifying flag '--model dummy'. +See our template model class 'template_model.py' for more details. +""" + +import importlib +from face3d.models.base_model import BaseModel + + +def find_model_using_name(model_name): + """Import the module "models/[model_name]_model.py". + + In the file, the class called DatasetNameModel() will + be instantiated. It has to be a subclass of BaseModel, + and it is case-insensitive. + """ + model_filename = "face3d.models." + model_name + "_model" + modellib = importlib.import_module(model_filename) + model = None + target_model_name = model_name.replace('_', '') + 'model' + for name, cls in modellib.__dict__.items(): + if name.lower() == target_model_name.lower() \ + and issubclass(cls, BaseModel): + model = cls + + if model is None: + print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) + exit(0) + + return model + + +def get_option_setter(model_name): + """Return the static method of the model class.""" + model_class = find_model_using_name(model_name) + return model_class.modify_commandline_options + + +def create_model(opt): + """Create a model given the option. + + This function warps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + + Example: + >>> from models import create_model + >>> model = create_model(opt) + """ + model = find_model_using_name(opt.model) + instance = model(opt) + print("model [%s] was created" % type(instance).__name__) + return instance diff --git a/third_part/face3d/models/arcface_torch/README.md b/third_part/face3d/models/arcface_torch/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2ee63a861229b68873561fa39bfa7c9a8b53b947 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/README.md @@ -0,0 +1,164 @@ +# Distributed Arcface Training in Pytorch + +This is a deep learning library that makes face recognition efficient, and effective, which can train tens of millions +identity on a single server. + +## Requirements + +- Install [pytorch](http://pytorch.org) (torch>=1.6.0), our doc for [install.md](docs/install.md). +- `pip install -r requirements.txt`. +- Download the dataset + from [https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_](https://github.com/deepinsight/insightface/tree/master/recognition/_datasets_) + . + +## How to Training + +To train a model, run `train.py` with the path to the configs: + +### 1. Single node, 8 GPUs: + +```shell +python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r50 +``` + +### 2. Multiple nodes, each node 8 GPUs: + +Node 0: + +```shell +python -m torch.distributed.launch --nproc_per_node=8 --nnodes=2 --node_rank=0 --master_addr="ip1" --master_port=1234 train.py train.py configs/ms1mv3_r50 +``` + +Node 1: + +```shell +python -m torch.distributed.launch --nproc_per_node=8 --nnodes=2 --node_rank=1 --master_addr="ip1" --master_port=1234 train.py train.py configs/ms1mv3_r50 +``` + +### 3.Training resnet2060 with 8 GPUs: + +```shell +python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r2060.py +``` + +## Model Zoo + +- The models are available for non-commercial research purposes only. +- All models can be found in here. +- [Baidu Yun Pan](https://pan.baidu.com/s/1CL-l4zWqsI1oDuEEYVhj-g): e8pw +- [onedrive](https://1drv.ms/u/s!AswpsDO2toNKq0lWY69vN58GR6mw?e=p9Ov5d) + +### Performance on [**ICCV2021-MFR**](http://iccv21-mfr.com/) + +ICCV2021-MFR testset consists of non-celebrities so we can ensure that it has very few overlap with public available face +recognition training set, such as MS1M and CASIA as they mostly collected from online celebrities. +As the result, we can evaluate the FAIR performance for different algorithms. + +For **ICCV2021-MFR-ALL** set, TAR is measured on all-to-all 1:1 protocal, with FAR less than 0.000001(e-6). The +globalised multi-racial testset contains 242,143 identities and 1,624,305 images. + +For **ICCV2021-MFR-MASK** set, TAR is measured on mask-to-nonmask 1:1 protocal, with FAR less than 0.0001(e-4). +Mask testset contains 6,964 identities, 6,964 masked images and 13,928 non-masked images. +There are totally 13,928 positive pairs and 96,983,824 negative pairs. + +| Datasets | backbone | Training throughout | Size / MB | **ICCV2021-MFR-MASK** | **ICCV2021-MFR-ALL** | +| :---: | :--- | :--- | :--- |:--- |:--- | +| MS1MV3 | r18 | - | 91 | **47.85** | **68.33** | +| Glint360k | r18 | 8536 | 91 | **53.32** | **72.07** | +| MS1MV3 | r34 | - | 130 | **58.72** | **77.36** | +| Glint360k | r34 | 6344 | 130 | **65.10** | **83.02** | +| MS1MV3 | r50 | 5500 | 166 | **63.85** | **80.53** | +| Glint360k | r50 | 5136 | 166 | **70.23** | **87.08** | +| MS1MV3 | r100 | - | 248 | **69.09** | **84.31** | +| Glint360k | r100 | 3332 | 248 | **75.57** | **90.66** | +| MS1MV3 | mobilefacenet | 12185 | 7.8 | **41.52** | **65.26** | +| Glint360k | mobilefacenet | 11197 | 7.8 | **44.52** | **66.48** | + +### Performance on IJB-C and Verification Datasets + +| Datasets | backbone | IJBC(1e-05) | IJBC(1e-04) | agedb30 | cfp_fp | lfw | log | +| :---: | :--- | :--- | :--- | :--- |:--- |:--- |:--- | +| MS1MV3 | r18 | 92.07 | 94.66 | 97.77 | 97.73 | 99.77 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r18_fp16/training.log)| +| MS1MV3 | r34 | 94.10 | 95.90 | 98.10 | 98.67 | 99.80 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r34_fp16/training.log)| +| MS1MV3 | r50 | 94.79 | 96.46 | 98.35 | 98.96 | 99.83 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r50_fp16/training.log)| +| MS1MV3 | r100 | 95.31 | 96.81 | 98.48 | 99.06 | 99.85 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r100_fp16/training.log)| +| MS1MV3 | **r2060**| 95.34 | 97.11 | 98.67 | 99.24 | 99.87 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/ms1mv3_arcface_r2060_fp16/training.log)| +| Glint360k |r18-0.1 | 93.16 | 95.33 | 97.72 | 97.73 | 99.77 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r18_fp16_0.1/training.log)| +| Glint360k |r34-0.1 | 95.16 | 96.56 | 98.33 | 98.78 | 99.82 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r34_fp16_0.1/training.log)| +| Glint360k |r50-0.1 | 95.61 | 96.97 | 98.38 | 99.20 | 99.83 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r50_fp16_0.1/training.log)| +| Glint360k |r100-0.1 | 95.88 | 97.32 | 98.48 | 99.29 | 99.82 |[log](https://raw.githubusercontent.com/anxiangsir/insightface_arcface_log/master/glint360k_cosface_r100_fp16_0.1/training.log)| + +[comment]: <> (More details see [model.md](docs/modelzoo.md) in docs.) + + +## [Speed Benchmark](docs/speed_benchmark.md) + +**Arcface Torch** can train large-scale face recognition training set efficiently and quickly. When the number of +classes in training sets is greater than 300K and the training is sufficient, partial fc sampling strategy will get same +accuracy with several times faster training performance and smaller GPU memory. +Partial FC is a sparse variant of the model parallel architecture for large sacle face recognition. Partial FC use a +sparse softmax, where each batch dynamicly sample a subset of class centers for training. In each iteration, only a +sparse part of the parameters will be updated, which can reduce a lot of GPU memory and calculations. With Partial FC, +we can scale trainset of 29 millions identities, the largest to date. Partial FC also supports multi-machine distributed +training and mixed precision training. + +![Image text](https://github.com/anxiangsir/insightface_arcface_log/blob/master/partial_fc_v2.png) + +More details see +[speed_benchmark.md](docs/speed_benchmark.md) in docs. + +### 1. Training speed of different parallel methods (samples / second), Tesla V100 32GB * 8. (Larger is better) + +`-` means training failed because of gpu memory limitations. + +| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | +| :--- | :--- | :--- | :--- | +|125000 | 4681 | 4824 | 5004 | +|1400000 | **1672** | 3043 | 4738 | +|5500000 | **-** | **1389** | 3975 | +|8000000 | **-** | **-** | 3565 | +|16000000 | **-** | **-** | 2679 | +|29000000 | **-** | **-** | **1855** | + +### 2. GPU memory cost of different parallel methods (MB per GPU), Tesla V100 32GB * 8. (Smaller is better) + +| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | +| :--- | :--- | :--- | :--- | +|125000 | 7358 | 5306 | 4868 | +|1400000 | 32252 | 11178 | 6056 | +|5500000 | **-** | 32188 | 9854 | +|8000000 | **-** | **-** | 12310 | +|16000000 | **-** | **-** | 19950 | +|29000000 | **-** | **-** | 32324 | + +## Evaluation ICCV2021-MFR and IJB-C + +More details see [eval.md](docs/eval.md) in docs. + +## Test + +We tested many versions of PyTorch. Please create an issue if you are having trouble. + +- [x] torch 1.6.0 +- [x] torch 1.7.1 +- [x] torch 1.8.0 +- [x] torch 1.9.0 + +## Citation + +``` +@inproceedings{deng2019arcface, + title={Arcface: Additive angular margin loss for deep face recognition}, + author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={4690--4699}, + year={2019} +} +@inproceedings{an2020partical_fc, + title={Partial FC: Training 10 Million Identities on a Single Machine}, + author={An, Xiang and Zhu, Xuhan and Xiao, Yang and Wu, Lan and Zhang, Ming and Gao, Yuan and Qin, Bin and + Zhang, Debing and Fu Ying}, + booktitle={Arxiv 2010.05222}, + year={2020} +} +``` diff --git a/third_part/face3d/models/arcface_torch/backbones/__init__.py b/third_part/face3d/models/arcface_torch/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55bd4c5d1889a1a998b52eb56793bbc1eef1b691 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/backbones/__init__.py @@ -0,0 +1,25 @@ +from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200 +from .mobilefacenet import get_mbf + + +def get_model(name, **kwargs): + # resnet + if name == "r18": + return iresnet18(False, **kwargs) + elif name == "r34": + return iresnet34(False, **kwargs) + elif name == "r50": + return iresnet50(False, **kwargs) + elif name == "r100": + return iresnet100(False, **kwargs) + elif name == "r200": + return iresnet200(False, **kwargs) + elif name == "r2060": + from .iresnet2060 import iresnet2060 + return iresnet2060(False, **kwargs) + elif name == "mbf": + fp16 = kwargs.get("fp16", False) + num_features = kwargs.get("num_features", 512) + return get_mbf(fp16=fp16, num_features=num_features) + else: + raise ValueError() \ No newline at end of file diff --git a/third_part/face3d/models/arcface_torch/backbones/iresnet.py b/third_part/face3d/models/arcface_torch/backbones/iresnet.py new file mode 100644 index 0000000000000000000000000000000000000000..c6d3b9c240c24687d432197f976ee01fbf423216 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/backbones/iresnet.py @@ -0,0 +1,187 @@ +import torch +from torch import nn + +__all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200'] + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + groups=groups, + bias=False, + dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, + out_planes, + kernel_size=1, + stride=stride, + bias=False) + + +class IBasicBlock(nn.Module): + expansion = 1 + def __init__(self, inplanes, planes, stride=1, downsample=None, + groups=1, base_width=64, dilation=1): + super(IBasicBlock, self).__init__() + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,) + self.conv1 = conv3x3(inplanes, planes) + self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,) + self.prelu = nn.PReLU(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + out = self.bn1(x) + out = self.conv1(out) + out = self.bn2(out) + out = self.prelu(out) + out = self.conv2(out) + out = self.bn3(out) + if self.downsample is not None: + identity = self.downsample(x) + out += identity + return out + + +class IResNet(nn.Module): + fc_scale = 7 * 7 + def __init__(self, + block, layers, dropout=0, num_features=512, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False): + super(IResNet, self).__init__() + self.fp16 = fp16 + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05) + self.prelu = nn.PReLU(self.inplanes) + self.layer1 = self._make_layer(block, 64, layers[0], stride=2) + self.layer2 = self._make_layer(block, + 128, + layers[1], + stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, + 256, + layers[2], + stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, + 512, + layers[3], + stride=2, + dilate=replace_stride_with_dilation[2]) + self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,) + self.dropout = nn.Dropout(p=dropout, inplace=True) + self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features) + self.features = nn.BatchNorm1d(num_features, eps=1e-05) + nn.init.constant_(self.features.weight, 1.0) + self.features.weight.requires_grad = False + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.normal_(m.weight, 0, 0.1) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + if zero_init_residual: + for m in self.modules(): + if isinstance(m, IBasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ), + ) + layers = [] + layers.append( + block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(self.inplanes, + planes, + groups=self.groups, + base_width=self.base_width, + dilation=self.dilation)) + + return nn.Sequential(*layers) + + def forward(self, x): + with torch.cuda.amp.autocast(self.fp16): + x = self.conv1(x) + x = self.bn1(x) + x = self.prelu(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.bn2(x) + x = torch.flatten(x, 1) + x = self.dropout(x) + x = self.fc(x.float() if self.fp16 else x) + x = self.features(x) + return x + + +def _iresnet(arch, block, layers, pretrained, progress, **kwargs): + model = IResNet(block, layers, **kwargs) + if pretrained: + raise ValueError() + return model + + +def iresnet18(pretrained=False, progress=True, **kwargs): + return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained, + progress, **kwargs) + + +def iresnet34(pretrained=False, progress=True, **kwargs): + return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained, + progress, **kwargs) + + +def iresnet50(pretrained=False, progress=True, **kwargs): + return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained, + progress, **kwargs) + + +def iresnet100(pretrained=False, progress=True, **kwargs): + return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained, + progress, **kwargs) + + +def iresnet200(pretrained=False, progress=True, **kwargs): + return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained, + progress, **kwargs) + diff --git a/third_part/face3d/models/arcface_torch/backbones/iresnet2060.py b/third_part/face3d/models/arcface_torch/backbones/iresnet2060.py new file mode 100644 index 0000000000000000000000000000000000000000..21d1122144d207637d2444cba1f68fe630c89f31 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/backbones/iresnet2060.py @@ -0,0 +1,176 @@ +import torch +from torch import nn + +assert torch.__version__ >= "1.8.1" +from torch.utils.checkpoint import checkpoint_sequential + +__all__ = ['iresnet2060'] + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + groups=groups, + bias=False, + dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, + out_planes, + kernel_size=1, + stride=stride, + bias=False) + + +class IBasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, + groups=1, base_width=64, dilation=1): + super(IBasicBlock, self).__init__() + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05, ) + self.conv1 = conv3x3(inplanes, planes) + self.bn2 = nn.BatchNorm2d(planes, eps=1e-05, ) + self.prelu = nn.PReLU(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn3 = nn.BatchNorm2d(planes, eps=1e-05, ) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + out = self.bn1(x) + out = self.conv1(out) + out = self.bn2(out) + out = self.prelu(out) + out = self.conv2(out) + out = self.bn3(out) + if self.downsample is not None: + identity = self.downsample(x) + out += identity + return out + + +class IResNet(nn.Module): + fc_scale = 7 * 7 + + def __init__(self, + block, layers, dropout=0, num_features=512, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False): + super(IResNet, self).__init__() + self.fp16 = fp16 + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05) + self.prelu = nn.PReLU(self.inplanes) + self.layer1 = self._make_layer(block, 64, layers[0], stride=2) + self.layer2 = self._make_layer(block, + 128, + layers[1], + stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, + 256, + layers[2], + stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, + 512, + layers[3], + stride=2, + dilate=replace_stride_with_dilation[2]) + self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05, ) + self.dropout = nn.Dropout(p=dropout, inplace=True) + self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features) + self.features = nn.BatchNorm1d(num_features, eps=1e-05) + nn.init.constant_(self.features.weight, 1.0) + self.features.weight.requires_grad = False + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.normal_(m.weight, 0, 0.1) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + if zero_init_residual: + for m in self.modules(): + if isinstance(m, IBasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ), + ) + layers = [] + layers.append( + block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(self.inplanes, + planes, + groups=self.groups, + base_width=self.base_width, + dilation=self.dilation)) + + return nn.Sequential(*layers) + + def checkpoint(self, func, num_seg, x): + if self.training: + return checkpoint_sequential(func, num_seg, x) + else: + return func(x) + + def forward(self, x): + with torch.cuda.amp.autocast(self.fp16): + x = self.conv1(x) + x = self.bn1(x) + x = self.prelu(x) + x = self.layer1(x) + x = self.checkpoint(self.layer2, 20, x) + x = self.checkpoint(self.layer3, 100, x) + x = self.layer4(x) + x = self.bn2(x) + x = torch.flatten(x, 1) + x = self.dropout(x) + x = self.fc(x.float() if self.fp16 else x) + x = self.features(x) + return x + + +def _iresnet(arch, block, layers, pretrained, progress, **kwargs): + model = IResNet(block, layers, **kwargs) + if pretrained: + raise ValueError() + return model + + +def iresnet2060(pretrained=False, progress=True, **kwargs): + return _iresnet('iresnet2060', IBasicBlock, [3, 128, 1024 - 128, 3], pretrained, progress, **kwargs) diff --git a/third_part/face3d/models/arcface_torch/backbones/mobilefacenet.py b/third_part/face3d/models/arcface_torch/backbones/mobilefacenet.py new file mode 100644 index 0000000000000000000000000000000000000000..87731491d76f9ff61cc70e57bb3f18c54fae308c --- /dev/null +++ b/third_part/face3d/models/arcface_torch/backbones/mobilefacenet.py @@ -0,0 +1,130 @@ +''' +Adapted from https://github.com/cavalleria/cavaface.pytorch/blob/master/backbone/mobilefacenet.py +Original author cavalleria +''' + +import torch.nn as nn +from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Sequential, Module +import torch + + +class Flatten(Module): + def forward(self, x): + return x.view(x.size(0), -1) + + +class ConvBlock(Module): + def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): + super(ConvBlock, self).__init__() + self.layers = nn.Sequential( + Conv2d(in_c, out_c, kernel, groups=groups, stride=stride, padding=padding, bias=False), + BatchNorm2d(num_features=out_c), + PReLU(num_parameters=out_c) + ) + + def forward(self, x): + return self.layers(x) + + +class LinearBlock(Module): + def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): + super(LinearBlock, self).__init__() + self.layers = nn.Sequential( + Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False), + BatchNorm2d(num_features=out_c) + ) + + def forward(self, x): + return self.layers(x) + + +class DepthWise(Module): + def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1): + super(DepthWise, self).__init__() + self.residual = residual + self.layers = nn.Sequential( + ConvBlock(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)), + ConvBlock(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride), + LinearBlock(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) + ) + + def forward(self, x): + short_cut = None + if self.residual: + short_cut = x + x = self.layers(x) + if self.residual: + output = short_cut + x + else: + output = x + return output + + +class Residual(Module): + def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): + super(Residual, self).__init__() + modules = [] + for _ in range(num_block): + modules.append(DepthWise(c, c, True, kernel, stride, padding, groups)) + self.layers = Sequential(*modules) + + def forward(self, x): + return self.layers(x) + + +class GDC(Module): + def __init__(self, embedding_size): + super(GDC, self).__init__() + self.layers = nn.Sequential( + LinearBlock(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)), + Flatten(), + Linear(512, embedding_size, bias=False), + BatchNorm1d(embedding_size)) + + def forward(self, x): + return self.layers(x) + + +class MobileFaceNet(Module): + def __init__(self, fp16=False, num_features=512): + super(MobileFaceNet, self).__init__() + scale = 2 + self.fp16 = fp16 + self.layers = nn.Sequential( + ConvBlock(3, 64 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1)), + ConvBlock(64 * scale, 64 * scale, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64), + DepthWise(64 * scale, 64 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128), + Residual(64 * scale, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), + DepthWise(64 * scale, 128 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256), + Residual(128 * scale, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), + DepthWise(128 * scale, 128 * scale, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512), + Residual(128 * scale, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), + ) + self.conv_sep = ConvBlock(128 * scale, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) + self.features = GDC(num_features) + self._initialize_weights() + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + with torch.cuda.amp.autocast(self.fp16): + x = self.layers(x) + x = self.conv_sep(x.float() if self.fp16 else x) + x = self.features(x) + return x + + +def get_mbf(fp16, num_features): + return MobileFaceNet(fp16, num_features) \ No newline at end of file diff --git a/third_part/face3d/models/arcface_torch/configs/3millions.py b/third_part/face3d/models/arcface_torch/configs/3millions.py new file mode 100644 index 0000000000000000000000000000000000000000..c9edc2f1414e35f93abfd3dfe11a61f1f406580e --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/3millions.py @@ -0,0 +1,23 @@ +from easydict import EasyDict as edict + +# configs for test speed + +config = edict() +config.loss = "arcface" +config.network = "r50" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "synthetic" +config.num_classes = 300 * 10000 +config.num_epoch = 30 +config.warmup_epoch = -1 +config.decay_epoch = [10, 16, 22] +config.val_targets = [] diff --git a/third_part/face3d/models/arcface_torch/configs/3millions_pfc.py b/third_part/face3d/models/arcface_torch/configs/3millions_pfc.py new file mode 100644 index 0000000000000000000000000000000000000000..77caafdbb300d8109d5bfdb844f131710ef81f20 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/3millions_pfc.py @@ -0,0 +1,23 @@ +from easydict import EasyDict as edict + +# configs for test speed + +config = edict() +config.loss = "arcface" +config.network = "r50" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 0.1 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "synthetic" +config.num_classes = 300 * 10000 +config.num_epoch = 30 +config.warmup_epoch = -1 +config.decay_epoch = [10, 16, 22] +config.val_targets = [] diff --git a/third_part/face3d/models/arcface_torch/configs/__init__.py b/third_part/face3d/models/arcface_torch/configs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/face3d/models/arcface_torch/configs/base.py b/third_part/face3d/models/arcface_torch/configs/base.py new file mode 100644 index 0000000000000000000000000000000000000000..78e4b36a9142b649ec39a8c59331bb2557f2ad57 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/base.py @@ -0,0 +1,56 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "arcface" +config.network = "r50" +config.resume = False +config.output = "ms1mv3_arcface_r50" + +config.dataset = "ms1m-retinaface-t1" +config.embedding_size = 512 +config.sample_rate = 1 +config.fp16 = False +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +if config.dataset == "emore": + config.rec = "/train_tmp/faces_emore" + config.num_classes = 85742 + config.num_image = 5822653 + config.num_epoch = 16 + config.warmup_epoch = -1 + config.decay_epoch = [8, 14, ] + config.val_targets = ["lfw", ] + +elif config.dataset == "ms1m-retinaface-t1": + config.rec = "/train_tmp/ms1m-retinaface-t1" + config.num_classes = 93431 + config.num_image = 5179510 + config.num_epoch = 25 + config.warmup_epoch = -1 + config.decay_epoch = [11, 17, 22] + config.val_targets = ["lfw", "cfp_fp", "agedb_30"] + +elif config.dataset == "glint360k": + config.rec = "/train_tmp/glint360k" + config.num_classes = 360232 + config.num_image = 17091657 + config.num_epoch = 20 + config.warmup_epoch = -1 + config.decay_epoch = [8, 12, 15, 18] + config.val_targets = ["lfw", "cfp_fp", "agedb_30"] + +elif config.dataset == "webface": + config.rec = "/train_tmp/faces_webface_112x112" + config.num_classes = 10572 + config.num_image = "forget" + config.num_epoch = 34 + config.warmup_epoch = -1 + config.decay_epoch = [20, 28, 32] + config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/glint360k_mbf.py b/third_part/face3d/models/arcface_torch/configs/glint360k_mbf.py new file mode 100644 index 0000000000000000000000000000000000000000..46ae777cc97af41a531cba4e5d1ff31f2efcb468 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/glint360k_mbf.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "cosface" +config.network = "mbf" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 0.1 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 2e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/glint360k" +config.num_classes = 360232 +config.num_image = 17091657 +config.num_epoch = 20 +config.warmup_epoch = -1 +config.decay_epoch = [8, 12, 15, 18] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/glint360k_r100.py b/third_part/face3d/models/arcface_torch/configs/glint360k_r100.py new file mode 100644 index 0000000000000000000000000000000000000000..93d0701c0094517cec147c382b005e8063938548 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/glint360k_r100.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "cosface" +config.network = "r100" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/glint360k" +config.num_classes = 360232 +config.num_image = 17091657 +config.num_epoch = 20 +config.warmup_epoch = -1 +config.decay_epoch = [8, 12, 15, 18] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/glint360k_r18.py b/third_part/face3d/models/arcface_torch/configs/glint360k_r18.py new file mode 100644 index 0000000000000000000000000000000000000000..7a8db34cd547e8e667103c93585296e47a894e97 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/glint360k_r18.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "cosface" +config.network = "r18" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/glint360k" +config.num_classes = 360232 +config.num_image = 17091657 +config.num_epoch = 20 +config.warmup_epoch = -1 +config.decay_epoch = [8, 12, 15, 18] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/glint360k_r34.py b/third_part/face3d/models/arcface_torch/configs/glint360k_r34.py new file mode 100644 index 0000000000000000000000000000000000000000..fda2701758a839a7161d09c25f0ca3d26033baff --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/glint360k_r34.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "cosface" +config.network = "r34" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/glint360k" +config.num_classes = 360232 +config.num_image = 17091657 +config.num_epoch = 20 +config.warmup_epoch = -1 +config.decay_epoch = [8, 12, 15, 18] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/glint360k_r50.py b/third_part/face3d/models/arcface_torch/configs/glint360k_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..37e7922f1f63284e356dcc45a5f979f9c105f25e --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/glint360k_r50.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "cosface" +config.network = "r50" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/glint360k" +config.num_classes = 360232 +config.num_image = 17091657 +config.num_epoch = 20 +config.warmup_epoch = -1 +config.decay_epoch = [8, 12, 15, 18] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/ms1mv3_mbf.py b/third_part/face3d/models/arcface_torch/configs/ms1mv3_mbf.py new file mode 100644 index 0000000000000000000000000000000000000000..b8a00d6305eeda5a94788017afc1cda0d4a4cd2a --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/ms1mv3_mbf.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "arcface" +config.network = "mbf" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 2e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/ms1m-retinaface-t1" +config.num_classes = 93431 +config.num_image = 5179510 +config.num_epoch = 30 +config.warmup_epoch = -1 +config.decay_epoch = [10, 20, 25] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/ms1mv3_r18.py b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r18.py new file mode 100644 index 0000000000000000000000000000000000000000..eb4e0d31f1aedf4590628d394e1606920fefb5c9 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r18.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "arcface" +config.network = "r18" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/ms1m-retinaface-t1" +config.num_classes = 93431 +config.num_image = 5179510 +config.num_epoch = 25 +config.warmup_epoch = -1 +config.decay_epoch = [10, 16, 22] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/ms1mv3_r2060.py b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r2060.py new file mode 100644 index 0000000000000000000000000000000000000000..23ad81e082c4b6390b67b164d0ceb84bb0635684 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r2060.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "arcface" +config.network = "r2060" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 64 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/ms1m-retinaface-t1" +config.num_classes = 93431 +config.num_image = 5179510 +config.num_epoch = 25 +config.warmup_epoch = -1 +config.decay_epoch = [10, 16, 22] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/ms1mv3_r34.py b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r34.py new file mode 100644 index 0000000000000000000000000000000000000000..5f78337a3d1f9eb6e9145eb5093618796c6842d2 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r34.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "arcface" +config.network = "r34" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/ms1m-retinaface-t1" +config.num_classes = 93431 +config.num_image = 5179510 +config.num_epoch = 25 +config.warmup_epoch = -1 +config.decay_epoch = [10, 16, 22] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/ms1mv3_r50.py b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..08ba55dbbea6df0afffddbb3d1ed173efad99604 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/ms1mv3_r50.py @@ -0,0 +1,26 @@ +from easydict import EasyDict as edict + +# make training faster +# our RAM is 256G +# mount -t tmpfs -o size=140G tmpfs /train_tmp + +config = edict() +config.loss = "arcface" +config.network = "r50" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "/train_tmp/ms1m-retinaface-t1" +config.num_classes = 93431 +config.num_image = 5179510 +config.num_epoch = 25 +config.warmup_epoch = -1 +config.decay_epoch = [10, 16, 22] +config.val_targets = ["lfw", "cfp_fp", "agedb_30"] diff --git a/third_part/face3d/models/arcface_torch/configs/speed.py b/third_part/face3d/models/arcface_torch/configs/speed.py new file mode 100644 index 0000000000000000000000000000000000000000..45e95237da65e44f35a172c25ac6dc4e313e4eae --- /dev/null +++ b/third_part/face3d/models/arcface_torch/configs/speed.py @@ -0,0 +1,23 @@ +from easydict import EasyDict as edict + +# configs for test speed + +config = edict() +config.loss = "arcface" +config.network = "r50" +config.resume = False +config.output = None +config.embedding_size = 512 +config.sample_rate = 1.0 +config.fp16 = True +config.momentum = 0.9 +config.weight_decay = 5e-4 +config.batch_size = 128 +config.lr = 0.1 # batch size is 512 + +config.rec = "synthetic" +config.num_classes = 100 * 10000 +config.num_epoch = 30 +config.warmup_epoch = -1 +config.decay_epoch = [10, 16, 22] +config.val_targets = [] diff --git a/third_part/face3d/models/arcface_torch/dataset.py b/third_part/face3d/models/arcface_torch/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..96bbb8bb6da99122f350bc8e1a6390245840e32b --- /dev/null +++ b/third_part/face3d/models/arcface_torch/dataset.py @@ -0,0 +1,124 @@ +import numbers +import os +import queue as Queue +import threading + +import mxnet as mx +import numpy as np +import torch +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +class BackgroundGenerator(threading.Thread): + def __init__(self, generator, local_rank, max_prefetch=6): + super(BackgroundGenerator, self).__init__() + self.queue = Queue.Queue(max_prefetch) + self.generator = generator + self.local_rank = local_rank + self.daemon = True + self.start() + + def run(self): + torch.cuda.set_device(self.local_rank) + for item in self.generator: + self.queue.put(item) + self.queue.put(None) + + def next(self): + next_item = self.queue.get() + if next_item is None: + raise StopIteration + return next_item + + def __next__(self): + return self.next() + + def __iter__(self): + return self + + +class DataLoaderX(DataLoader): + + def __init__(self, local_rank, **kwargs): + super(DataLoaderX, self).__init__(**kwargs) + self.stream = torch.cuda.Stream(local_rank) + self.local_rank = local_rank + + def __iter__(self): + self.iter = super(DataLoaderX, self).__iter__() + self.iter = BackgroundGenerator(self.iter, self.local_rank) + self.preload() + return self + + def preload(self): + self.batch = next(self.iter, None) + if self.batch is None: + return None + with torch.cuda.stream(self.stream): + for k in range(len(self.batch)): + self.batch[k] = self.batch[k].to(device=self.local_rank, non_blocking=True) + + def __next__(self): + torch.cuda.current_stream().wait_stream(self.stream) + batch = self.batch + if batch is None: + raise StopIteration + self.preload() + return batch + + +class MXFaceDataset(Dataset): + def __init__(self, root_dir, local_rank): + super(MXFaceDataset, self).__init__() + self.transform = transforms.Compose( + [transforms.ToPILImage(), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + ]) + self.root_dir = root_dir + self.local_rank = local_rank + path_imgrec = os.path.join(root_dir, 'train.rec') + path_imgidx = os.path.join(root_dir, 'train.idx') + self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') + s = self.imgrec.read_idx(0) + header, _ = mx.recordio.unpack(s) + if header.flag > 0: + self.header0 = (int(header.label[0]), int(header.label[1])) + self.imgidx = np.array(range(1, int(header.label[0]))) + else: + self.imgidx = np.array(list(self.imgrec.keys)) + + def __getitem__(self, index): + idx = self.imgidx[index] + s = self.imgrec.read_idx(idx) + header, img = mx.recordio.unpack(s) + label = header.label + if not isinstance(label, numbers.Number): + label = label[0] + label = torch.tensor(label, dtype=torch.long) + sample = mx.image.imdecode(img).asnumpy() + if self.transform is not None: + sample = self.transform(sample) + return sample, label + + def __len__(self): + return len(self.imgidx) + + +class SyntheticDataset(Dataset): + def __init__(self, local_rank): + super(SyntheticDataset, self).__init__() + img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32) + img = np.transpose(img, (2, 0, 1)) + img = torch.from_numpy(img).squeeze(0).float() + img = ((img / 255) - 0.5) / 0.5 + self.img = img + self.label = 1 + + def __getitem__(self, index): + return self.img, self.label + + def __len__(self): + return 1000000 diff --git a/third_part/face3d/models/arcface_torch/docs/eval.md b/third_part/face3d/models/arcface_torch/docs/eval.md new file mode 100644 index 0000000000000000000000000000000000000000..dd1d9e257367b6422680966198646c45e5a2671d --- /dev/null +++ b/third_part/face3d/models/arcface_torch/docs/eval.md @@ -0,0 +1,31 @@ +## Eval on ICCV2021-MFR + +coming soon. + + +## Eval IJBC +You can eval ijbc with pytorch or onnx. + + +1. Eval IJBC With Onnx +```shell +CUDA_VISIBLE_DEVICES=0 python onnx_ijbc.py --model-root ms1mv3_arcface_r50 --image-path IJB_release/IJBC --result-dir ms1mv3_arcface_r50 +``` + +2. Eval IJBC With Pytorch +```shell +CUDA_VISIBLE_DEVICES=0,1 python eval_ijbc.py \ +--model-prefix ms1mv3_arcface_r50/backbone.pth \ +--image-path IJB_release/IJBC \ +--result-dir ms1mv3_arcface_r50 \ +--batch-size 128 \ +--job ms1mv3_arcface_r50 \ +--target IJBC \ +--network iresnet50 +``` + +## Inference + +```shell +python inference.py --weight ms1mv3_arcface_r50/backbone.pth --network r50 +``` diff --git a/third_part/face3d/models/arcface_torch/docs/install.md b/third_part/face3d/models/arcface_torch/docs/install.md new file mode 100644 index 0000000000000000000000000000000000000000..6314a40441285e9236438e468caf8b71a407531a --- /dev/null +++ b/third_part/face3d/models/arcface_torch/docs/install.md @@ -0,0 +1,51 @@ +## v1.8.0 +### Linux and Windows +```shell +# CUDA 11.0 +pip --default-timeout=100 install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html + +# CUDA 10.2 +pip --default-timeout=100 install torch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 + +# CPU only +pip --default-timeout=100 install torch==1.8.0+cpu torchvision==0.9.0+cpu torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html + +``` + + +## v1.7.1 +### Linux and Windows +```shell +# CUDA 11.0 +pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html + +# CUDA 10.2 +pip install torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 + +# CUDA 10.1 +pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html + +# CUDA 9.2 +pip install torch==1.7.1+cu92 torchvision==0.8.2+cu92 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html + +# CPU only +pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html +``` + + +## v1.6.0 + +### Linux and Windows +```shell +# CUDA 10.2 +pip install torch==1.6.0 torchvision==0.7.0 + +# CUDA 10.1 +pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html + +# CUDA 9.2 +pip install torch==1.6.0+cu92 torchvision==0.7.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html + +# CPU only +pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html +``` \ No newline at end of file diff --git a/third_part/face3d/models/arcface_torch/docs/modelzoo.md b/third_part/face3d/models/arcface_torch/docs/modelzoo.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/face3d/models/arcface_torch/docs/speed_benchmark.md b/third_part/face3d/models/arcface_torch/docs/speed_benchmark.md new file mode 100644 index 0000000000000000000000000000000000000000..055aee0defe2c43a523ced48260242f0f99b7cea --- /dev/null +++ b/third_part/face3d/models/arcface_torch/docs/speed_benchmark.md @@ -0,0 +1,93 @@ +## Test Training Speed + +- Test Commands + +You need to use the following two commands to test the Partial FC training performance. +The number of identites is **3 millions** (synthetic data), turn mixed precision training on, backbone is resnet50, +batch size is 1024. +```shell +# Model Parallel +python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions +# Partial FC 0.1 +python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/3millions_pfc +``` + +- GPU Memory + +``` +# (Model Parallel) gpustat -i +[0] Tesla V100-SXM2-32GB | 64'C, 94 % | 30338 / 32510 MB +[1] Tesla V100-SXM2-32GB | 60'C, 99 % | 28876 / 32510 MB +[2] Tesla V100-SXM2-32GB | 60'C, 99 % | 28872 / 32510 MB +[3] Tesla V100-SXM2-32GB | 69'C, 99 % | 28872 / 32510 MB +[4] Tesla V100-SXM2-32GB | 66'C, 99 % | 28888 / 32510 MB +[5] Tesla V100-SXM2-32GB | 60'C, 99 % | 28932 / 32510 MB +[6] Tesla V100-SXM2-32GB | 68'C, 100 % | 28916 / 32510 MB +[7] Tesla V100-SXM2-32GB | 65'C, 99 % | 28860 / 32510 MB + +# (Partial FC 0.1) gpustat -i +[0] Tesla V100-SXM2-32GB | 60'C, 95 % | 10488 / 32510 MB │······················· +[1] Tesla V100-SXM2-32GB | 60'C, 97 % | 10344 / 32510 MB │······················· +[2] Tesla V100-SXM2-32GB | 61'C, 95 % | 10340 / 32510 MB │······················· +[3] Tesla V100-SXM2-32GB | 66'C, 95 % | 10340 / 32510 MB │······················· +[4] Tesla V100-SXM2-32GB | 65'C, 94 % | 10356 / 32510 MB │······················· +[5] Tesla V100-SXM2-32GB | 61'C, 95 % | 10400 / 32510 MB │······················· +[6] Tesla V100-SXM2-32GB | 68'C, 96 % | 10384 / 32510 MB │······················· +[7] Tesla V100-SXM2-32GB | 64'C, 95 % | 10328 / 32510 MB │······················· +``` + +- Training Speed + +```python +# (Model Parallel) trainging.log +Training: Speed 2271.33 samples/sec Loss 1.1624 LearningRate 0.2000 Epoch: 0 Global Step: 100 +Training: Speed 2269.94 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 150 +Training: Speed 2272.67 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 200 +Training: Speed 2266.55 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 250 +Training: Speed 2272.54 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 300 + +# (Partial FC 0.1) trainging.log +Training: Speed 5299.56 samples/sec Loss 1.0965 LearningRate 0.2000 Epoch: 0 Global Step: 100 +Training: Speed 5296.37 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 150 +Training: Speed 5304.37 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 200 +Training: Speed 5274.43 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 250 +Training: Speed 5300.10 samples/sec Loss 0.0000 LearningRate 0.2000 Epoch: 0 Global Step: 300 +``` + +In this test case, Partial FC 0.1 only use1 1/3 of the GPU memory of the model parallel, +and the training speed is 2.5 times faster than the model parallel. + + +## Speed Benchmark + +1. Training speed of different parallel methods (samples/second), Tesla V100 32GB * 8. (Larger is better) + +| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | +| :--- | :--- | :--- | :--- | +|125000 | 4681 | 4824 | 5004 | +|250000 | 4047 | 4521 | 4976 | +|500000 | 3087 | 4013 | 4900 | +|1000000 | 2090 | 3449 | 4803 | +|1400000 | 1672 | 3043 | 4738 | +|2000000 | - | 2593 | 4626 | +|4000000 | - | 1748 | 4208 | +|5500000 | - | 1389 | 3975 | +|8000000 | - | - | 3565 | +|16000000 | - | - | 2679 | +|29000000 | - | - | 1855 | + +2. GPU memory cost of different parallel methods (GB per GPU), Tesla V100 32GB * 8. (Smaller is better) + +| Number of Identities in Dataset | Data Parallel | Model Parallel | Partial FC 0.1 | +| :--- | :--- | :--- | :--- | +|125000 | 7358 | 5306 | 4868 | +|250000 | 9940 | 5826 | 5004 | +|500000 | 14220 | 7114 | 5202 | +|1000000 | 23708 | 9966 | 5620 | +|1400000 | 32252 | 11178 | 6056 | +|2000000 | - | 13978 | 6472 | +|4000000 | - | 23238 | 8284 | +|5500000 | - | 32188 | 9854 | +|8000000 | - | - | 12310 | +|16000000 | - | - | 19950 | +|29000000 | - | - | 32324 | diff --git a/third_part/face3d/models/arcface_torch/eval/__init__.py b/third_part/face3d/models/arcface_torch/eval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/face3d/models/arcface_torch/eval/verification.py b/third_part/face3d/models/arcface_torch/eval/verification.py new file mode 100644 index 0000000000000000000000000000000000000000..253343b83dbf9d1bd154d14ec068e098bf0968db --- /dev/null +++ b/third_part/face3d/models/arcface_torch/eval/verification.py @@ -0,0 +1,407 @@ +"""Helper for evaluation on the Labeled Faces in the Wild dataset +""" + +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import datetime +import os +import pickle + +import mxnet as mx +import numpy as np +import sklearn +import torch +from mxnet import ndarray as nd +from scipy import interpolate +from sklearn.decomposition import PCA +from sklearn.model_selection import KFold + + +class LFold: + def __init__(self, n_splits=2, shuffle=False): + self.n_splits = n_splits + if self.n_splits > 1: + self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle) + + def split(self, indices): + if self.n_splits > 1: + return self.k_fold.split(indices) + else: + return [(indices, indices)] + + +def calculate_roc(thresholds, + embeddings1, + embeddings2, + actual_issame, + nrof_folds=10, + pca=0): + assert (embeddings1.shape[0] == embeddings2.shape[0]) + assert (embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = LFold(n_splits=nrof_folds, shuffle=False) + + tprs = np.zeros((nrof_folds, nrof_thresholds)) + fprs = np.zeros((nrof_folds, nrof_thresholds)) + accuracy = np.zeros((nrof_folds)) + indices = np.arange(nrof_pairs) + + if pca == 0: + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff), 1) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if pca > 0: + print('doing pca on', fold_idx) + embed1_train = embeddings1[train_set] + embed2_train = embeddings2[train_set] + _embed_train = np.concatenate((embed1_train, embed2_train), axis=0) + pca_model = PCA(n_components=pca) + pca_model.fit(_embed_train) + embed1 = pca_model.transform(embeddings1) + embed2 = pca_model.transform(embeddings2) + embed1 = sklearn.preprocessing.normalize(embed1) + embed2 = sklearn.preprocessing.normalize(embed2) + diff = np.subtract(embed1, embed2) + dist = np.sum(np.square(diff), 1) + + # Find the best threshold for the fold + acc_train = np.zeros((nrof_thresholds)) + for threshold_idx, threshold in enumerate(thresholds): + _, _, acc_train[threshold_idx] = calculate_accuracy( + threshold, dist[train_set], actual_issame[train_set]) + best_threshold_index = np.argmax(acc_train) + for threshold_idx, threshold in enumerate(thresholds): + tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy( + threshold, dist[test_set], + actual_issame[test_set]) + _, _, accuracy[fold_idx] = calculate_accuracy( + thresholds[best_threshold_index], dist[test_set], + actual_issame[test_set]) + + tpr = np.mean(tprs, 0) + fpr = np.mean(fprs, 0) + return tpr, fpr, accuracy + + +def calculate_accuracy(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + tp = np.sum(np.logical_and(predict_issame, actual_issame)) + fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + tn = np.sum( + np.logical_and(np.logical_not(predict_issame), + np.logical_not(actual_issame))) + fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) + + tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn) + fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn) + acc = float(tp + tn) / dist.size + return tpr, fpr, acc + + +def calculate_val(thresholds, + embeddings1, + embeddings2, + actual_issame, + far_target, + nrof_folds=10): + assert (embeddings1.shape[0] == embeddings2.shape[0]) + assert (embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = LFold(n_splits=nrof_folds, shuffle=False) + + val = np.zeros(nrof_folds) + far = np.zeros(nrof_folds) + + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff), 1) + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + + # Find the threshold that gives FAR = far_target + far_train = np.zeros(nrof_thresholds) + for threshold_idx, threshold in enumerate(thresholds): + _, far_train[threshold_idx] = calculate_val_far( + threshold, dist[train_set], actual_issame[train_set]) + if np.max(far_train) >= far_target: + f = interpolate.interp1d(far_train, thresholds, kind='slinear') + threshold = f(far_target) + else: + threshold = 0.0 + + val[fold_idx], far[fold_idx] = calculate_val_far( + threshold, dist[test_set], actual_issame[test_set]) + + val_mean = np.mean(val) + far_mean = np.mean(far) + val_std = np.std(val) + return val_mean, val_std, far_mean + + +def calculate_val_far(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) + false_accept = np.sum( + np.logical_and(predict_issame, np.logical_not(actual_issame))) + n_same = np.sum(actual_issame) + n_diff = np.sum(np.logical_not(actual_issame)) + # print(true_accept, false_accept) + # print(n_same, n_diff) + val = float(true_accept) / float(n_same) + far = float(false_accept) / float(n_diff) + return val, far + + +def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0): + # Calculate evaluation metrics + thresholds = np.arange(0, 4, 0.01) + embeddings1 = embeddings[0::2] + embeddings2 = embeddings[1::2] + tpr, fpr, accuracy = calculate_roc(thresholds, + embeddings1, + embeddings2, + np.asarray(actual_issame), + nrof_folds=nrof_folds, + pca=pca) + thresholds = np.arange(0, 4, 0.001) + val, val_std, far = calculate_val(thresholds, + embeddings1, + embeddings2, + np.asarray(actual_issame), + 1e-3, + nrof_folds=nrof_folds) + return tpr, fpr, accuracy, val, val_std, far + +@torch.no_grad() +def load_bin(path, image_size): + try: + with open(path, 'rb') as f: + bins, issame_list = pickle.load(f) # py2 + except UnicodeDecodeError as e: + with open(path, 'rb') as f: + bins, issame_list = pickle.load(f, encoding='bytes') # py3 + data_list = [] + for flip in [0, 1]: + data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1])) + data_list.append(data) + for idx in range(len(issame_list) * 2): + _bin = bins[idx] + img = mx.image.imdecode(_bin) + if img.shape[1] != image_size[0]: + img = mx.image.resize_short(img, image_size[0]) + img = nd.transpose(img, axes=(2, 0, 1)) + for flip in [0, 1]: + if flip == 1: + img = mx.ndarray.flip(data=img, axis=2) + data_list[flip][idx][:] = torch.from_numpy(img.asnumpy()) + if idx % 1000 == 0: + print('loading bin', idx) + print(data_list[0].shape) + return data_list, issame_list + +@torch.no_grad() +def test(data_set, backbone, batch_size, nfolds=10): + print('testing verification..') + data_list = data_set[0] + issame_list = data_set[1] + embeddings_list = [] + time_consumed = 0.0 + for i in range(len(data_list)): + data = data_list[i] + embeddings = None + ba = 0 + while ba < data.shape[0]: + bb = min(ba + batch_size, data.shape[0]) + count = bb - ba + _data = data[bb - batch_size: bb] + time0 = datetime.datetime.now() + img = ((_data / 255) - 0.5) / 0.5 + net_out: torch.Tensor = backbone(img) + _embeddings = net_out.detach().cpu().numpy() + time_now = datetime.datetime.now() + diff = time_now - time0 + time_consumed += diff.total_seconds() + if embeddings is None: + embeddings = np.zeros((data.shape[0], _embeddings.shape[1])) + embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :] + ba = bb + embeddings_list.append(embeddings) + + _xnorm = 0.0 + _xnorm_cnt = 0 + for embed in embeddings_list: + for i in range(embed.shape[0]): + _em = embed[i] + _norm = np.linalg.norm(_em) + _xnorm += _norm + _xnorm_cnt += 1 + _xnorm /= _xnorm_cnt + + acc1 = 0.0 + std1 = 0.0 + embeddings = embeddings_list[0] + embeddings_list[1] + embeddings = sklearn.preprocessing.normalize(embeddings) + print(embeddings.shape) + print('infer time', time_consumed) + _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds) + acc2, std2 = np.mean(accuracy), np.std(accuracy) + return acc1, std1, acc2, std2, _xnorm, embeddings_list + + +def dumpR(data_set, + backbone, + batch_size, + name='', + data_extra=None, + label_shape=None): + print('dump verification embedding..') + data_list = data_set[0] + issame_list = data_set[1] + embeddings_list = [] + time_consumed = 0.0 + for i in range(len(data_list)): + data = data_list[i] + embeddings = None + ba = 0 + while ba < data.shape[0]: + bb = min(ba + batch_size, data.shape[0]) + count = bb - ba + + _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb) + time0 = datetime.datetime.now() + if data_extra is None: + db = mx.io.DataBatch(data=(_data,), label=(_label,)) + else: + db = mx.io.DataBatch(data=(_data, _data_extra), + label=(_label,)) + model.forward(db, is_train=False) + net_out = model.get_outputs() + _embeddings = net_out[0].asnumpy() + time_now = datetime.datetime.now() + diff = time_now - time0 + time_consumed += diff.total_seconds() + if embeddings is None: + embeddings = np.zeros((data.shape[0], _embeddings.shape[1])) + embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :] + ba = bb + embeddings_list.append(embeddings) + embeddings = embeddings_list[0] + embeddings_list[1] + embeddings = sklearn.preprocessing.normalize(embeddings) + actual_issame = np.asarray(issame_list) + outname = os.path.join('temp.bin') + with open(outname, 'wb') as f: + pickle.dump((embeddings, issame_list), + f, + protocol=pickle.HIGHEST_PROTOCOL) + + +# if __name__ == '__main__': +# +# parser = argparse.ArgumentParser(description='do verification') +# # general +# parser.add_argument('--data-dir', default='', help='') +# parser.add_argument('--model', +# default='../model/softmax,50', +# help='path to load model.') +# parser.add_argument('--target', +# default='lfw,cfp_ff,cfp_fp,agedb_30', +# help='test targets.') +# parser.add_argument('--gpu', default=0, type=int, help='gpu id') +# parser.add_argument('--batch-size', default=32, type=int, help='') +# parser.add_argument('--max', default='', type=str, help='') +# parser.add_argument('--mode', default=0, type=int, help='') +# parser.add_argument('--nfolds', default=10, type=int, help='') +# args = parser.parse_args() +# image_size = [112, 112] +# print('image_size', image_size) +# ctx = mx.gpu(args.gpu) +# nets = [] +# vec = args.model.split(',') +# prefix = args.model.split(',')[0] +# epochs = [] +# if len(vec) == 1: +# pdir = os.path.dirname(prefix) +# for fname in os.listdir(pdir): +# if not fname.endswith('.params'): +# continue +# _file = os.path.join(pdir, fname) +# if _file.startswith(prefix): +# epoch = int(fname.split('.')[0].split('-')[1]) +# epochs.append(epoch) +# epochs = sorted(epochs, reverse=True) +# if len(args.max) > 0: +# _max = [int(x) for x in args.max.split(',')] +# assert len(_max) == 2 +# if len(epochs) > _max[1]: +# epochs = epochs[_max[0]:_max[1]] +# +# else: +# epochs = [int(x) for x in vec[1].split('|')] +# print('model number', len(epochs)) +# time0 = datetime.datetime.now() +# for epoch in epochs: +# print('loading', prefix, epoch) +# sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) +# # arg_params, aux_params = ch_dev(arg_params, aux_params, ctx) +# all_layers = sym.get_internals() +# sym = all_layers['fc1_output'] +# model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) +# # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) +# model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], +# image_size[1]))]) +# model.set_params(arg_params, aux_params) +# nets.append(model) +# time_now = datetime.datetime.now() +# diff = time_now - time0 +# print('model loading time', diff.total_seconds()) +# +# ver_list = [] +# ver_name_list = [] +# for name in args.target.split(','): +# path = os.path.join(args.data_dir, name + ".bin") +# if os.path.exists(path): +# print('loading.. ', name) +# data_set = load_bin(path, image_size) +# ver_list.append(data_set) +# ver_name_list.append(name) +# +# if args.mode == 0: +# for i in range(len(ver_list)): +# results = [] +# for model in nets: +# acc1, std1, acc2, std2, xnorm, embeddings_list = test( +# ver_list[i], model, args.batch_size, args.nfolds) +# print('[%s]XNorm: %f' % (ver_name_list[i], xnorm)) +# print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1)) +# print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2)) +# results.append(acc2) +# print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results))) +# elif args.mode == 1: +# raise ValueError +# else: +# model = nets[0] +# dumpR(ver_list[0], model, args.batch_size, args.target) diff --git a/third_part/face3d/models/arcface_torch/eval_ijbc.py b/third_part/face3d/models/arcface_torch/eval_ijbc.py new file mode 100644 index 0000000000000000000000000000000000000000..9c5a650d486d18eb02d6f60d448fc3b315261f5d --- /dev/null +++ b/third_part/face3d/models/arcface_torch/eval_ijbc.py @@ -0,0 +1,483 @@ +# coding: utf-8 + +import os +import pickle + +import matplotlib +import pandas as pd + +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import timeit +import sklearn +import argparse +import cv2 +import numpy as np +import torch +from skimage import transform as trans +from backbones import get_model +from sklearn.metrics import roc_curve, auc + +from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap +from prettytable import PrettyTable +from pathlib import Path + +import sys +import warnings + +sys.path.insert(0, "../") +warnings.filterwarnings("ignore") + +parser = argparse.ArgumentParser(description='do ijb test') +# general +parser.add_argument('--model-prefix', default='', help='path to load model.') +parser.add_argument('--image-path', default='', type=str, help='') +parser.add_argument('--result-dir', default='.', type=str, help='') +parser.add_argument('--batch-size', default=128, type=int, help='') +parser.add_argument('--network', default='iresnet50', type=str, help='') +parser.add_argument('--job', default='insightface', type=str, help='job name') +parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB') +args = parser.parse_args() + +target = args.target +model_path = args.model_prefix +image_path = args.image_path +result_dir = args.result_dir +gpu_id = None +use_norm_score = True # if Ture, TestMode(N1) +use_detector_score = True # if Ture, TestMode(D1) +use_flip_test = True # if Ture, TestMode(F1) +job = args.job +batch_size = args.batch_size + + +class Embedding(object): + def __init__(self, prefix, data_shape, batch_size=1): + image_size = (112, 112) + self.image_size = image_size + weight = torch.load(prefix) + resnet = get_model(args.network, dropout=0, fp16=False).cuda() + resnet.load_state_dict(weight) + model = torch.nn.DataParallel(resnet) + self.model = model + self.model.eval() + src = np.array([ + [30.2946, 51.6963], + [65.5318, 51.5014], + [48.0252, 71.7366], + [33.5493, 92.3655], + [62.7299, 92.2041]], dtype=np.float32) + src[:, 0] += 8.0 + self.src = src + self.batch_size = batch_size + self.data_shape = data_shape + + def get(self, rimg, landmark): + + assert landmark.shape[0] == 68 or landmark.shape[0] == 5 + assert landmark.shape[1] == 2 + if landmark.shape[0] == 68: + landmark5 = np.zeros((5, 2), dtype=np.float32) + landmark5[0] = (landmark[36] + landmark[39]) / 2 + landmark5[1] = (landmark[42] + landmark[45]) / 2 + landmark5[2] = landmark[30] + landmark5[3] = landmark[48] + landmark5[4] = landmark[54] + else: + landmark5 = landmark + tform = trans.SimilarityTransform() + tform.estimate(landmark5, self.src) + M = tform.params[0:2, :] + img = cv2.warpAffine(rimg, + M, (self.image_size[1], self.image_size[0]), + borderValue=0.0) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img_flip = np.fliplr(img) + img = np.transpose(img, (2, 0, 1)) # 3*112*112, RGB + img_flip = np.transpose(img_flip, (2, 0, 1)) + input_blob = np.zeros((2, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8) + input_blob[0] = img + input_blob[1] = img_flip + return input_blob + + @torch.no_grad() + def forward_db(self, batch_data): + imgs = torch.Tensor(batch_data).cuda() + imgs.div_(255).sub_(0.5).div_(0.5) + feat = self.model(imgs) + feat = feat.reshape([self.batch_size, 2 * feat.shape[1]]) + return feat.cpu().numpy() + + +# 将一个list尽量均分成n份,限制len(list)==n,份数大于原list内元素个数则分配空list[] +def divideIntoNstrand(listTemp, n): + twoList = [[] for i in range(n)] + for i, e in enumerate(listTemp): + twoList[i % n].append(e) + return twoList + + +def read_template_media_list(path): + # ijb_meta = np.loadtxt(path, dtype=str) + ijb_meta = pd.read_csv(path, sep=' ', header=None).values + templates = ijb_meta[:, 1].astype(np.int) + medias = ijb_meta[:, 2].astype(np.int) + return templates, medias + + +# In[ ]: + + +def read_template_pair_list(path): + # pairs = np.loadtxt(path, dtype=str) + pairs = pd.read_csv(path, sep=' ', header=None).values + # print(pairs.shape) + # print(pairs[:, 0].astype(np.int)) + t1 = pairs[:, 0].astype(np.int) + t2 = pairs[:, 1].astype(np.int) + label = pairs[:, 2].astype(np.int) + return t1, t2, label + + +# In[ ]: + + +def read_image_feature(path): + with open(path, 'rb') as fid: + img_feats = pickle.load(fid) + return img_feats + + +# In[ ]: + + +def get_image_feature(img_path, files_list, model_path, epoch, gpu_id): + batch_size = args.batch_size + data_shape = (3, 112, 112) + + files = files_list + print('files:', len(files)) + rare_size = len(files) % batch_size + faceness_scores = [] + batch = 0 + img_feats = np.empty((len(files), 1024), dtype=np.float32) + + batch_data = np.empty((2 * batch_size, 3, 112, 112)) + embedding = Embedding(model_path, data_shape, batch_size) + for img_index, each_line in enumerate(files[:len(files) - rare_size]): + name_lmk_score = each_line.strip().split(' ') + img_name = os.path.join(img_path, name_lmk_score[0]) + img = cv2.imread(img_name) + lmk = np.array([float(x) for x in name_lmk_score[1:-1]], + dtype=np.float32) + lmk = lmk.reshape((5, 2)) + input_blob = embedding.get(img, lmk) + + batch_data[2 * (img_index - batch * batch_size)][:] = input_blob[0] + batch_data[2 * (img_index - batch * batch_size) + 1][:] = input_blob[1] + if (img_index + 1) % batch_size == 0: + print('batch', batch) + img_feats[batch * batch_size:batch * batch_size + + batch_size][:] = embedding.forward_db(batch_data) + batch += 1 + faceness_scores.append(name_lmk_score[-1]) + + batch_data = np.empty((2 * rare_size, 3, 112, 112)) + embedding = Embedding(model_path, data_shape, rare_size) + for img_index, each_line in enumerate(files[len(files) - rare_size:]): + name_lmk_score = each_line.strip().split(' ') + img_name = os.path.join(img_path, name_lmk_score[0]) + img = cv2.imread(img_name) + lmk = np.array([float(x) for x in name_lmk_score[1:-1]], + dtype=np.float32) + lmk = lmk.reshape((5, 2)) + input_blob = embedding.get(img, lmk) + batch_data[2 * img_index][:] = input_blob[0] + batch_data[2 * img_index + 1][:] = input_blob[1] + if (img_index + 1) % rare_size == 0: + print('batch', batch) + img_feats[len(files) - + rare_size:][:] = embedding.forward_db(batch_data) + batch += 1 + faceness_scores.append(name_lmk_score[-1]) + faceness_scores = np.array(faceness_scores).astype(np.float32) + # img_feats = np.ones( (len(files), 1024), dtype=np.float32) * 0.01 + # faceness_scores = np.ones( (len(files), ), dtype=np.float32 ) + return img_feats, faceness_scores + + +# In[ ]: + + +def image2template_feature(img_feats=None, templates=None, medias=None): + # ========================================================== + # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim] + # 2. compute media feature. + # 3. compute template feature. + # ========================================================== + unique_templates = np.unique(templates) + template_feats = np.zeros((len(unique_templates), img_feats.shape[1])) + + for count_template, uqt in enumerate(unique_templates): + + (ind_t,) = np.where(templates == uqt) + face_norm_feats = img_feats[ind_t] + face_medias = medias[ind_t] + unique_medias, unique_media_counts = np.unique(face_medias, + return_counts=True) + media_norm_feats = [] + for u, ct in zip(unique_medias, unique_media_counts): + (ind_m,) = np.where(face_medias == u) + if ct == 1: + media_norm_feats += [face_norm_feats[ind_m]] + else: # image features from the same video will be aggregated into one feature + media_norm_feats += [ + np.mean(face_norm_feats[ind_m], axis=0, keepdims=True) + ] + media_norm_feats = np.array(media_norm_feats) + # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True)) + template_feats[count_template] = np.sum(media_norm_feats, axis=0) + if count_template % 2000 == 0: + print('Finish Calculating {} template features.'.format( + count_template)) + # template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True)) + template_norm_feats = sklearn.preprocessing.normalize(template_feats) + # print(template_norm_feats.shape) + return template_norm_feats, unique_templates + + +# In[ ]: + + +def verification(template_norm_feats=None, + unique_templates=None, + p1=None, + p2=None): + # ========================================================== + # Compute set-to-set Similarity Score. + # ========================================================== + template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int) + for count_template, uqt in enumerate(unique_templates): + template2id[uqt] = count_template + + score = np.zeros((len(p1),)) # save cosine distance between pairs + + total_pairs = np.array(range(len(p1))) + batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation + sublists = [ + total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize) + ] + total_sublists = len(sublists) + for c, s in enumerate(sublists): + feat1 = template_norm_feats[template2id[p1[s]]] + feat2 = template_norm_feats[template2id[p2[s]]] + similarity_score = np.sum(feat1 * feat2, -1) + score[s] = similarity_score.flatten() + if c % 10 == 0: + print('Finish {}/{} pairs.'.format(c, total_sublists)) + return score + + +# In[ ]: +def verification2(template_norm_feats=None, + unique_templates=None, + p1=None, + p2=None): + template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int) + for count_template, uqt in enumerate(unique_templates): + template2id[uqt] = count_template + score = np.zeros((len(p1),)) # save cosine distance between pairs + total_pairs = np.array(range(len(p1))) + batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation + sublists = [ + total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize) + ] + total_sublists = len(sublists) + for c, s in enumerate(sublists): + feat1 = template_norm_feats[template2id[p1[s]]] + feat2 = template_norm_feats[template2id[p2[s]]] + similarity_score = np.sum(feat1 * feat2, -1) + score[s] = similarity_score.flatten() + if c % 10 == 0: + print('Finish {}/{} pairs.'.format(c, total_sublists)) + return score + + +def read_score(path): + with open(path, 'rb') as fid: + img_feats = pickle.load(fid) + return img_feats + + +# # Step1: Load Meta Data + +# In[ ]: + +assert target == 'IJBC' or target == 'IJBB' + +# ============================================================= +# load image and template relationships for template feature embedding +# tid --> template id, mid --> media id +# format: +# image_name tid mid +# ============================================================= +start = timeit.default_timer() +templates, medias = read_template_media_list( + os.path.join('%s/meta' % image_path, + '%s_face_tid_mid.txt' % target.lower())) +stop = timeit.default_timer() +print('Time: %.2f s. ' % (stop - start)) + +# In[ ]: + +# ============================================================= +# load template pairs for template-to-template verification +# tid : template id, label : 1/0 +# format: +# tid_1 tid_2 label +# ============================================================= +start = timeit.default_timer() +p1, p2, label = read_template_pair_list( + os.path.join('%s/meta' % image_path, + '%s_template_pair_label.txt' % target.lower())) +stop = timeit.default_timer() +print('Time: %.2f s. ' % (stop - start)) + +# # Step 2: Get Image Features + +# In[ ]: + +# ============================================================= +# load image features +# format: +# img_feats: [image_num x feats_dim] (227630, 512) +# ============================================================= +start = timeit.default_timer() +img_path = '%s/loose_crop' % image_path +img_list_path = '%s/meta/%s_name_5pts_score.txt' % (image_path, target.lower()) +img_list = open(img_list_path) +files = img_list.readlines() +# files_list = divideIntoNstrand(files, rank_size) +files_list = files + +# img_feats +# for i in range(rank_size): +img_feats, faceness_scores = get_image_feature(img_path, files_list, + model_path, 0, gpu_id) +stop = timeit.default_timer() +print('Time: %.2f s. ' % (stop - start)) +print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], + img_feats.shape[1])) + +# # Step3: Get Template Features + +# In[ ]: + +# ============================================================= +# compute template features from image features. +# ============================================================= +start = timeit.default_timer() +# ========================================================== +# Norm feature before aggregation into template feature? +# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face). +# ========================================================== +# 1. FaceScore (Feature Norm) +# 2. FaceScore (Detector) + +if use_flip_test: + # concat --- F1 + # img_input_feats = img_feats + # add --- F2 + img_input_feats = img_feats[:, 0:img_feats.shape[1] // + 2] + img_feats[:, img_feats.shape[1] // 2:] +else: + img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2] + +if use_norm_score: + img_input_feats = img_input_feats +else: + # normalise features to remove norm information + img_input_feats = img_input_feats / np.sqrt( + np.sum(img_input_feats ** 2, -1, keepdims=True)) + +if use_detector_score: + print(img_input_feats.shape, faceness_scores.shape) + img_input_feats = img_input_feats * faceness_scores[:, np.newaxis] +else: + img_input_feats = img_input_feats + +template_norm_feats, unique_templates = image2template_feature( + img_input_feats, templates, medias) +stop = timeit.default_timer() +print('Time: %.2f s. ' % (stop - start)) + +# # Step 4: Get Template Similarity Scores + +# In[ ]: + +# ============================================================= +# compute verification scores between template pairs. +# ============================================================= +start = timeit.default_timer() +score = verification(template_norm_feats, unique_templates, p1, p2) +stop = timeit.default_timer() +print('Time: %.2f s. ' % (stop - start)) + +# In[ ]: +save_path = os.path.join(result_dir, args.job) +# save_path = result_dir + '/%s_result' % target + +if not os.path.exists(save_path): + os.makedirs(save_path) + +score_save_file = os.path.join(save_path, "%s.npy" % target.lower()) +np.save(score_save_file, score) + +# # Step 5: Get ROC Curves and TPR@FPR Table + +# In[ ]: + +files = [score_save_file] +methods = [] +scores = [] +for file in files: + methods.append(Path(file).stem) + scores.append(np.load(file)) + +methods = np.array(methods) +scores = dict(zip(methods, scores)) +colours = dict( + zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2'))) +x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1] +tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels]) +fig = plt.figure() +for method in methods: + fpr, tpr, _ = roc_curve(label, scores[method]) + roc_auc = auc(fpr, tpr) + fpr = np.flipud(fpr) + tpr = np.flipud(tpr) # select largest tpr at same fpr + plt.plot(fpr, + tpr, + color=colours[method], + lw=1, + label=('[%s (AUC = %0.4f %%)]' % + (method.split('-')[-1], roc_auc * 100))) + tpr_fpr_row = [] + tpr_fpr_row.append("%s-%s" % (method, target)) + for fpr_iter in np.arange(len(x_labels)): + _, min_index = min( + list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr))))) + tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100)) + tpr_fpr_table.add_row(tpr_fpr_row) +plt.xlim([10 ** -6, 0.1]) +plt.ylim([0.3, 1.0]) +plt.grid(linestyle='--', linewidth=1) +plt.xticks(x_labels) +plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) +plt.xscale('log') +plt.xlabel('False Positive Rate') +plt.ylabel('True Positive Rate') +plt.title('ROC on IJB') +plt.legend(loc="lower right") +fig.savefig(os.path.join(save_path, '%s.pdf' % target.lower())) +print(tpr_fpr_table) diff --git a/third_part/face3d/models/arcface_torch/inference.py b/third_part/face3d/models/arcface_torch/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3e5156e8d649954837e397c2ff15ec29995e7502 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/inference.py @@ -0,0 +1,35 @@ +import argparse + +import cv2 +import numpy as np +import torch + +from backbones import get_model + + +@torch.no_grad() +def inference(weight, name, img): + if img is None: + img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.uint8) + else: + img = cv2.imread(img) + img = cv2.resize(img, (112, 112)) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.transpose(img, (2, 0, 1)) + img = torch.from_numpy(img).unsqueeze(0).float() + img.div_(255).sub_(0.5).div_(0.5) + net = get_model(name, fp16=False) + net.load_state_dict(torch.load(weight)) + net.eval() + feat = net(img).numpy() + print(feat) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='PyTorch ArcFace Training') + parser.add_argument('--network', type=str, default='r50', help='backbone network') + parser.add_argument('--weight', type=str, default='') + parser.add_argument('--img', type=str, default=None) + args = parser.parse_args() + inference(args.weight, args.network, args.img) diff --git a/third_part/face3d/models/arcface_torch/losses.py b/third_part/face3d/models/arcface_torch/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..87aeaa107af4d53f5a6132b3739d5cafdcded7fc --- /dev/null +++ b/third_part/face3d/models/arcface_torch/losses.py @@ -0,0 +1,42 @@ +import torch +from torch import nn + + +def get_loss(name): + if name == "cosface": + return CosFace() + elif name == "arcface": + return ArcFace() + else: + raise ValueError() + + +class CosFace(nn.Module): + def __init__(self, s=64.0, m=0.40): + super(CosFace, self).__init__() + self.s = s + self.m = m + + def forward(self, cosine, label): + index = torch.where(label != -1)[0] + m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device) + m_hot.scatter_(1, label[index, None], self.m) + cosine[index] -= m_hot + ret = cosine * self.s + return ret + + +class ArcFace(nn.Module): + def __init__(self, s=64.0, m=0.5): + super(ArcFace, self).__init__() + self.s = s + self.m = m + + def forward(self, cosine: torch.Tensor, label): + index = torch.where(label != -1)[0] + m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device) + m_hot.scatter_(1, label[index, None], self.m) + cosine.acos_() + cosine[index] += m_hot + cosine.cos_().mul_(self.s) + return cosine diff --git a/third_part/face3d/models/arcface_torch/onnx_helper.py b/third_part/face3d/models/arcface_torch/onnx_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..ca922ca6d410655029e459cf8fd1c323d276c34c --- /dev/null +++ b/third_part/face3d/models/arcface_torch/onnx_helper.py @@ -0,0 +1,250 @@ +from __future__ import division +import datetime +import os +import os.path as osp +import glob +import numpy as np +import cv2 +import sys +import onnxruntime +import onnx +import argparse +from onnx import numpy_helper +from insightface.data import get_image + +class ArcFaceORT: + def __init__(self, model_path, cpu=False): + self.model_path = model_path + # providers = None will use available provider, for onnxruntime-gpu it will be "CUDAExecutionProvider" + self.providers = ['CPUExecutionProvider'] if cpu else None + + #input_size is (w,h), return error message, return None if success + def check(self, track='cfat', test_img = None): + #default is cfat + max_model_size_mb=1024 + max_feat_dim=512 + max_time_cost=15 + if track.startswith('ms1m'): + max_model_size_mb=1024 + max_feat_dim=512 + max_time_cost=10 + elif track.startswith('glint'): + max_model_size_mb=1024 + max_feat_dim=1024 + max_time_cost=20 + elif track.startswith('cfat'): + max_model_size_mb = 1024 + max_feat_dim = 512 + max_time_cost = 15 + elif track.startswith('unconstrained'): + max_model_size_mb=1024 + max_feat_dim=1024 + max_time_cost=30 + else: + return "track not found" + + if not os.path.exists(self.model_path): + return "model_path not exists" + if not os.path.isdir(self.model_path): + return "model_path should be directory" + onnx_files = [] + for _file in os.listdir(self.model_path): + if _file.endswith('.onnx'): + onnx_files.append(osp.join(self.model_path, _file)) + if len(onnx_files)==0: + return "do not have onnx files" + self.model_file = sorted(onnx_files)[-1] + print('use onnx-model:', self.model_file) + try: + session = onnxruntime.InferenceSession(self.model_file, providers=self.providers) + except: + return "load onnx failed" + input_cfg = session.get_inputs()[0] + input_shape = input_cfg.shape + print('input-shape:', input_shape) + if len(input_shape)!=4: + return "length of input_shape should be 4" + if not isinstance(input_shape[0], str): + #return "input_shape[0] should be str to support batch-inference" + print('reset input-shape[0] to None') + model = onnx.load(self.model_file) + model.graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None' + new_model_file = osp.join(self.model_path, 'zzzzrefined.onnx') + onnx.save(model, new_model_file) + self.model_file = new_model_file + print('use new onnx-model:', self.model_file) + try: + session = onnxruntime.InferenceSession(self.model_file, providers=self.providers) + except: + return "load onnx failed" + input_cfg = session.get_inputs()[0] + input_shape = input_cfg.shape + print('new-input-shape:', input_shape) + + self.image_size = tuple(input_shape[2:4][::-1]) + #print('image_size:', self.image_size) + input_name = input_cfg.name + outputs = session.get_outputs() + output_names = [] + for o in outputs: + output_names.append(o.name) + #print(o.name, o.shape) + if len(output_names)!=1: + return "number of output nodes should be 1" + self.session = session + self.input_name = input_name + self.output_names = output_names + #print(self.output_names) + model = onnx.load(self.model_file) + graph = model.graph + if len(graph.node)<8: + return "too small onnx graph" + + input_size = (112,112) + self.crop = None + if track=='cfat': + crop_file = osp.join(self.model_path, 'crop.txt') + if osp.exists(crop_file): + lines = open(crop_file,'r').readlines() + if len(lines)!=6: + return "crop.txt should contain 6 lines" + lines = [int(x) for x in lines] + self.crop = lines[:4] + input_size = tuple(lines[4:6]) + if input_size!=self.image_size: + return "input-size is inconsistant with onnx model input, %s vs %s"%(input_size, self.image_size) + + self.model_size_mb = os.path.getsize(self.model_file) / float(1024*1024) + if self.model_size_mb > max_model_size_mb: + return "max model size exceed, given %.3f-MB"%self.model_size_mb + + input_mean = None + input_std = None + if track=='cfat': + pn_file = osp.join(self.model_path, 'pixel_norm.txt') + if osp.exists(pn_file): + lines = open(pn_file,'r').readlines() + if len(lines)!=2: + return "pixel_norm.txt should contain 2 lines" + input_mean = float(lines[0]) + input_std = float(lines[1]) + if input_mean is not None or input_std is not None: + if input_mean is None or input_std is None: + return "please set input_mean and input_std simultaneously" + else: + find_sub = False + find_mul = False + for nid, node in enumerate(graph.node[:8]): + print(nid, node.name) + if node.name.startswith('Sub') or node.name.startswith('_minus'): + find_sub = True + if node.name.startswith('Mul') or node.name.startswith('_mul') or node.name.startswith('Div'): + find_mul = True + if find_sub and find_mul: + print("find sub and mul") + #mxnet arcface model + input_mean = 0.0 + input_std = 1.0 + else: + input_mean = 127.5 + input_std = 127.5 + self.input_mean = input_mean + self.input_std = input_std + for initn in graph.initializer: + weight_array = numpy_helper.to_array(initn) + dt = weight_array.dtype + if dt.itemsize<4: + return 'invalid weight type - (%s:%s)' % (initn.name, dt.name) + if test_img is None: + test_img = get_image('Tom_Hanks_54745') + test_img = cv2.resize(test_img, self.image_size) + else: + test_img = cv2.resize(test_img, self.image_size) + feat, cost = self.benchmark(test_img) + batch_result = self.check_batch(test_img) + batch_result_sum = float(np.sum(batch_result)) + if batch_result_sum in [float('inf'), -float('inf')] or batch_result_sum != batch_result_sum: + print(batch_result) + print(batch_result_sum) + return "batch result output contains NaN!" + + if len(feat.shape) < 2: + return "the shape of the feature must be two, but get {}".format(str(feat.shape)) + + if feat.shape[1] > max_feat_dim: + return "max feat dim exceed, given %d"%feat.shape[1] + self.feat_dim = feat.shape[1] + cost_ms = cost*1000 + if cost_ms>max_time_cost: + return "max time cost exceed, given %.4f"%cost_ms + self.cost_ms = cost_ms + print('check stat:, model-size-mb: %.4f, feat-dim: %d, time-cost-ms: %.4f, input-mean: %.3f, input-std: %.3f'%(self.model_size_mb, self.feat_dim, self.cost_ms, self.input_mean, self.input_std)) + return None + + def check_batch(self, img): + if not isinstance(img, list): + imgs = [img, ] * 32 + if self.crop is not None: + nimgs = [] + for img in imgs: + nimg = img[self.crop[1]:self.crop[3], self.crop[0]:self.crop[2], :] + if nimg.shape[0] != self.image_size[1] or nimg.shape[1] != self.image_size[0]: + nimg = cv2.resize(nimg, self.image_size) + nimgs.append(nimg) + imgs = nimgs + blob = cv2.dnn.blobFromImages( + images=imgs, scalefactor=1.0 / self.input_std, size=self.image_size, + mean=(self.input_mean, self.input_mean, self.input_mean), swapRB=True) + net_out = self.session.run(self.output_names, {self.input_name: blob})[0] + return net_out + + + def meta_info(self): + return {'model-size-mb':self.model_size_mb, 'feature-dim':self.feat_dim, 'infer': self.cost_ms} + + + def forward(self, imgs): + if not isinstance(imgs, list): + imgs = [imgs] + input_size = self.image_size + if self.crop is not None: + nimgs = [] + for img in imgs: + nimg = img[self.crop[1]:self.crop[3],self.crop[0]:self.crop[2],:] + if nimg.shape[0]!=input_size[1] or nimg.shape[1]!=input_size[0]: + nimg = cv2.resize(nimg, input_size) + nimgs.append(nimg) + imgs = nimgs + blob = cv2.dnn.blobFromImages(imgs, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True) + net_out = self.session.run(self.output_names, {self.input_name : blob})[0] + return net_out + + def benchmark(self, img): + input_size = self.image_size + if self.crop is not None: + nimg = img[self.crop[1]:self.crop[3],self.crop[0]:self.crop[2],:] + if nimg.shape[0]!=input_size[1] or nimg.shape[1]!=input_size[0]: + nimg = cv2.resize(nimg, input_size) + img = nimg + blob = cv2.dnn.blobFromImage(img, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True) + costs = [] + for _ in range(50): + ta = datetime.datetime.now() + net_out = self.session.run(self.output_names, {self.input_name : blob})[0] + tb = datetime.datetime.now() + cost = (tb-ta).total_seconds() + costs.append(cost) + costs = sorted(costs) + cost = costs[5] + return net_out, cost + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='') + # general + parser.add_argument('workdir', help='submitted work dir', type=str) + parser.add_argument('--track', help='track name, for different challenge', type=str, default='cfat') + args = parser.parse_args() + handler = ArcFaceORT(args.workdir) + err = handler.check(args.track) + print('err:', err) diff --git a/third_part/face3d/models/arcface_torch/onnx_ijbc.py b/third_part/face3d/models/arcface_torch/onnx_ijbc.py new file mode 100644 index 0000000000000000000000000000000000000000..05b50bfad4b4cf38903b89f596263a8e29a50d3e --- /dev/null +++ b/third_part/face3d/models/arcface_torch/onnx_ijbc.py @@ -0,0 +1,267 @@ +import argparse +import os +import pickle +import timeit + +import cv2 +import mxnet as mx +import numpy as np +import pandas as pd +import prettytable +import skimage.transform +from sklearn.metrics import roc_curve +from sklearn.preprocessing import normalize + +from onnx_helper import ArcFaceORT + +SRC = np.array( + [ + [30.2946, 51.6963], + [65.5318, 51.5014], + [48.0252, 71.7366], + [33.5493, 92.3655], + [62.7299, 92.2041]] + , dtype=np.float32) +SRC[:, 0] += 8.0 + + +class AlignedDataSet(mx.gluon.data.Dataset): + def __init__(self, root, lines, align=True): + self.lines = lines + self.root = root + self.align = align + + def __len__(self): + return len(self.lines) + + def __getitem__(self, idx): + each_line = self.lines[idx] + name_lmk_score = each_line.strip().split(' ') + name = os.path.join(self.root, name_lmk_score[0]) + img = cv2.cvtColor(cv2.imread(name), cv2.COLOR_BGR2RGB) + landmark5 = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32).reshape((5, 2)) + st = skimage.transform.SimilarityTransform() + st.estimate(landmark5, SRC) + img = cv2.warpAffine(img, st.params[0:2, :], (112, 112), borderValue=0.0) + img_1 = np.expand_dims(img, 0) + img_2 = np.expand_dims(np.fliplr(img), 0) + output = np.concatenate((img_1, img_2), axis=0).astype(np.float32) + output = np.transpose(output, (0, 3, 1, 2)) + output = mx.nd.array(output) + return output + + +def extract(model_root, dataset): + model = ArcFaceORT(model_path=model_root) + model.check() + feat_mat = np.zeros(shape=(len(dataset), 2 * model.feat_dim)) + + def batchify_fn(data): + return mx.nd.concat(*data, dim=0) + + data_loader = mx.gluon.data.DataLoader( + dataset, 128, last_batch='keep', num_workers=4, + thread_pool=True, prefetch=16, batchify_fn=batchify_fn) + num_iter = 0 + for batch in data_loader: + batch = batch.asnumpy() + batch = (batch - model.input_mean) / model.input_std + feat = model.session.run(model.output_names, {model.input_name: batch})[0] + feat = np.reshape(feat, (-1, model.feat_dim * 2)) + feat_mat[128 * num_iter: 128 * num_iter + feat.shape[0], :] = feat + num_iter += 1 + if num_iter % 50 == 0: + print(num_iter) + return feat_mat + + +def read_template_media_list(path): + ijb_meta = pd.read_csv(path, sep=' ', header=None).values + templates = ijb_meta[:, 1].astype(np.int) + medias = ijb_meta[:, 2].astype(np.int) + return templates, medias + + +def read_template_pair_list(path): + pairs = pd.read_csv(path, sep=' ', header=None).values + t1 = pairs[:, 0].astype(np.int) + t2 = pairs[:, 1].astype(np.int) + label = pairs[:, 2].astype(np.int) + return t1, t2, label + + +def read_image_feature(path): + with open(path, 'rb') as fid: + img_feats = pickle.load(fid) + return img_feats + + +def image2template_feature(img_feats=None, + templates=None, + medias=None): + unique_templates = np.unique(templates) + template_feats = np.zeros((len(unique_templates), img_feats.shape[1])) + for count_template, uqt in enumerate(unique_templates): + (ind_t,) = np.where(templates == uqt) + face_norm_feats = img_feats[ind_t] + face_medias = medias[ind_t] + unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True) + media_norm_feats = [] + for u, ct in zip(unique_medias, unique_media_counts): + (ind_m,) = np.where(face_medias == u) + if ct == 1: + media_norm_feats += [face_norm_feats[ind_m]] + else: # image features from the same video will be aggregated into one feature + media_norm_feats += [np.mean(face_norm_feats[ind_m], axis=0, keepdims=True), ] + media_norm_feats = np.array(media_norm_feats) + template_feats[count_template] = np.sum(media_norm_feats, axis=0) + if count_template % 2000 == 0: + print('Finish Calculating {} template features.'.format( + count_template)) + template_norm_feats = normalize(template_feats) + return template_norm_feats, unique_templates + + +def verification(template_norm_feats=None, + unique_templates=None, + p1=None, + p2=None): + template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int) + for count_template, uqt in enumerate(unique_templates): + template2id[uqt] = count_template + score = np.zeros((len(p1),)) + total_pairs = np.array(range(len(p1))) + batchsize = 100000 + sublists = [total_pairs[i: i + batchsize] for i in range(0, len(p1), batchsize)] + total_sublists = len(sublists) + for c, s in enumerate(sublists): + feat1 = template_norm_feats[template2id[p1[s]]] + feat2 = template_norm_feats[template2id[p2[s]]] + similarity_score = np.sum(feat1 * feat2, -1) + score[s] = similarity_score.flatten() + if c % 10 == 0: + print('Finish {}/{} pairs.'.format(c, total_sublists)) + return score + + +def verification2(template_norm_feats=None, + unique_templates=None, + p1=None, + p2=None): + template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int) + for count_template, uqt in enumerate(unique_templates): + template2id[uqt] = count_template + score = np.zeros((len(p1),)) # save cosine distance between pairs + total_pairs = np.array(range(len(p1))) + batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation + sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)] + total_sublists = len(sublists) + for c, s in enumerate(sublists): + feat1 = template_norm_feats[template2id[p1[s]]] + feat2 = template_norm_feats[template2id[p2[s]]] + similarity_score = np.sum(feat1 * feat2, -1) + score[s] = similarity_score.flatten() + if c % 10 == 0: + print('Finish {}/{} pairs.'.format(c, total_sublists)) + return score + + +def main(args): + use_norm_score = True # if Ture, TestMode(N1) + use_detector_score = True # if Ture, TestMode(D1) + use_flip_test = True # if Ture, TestMode(F1) + assert args.target == 'IJBC' or args.target == 'IJBB' + + start = timeit.default_timer() + templates, medias = read_template_media_list( + os.path.join('%s/meta' % args.image_path, '%s_face_tid_mid.txt' % args.target.lower())) + stop = timeit.default_timer() + print('Time: %.2f s. ' % (stop - start)) + + start = timeit.default_timer() + p1, p2, label = read_template_pair_list( + os.path.join('%s/meta' % args.image_path, + '%s_template_pair_label.txt' % args.target.lower())) + stop = timeit.default_timer() + print('Time: %.2f s. ' % (stop - start)) + + start = timeit.default_timer() + img_path = '%s/loose_crop' % args.image_path + img_list_path = '%s/meta/%s_name_5pts_score.txt' % (args.image_path, args.target.lower()) + img_list = open(img_list_path) + files = img_list.readlines() + dataset = AlignedDataSet(root=img_path, lines=files, align=True) + img_feats = extract(args.model_root, dataset) + + faceness_scores = [] + for each_line in files: + name_lmk_score = each_line.split() + faceness_scores.append(name_lmk_score[-1]) + faceness_scores = np.array(faceness_scores).astype(np.float32) + stop = timeit.default_timer() + print('Time: %.2f s. ' % (stop - start)) + print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1])) + start = timeit.default_timer() + + if use_flip_test: + img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2] + img_feats[:, img_feats.shape[1] // 2:] + else: + img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2] + + if use_norm_score: + img_input_feats = img_input_feats + else: + img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True)) + + if use_detector_score: + print(img_input_feats.shape, faceness_scores.shape) + img_input_feats = img_input_feats * faceness_scores[:, np.newaxis] + else: + img_input_feats = img_input_feats + + template_norm_feats, unique_templates = image2template_feature( + img_input_feats, templates, medias) + stop = timeit.default_timer() + print('Time: %.2f s. ' % (stop - start)) + + start = timeit.default_timer() + score = verification(template_norm_feats, unique_templates, p1, p2) + stop = timeit.default_timer() + print('Time: %.2f s. ' % (stop - start)) + save_path = os.path.join(args.result_dir, "{}_result".format(args.target)) + if not os.path.exists(save_path): + os.makedirs(save_path) + score_save_file = os.path.join(save_path, "{}.npy".format(args.model_root)) + np.save(score_save_file, score) + files = [score_save_file] + methods = [] + scores = [] + for file in files: + methods.append(os.path.basename(file)) + scores.append(np.load(file)) + methods = np.array(methods) + scores = dict(zip(methods, scores)) + x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1] + tpr_fpr_table = prettytable.PrettyTable(['Methods'] + [str(x) for x in x_labels]) + for method in methods: + fpr, tpr, _ = roc_curve(label, scores[method]) + fpr = np.flipud(fpr) + tpr = np.flipud(tpr) + tpr_fpr_row = [] + tpr_fpr_row.append("%s-%s" % (method, args.target)) + for fpr_iter in np.arange(len(x_labels)): + _, min_index = min( + list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr))))) + tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100)) + tpr_fpr_table.add_row(tpr_fpr_row) + print(tpr_fpr_table) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='do ijb test') + # general + parser.add_argument('--model-root', default='', help='path to load model.') + parser.add_argument('--image-path', default='', type=str, help='') + parser.add_argument('--result-dir', default='.', type=str, help='') + parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB') + main(parser.parse_args()) diff --git a/third_part/face3d/models/arcface_torch/partial_fc.py b/third_part/face3d/models/arcface_torch/partial_fc.py new file mode 100644 index 0000000000000000000000000000000000000000..17e2d25715d10ba446c957e1d2528b0687ed71d5 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/partial_fc.py @@ -0,0 +1,222 @@ +import logging +import os + +import torch +import torch.distributed as dist +from torch.nn import Module +from torch.nn.functional import normalize, linear +from torch.nn.parameter import Parameter + + +class PartialFC(Module): + """ + Author: {Xiang An, Yang Xiao, XuHan Zhu} in DeepGlint, + Partial FC: Training 10 Million Identities on a Single Machine + See the original paper: + https://arxiv.org/abs/2010.05222 + """ + + @torch.no_grad() + def __init__(self, rank, local_rank, world_size, batch_size, resume, + margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"): + """ + rank: int + Unique process(GPU) ID from 0 to world_size - 1. + local_rank: int + Unique process(GPU) ID within the server from 0 to 7. + world_size: int + Number of GPU. + batch_size: int + Batch size on current rank(GPU). + resume: bool + Select whether to restore the weight of softmax. + margin_softmax: callable + A function of margin softmax, eg: cosface, arcface. + num_classes: int + The number of class center storage in current rank(CPU/GPU), usually is total_classes // world_size, + required. + sample_rate: float + The partial fc sampling rate, when the number of classes increases to more than 2 millions, Sampling + can greatly speed up training, and reduce a lot of GPU memory, default is 1.0. + embedding_size: int + The feature dimension, default is 512. + prefix: str + Path for save checkpoint, default is './'. + """ + super(PartialFC, self).__init__() + # + self.num_classes: int = num_classes + self.rank: int = rank + self.local_rank: int = local_rank + self.device: torch.device = torch.device("cuda:{}".format(self.local_rank)) + self.world_size: int = world_size + self.batch_size: int = batch_size + self.margin_softmax: callable = margin_softmax + self.sample_rate: float = sample_rate + self.embedding_size: int = embedding_size + self.prefix: str = prefix + self.num_local: int = num_classes // world_size + int(rank < num_classes % world_size) + self.class_start: int = num_classes // world_size * rank + min(rank, num_classes % world_size) + self.num_sample: int = int(self.sample_rate * self.num_local) + + self.weight_name = os.path.join(self.prefix, "rank_{}_softmax_weight.pt".format(self.rank)) + self.weight_mom_name = os.path.join(self.prefix, "rank_{}_softmax_weight_mom.pt".format(self.rank)) + + if resume: + try: + self.weight: torch.Tensor = torch.load(self.weight_name) + self.weight_mom: torch.Tensor = torch.load(self.weight_mom_name) + if self.weight.shape[0] != self.num_local or self.weight_mom.shape[0] != self.num_local: + raise IndexError + logging.info("softmax weight resume successfully!") + logging.info("softmax weight mom resume successfully!") + except (FileNotFoundError, KeyError, IndexError): + self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device) + self.weight_mom: torch.Tensor = torch.zeros_like(self.weight) + logging.info("softmax weight init!") + logging.info("softmax weight mom init!") + else: + self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device) + self.weight_mom: torch.Tensor = torch.zeros_like(self.weight) + logging.info("softmax weight init successfully!") + logging.info("softmax weight mom init successfully!") + self.stream: torch.cuda.Stream = torch.cuda.Stream(local_rank) + + self.index = None + if int(self.sample_rate) == 1: + self.update = lambda: 0 + self.sub_weight = Parameter(self.weight) + self.sub_weight_mom = self.weight_mom + else: + self.sub_weight = Parameter(torch.empty((0, 0)).cuda(local_rank)) + + def save_params(self): + """ Save softmax weight for each rank on prefix + """ + torch.save(self.weight.data, self.weight_name) + torch.save(self.weight_mom, self.weight_mom_name) + + @torch.no_grad() + def sample(self, total_label): + """ + Sample all positive class centers in each rank, and random select neg class centers to filling a fixed + `num_sample`. + + total_label: tensor + Label after all gather, which cross all GPUs. + """ + index_positive = (self.class_start <= total_label) & (total_label < self.class_start + self.num_local) + total_label[~index_positive] = -1 + total_label[index_positive] -= self.class_start + if int(self.sample_rate) != 1: + positive = torch.unique(total_label[index_positive], sorted=True) + if self.num_sample - positive.size(0) >= 0: + perm = torch.rand(size=[self.num_local], device=self.device) + perm[positive] = 2.0 + index = torch.topk(perm, k=self.num_sample)[1] + index = index.sort()[0] + else: + index = positive + self.index = index + total_label[index_positive] = torch.searchsorted(index, total_label[index_positive]) + self.sub_weight = Parameter(self.weight[index]) + self.sub_weight_mom = self.weight_mom[index] + + def forward(self, total_features, norm_weight): + """ Partial fc forward, `logits = X * sample(W)` + """ + torch.cuda.current_stream().wait_stream(self.stream) + logits = linear(total_features, norm_weight) + return logits + + @torch.no_grad() + def update(self): + """ Set updated weight and weight_mom to memory bank. + """ + self.weight_mom[self.index] = self.sub_weight_mom + self.weight[self.index] = self.sub_weight + + def prepare(self, label, optimizer): + """ + get sampled class centers for cal softmax. + + label: tensor + Label tensor on each rank. + optimizer: opt + Optimizer for partial fc, which need to get weight mom. + """ + with torch.cuda.stream(self.stream): + total_label = torch.zeros( + size=[self.batch_size * self.world_size], device=self.device, dtype=torch.long) + dist.all_gather(list(total_label.chunk(self.world_size, dim=0)), label) + self.sample(total_label) + optimizer.state.pop(optimizer.param_groups[-1]['params'][0], None) + optimizer.param_groups[-1]['params'][0] = self.sub_weight + optimizer.state[self.sub_weight]['momentum_buffer'] = self.sub_weight_mom + norm_weight = normalize(self.sub_weight) + return total_label, norm_weight + + def forward_backward(self, label, features, optimizer): + """ + Partial fc forward and backward with model parallel + + label: tensor + Label tensor on each rank(GPU) + features: tensor + Features tensor on each rank(GPU) + optimizer: optimizer + Optimizer for partial fc + + Returns: + -------- + x_grad: tensor + The gradient of features. + loss_v: tensor + Loss value for cross entropy. + """ + total_label, norm_weight = self.prepare(label, optimizer) + total_features = torch.zeros( + size=[self.batch_size * self.world_size, self.embedding_size], device=self.device) + dist.all_gather(list(total_features.chunk(self.world_size, dim=0)), features.data) + total_features.requires_grad = True + + logits = self.forward(total_features, norm_weight) + logits = self.margin_softmax(logits, total_label) + + with torch.no_grad(): + max_fc = torch.max(logits, dim=1, keepdim=True)[0] + dist.all_reduce(max_fc, dist.ReduceOp.MAX) + + # calculate exp(logits) and all-reduce + logits_exp = torch.exp(logits - max_fc) + logits_sum_exp = logits_exp.sum(dim=1, keepdims=True) + dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM) + + # calculate prob + logits_exp.div_(logits_sum_exp) + + # get one-hot + grad = logits_exp + index = torch.where(total_label != -1)[0] + one_hot = torch.zeros(size=[index.size()[0], grad.size()[1]], device=grad.device) + one_hot.scatter_(1, total_label[index, None], 1) + + # calculate loss + loss = torch.zeros(grad.size()[0], 1, device=grad.device) + loss[index] = grad[index].gather(1, total_label[index, None]) + dist.all_reduce(loss, dist.ReduceOp.SUM) + loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1) + + # calculate grad + grad[index] -= one_hot + grad.div_(self.batch_size * self.world_size) + + logits.backward(grad) + if total_features.grad is not None: + total_features.grad.detach_() + x_grad: torch.Tensor = torch.zeros_like(features, requires_grad=True) + # feature gradient all-reduce + dist.reduce_scatter(x_grad, list(total_features.grad.chunk(self.world_size, dim=0))) + x_grad = x_grad * self.world_size + # backward backbone + return x_grad, loss_v diff --git a/third_part/face3d/models/arcface_torch/requirement.txt b/third_part/face3d/models/arcface_torch/requirement.txt new file mode 100644 index 0000000000000000000000000000000000000000..f72c1b3ba814ae1e0bc1c1f56402026978b9e870 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/requirement.txt @@ -0,0 +1,5 @@ +tensorboard +easydict +mxnet +onnx +sklearn diff --git a/third_part/face3d/models/arcface_torch/run.sh b/third_part/face3d/models/arcface_torch/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..61af4b4950eb11334e55362e3e3c5e2796979a01 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/run.sh @@ -0,0 +1,2 @@ +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r50 +ps -ef | grep "train" | grep -v grep | awk '{print "kill -9 "$2}' | sh diff --git a/third_part/face3d/models/arcface_torch/torch2onnx.py b/third_part/face3d/models/arcface_torch/torch2onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..fc26ab82e552331bc8d75b34e81000418f4d38ec --- /dev/null +++ b/third_part/face3d/models/arcface_torch/torch2onnx.py @@ -0,0 +1,59 @@ +import numpy as np +import onnx +import torch + + +def convert_onnx(net, path_module, output, opset=11, simplify=False): + assert isinstance(net, torch.nn.Module) + img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32) + img = img.astype(np.float) + img = (img / 255. - 0.5) / 0.5 # torch style norm + img = img.transpose((2, 0, 1)) + img = torch.from_numpy(img).unsqueeze(0).float() + + weight = torch.load(path_module) + net.load_state_dict(weight) + net.eval() + torch.onnx.export(net, img, output, keep_initializers_as_inputs=False, verbose=False, opset_version=opset) + model = onnx.load(output) + graph = model.graph + graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None' + if simplify: + from onnxsim import simplify + model, check = simplify(model) + assert check, "Simplified ONNX model could not be validated" + onnx.save(model, output) + + +if __name__ == '__main__': + import os + import argparse + from backbones import get_model + + parser = argparse.ArgumentParser(description='ArcFace PyTorch to onnx') + parser.add_argument('input', type=str, help='input backbone.pth file or path') + parser.add_argument('--output', type=str, default=None, help='output onnx path') + parser.add_argument('--network', type=str, default=None, help='backbone network') + parser.add_argument('--simplify', type=bool, default=False, help='onnx simplify') + args = parser.parse_args() + input_file = args.input + if os.path.isdir(input_file): + input_file = os.path.join(input_file, "backbone.pth") + assert os.path.exists(input_file) + model_name = os.path.basename(os.path.dirname(input_file)).lower() + params = model_name.split("_") + if len(params) >= 3 and params[1] in ('arcface', 'cosface'): + if args.network is None: + args.network = params[2] + assert args.network is not None + print(args) + backbone_onnx = get_model(args.network, dropout=0) + + output_path = args.output + if output_path is None: + output_path = os.path.join(os.path.dirname(__file__), 'onnx') + if not os.path.exists(output_path): + os.makedirs(output_path) + assert os.path.isdir(output_path) + output_file = os.path.join(output_path, "%s.onnx" % model_name) + convert_onnx(backbone_onnx, input_file, output_file, simplify=args.simplify) diff --git a/third_part/face3d/models/arcface_torch/train.py b/third_part/face3d/models/arcface_torch/train.py new file mode 100644 index 0000000000000000000000000000000000000000..55eca2d0ad9463415970e09bccab8b722e496704 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/train.py @@ -0,0 +1,141 @@ +import argparse +import logging +import os + +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torch.utils.data.distributed +from torch.nn.utils import clip_grad_norm_ + +import losses +from backbones import get_model +from dataset import MXFaceDataset, SyntheticDataset, DataLoaderX +from partial_fc import PartialFC +from utils.utils_amp import MaxClipGradScaler +from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint +from utils.utils_config import get_config +from utils.utils_logging import AverageMeter, init_logging + + +def main(args): + cfg = get_config(args.config) + try: + world_size = int(os.environ['WORLD_SIZE']) + rank = int(os.environ['RANK']) + dist.init_process_group('nccl') + except KeyError: + world_size = 1 + rank = 0 + dist.init_process_group(backend='nccl', init_method="tcp://127.0.0.1:12584", rank=rank, world_size=world_size) + + local_rank = args.local_rank + torch.cuda.set_device(local_rank) + os.makedirs(cfg.output, exist_ok=True) + init_logging(rank, cfg.output) + + if cfg.rec == "synthetic": + train_set = SyntheticDataset(local_rank=local_rank) + else: + train_set = MXFaceDataset(root_dir=cfg.rec, local_rank=local_rank) + + train_sampler = torch.utils.data.distributed.DistributedSampler(train_set, shuffle=True) + train_loader = DataLoaderX( + local_rank=local_rank, dataset=train_set, batch_size=cfg.batch_size, + sampler=train_sampler, num_workers=2, pin_memory=True, drop_last=True) + backbone = get_model(cfg.network, dropout=0.0, fp16=cfg.fp16, num_features=cfg.embedding_size).to(local_rank) + + if cfg.resume: + try: + backbone_pth = os.path.join(cfg.output, "backbone.pth") + backbone.load_state_dict(torch.load(backbone_pth, map_location=torch.device(local_rank))) + if rank == 0: + logging.info("backbone resume successfully!") + except (FileNotFoundError, KeyError, IndexError, RuntimeError): + if rank == 0: + logging.info("resume fail, backbone init successfully!") + + backbone = torch.nn.parallel.DistributedDataParallel( + module=backbone, broadcast_buffers=False, device_ids=[local_rank]) + backbone.train() + margin_softmax = losses.get_loss(cfg.loss) + module_partial_fc = PartialFC( + rank=rank, local_rank=local_rank, world_size=world_size, resume=cfg.resume, + batch_size=cfg.batch_size, margin_softmax=margin_softmax, num_classes=cfg.num_classes, + sample_rate=cfg.sample_rate, embedding_size=cfg.embedding_size, prefix=cfg.output) + + opt_backbone = torch.optim.SGD( + params=[{'params': backbone.parameters()}], + lr=cfg.lr / 512 * cfg.batch_size * world_size, + momentum=0.9, weight_decay=cfg.weight_decay) + opt_pfc = torch.optim.SGD( + params=[{'params': module_partial_fc.parameters()}], + lr=cfg.lr / 512 * cfg.batch_size * world_size, + momentum=0.9, weight_decay=cfg.weight_decay) + + num_image = len(train_set) + total_batch_size = cfg.batch_size * world_size + cfg.warmup_step = num_image // total_batch_size * cfg.warmup_epoch + cfg.total_step = num_image // total_batch_size * cfg.num_epoch + + def lr_step_func(current_step): + cfg.decay_step = [x * num_image // total_batch_size for x in cfg.decay_epoch] + if current_step < cfg.warmup_step: + return current_step / cfg.warmup_step + else: + return 0.1 ** len([m for m in cfg.decay_step if m <= current_step]) + + scheduler_backbone = torch.optim.lr_scheduler.LambdaLR( + optimizer=opt_backbone, lr_lambda=lr_step_func) + scheduler_pfc = torch.optim.lr_scheduler.LambdaLR( + optimizer=opt_pfc, lr_lambda=lr_step_func) + + for key, value in cfg.items(): + num_space = 25 - len(key) + logging.info(": " + key + " " * num_space + str(value)) + + val_target = cfg.val_targets + callback_verification = CallBackVerification(2000, rank, val_target, cfg.rec) + callback_logging = CallBackLogging(50, rank, cfg.total_step, cfg.batch_size, world_size, None) + callback_checkpoint = CallBackModelCheckpoint(rank, cfg.output) + + loss = AverageMeter() + start_epoch = 0 + global_step = 0 + grad_amp = MaxClipGradScaler(cfg.batch_size, 128 * cfg.batch_size, growth_interval=100) if cfg.fp16 else None + for epoch in range(start_epoch, cfg.num_epoch): + train_sampler.set_epoch(epoch) + for step, (img, label) in enumerate(train_loader): + global_step += 1 + features = F.normalize(backbone(img)) + x_grad, loss_v = module_partial_fc.forward_backward(label, features, opt_pfc) + if cfg.fp16: + features.backward(grad_amp.scale(x_grad)) + grad_amp.unscale_(opt_backbone) + clip_grad_norm_(backbone.parameters(), max_norm=5, norm_type=2) + grad_amp.step(opt_backbone) + grad_amp.update() + else: + features.backward(x_grad) + clip_grad_norm_(backbone.parameters(), max_norm=5, norm_type=2) + opt_backbone.step() + + opt_pfc.step() + module_partial_fc.update() + opt_backbone.zero_grad() + opt_pfc.zero_grad() + loss.update(loss_v, 1) + callback_logging(global_step, loss, epoch, cfg.fp16, scheduler_backbone.get_last_lr()[0], grad_amp) + callback_verification(global_step, backbone) + scheduler_backbone.step() + scheduler_pfc.step() + callback_checkpoint(global_step, backbone, module_partial_fc) + dist.destroy_process_group() + + +if __name__ == "__main__": + torch.backends.cudnn.benchmark = True + parser = argparse.ArgumentParser(description='PyTorch ArcFace Training') + parser.add_argument('config', type=str, help='py config file') + parser.add_argument('--local_rank', type=int, default=0, help='local_rank') + main(parser.parse_args()) diff --git a/third_part/face3d/models/arcface_torch/utils/__init__.py b/third_part/face3d/models/arcface_torch/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/face3d/models/arcface_torch/utils/plot.py b/third_part/face3d/models/arcface_torch/utils/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..ccc588e5c01ca550b69c385aeb3fd139c59fb88a --- /dev/null +++ b/third_part/face3d/models/arcface_torch/utils/plot.py @@ -0,0 +1,72 @@ +# coding: utf-8 + +import os +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap +from prettytable import PrettyTable +from sklearn.metrics import roc_curve, auc + +image_path = "/data/anxiang/IJB_release/IJBC" +files = [ + "./ms1mv3_arcface_r100/ms1mv3_arcface_r100/ijbc.npy" +] + + +def read_template_pair_list(path): + pairs = pd.read_csv(path, sep=' ', header=None).values + t1 = pairs[:, 0].astype(np.int) + t2 = pairs[:, 1].astype(np.int) + label = pairs[:, 2].astype(np.int) + return t1, t2, label + + +p1, p2, label = read_template_pair_list( + os.path.join('%s/meta' % image_path, + '%s_template_pair_label.txt' % 'ijbc')) + +methods = [] +scores = [] +for file in files: + methods.append(file.split('/')[-2]) + scores.append(np.load(file)) + +methods = np.array(methods) +scores = dict(zip(methods, scores)) +colours = dict( + zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2'))) +x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1] +tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels]) +fig = plt.figure() +for method in methods: + fpr, tpr, _ = roc_curve(label, scores[method]) + roc_auc = auc(fpr, tpr) + fpr = np.flipud(fpr) + tpr = np.flipud(tpr) # select largest tpr at same fpr + plt.plot(fpr, + tpr, + color=colours[method], + lw=1, + label=('[%s (AUC = %0.4f %%)]' % + (method.split('-')[-1], roc_auc * 100))) + tpr_fpr_row = [] + tpr_fpr_row.append("%s-%s" % (method, "IJBC")) + for fpr_iter in np.arange(len(x_labels)): + _, min_index = min( + list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr))))) + tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100)) + tpr_fpr_table.add_row(tpr_fpr_row) +plt.xlim([10 ** -6, 0.1]) +plt.ylim([0.3, 1.0]) +plt.grid(linestyle='--', linewidth=1) +plt.xticks(x_labels) +plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) +plt.xscale('log') +plt.xlabel('False Positive Rate') +plt.ylabel('True Positive Rate') +plt.title('ROC on IJB') +plt.legend(loc="lower right") +print(tpr_fpr_table) diff --git a/third_part/face3d/models/arcface_torch/utils/utils_amp.py b/third_part/face3d/models/arcface_torch/utils/utils_amp.py new file mode 100644 index 0000000000000000000000000000000000000000..9ac2a03f4212faa129faed447a8f4519c0a00a8b --- /dev/null +++ b/third_part/face3d/models/arcface_torch/utils/utils_amp.py @@ -0,0 +1,88 @@ +from typing import Dict, List + +import torch + +if torch.__version__ < '1.9': + Iterable = torch._six.container_abcs.Iterable +else: + import collections + + Iterable = collections.abc.Iterable +from torch.cuda.amp import GradScaler + + +class _MultiDeviceReplicator(object): + """ + Lazily serves copies of a tensor to requested devices. Copies are cached per-device. + """ + + def __init__(self, master_tensor: torch.Tensor) -> None: + assert master_tensor.is_cuda + self.master = master_tensor + self._per_device_tensors: Dict[torch.device, torch.Tensor] = {} + + def get(self, device) -> torch.Tensor: + retval = self._per_device_tensors.get(device, None) + if retval is None: + retval = self.master.to(device=device, non_blocking=True, copy=True) + self._per_device_tensors[device] = retval + return retval + + +class MaxClipGradScaler(GradScaler): + def __init__(self, init_scale, max_scale: float, growth_interval=100): + GradScaler.__init__(self, init_scale=init_scale, growth_interval=growth_interval) + self.max_scale = max_scale + + def scale_clip(self): + if self.get_scale() == self.max_scale: + self.set_growth_factor(1) + elif self.get_scale() < self.max_scale: + self.set_growth_factor(2) + elif self.get_scale() > self.max_scale: + self._scale.fill_(self.max_scale) + self.set_growth_factor(1) + + def scale(self, outputs): + """ + Multiplies ('scales') a tensor or list of tensors by the scale factor. + + Returns scaled outputs. If this instance of :class:`GradScaler` is not enabled, outputs are returned + unmodified. + + Arguments: + outputs (Tensor or iterable of Tensors): Outputs to scale. + """ + if not self._enabled: + return outputs + self.scale_clip() + # Short-circuit for the common case. + if isinstance(outputs, torch.Tensor): + assert outputs.is_cuda + if self._scale is None: + self._lazy_init_scale_growth_tracker(outputs.device) + assert self._scale is not None + return outputs * self._scale.to(device=outputs.device, non_blocking=True) + + # Invoke the more complex machinery only if we're treating multiple outputs. + stash: List[_MultiDeviceReplicator] = [] # holds a reference that can be overwritten by apply_scale + + def apply_scale(val): + if isinstance(val, torch.Tensor): + assert val.is_cuda + if len(stash) == 0: + if self._scale is None: + self._lazy_init_scale_growth_tracker(val.device) + assert self._scale is not None + stash.append(_MultiDeviceReplicator(self._scale)) + return val * stash[0].get(val.device) + elif isinstance(val, Iterable): + iterable = map(apply_scale, val) + if isinstance(val, list) or isinstance(val, tuple): + return type(val)(iterable) + else: + return iterable + else: + raise ValueError("outputs must be a Tensor or an iterable of Tensors") + + return apply_scale(outputs) diff --git a/third_part/face3d/models/arcface_torch/utils/utils_callbacks.py b/third_part/face3d/models/arcface_torch/utils/utils_callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..bd2f56cba47c57de102710ff56eaac591e59f4da --- /dev/null +++ b/third_part/face3d/models/arcface_torch/utils/utils_callbacks.py @@ -0,0 +1,117 @@ +import logging +import os +import time +from typing import List + +import torch + +from eval import verification +from utils.utils_logging import AverageMeter + + +class CallBackVerification(object): + def __init__(self, frequent, rank, val_targets, rec_prefix, image_size=(112, 112)): + self.frequent: int = frequent + self.rank: int = rank + self.highest_acc: float = 0.0 + self.highest_acc_list: List[float] = [0.0] * len(val_targets) + self.ver_list: List[object] = [] + self.ver_name_list: List[str] = [] + if self.rank is 0: + self.init_dataset(val_targets=val_targets, data_dir=rec_prefix, image_size=image_size) + + def ver_test(self, backbone: torch.nn.Module, global_step: int): + results = [] + for i in range(len(self.ver_list)): + acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( + self.ver_list[i], backbone, 10, 10) + logging.info('[%s][%d]XNorm: %f' % (self.ver_name_list[i], global_step, xnorm)) + logging.info('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (self.ver_name_list[i], global_step, acc2, std2)) + if acc2 > self.highest_acc_list[i]: + self.highest_acc_list[i] = acc2 + logging.info( + '[%s][%d]Accuracy-Highest: %1.5f' % (self.ver_name_list[i], global_step, self.highest_acc_list[i])) + results.append(acc2) + + def init_dataset(self, val_targets, data_dir, image_size): + for name in val_targets: + path = os.path.join(data_dir, name + ".bin") + if os.path.exists(path): + data_set = verification.load_bin(path, image_size) + self.ver_list.append(data_set) + self.ver_name_list.append(name) + + def __call__(self, num_update, backbone: torch.nn.Module): + if self.rank is 0 and num_update > 0 and num_update % self.frequent == 0: + backbone.eval() + self.ver_test(backbone, num_update) + backbone.train() + + +class CallBackLogging(object): + def __init__(self, frequent, rank, total_step, batch_size, world_size, writer=None): + self.frequent: int = frequent + self.rank: int = rank + self.time_start = time.time() + self.total_step: int = total_step + self.batch_size: int = batch_size + self.world_size: int = world_size + self.writer = writer + + self.init = False + self.tic = 0 + + def __call__(self, + global_step: int, + loss: AverageMeter, + epoch: int, + fp16: bool, + learning_rate: float, + grad_scaler: torch.cuda.amp.GradScaler): + if self.rank == 0 and global_step > 0 and global_step % self.frequent == 0: + if self.init: + try: + speed: float = self.frequent * self.batch_size / (time.time() - self.tic) + speed_total = speed * self.world_size + except ZeroDivisionError: + speed_total = float('inf') + + time_now = (time.time() - self.time_start) / 3600 + time_total = time_now / ((global_step + 1) / self.total_step) + time_for_end = time_total - time_now + if self.writer is not None: + self.writer.add_scalar('time_for_end', time_for_end, global_step) + self.writer.add_scalar('learning_rate', learning_rate, global_step) + self.writer.add_scalar('loss', loss.avg, global_step) + if fp16: + msg = "Speed %.2f samples/sec Loss %.4f LearningRate %.4f Epoch: %d Global Step: %d " \ + "Fp16 Grad Scale: %2.f Required: %1.f hours" % ( + speed_total, loss.avg, learning_rate, epoch, global_step, + grad_scaler.get_scale(), time_for_end + ) + else: + msg = "Speed %.2f samples/sec Loss %.4f LearningRate %.4f Epoch: %d Global Step: %d " \ + "Required: %1.f hours" % ( + speed_total, loss.avg, learning_rate, epoch, global_step, time_for_end + ) + logging.info(msg) + loss.reset() + self.tic = time.time() + else: + self.init = True + self.tic = time.time() + + +class CallBackModelCheckpoint(object): + def __init__(self, rank, output="./"): + self.rank: int = rank + self.output: str = output + + def __call__(self, global_step, backbone, partial_fc, ): + if global_step > 100 and self.rank == 0: + path_module = os.path.join(self.output, "backbone.pth") + torch.save(backbone.module.state_dict(), path_module) + logging.info("Pytorch Model Saved in '{}'".format(path_module)) + + if global_step > 100 and partial_fc is not None: + partial_fc.save_params() diff --git a/third_part/face3d/models/arcface_torch/utils/utils_config.py b/third_part/face3d/models/arcface_torch/utils/utils_config.py new file mode 100644 index 0000000000000000000000000000000000000000..0c02eaf70fc0140aca7925f621c29a496f491cae --- /dev/null +++ b/third_part/face3d/models/arcface_torch/utils/utils_config.py @@ -0,0 +1,16 @@ +import importlib +import os.path as osp + + +def get_config(config_file): + assert config_file.startswith('configs/'), 'config file setting must start with configs/' + temp_config_name = osp.basename(config_file) + temp_module_name = osp.splitext(temp_config_name)[0] + config = importlib.import_module("configs.base") + cfg = config.config + config = importlib.import_module("configs.%s" % temp_module_name) + job_cfg = config.config + cfg.update(job_cfg) + if cfg.output is None: + cfg.output = osp.join('work_dirs', temp_module_name) + return cfg \ No newline at end of file diff --git a/third_part/face3d/models/arcface_torch/utils/utils_logging.py b/third_part/face3d/models/arcface_torch/utils/utils_logging.py new file mode 100644 index 0000000000000000000000000000000000000000..c787b6aae7cd037a4718df44d672b8ffa9e5c249 --- /dev/null +++ b/third_part/face3d/models/arcface_torch/utils/utils_logging.py @@ -0,0 +1,41 @@ +import logging +import os +import sys + + +class AverageMeter(object): + """Computes and stores the average and current value + """ + + def __init__(self): + self.val = None + self.avg = None + self.sum = None + self.count = None + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def init_logging(rank, models_root): + if rank == 0: + log_root = logging.getLogger() + log_root.setLevel(logging.INFO) + formatter = logging.Formatter("Training: %(asctime)s-%(message)s") + handler_file = logging.FileHandler(os.path.join(models_root, "training.log")) + handler_stream = logging.StreamHandler(sys.stdout) + handler_file.setFormatter(formatter) + handler_stream.setFormatter(formatter) + log_root.addHandler(handler_file) + log_root.addHandler(handler_stream) + log_root.info('rank_id: %d' % rank) diff --git a/third_part/face3d/models/arcface_torch/utils/utils_os.py b/third_part/face3d/models/arcface_torch/utils/utils_os.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_part/face3d/models/base_model.py b/third_part/face3d/models/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..2a05d3a000379d28b176f9d052ed7ff15cf5ba1e --- /dev/null +++ b/third_part/face3d/models/base_model.py @@ -0,0 +1,316 @@ +"""This script defines the base network model for Deep3DFaceRecon_pytorch +""" + +import os +import numpy as np +import torch +from collections import OrderedDict +from abc import ABC, abstractmethod +from . import networks + + +class BaseModel(ABC): + """This class is an abstract base class (ABC) for models. + To create a subclass, you need to implement the following five functions: + -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). + -- : unpack data from dataset and apply preprocessing. + -- : produce intermediate results. + -- : calculate losses, gradients, and update network weights. + -- : (optionally) add model-specific options and set default options. + """ + + def __init__(self, opt): + """Initialize the BaseModel class. + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + + When creating your custom class, you need to implement your own initialization. + In this fucntion, you should first call + Then, you need to define four lists: + -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.model_names (str list): specify the images that you want to display and save. + -- self.visual_names (str list): define networks used in our training. + -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example. + """ + self.opt = opt + self.isTrain = opt.isTrain + self.device = torch.device('cpu') + self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir + self.loss_names = [] + self.model_names = [] + self.visual_names = [] + self.parallel_names = [] + self.optimizers = [] + self.image_paths = [] + self.metric = 0 # used for learning rate policy 'plateau' + + @staticmethod + def dict_grad_hook_factory(add_func=lambda x: x): + saved_dict = dict() + + def hook_gen(name): + def grad_hook(grad): + saved_vals = add_func(grad) + saved_dict[name] = saved_vals + return grad_hook + return hook_gen, saved_dict + + @staticmethod + def modify_commandline_options(parser, is_train): + """Add new model-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + """ + return parser + + @abstractmethod + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input (dict): includes the data itself and its metadata information. + """ + pass + + @abstractmethod + def forward(self): + """Run forward pass; called by both functions and .""" + pass + + @abstractmethod + def optimize_parameters(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + pass + + def setup(self, opt): + """Load and print networks; create schedulers + + Parameters: + opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + if self.isTrain: + self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers] + + if not self.isTrain or opt.continue_train: + load_suffix = opt.epoch + self.load_networks(load_suffix) + + + # self.print_networks(opt.verbose) + + def parallelize(self, convert_sync_batchnorm=True): + if not self.opt.use_ddp: + for name in self.parallel_names: + if isinstance(name, str): + module = getattr(self, name) + setattr(self, name, module.to(self.device)) + else: + for name in self.model_names: + if isinstance(name, str): + module = getattr(self, name) + if convert_sync_batchnorm: + module = torch.nn.SyncBatchNorm.convert_sync_batchnorm(module) + setattr(self, name, torch.nn.parallel.DistributedDataParallel(module.to(self.device), + device_ids=[self.device.index], + find_unused_parameters=True, broadcast_buffers=True)) + + # DistributedDataParallel is not needed when a module doesn't have any parameter that requires a gradient. + for name in self.parallel_names: + if isinstance(name, str) and name not in self.model_names: + module = getattr(self, name) + setattr(self, name, module.to(self.device)) + + # put state_dict of optimizer to gpu device + if self.opt.phase != 'test': + if self.opt.continue_train: + for optim in self.optimizers: + for state in optim.state.values(): + for k, v in state.items(): + if isinstance(v, torch.Tensor): + state[k] = v.to(self.device) + + def data_dependent_initialize(self, data): + pass + + def train(self): + """Make models train mode""" + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, name) + net.train() + + def eval(self): + """Make models eval mode""" + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, name) + net.eval() + + def test(self): + """Forward function used in test time. + + This function wraps function in no_grad() so we don't save intermediate steps for backprop + It also calls to produce additional visualization results + """ + with torch.no_grad(): + self.forward() + self.compute_visuals() + + def compute_visuals(self): + """Calculate additional output images for visdom and HTML visualization""" + pass + + def get_image_paths(self, name='A'): + """ Return image paths that are used to load current data""" + return self.image_paths if name =='A' else self.image_paths_B + + def update_learning_rate(self): + """Update learning rates for all the networks; called at the end of every epoch""" + for scheduler in self.schedulers: + if self.opt.lr_policy == 'plateau': + scheduler.step(self.metric) + else: + scheduler.step() + + lr = self.optimizers[0].param_groups[0]['lr'] + print('learning rate = %.7f' % lr) + + def get_current_visuals(self): + """Return visualization images. train.py will display these images with visdom, and save the images to a HTML""" + visual_ret = OrderedDict() + for name in self.visual_names: + if isinstance(name, str): + visual_ret[name] = getattr(self, name)[:, :3, ...] + return visual_ret + + def get_current_losses(self): + """Return traning losses / errors. train.py will print out these errors on console, and save them to a file""" + errors_ret = OrderedDict() + for name in self.loss_names: + if isinstance(name, str): + errors_ret[name] = float(getattr(self, 'loss_' + name)) # float(...) works for both scalar tensor and float number + return errors_ret + + def save_networks(self, epoch): + """Save all the networks to the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + if not os.path.isdir(self.save_dir): + os.makedirs(self.save_dir) + + save_filename = 'epoch_%s.pth' % (epoch) + save_path = os.path.join(self.save_dir, save_filename) + + save_dict = {} + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, name) + if isinstance(net, torch.nn.DataParallel) or isinstance(net, + torch.nn.parallel.DistributedDataParallel): + net = net.module + save_dict[name] = net.state_dict() + + + for i, optim in enumerate(self.optimizers): + save_dict['opt_%02d'%i] = optim.state_dict() + + for i, sched in enumerate(self.schedulers): + save_dict['sched_%02d'%i] = sched.state_dict() + + torch.save(save_dict, save_path) + + def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): + """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" + key = keys[i] + if i + 1 == len(keys): # at the end, pointing to a parameter/buffer + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'running_mean' or key == 'running_var'): + if getattr(module, key) is None: + state_dict.pop('.'.join(keys)) + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'num_batches_tracked'): + state_dict.pop('.'.join(keys)) + else: + self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) + + def load_networks(self, epoch): + """Load all the networks from the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + if self.opt.isTrain and self.opt.pretrained_name is not None: + load_dir = os.path.join(self.opt.checkpoints_dir, self.opt.pretrained_name) + else: + load_dir = self.save_dir + load_filename = 'epoch_%s.pth' % (epoch) + load_path = os.path.join(load_dir, load_filename) + state_dict = torch.load(load_path, map_location=self.device) + print('loading the model from %s' % load_path) + + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, name) + if isinstance(net, torch.nn.DataParallel): + net = net.module + net.load_state_dict(state_dict[name]) + + if self.opt.phase != 'test': + if self.opt.continue_train: + print('loading the optim from %s' % load_path) + for i, optim in enumerate(self.optimizers): + optim.load_state_dict(state_dict['opt_%02d'%i]) + + try: + print('loading the sched from %s' % load_path) + for i, sched in enumerate(self.schedulers): + sched.load_state_dict(state_dict['sched_%02d'%i]) + except: + print('Failed to load schedulers, set schedulers according to epoch count manually') + for i, sched in enumerate(self.schedulers): + sched.last_epoch = self.opt.epoch_count - 1 + + + + + def print_networks(self, verbose): + """Print the total number of parameters in the network and (if verbose) network architecture + + Parameters: + verbose (bool) -- if verbose: print the network architecture + """ + print('---------- Networks initialized -------------') + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, name) + num_params = 0 + for param in net.parameters(): + num_params += param.numel() + if verbose: + print(net) + print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6)) + print('-----------------------------------------------') + + def set_requires_grad(self, nets, requires_grad=False): + """Set requies_grad=Fasle for all the networks to avoid unnecessary computations + Parameters: + nets (network list) -- a list of networks + requires_grad (bool) -- whether the networks require gradients or not + """ + if not isinstance(nets, list): + nets = [nets] + for net in nets: + if net is not None: + for param in net.parameters(): + param.requires_grad = requires_grad + + def generate_visuals_for_evaluation(self, data, mode): + return {} diff --git a/third_part/face3d/models/bfm.py b/third_part/face3d/models/bfm.py new file mode 100644 index 0000000000000000000000000000000000000000..376641e19ccbb0c1c9df316d9792b9930f1a7df1 --- /dev/null +++ b/third_part/face3d/models/bfm.py @@ -0,0 +1,303 @@ +"""This script defines the parametric 3d face model for Deep3DFaceRecon_pytorch +""" + +import numpy as np +import torch +import torch.nn.functional as F +from scipy.io import loadmat +from face3d.util.load_mats import transferBFM09 +import os + +def perspective_projection(focal, center): + # return p.T (N, 3) @ (3, 3) + return np.array([ + focal, 0, center, + 0, focal, center, + 0, 0, 1 + ]).reshape([3, 3]).astype(np.float32).transpose() + +class SH: + def __init__(self): + self.a = [np.pi, 2 * np.pi / np.sqrt(3.), 2 * np.pi / np.sqrt(8.)] + self.c = [1/np.sqrt(4 * np.pi), np.sqrt(3.) / np.sqrt(4 * np.pi), 3 * np.sqrt(5.) / np.sqrt(12 * np.pi)] + + + +class ParametricFaceModel: + def __init__(self, + bfm_folder='./BFM', + recenter=True, + camera_distance=10., + init_lit=np.array([ + 0.8, 0, 0, 0, 0, 0, 0, 0, 0 + ]), + focal=1015., + center=112., + is_train=True, + default_name='BFM_model_front.mat'): + + if not os.path.isfile(os.path.join(bfm_folder, default_name)): + transferBFM09(bfm_folder) + model = loadmat(os.path.join(bfm_folder, default_name)) + # mean face shape. [3*N,1] + self.mean_shape = model['meanshape'].astype(np.float32) + # identity basis. [3*N,80] + self.id_base = model['idBase'].astype(np.float32) + # expression basis. [3*N,64] + self.exp_base = model['exBase'].astype(np.float32) + # mean face texture. [3*N,1] (0-255) + self.mean_tex = model['meantex'].astype(np.float32) + # texture basis. [3*N,80] + self.tex_base = model['texBase'].astype(np.float32) + # face indices for each vertex that lies in. starts from 0. [N,8] + self.point_buf = model['point_buf'].astype(np.int64) - 1 + # vertex indices for each face. starts from 0. [F,3] + self.face_buf = model['tri'].astype(np.int64) - 1 + # vertex indices for 68 landmarks. starts from 0. [68,1] + self.keypoints = np.squeeze(model['keypoints']).astype(np.int64) - 1 + + if is_train: + # vertex indices for small face region to compute photometric error. starts from 0. + self.front_mask = np.squeeze(model['frontmask2_idx']).astype(np.int64) - 1 + # vertex indices for each face from small face region. starts from 0. [f,3] + self.front_face_buf = model['tri_mask2'].astype(np.int64) - 1 + # vertex indices for pre-defined skin region to compute reflectance loss + self.skin_mask = np.squeeze(model['skinmask']) + + if recenter: + mean_shape = self.mean_shape.reshape([-1, 3]) + mean_shape = mean_shape - np.mean(mean_shape, axis=0, keepdims=True) + self.mean_shape = mean_shape.reshape([-1, 1]) + + self.persc_proj = perspective_projection(focal, center) + self.device = 'cpu' + self.camera_distance = camera_distance + self.SH = SH() + self.init_lit = init_lit.reshape([1, 1, -1]).astype(np.float32) + + + def to(self, device): + self.device = device + for key, value in self.__dict__.items(): + if type(value).__module__ == np.__name__: + setattr(self, key, torch.tensor(value).to(device)) + + + def compute_shape(self, id_coeff, exp_coeff): + """ + Return: + face_shape -- torch.tensor, size (B, N, 3) + + Parameters: + id_coeff -- torch.tensor, size (B, 80), identity coeffs + exp_coeff -- torch.tensor, size (B, 64), expression coeffs + """ + batch_size = id_coeff.shape[0] + id_part = torch.einsum('ij,aj->ai', self.id_base, id_coeff) + exp_part = torch.einsum('ij,aj->ai', self.exp_base, exp_coeff) + face_shape = id_part + exp_part + self.mean_shape.reshape([1, -1]) + return face_shape.reshape([batch_size, -1, 3]) + + + def compute_texture(self, tex_coeff, normalize=True): + """ + Return: + face_texture -- torch.tensor, size (B, N, 3), in RGB order, range (0, 1.) + + Parameters: + tex_coeff -- torch.tensor, size (B, 80) + """ + batch_size = tex_coeff.shape[0] + face_texture = torch.einsum('ij,aj->ai', self.tex_base, tex_coeff) + self.mean_tex + if normalize: + face_texture = face_texture / 255. + return face_texture.reshape([batch_size, -1, 3]) + + + def compute_norm(self, face_shape): + """ + Return: + vertex_norm -- torch.tensor, size (B, N, 3) + + Parameters: + face_shape -- torch.tensor, size (B, N, 3) + """ + + v1 = face_shape[:, self.face_buf[:, 0]] + v2 = face_shape[:, self.face_buf[:, 1]] + v3 = face_shape[:, self.face_buf[:, 2]] + e1 = v1 - v2 + e2 = v2 - v3 + face_norm = torch.cross(e1, e2, dim=-1) + face_norm = F.normalize(face_norm, dim=-1, p=2) + face_norm = torch.cat([face_norm, torch.zeros(face_norm.shape[0], 1, 3).to(self.device)], dim=1) + + vertex_norm = torch.sum(face_norm[:, self.point_buf], dim=2) + vertex_norm = F.normalize(vertex_norm, dim=-1, p=2) + return vertex_norm + + + def compute_color(self, face_texture, face_norm, gamma): + """ + Return: + face_color -- torch.tensor, size (B, N, 3), range (0, 1.) + + Parameters: + face_texture -- torch.tensor, size (B, N, 3), from texture model, range (0, 1.) + face_norm -- torch.tensor, size (B, N, 3), rotated face normal + gamma -- torch.tensor, size (B, 27), SH coeffs + """ + batch_size = gamma.shape[0] + v_num = face_texture.shape[1] + a, c = self.SH.a, self.SH.c + gamma = gamma.reshape([batch_size, 3, 9]) + gamma = gamma + self.init_lit + gamma = gamma.permute(0, 2, 1) + Y = torch.cat([ + a[0] * c[0] * torch.ones_like(face_norm[..., :1]).to(self.device), + -a[1] * c[1] * face_norm[..., 1:2], + a[1] * c[1] * face_norm[..., 2:], + -a[1] * c[1] * face_norm[..., :1], + a[2] * c[2] * face_norm[..., :1] * face_norm[..., 1:2], + -a[2] * c[2] * face_norm[..., 1:2] * face_norm[..., 2:], + 0.5 * a[2] * c[2] / np.sqrt(3.) * (3 * face_norm[..., 2:] ** 2 - 1), + -a[2] * c[2] * face_norm[..., :1] * face_norm[..., 2:], + 0.5 * a[2] * c[2] * (face_norm[..., :1] ** 2 - face_norm[..., 1:2] ** 2) + ], dim=-1) + r = Y @ gamma[..., :1] + g = Y @ gamma[..., 1:2] + b = Y @ gamma[..., 2:] + face_color = torch.cat([r, g, b], dim=-1) * face_texture + return face_color + + + def compute_rotation(self, angles): + """ + Return: + rot -- torch.tensor, size (B, 3, 3) pts @ trans_mat + + Parameters: + angles -- torch.tensor, size (B, 3), radian + """ + + batch_size = angles.shape[0] + ones = torch.ones([batch_size, 1]).to(self.device) + zeros = torch.zeros([batch_size, 1]).to(self.device) + x, y, z = angles[:, :1], angles[:, 1:2], angles[:, 2:], + + rot_x = torch.cat([ + ones, zeros, zeros, + zeros, torch.cos(x), -torch.sin(x), + zeros, torch.sin(x), torch.cos(x) + ], dim=1).reshape([batch_size, 3, 3]) + + rot_y = torch.cat([ + torch.cos(y), zeros, torch.sin(y), + zeros, ones, zeros, + -torch.sin(y), zeros, torch.cos(y) + ], dim=1).reshape([batch_size, 3, 3]) + + rot_z = torch.cat([ + torch.cos(z), -torch.sin(z), zeros, + torch.sin(z), torch.cos(z), zeros, + zeros, zeros, ones + ], dim=1).reshape([batch_size, 3, 3]) + + rot = rot_z @ rot_y @ rot_x + return rot.permute(0, 2, 1) + + + def to_camera(self, face_shape): + face_shape[..., -1] = self.camera_distance - face_shape[..., -1] + return face_shape + + def to_image(self, face_shape): + """ + Return: + face_proj -- torch.tensor, size (B, N, 2), y direction is opposite to v direction + + Parameters: + face_shape -- torch.tensor, size (B, N, 3) + """ + # to image_plane + face_proj = face_shape @ self.persc_proj + face_proj = face_proj[..., :2] / face_proj[..., 2:] + + return face_proj + + + def transform(self, face_shape, rot, trans): + """ + Return: + face_shape -- torch.tensor, size (B, N, 3) pts @ rot + trans + + Parameters: + face_shape -- torch.tensor, size (B, N, 3) + rot -- torch.tensor, size (B, 3, 3) + trans -- torch.tensor, size (B, 3) + """ + return face_shape @ rot + trans.unsqueeze(1) + + + def get_landmarks(self, face_proj): + """ + Return: + face_lms -- torch.tensor, size (B, 68, 2) + + Parameters: + face_proj -- torch.tensor, size (B, N, 2) + """ + return face_proj[:, self.keypoints] + + def split_coeff(self, coeffs): + """ + Return: + coeffs_dict -- a dict of torch.tensors + + Parameters: + coeffs -- torch.tensor, size (B, 256) + """ + id_coeffs = coeffs[:, :80] + exp_coeffs = coeffs[:, 80: 144] + tex_coeffs = coeffs[:, 144: 224] + angles = coeffs[:, 224: 227] + gammas = coeffs[:, 227: 254] + translations = coeffs[:, 254:] + return { + 'id': id_coeffs, + 'exp': exp_coeffs, + 'tex': tex_coeffs, + 'angle': angles, + 'gamma': gammas, + 'trans': translations + } + def compute_for_render(self, coeffs): + """ + Return: + face_vertex -- torch.tensor, size (B, N, 3), in camera coordinate + face_color -- torch.tensor, size (B, N, 3), in RGB order + landmark -- torch.tensor, size (B, 68, 2), y direction is opposite to v direction + Parameters: + coeffs -- torch.tensor, size (B, 257) + """ + coef_dict = self.split_coeff(coeffs) + face_shape = self.compute_shape(coef_dict['id'], coef_dict['exp']) + rotation = self.compute_rotation(coef_dict['angle']) + + + face_shape_transformed = self.transform(face_shape, rotation, coef_dict['trans']) + face_vertex = self.to_camera(face_shape_transformed) + + face_proj = self.to_image(face_vertex) + landmark = self.get_landmarks(face_proj) + + face_texture = self.compute_texture(coef_dict['tex']) + face_norm = self.compute_norm(face_shape) + face_norm_roted = face_norm @ rotation + face_color = self.compute_color(face_texture, face_norm_roted, coef_dict['gamma']) + + return face_vertex, face_texture, face_color, landmark + + +if __name__ == '__main__': + transferBFM09() \ No newline at end of file diff --git a/third_part/face3d/models/facerecon_model.py b/third_part/face3d/models/facerecon_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e51cab7190eeaf7f7681223a5c4cc485d5df867d --- /dev/null +++ b/third_part/face3d/models/facerecon_model.py @@ -0,0 +1,227 @@ +"""This script defines the face reconstruction model for Deep3DFaceRecon_pytorch +""" + +import numpy as np +import torch +from face3d.models.base_model import BaseModel +from face3d.models import networks +from face3d.models.bfm import ParametricFaceModel +from face3d.models.losses import perceptual_loss, photo_loss, reg_loss, reflectance_loss, landmark_loss +from face3d.util import util +from face3d.util.nvdiffrast import MeshRenderer +from face3d.util.preprocess import estimate_norm_torch + +import trimesh +from scipy.io import savemat + +class FaceReconModel(BaseModel): + + @staticmethod + def modify_commandline_options(parser, is_train=True): + """ Configures options specific for CUT model + """ + # net structure and parameters + parser.add_argument('--net_recon', type=str, default='resnet50', choices=['resnet18', 'resnet34', 'resnet50'], help='network structure') + parser.add_argument('--init_path', type=str, default='checkpoints/init_model/resnet50-0676ba61.pth') + parser.add_argument('--use_last_fc', type=util.str2bool, nargs='?', const=True, default=False, help='zero initialize the last fc') + parser.add_argument('--bfm_folder', type=str, default='BFM') + parser.add_argument('--bfm_model', type=str, default='BFM_model_front.mat', help='bfm model') + + # renderer parameters + parser.add_argument('--focal', type=float, default=1015.) + parser.add_argument('--center', type=float, default=112.) + parser.add_argument('--camera_d', type=float, default=10.) + parser.add_argument('--z_near', type=float, default=5.) + parser.add_argument('--z_far', type=float, default=15.) + + if is_train: + # training parameters + parser.add_argument('--net_recog', type=str, default='r50', choices=['r18', 'r43', 'r50'], help='face recog network structure') + parser.add_argument('--net_recog_path', type=str, default='checkpoints/recog_model/ms1mv3_arcface_r50_fp16/backbone.pth') + parser.add_argument('--use_crop_face', type=util.str2bool, nargs='?', const=True, default=False, help='use crop mask for photo loss') + parser.add_argument('--use_predef_M', type=util.str2bool, nargs='?', const=True, default=False, help='use predefined M for predicted face') + + + # augmentation parameters + parser.add_argument('--shift_pixs', type=float, default=10., help='shift pixels') + parser.add_argument('--scale_delta', type=float, default=0.1, help='delta scale factor') + parser.add_argument('--rot_angle', type=float, default=10., help='rot angles, degree') + + # loss weights + parser.add_argument('--w_feat', type=float, default=0.2, help='weight for feat loss') + parser.add_argument('--w_color', type=float, default=1.92, help='weight for loss loss') + parser.add_argument('--w_reg', type=float, default=3.0e-4, help='weight for reg loss') + parser.add_argument('--w_id', type=float, default=1.0, help='weight for id_reg loss') + parser.add_argument('--w_exp', type=float, default=0.8, help='weight for exp_reg loss') + parser.add_argument('--w_tex', type=float, default=1.7e-2, help='weight for tex_reg loss') + parser.add_argument('--w_gamma', type=float, default=10.0, help='weight for gamma loss') + parser.add_argument('--w_lm', type=float, default=1.6e-3, help='weight for lm loss') + parser.add_argument('--w_reflc', type=float, default=5.0, help='weight for reflc loss') + + + + opt, _ = parser.parse_known_args() + parser.set_defaults( + focal=1015., center=112., camera_d=10., use_last_fc=False, z_near=5., z_far=15. + ) + if is_train: + parser.set_defaults( + use_crop_face=True, use_predef_M=False + ) + return parser + + def __init__(self, opt): + """Initialize this model class. + + Parameters: + opt -- training/test options + + A few things can be done here. + - (required) call the initialization function of BaseModel + - define loss function, visualization images, model names, and optimizers + """ + BaseModel.__init__(self, opt) # call the initialization method of BaseModel + + self.visual_names = ['output_vis'] + self.model_names = ['net_recon'] + self.parallel_names = self.model_names + ['renderer'] + + self.net_recon = networks.define_net_recon( + net_recon=opt.net_recon, use_last_fc=opt.use_last_fc, init_path=opt.init_path + ) + + self.facemodel = ParametricFaceModel( + bfm_folder=opt.bfm_folder, camera_distance=opt.camera_d, focal=opt.focal, center=opt.center, + is_train=self.isTrain, default_name=opt.bfm_model + ) + + fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi + self.renderer = MeshRenderer( + rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center) + ) + + if self.isTrain: + self.loss_names = ['all', 'feat', 'color', 'lm', 'reg', 'gamma', 'reflc'] + + self.net_recog = networks.define_net_recog( + net_recog=opt.net_recog, pretrained_path=opt.net_recog_path + ) + # loss func name: (compute_%s_loss) % loss_name + self.compute_feat_loss = perceptual_loss + self.comupte_color_loss = photo_loss + self.compute_lm_loss = landmark_loss + self.compute_reg_loss = reg_loss + self.compute_reflc_loss = reflectance_loss + + self.optimizer = torch.optim.Adam(self.net_recon.parameters(), lr=opt.lr) + self.optimizers = [self.optimizer] + self.parallel_names += ['net_recog'] + # Our program will automatically call to define schedulers, load networks, and print networks + + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input: a dictionary that contains the data itself and its metadata information. + """ + self.input_img = input['imgs'].to(self.device) + self.atten_mask = input['msks'].to(self.device) if 'msks' in input else None + self.gt_lm = input['lms'].to(self.device) if 'lms' in input else None + self.trans_m = input['M'].to(self.device) if 'M' in input else None + self.image_paths = input['im_paths'] if 'im_paths' in input else None + + def forward(self): + output_coeff = self.net_recon(self.input_img) + self.facemodel.to(self.device) + self.pred_vertex, self.pred_tex, self.pred_color, self.pred_lm = \ + self.facemodel.compute_for_render(output_coeff) + self.pred_mask, _, self.pred_face = self.renderer( + self.pred_vertex, self.facemodel.face_buf, feat=self.pred_color) + + self.pred_coeffs_dict = self.facemodel.split_coeff(output_coeff) + + + def compute_losses(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + + assert self.net_recog.training == False + trans_m = self.trans_m + if not self.opt.use_predef_M: + trans_m = estimate_norm_torch(self.pred_lm, self.input_img.shape[-2]) + + pred_feat = self.net_recog(self.pred_face, trans_m) + gt_feat = self.net_recog(self.input_img, self.trans_m) + self.loss_feat = self.opt.w_feat * self.compute_feat_loss(pred_feat, gt_feat) + + face_mask = self.pred_mask + if self.opt.use_crop_face: + face_mask, _, _ = self.renderer(self.pred_vertex, self.facemodel.front_face_buf) + + face_mask = face_mask.detach() + self.loss_color = self.opt.w_color * self.comupte_color_loss( + self.pred_face, self.input_img, self.atten_mask * face_mask) + + loss_reg, loss_gamma = self.compute_reg_loss(self.pred_coeffs_dict, self.opt) + self.loss_reg = self.opt.w_reg * loss_reg + self.loss_gamma = self.opt.w_gamma * loss_gamma + + self.loss_lm = self.opt.w_lm * self.compute_lm_loss(self.pred_lm, self.gt_lm) + + self.loss_reflc = self.opt.w_reflc * self.compute_reflc_loss(self.pred_tex, self.facemodel.skin_mask) + + self.loss_all = self.loss_feat + self.loss_color + self.loss_reg + self.loss_gamma \ + + self.loss_lm + self.loss_reflc + + + def optimize_parameters(self, isTrain=True): + self.forward() + self.compute_losses() + """Update network weights; it will be called in every training iteration.""" + if isTrain: + self.optimizer.zero_grad() + self.loss_all.backward() + self.optimizer.step() + + def compute_visuals(self): + with torch.no_grad(): + input_img_numpy = 255. * self.input_img.detach().cpu().permute(0, 2, 3, 1).numpy() + output_vis = self.pred_face * self.pred_mask + (1 - self.pred_mask) * self.input_img + output_vis_numpy_raw = 255. * output_vis.detach().cpu().permute(0, 2, 3, 1).numpy() + + if self.gt_lm is not None: + gt_lm_numpy = self.gt_lm.cpu().numpy() + pred_lm_numpy = self.pred_lm.detach().cpu().numpy() + output_vis_numpy = util.draw_landmarks(output_vis_numpy_raw, gt_lm_numpy, 'b') + output_vis_numpy = util.draw_landmarks(output_vis_numpy, pred_lm_numpy, 'r') + + output_vis_numpy = np.concatenate((input_img_numpy, + output_vis_numpy_raw, output_vis_numpy), axis=-2) + else: + output_vis_numpy = np.concatenate((input_img_numpy, + output_vis_numpy_raw), axis=-2) + + self.output_vis = torch.tensor( + output_vis_numpy / 255., dtype=torch.float32 + ).permute(0, 3, 1, 2).to(self.device) + + def save_mesh(self, name): + + recon_shape = self.pred_vertex # get reconstructed shape + recon_shape[..., -1] = 10 - recon_shape[..., -1] # from camera space to world space + recon_shape = recon_shape.cpu().numpy()[0] + recon_color = self.pred_color + recon_color = recon_color.cpu().numpy()[0] + tri = self.facemodel.face_buf.cpu().numpy() + mesh = trimesh.Trimesh(vertices=recon_shape, faces=tri, vertex_colors=np.clip(255. * recon_color, 0, 255).astype(np.uint8)) + mesh.export(name) + + def save_coeff(self,name): + + pred_coeffs = {key:self.pred_coeffs_dict[key].cpu().numpy() for key in self.pred_coeffs_dict} + pred_lm = self.pred_lm.cpu().numpy() + pred_lm = np.stack([pred_lm[:,:,0],self.input_img.shape[2]-1-pred_lm[:,:,1]],axis=2) # transfer to image coordinate + pred_coeffs['lm68'] = pred_lm + savemat(name,pred_coeffs) + + + diff --git a/third_part/face3d/models/losses.py b/third_part/face3d/models/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..fbacb63b6110f3dbe7256eb4d5eb781a41e87b8f --- /dev/null +++ b/third_part/face3d/models/losses.py @@ -0,0 +1,113 @@ +import numpy as np +import torch +import torch.nn as nn +from kornia.geometry import warp_affine +import torch.nn.functional as F + +def resize_n_crop(image, M, dsize=112): + # image: (b, c, h, w) + # M : (b, 2, 3) + return warp_affine(image, M, dsize=(dsize, dsize)) + +### perceptual level loss +class PerceptualLoss(nn.Module): + def __init__(self, recog_net, input_size=112): + super(PerceptualLoss, self).__init__() + self.recog_net = recog_net + self.preprocess = lambda x: 2 * x - 1 + self.input_size=input_size + def forward(imageA, imageB, M): + """ + 1 - cosine distance + Parameters: + imageA --torch.tensor (B, 3, H, W), range (0, 1) , RGB order + imageB --same as imageA + """ + + imageA = self.preprocess(resize_n_crop(imageA, M, self.input_size)) + imageB = self.preprocess(resize_n_crop(imageB, M, self.input_size)) + + # freeze bn + self.recog_net.eval() + + id_featureA = F.normalize(self.recog_net(imageA), dim=-1, p=2) + id_featureB = F.normalize(self.recog_net(imageB), dim=-1, p=2) + cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) + # assert torch.sum((cosine_d > 1).float()) == 0 + return torch.sum(1 - cosine_d) / cosine_d.shape[0] + +def perceptual_loss(id_featureA, id_featureB): + cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) + # assert torch.sum((cosine_d > 1).float()) == 0 + return torch.sum(1 - cosine_d) / cosine_d.shape[0] + +### image level loss +def photo_loss(imageA, imageB, mask, eps=1e-6): + """ + l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur) + Parameters: + imageA --torch.tensor (B, 3, H, W), range (0, 1), RGB order + imageB --same as imageA + """ + loss = torch.sqrt(eps + torch.sum((imageA - imageB) ** 2, dim=1, keepdims=True)) * mask + loss = torch.sum(loss) / torch.max(torch.sum(mask), torch.tensor(1.0).to(mask.device)) + return loss + +def landmark_loss(predict_lm, gt_lm, weight=None): + """ + weighted mse loss + Parameters: + predict_lm --torch.tensor (B, 68, 2) + gt_lm --torch.tensor (B, 68, 2) + weight --numpy.array (1, 68) + """ + if not weight: + weight = np.ones([68]) + weight[28:31] = 20 + weight[-8:] = 20 + weight = np.expand_dims(weight, 0) + weight = torch.tensor(weight).to(predict_lm.device) + loss = torch.sum((predict_lm - gt_lm)**2, dim=-1) * weight + loss = torch.sum(loss) / (predict_lm.shape[0] * predict_lm.shape[1]) + return loss + + +### regulization +def reg_loss(coeffs_dict, opt=None): + """ + l2 norm without the sqrt, from yu's implementation (mse) + tf.nn.l2_loss https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss + Parameters: + coeffs_dict -- a dict of torch.tensors , keys: id, exp, tex, angle, gamma, trans + + """ + # coefficient regularization to ensure plausible 3d faces + if opt: + w_id, w_exp, w_tex = opt.w_id, opt.w_exp, opt.w_tex + else: + w_id, w_exp, w_tex = 1, 1, 1, 1 + creg_loss = w_id * torch.sum(coeffs_dict['id'] ** 2) + \ + w_exp * torch.sum(coeffs_dict['exp'] ** 2) + \ + w_tex * torch.sum(coeffs_dict['tex'] ** 2) + creg_loss = creg_loss / coeffs_dict['id'].shape[0] + + # gamma regularization to ensure a nearly-monochromatic light + gamma = coeffs_dict['gamma'].reshape([-1, 3, 9]) + gamma_mean = torch.mean(gamma, dim=1, keepdims=True) + gamma_loss = torch.mean((gamma - gamma_mean) ** 2) + + return creg_loss, gamma_loss + +def reflectance_loss(texture, mask): + """ + minimize texture variance (mse), albedo regularization to ensure an uniform skin albedo + Parameters: + texture --torch.tensor, (B, N, 3) + mask --torch.tensor, (N), 1 or 0 + + """ + mask = mask.reshape([1, mask.shape[0], 1]) + texture_mean = torch.sum(mask * texture, dim=1, keepdims=True) / torch.sum(mask) + loss = torch.sum(((texture - texture_mean) * mask)**2) / (texture.shape[0] * torch.sum(mask)) + return loss + diff --git a/third_part/face3d/models/networks.py b/third_part/face3d/models/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..40ce9f9974267da87505b9c0a5e929b12c644801 --- /dev/null +++ b/third_part/face3d/models/networks.py @@ -0,0 +1,521 @@ +"""This script defines deep neural networks for Deep3DFaceRecon_pytorch +""" + +import os +import numpy as np +import torch.nn.functional as F +from torch.nn import init +import functools +from torch.optim import lr_scheduler +import torch +from torch import Tensor +import torch.nn as nn +try: + from torch.hub import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url +from typing import Type, Any, Callable, Union, List, Optional +from .arcface_torch.backbones import get_model +from kornia.geometry import warp_affine + +def resize_n_crop(image, M, dsize=112): + # image: (b, c, h, w) + # M : (b, 2, 3) + return warp_affine(image, M, dsize=(dsize, dsize)) + +def filter_state_dict(state_dict, remove_name='fc'): + new_state_dict = {} + for key in state_dict: + if remove_name in key: + continue + new_state_dict[key] = state_dict[key] + return new_state_dict + +def get_scheduler(optimizer, opt): + """Return a learning rate scheduler + + Parameters: + optimizer -- the optimizer of the network + opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions.  + opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine + + For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers. + See https://pytorch.org/docs/stable/optim.html for more details. + """ + if opt.lr_policy == 'linear': + def lambda_rule(epoch): + lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.n_epochs) / float(opt.n_epochs + 1) + return lr_l + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) + elif opt.lr_policy == 'step': + scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_epochs, gamma=0.2) + elif opt.lr_policy == 'plateau': + scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) + elif opt.lr_policy == 'cosine': + scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.n_epochs, eta_min=0) + else: + return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) + return scheduler + + +def define_net_recon(net_recon, use_last_fc=False, init_path=None): + return ReconNetWrapper(net_recon, use_last_fc=use_last_fc, init_path=init_path) + +def define_net_recog(net_recog, pretrained_path=None): + net = RecogNetWrapper(net_recog=net_recog, pretrained_path=pretrained_path) + net.eval() + return net + +class ReconNetWrapper(nn.Module): + fc_dim=257 + def __init__(self, net_recon, use_last_fc=False, init_path=None): + super(ReconNetWrapper, self).__init__() + self.use_last_fc = use_last_fc + if net_recon not in func_dict: + return NotImplementedError('network [%s] is not implemented', net_recon) + func, last_dim = func_dict[net_recon] + backbone = func(use_last_fc=use_last_fc, num_classes=self.fc_dim) + if init_path and os.path.isfile(init_path): + state_dict = filter_state_dict(torch.load(init_path, map_location='cpu')) + backbone.load_state_dict(state_dict) + print("loading init net_recon %s from %s" %(net_recon, init_path)) + self.backbone = backbone + if not use_last_fc: + self.final_layers = nn.ModuleList([ + conv1x1(last_dim, 80, bias=True), # id layer + conv1x1(last_dim, 64, bias=True), # exp layer + conv1x1(last_dim, 80, bias=True), # tex layer + conv1x1(last_dim, 3, bias=True), # angle layer + conv1x1(last_dim, 27, bias=True), # gamma layer + conv1x1(last_dim, 2, bias=True), # tx, ty + conv1x1(last_dim, 1, bias=True) # tz + ]) + for m in self.final_layers: + nn.init.constant_(m.weight, 0.) + nn.init.constant_(m.bias, 0.) + + def forward(self, x): + x = self.backbone(x) + if not self.use_last_fc: + output = [] + for layer in self.final_layers: + output.append(layer(x)) + x = torch.flatten(torch.cat(output, dim=1), 1) + return x + + +class RecogNetWrapper(nn.Module): + def __init__(self, net_recog, pretrained_path=None, input_size=112): + super(RecogNetWrapper, self).__init__() + net = get_model(name=net_recog, fp16=False) + if pretrained_path: + state_dict = torch.load(pretrained_path, map_location='cpu') + net.load_state_dict(state_dict) + print("loading pretrained net_recog %s from %s" %(net_recog, pretrained_path)) + for param in net.parameters(): + param.requires_grad = False + self.net = net + self.preprocess = lambda x: 2 * x - 1 + self.input_size=input_size + + def forward(self, image, M): + image = self.preprocess(resize_n_crop(image, M, self.input_size)) + id_feature = F.normalize(self.net(image), dim=-1, p=2) + return id_feature + + +# adapted from https://github.com/pytorch/vision/edit/master/torchvision/models/resnet.py +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', + 'wide_resnet50_2', 'wide_resnet101_2'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-f37072fd.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-b627a593.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-0676ba61.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-63fe2227.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-394f9c45.pth', + 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', + 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', + 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth', + 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth', +} + + +def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d: + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes: int, out_planes: int, stride: int = 1, bias: bool = False) -> nn.Conv2d: + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=bias) + + +class BasicBlock(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion: int = 4 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__( + self, + block: Type[Union[BasicBlock, Bottleneck]], + layers: List[int], + num_classes: int = 1000, + zero_init_residual: bool = False, + use_last_fc: bool = False, + groups: int = 1, + width_per_group: int = 64, + replace_stride_with_dilation: Optional[List[bool]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.use_last_fc = use_last_fc + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + + if self.use_last_fc: + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type] + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type] + + def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int, + stride: int = 1, dilate: bool = False) -> nn.Sequential: + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def _forward_impl(self, x: Tensor) -> Tensor: + # See note [TorchScript super()] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + if self.use_last_fc: + x = torch.flatten(x, 1) + x = self.fc(x) + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _resnet( + arch: str, + block: Type[Union[BasicBlock, Bottleneck]], + layers: List[int], + pretrained: bool, + progress: bool, + **kwargs: Any +) -> ResNet: + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], + progress=progress) + model.load_state_dict(state_dict) + return model + + +def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""ResNet-18 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, + **kwargs) + + +def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""ResNet-34 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""ResNet-101 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, + **kwargs) + + +def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""ResNet-152 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, + **kwargs) + + +def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""ResNeXt-50 32x4d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 4 + return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""ResNeXt-101 32x8d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + +def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""Wide ResNet-50-2 model from + `"Wide Residual Networks" `_. + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: + r"""Wide ResNet-101-2 model from + `"Wide Residual Networks" `_. + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + +func_dict = { + 'resnet18': (resnet18, 512), + 'resnet50': (resnet50, 2048) +} diff --git a/third_part/face3d/models/template_model.py b/third_part/face3d/models/template_model.py new file mode 100644 index 0000000000000000000000000000000000000000..dac7b33d5889777eb63c9882a3b9fa094dcab293 --- /dev/null +++ b/third_part/face3d/models/template_model.py @@ -0,0 +1,100 @@ +"""Model class template + +This module provides a template for users to implement custom models. +You can specify '--model template' to use this model. +The class name should be consistent with both the filename and its model option. +The filename should be _dataset.py +The class name should be Dataset.py +It implements a simple image-to-image translation baseline based on regression loss. +Given input-output pairs (data_A, data_B), it learns a network netG that can minimize the following L1 loss: + min_ ||netG(data_A) - data_B||_1 +You need to implement the following functions: + : Add model-specific options and rewrite default values for existing options. + <__init__>: Initialize this model class. + : Unpack input data and perform data pre-processing. + : Run forward pass. This will be called by both and . + : Update network weights; it will be called in every training iteration. +""" +import numpy as np +import torch +from .base_model import BaseModel +from . import networks + + +class TemplateModel(BaseModel): + @staticmethod + def modify_commandline_options(parser, is_train=True): + """Add new model-specific options and rewrite default values for existing options. + + Parameters: + parser -- the option parser + is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + """ + parser.set_defaults(dataset_mode='aligned') # You can rewrite default values for this model. For example, this model usually uses aligned dataset as its dataset. + if is_train: + parser.add_argument('--lambda_regression', type=float, default=1.0, help='weight for the regression loss') # You can define new arguments for this model. + + return parser + + def __init__(self, opt): + """Initialize this model class. + + Parameters: + opt -- training/test options + + A few things can be done here. + - (required) call the initialization function of BaseModel + - define loss function, visualization images, model names, and optimizers + """ + BaseModel.__init__(self, opt) # call the initialization method of BaseModel + # specify the training losses you want to print out. The program will call base_model.get_current_losses to plot the losses to the console and save them to the disk. + self.loss_names = ['loss_G'] + # specify the images you want to save and display. The program will call base_model.get_current_visuals to save and display these images. + self.visual_names = ['data_A', 'data_B', 'output'] + # specify the models you want to save to the disk. The program will call base_model.save_networks and base_model.load_networks to save and load networks. + # you can use opt.isTrain to specify different behaviors for training and test. For example, some networks will not be used during test, and you don't need to load them. + self.model_names = ['G'] + # define networks; you can use opt.isTrain to specify different behaviors for training and test. + self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, gpu_ids=self.gpu_ids) + if self.isTrain: # only defined during training time + # define your loss functions. You can use losses provided by torch.nn such as torch.nn.L1Loss. + # We also provide a GANLoss class "networks.GANLoss". self.criterionGAN = networks.GANLoss().to(self.device) + self.criterionLoss = torch.nn.L1Loss() + # define and initialize optimizers. You can define one optimizer for each network. + # If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example. + self.optimizer = torch.optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) + self.optimizers = [self.optimizer] + + # Our program will automatically call to define schedulers, load networks, and print networks + + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input: a dictionary that contains the data itself and its metadata information. + """ + AtoB = self.opt.direction == 'AtoB' # use to swap data_A and data_B + self.data_A = input['A' if AtoB else 'B'].to(self.device) # get image data A + self.data_B = input['B' if AtoB else 'A'].to(self.device) # get image data B + self.image_paths = input['A_paths' if AtoB else 'B_paths'] # get image paths + + def forward(self): + """Run forward pass. This will be called by both functions and .""" + self.output = self.netG(self.data_A) # generate output image given the input data_A + + def backward(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + # caculate the intermediate results if necessary; here self.output has been computed during function + # calculate loss given the input and intermediate results + self.loss_G = self.criterionLoss(self.output, self.data_B) * self.opt.lambda_regression + self.loss_G.backward() # calculate gradients of network G w.r.t. loss_G + + def optimize_parameters(self): + """Update network weights; it will be called in every training iteration.""" + self.forward() # first call forward to calculate intermediate results + self.optimizer.zero_grad() # clear network G's existing gradients + self.backward() # calculate gradients for network G + self.optimizer.step() # update gradients for network G diff --git a/third_part/face3d/options/__init__.py b/third_part/face3d/options/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e7eedebe54aa70169fd25951b3034d819e396c90 --- /dev/null +++ b/third_part/face3d/options/__init__.py @@ -0,0 +1 @@ +"""This package options includes option modules: training options, test options, and basic options (used in both training and test).""" diff --git a/third_part/face3d/options/base_options.py b/third_part/face3d/options/base_options.py new file mode 100644 index 0000000000000000000000000000000000000000..d8f921d5a43434ae802a55a0fa3889c4b7ab9f6d --- /dev/null +++ b/third_part/face3d/options/base_options.py @@ -0,0 +1,169 @@ +"""This script contains base options for Deep3DFaceRecon_pytorch +""" + +import argparse +import os +from util import util +import numpy as np +import torch +import face3d.models as models +import face3d.data as data + + +class BaseOptions(): + """This class defines options used during both training and test time. + + It also implements several helper functions such as parsing, printing, and saving the options. + It also gathers additional options defined in functions in both dataset class and model class. + """ + + def __init__(self, cmd_line=None): + """Reset the class; indicates the class hasn't been initailized""" + self.initialized = False + self.cmd_line = None + if cmd_line is not None: + self.cmd_line = cmd_line.split() + + def initialize(self, parser): + """Define the common options that are used in both training and test.""" + # basic parameters + parser.add_argument('--name', type=str, default='face_recon', help='name of the experiment. It decides where to store samples and models') + parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') + parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') + parser.add_argument('--vis_batch_nums', type=float, default=1, help='batch nums of images for visulization') + parser.add_argument('--eval_batch_nums', type=float, default=float('inf'), help='batch nums of images for evaluation') + parser.add_argument('--use_ddp', type=util.str2bool, nargs='?', const=True, default=True, help='whether use distributed data parallel') + parser.add_argument('--ddp_port', type=str, default='12355', help='ddp port') + parser.add_argument('--display_per_batch', type=util.str2bool, nargs='?', const=True, default=True, help='whether use batch to show losses') + parser.add_argument('--add_image', type=util.str2bool, nargs='?', const=True, default=True, help='whether add image to tensorboard') + parser.add_argument('--world_size', type=int, default=1, help='batch nums of images for evaluation') + + # model parameters + parser.add_argument('--model', type=str, default='facerecon', help='chooses which model to use.') + + # additional parameters + parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') + parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information') + parser.add_argument('--suffix', default='', type=str, help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}') + + self.initialized = True + return parser + + def gather_options(self): + """Initialize our parser with basic options(only once). + Add additional model-specific and dataset-specific options. + These options are defined in the function + in model and dataset classes. + """ + if not self.initialized: # check if it has been initialized + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = self.initialize(parser) + + # get the basic options + if self.cmd_line is None: + opt, _ = parser.parse_known_args() + else: + opt, _ = parser.parse_known_args(self.cmd_line) + + # set cuda visible devices + os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids + + # modify model-related parser options + model_name = opt.model + model_option_setter = models.get_option_setter(model_name) + parser = model_option_setter(parser, self.isTrain) + if self.cmd_line is None: + opt, _ = parser.parse_known_args() # parse again with new defaults + else: + opt, _ = parser.parse_known_args(self.cmd_line) # parse again with new defaults + + # modify dataset-related parser options + if opt.dataset_mode: + dataset_name = opt.dataset_mode + dataset_option_setter = data.get_option_setter(dataset_name) + parser = dataset_option_setter(parser, self.isTrain) + + # save and return the parser + self.parser = parser + if self.cmd_line is None: + return parser.parse_args() + else: + return parser.parse_args(self.cmd_line) + + def print_options(self, opt): + """Print and save options + + It will print both current options and default values(if different). + It will save options into a text file / [checkpoints_dir] / opt.txt + """ + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(opt).items()): + comment = '' + default = self.parser.get_default(k) + if v != default: + comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + # save to the disk + expr_dir = os.path.join(opt.checkpoints_dir, opt.name) + util.mkdirs(expr_dir) + file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase)) + try: + with open(file_name, 'wt') as opt_file: + opt_file.write(message) + opt_file.write('\n') + except PermissionError as error: + print("permission error {}".format(error)) + pass + + def parse(self): + """Parse our options, create checkpoints directory suffix, and set up gpu device.""" + opt = self.gather_options() + opt.isTrain = self.isTrain # train or test + + # process opt.suffix + if opt.suffix: + suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else '' + opt.name = opt.name + suffix + + + # set gpu ids + str_ids = opt.gpu_ids.split(',') + gpu_ids = [] + for str_id in str_ids: + id = int(str_id) + if id >= 0: + gpu_ids.append(id) + opt.world_size = len(gpu_ids) + # if len(opt.gpu_ids) > 0: + # torch.cuda.set_device(gpu_ids[0]) + if opt.world_size == 1: + opt.use_ddp = False + + if opt.phase != 'test': + # set continue_train automatically + if opt.pretrained_name is None: + model_dir = os.path.join(opt.checkpoints_dir, opt.name) + else: + model_dir = os.path.join(opt.checkpoints_dir, opt.pretrained_name) + if os.path.isdir(model_dir): + model_pths = [i for i in os.listdir(model_dir) if i.endswith('pth')] + if os.path.isdir(model_dir) and len(model_pths) != 0: + opt.continue_train= True + + # update the latest epoch count + if opt.continue_train: + if opt.epoch == 'latest': + epoch_counts = [int(i.split('.')[0].split('_')[-1]) for i in model_pths if 'latest' not in i] + if len(epoch_counts) != 0: + opt.epoch_count = max(epoch_counts) + 1 + else: + opt.epoch_count = int(opt.epoch) + 1 + + + self.print_options(opt) + self.opt = opt + return self.opt diff --git a/third_part/face3d/options/inference_options.py b/third_part/face3d/options/inference_options.py new file mode 100644 index 0000000000000000000000000000000000000000..c453965959ab4cfb31acbc424f994db68c3d4df5 --- /dev/null +++ b/third_part/face3d/options/inference_options.py @@ -0,0 +1,23 @@ +from face3d.options.base_options import BaseOptions + + +class InferenceOptions(BaseOptions): + """This class includes test options. + + It also includes shared options defined in BaseOptions. + """ + + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) # define shared options + parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') + parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]') + + parser.add_argument('--input_dir', type=str, help='the folder of the input files') + parser.add_argument('--keypoint_dir', type=str, help='the folder of the keypoint files') + parser.add_argument('--output_dir', type=str, default='mp4', help='the output dir to save the extracted coefficients') + parser.add_argument('--save_split_files', action='store_true', help='save split files or not') + parser.add_argument('--inference_batch_size', type=int, default=8) + + # Dropout and Batchnorm has different behavior during training and test. + self.isTrain = False + return parser diff --git a/third_part/face3d/options/test_options.py b/third_part/face3d/options/test_options.py new file mode 100644 index 0000000000000000000000000000000000000000..4ff3ad142779850d1d5a1640bc00f70d34d4a862 --- /dev/null +++ b/third_part/face3d/options/test_options.py @@ -0,0 +1,21 @@ +"""This script contains the test options for Deep3DFaceRecon_pytorch +""" + +from .base_options import BaseOptions + + +class TestOptions(BaseOptions): + """This class includes test options. + + It also includes shared options defined in BaseOptions. + """ + + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) # define shared options + parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') + parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]') + parser.add_argument('--img_folder', type=str, default='examples', help='folder for test images.') + + # Dropout and Batchnorm has different behavior during training and test. + self.isTrain = False + return parser diff --git a/third_part/face3d/options/train_options.py b/third_part/face3d/options/train_options.py new file mode 100644 index 0000000000000000000000000000000000000000..1337bfdd5f372b5c686a91b394a2aadbe5741f44 --- /dev/null +++ b/third_part/face3d/options/train_options.py @@ -0,0 +1,53 @@ +"""This script contains the training options for Deep3DFaceRecon_pytorch +""" + +from .base_options import BaseOptions +from util import util + +class TrainOptions(BaseOptions): + """This class includes training options. + + It also includes shared options defined in BaseOptions. + """ + + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) + # dataset parameters + # for train + parser.add_argument('--data_root', type=str, default='./', help='dataset root') + parser.add_argument('--flist', type=str, default='datalist/train/masks.txt', help='list of mask names of training set') + parser.add_argument('--batch_size', type=int, default=32) + parser.add_argument('--dataset_mode', type=str, default='flist', help='chooses how datasets are loaded. [None | flist]') + parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') + parser.add_argument('--num_threads', default=4, type=int, help='# threads for loading data') + parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') + parser.add_argument('--preprocess', type=str, default='shift_scale_rot_flip', help='scaling and cropping of images at load time [shift_scale_rot_flip | shift_scale | shift | shift_rot_flip ]') + parser.add_argument('--use_aug', type=util.str2bool, nargs='?', const=True, default=True, help='whether use data augmentation') + + # for val + parser.add_argument('--flist_val', type=str, default='datalist/val/masks.txt', help='list of mask names of val set') + parser.add_argument('--batch_size_val', type=int, default=32) + + + # visualization parameters + parser.add_argument('--display_freq', type=int, default=1000, help='frequency of showing training results on screen') + parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') + + # network saving and loading parameters + parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results') + parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs') + parser.add_argument('--evaluation_freq', type=int, default=5000, help='evaluation freq') + parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration') + parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') + parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') + parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') + parser.add_argument('--pretrained_name', type=str, default=None, help='resume training from another checkpoint') + + # training parameters + parser.add_argument('--n_epochs', type=int, default=20, help='number of epochs with the initial learning rate') + parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate for adam') + parser.add_argument('--lr_policy', type=str, default='step', help='learning rate policy. [linear | step | plateau | cosine]') + parser.add_argument('--lr_decay_epochs', type=int, default=10, help='multiply by a gamma every lr_decay_epochs epoches') + + self.isTrain = True + return parser diff --git a/third_part/face3d/util/BBRegressorParam_r.mat b/third_part/face3d/util/BBRegressorParam_r.mat new file mode 100644 index 0000000000000000000000000000000000000000..a0da99af145c400a5216d9f6fb251d9412565921 --- /dev/null +++ b/third_part/face3d/util/BBRegressorParam_r.mat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5a07b8ce75a39d96b918dc0fc6e110a72e090da16f5f056a0ef7bfbc3f4560 +size 22019 diff --git a/third_part/face3d/util/__init__.py b/third_part/face3d/util/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b1e5fdd10b2c89b5f75d8fe6b500f248844bb10d --- /dev/null +++ b/third_part/face3d/util/__init__.py @@ -0,0 +1,2 @@ +"""This package includes a miscellaneous collection of useful helper functions.""" +from face3d.util import * diff --git a/third_part/face3d/util/detect_lm68.py b/third_part/face3d/util/detect_lm68.py new file mode 100644 index 0000000000000000000000000000000000000000..b7e40997289e17405e1fb6c408d21adce7b626ce --- /dev/null +++ b/third_part/face3d/util/detect_lm68.py @@ -0,0 +1,106 @@ +import os +import cv2 +import numpy as np +from scipy.io import loadmat +import tensorflow as tf +from util.preprocess import align_for_lm +from shutil import move + +mean_face = np.loadtxt('util/test_mean_face.txt') +mean_face = mean_face.reshape([68, 2]) + +def save_label(labels, save_path): + np.savetxt(save_path, labels) + +def draw_landmarks(img, landmark, save_name): + landmark = landmark + lm_img = np.zeros([img.shape[0], img.shape[1], 3]) + lm_img[:] = img.astype(np.float32) + landmark = np.round(landmark).astype(np.int32) + + for i in range(len(landmark)): + for j in range(-1, 1): + for k in range(-1, 1): + if img.shape[0] - 1 - landmark[i, 1]+j > 0 and \ + img.shape[0] - 1 - landmark[i, 1]+j < img.shape[0] and \ + landmark[i, 0]+k > 0 and \ + landmark[i, 0]+k < img.shape[1]: + lm_img[img.shape[0] - 1 - landmark[i, 1]+j, landmark[i, 0]+k, + :] = np.array([0, 0, 255]) + lm_img = lm_img.astype(np.uint8) + + cv2.imwrite(save_name, lm_img) + + +def load_data(img_name, txt_name): + return cv2.imread(img_name), np.loadtxt(txt_name) + +# create tensorflow graph for landmark detector +def load_lm_graph(graph_filename): + with tf.gfile.GFile(graph_filename, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name='net') + img_224 = graph.get_tensor_by_name('net/input_imgs:0') + output_lm = graph.get_tensor_by_name('net/lm:0') + lm_sess = tf.Session(graph=graph) + + return lm_sess,img_224,output_lm + +# landmark detection +def detect_68p(img_path,sess,input_op,output_op): + print('detecting landmarks......') + names = [i for i in sorted(os.listdir( + img_path)) if 'jpg' in i or 'png' in i or 'jpeg' in i or 'PNG' in i] + vis_path = os.path.join(img_path, 'vis') + remove_path = os.path.join(img_path, 'remove') + save_path = os.path.join(img_path, 'landmarks') + if not os.path.isdir(vis_path): + os.makedirs(vis_path) + if not os.path.isdir(remove_path): + os.makedirs(remove_path) + if not os.path.isdir(save_path): + os.makedirs(save_path) + + for i in range(0, len(names)): + name = names[i] + print('%05d' % (i), ' ', name) + full_image_name = os.path.join(img_path, name) + txt_name = '.'.join(name.split('.')[:-1]) + '.txt' + full_txt_name = os.path.join(img_path, 'detections', txt_name) # 5 facial landmark path for each image + + # if an image does not have detected 5 facial landmarks, remove it from the training list + if not os.path.isfile(full_txt_name): + move(full_image_name, os.path.join(remove_path, name)) + continue + + # load data + img, five_points = load_data(full_image_name, full_txt_name) + input_img, scale, bbox = align_for_lm(img, five_points) # align for 68 landmark detection + + # if the alignment fails, remove corresponding image from the training list + if scale == 0: + move(full_txt_name, os.path.join( + remove_path, txt_name)) + move(full_image_name, os.path.join(remove_path, name)) + continue + + # detect landmarks + input_img = np.reshape( + input_img, [1, 224, 224, 3]).astype(np.float32) + landmark = sess.run( + output_op, feed_dict={input_op: input_img}) + + # transform back to original image coordinate + landmark = landmark.reshape([68, 2]) + mean_face + landmark[:, 1] = 223 - landmark[:, 1] + landmark = landmark / scale + landmark[:, 0] = landmark[:, 0] + bbox[0] + landmark[:, 1] = landmark[:, 1] + bbox[1] + landmark[:, 1] = img.shape[0] - 1 - landmark[:, 1] + + if i % 100 == 0: + draw_landmarks(img, landmark, os.path.join(vis_path, name)) + save_label(landmark, os.path.join(save_path, txt_name)) diff --git a/third_part/face3d/util/generate_list.py b/third_part/face3d/util/generate_list.py new file mode 100644 index 0000000000000000000000000000000000000000..943d906781063c3584a7e5b5c784f8aac0694985 --- /dev/null +++ b/third_part/face3d/util/generate_list.py @@ -0,0 +1,34 @@ +"""This script is to generate training list files for Deep3DFaceRecon_pytorch +""" + +import os + +# save path to training data +def write_list(lms_list, imgs_list, msks_list, mode='train',save_folder='datalist', save_name=''): + save_path = os.path.join(save_folder, mode) + if not os.path.isdir(save_path): + os.makedirs(save_path) + with open(os.path.join(save_path, save_name + 'landmarks.txt'), 'w') as fd: + fd.writelines([i + '\n' for i in lms_list]) + + with open(os.path.join(save_path, save_name + 'images.txt'), 'w') as fd: + fd.writelines([i + '\n' for i in imgs_list]) + + with open(os.path.join(save_path, save_name + 'masks.txt'), 'w') as fd: + fd.writelines([i + '\n' for i in msks_list]) + +# check if the path is valid +def check_list(rlms_list, rimgs_list, rmsks_list): + lms_list, imgs_list, msks_list = [], [], [] + for i in range(len(rlms_list)): + flag = 'false' + lm_path = rlms_list[i] + im_path = rimgs_list[i] + msk_path = rmsks_list[i] + if os.path.isfile(lm_path) and os.path.isfile(im_path) and os.path.isfile(msk_path): + flag = 'true' + lms_list.append(rlms_list[i]) + imgs_list.append(rimgs_list[i]) + msks_list.append(rmsks_list[i]) + print(i, rlms_list[i], flag) + return lms_list, imgs_list, msks_list diff --git a/third_part/face3d/util/html.py b/third_part/face3d/util/html.py new file mode 100644 index 0000000000000000000000000000000000000000..cc3262a1eafda34842e4dbad47bb6ba72f0c5a68 --- /dev/null +++ b/third_part/face3d/util/html.py @@ -0,0 +1,86 @@ +import dominate +from dominate.tags import meta, h3, table, tr, td, p, a, img, br +import os + + +class HTML: + """This HTML class allows us to save images and write texts into a single HTML file. + + It consists of functions such as (add a text header to the HTML file), + (add a row of images to the HTML file), and (save the HTML to the disk). + It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API. + """ + + def __init__(self, web_dir, title, refresh=0): + """Initialize the HTML classes + + Parameters: + web_dir (str) -- a directory that stores the webpage. HTML file will be created at /index.html; images will be saved at 0: + with self.doc.head: + meta(http_equiv="refresh", content=str(refresh)) + + def get_image_dir(self): + """Return the directory that stores images""" + return self.img_dir + + def add_header(self, text): + """Insert a header to the HTML file + + Parameters: + text (str) -- the header text + """ + with self.doc: + h3(text) + + def add_images(self, ims, txts, links, width=400): + """add images to the HTML file + + Parameters: + ims (str list) -- a list of image paths + txts (str list) -- a list of image names shown on the website + links (str list) -- a list of hyperref links; when you click an image, it will redirect you to a new page + """ + self.t = table(border=1, style="table-layout: fixed;") # Insert a table + self.doc.add(self.t) + with self.t: + with tr(): + for im, txt, link in zip(ims, txts, links): + with td(style="word-wrap: break-word;", halign="center", valign="top"): + with p(): + with a(href=os.path.join('images', link)): + img(style="width:%dpx" % width, src=os.path.join('images', im)) + br() + p(txt) + + def save(self): + """save the current content to the HMTL file""" + html_file = '%s/index.html' % self.web_dir + f = open(html_file, 'wt') + f.write(self.doc.render()) + f.close() + + +if __name__ == '__main__': # we show an example usage here. + html = HTML('web/', 'test_html') + html.add_header('hello world') + + ims, txts, links = [], [], [] + for n in range(4): + ims.append('image_%d.png' % n) + txts.append('text_%d' % n) + links.append('image_%d.png' % n) + html.add_images(ims, txts, links) + html.save() diff --git a/third_part/face3d/util/load_mats.py b/third_part/face3d/util/load_mats.py new file mode 100644 index 0000000000000000000000000000000000000000..52c613a82bae597e52ec5e3523d9098f007806e9 --- /dev/null +++ b/third_part/face3d/util/load_mats.py @@ -0,0 +1,120 @@ +"""This script is to load 3D face model for Deep3DFaceRecon_pytorch +""" + +import numpy as np +from PIL import Image +from scipy.io import loadmat, savemat +from array import array +import os.path as osp + +# load expression basis +def LoadExpBasis(bfm_folder='BFM'): + n_vertex = 53215 + Expbin = open(osp.join(bfm_folder, 'Exp_Pca.bin'), 'rb') + exp_dim = array('i') + exp_dim.fromfile(Expbin, 1) + expMU = array('f') + expPC = array('f') + expMU.fromfile(Expbin, 3*n_vertex) + expPC.fromfile(Expbin, 3*exp_dim[0]*n_vertex) + Expbin.close() + + expPC = np.array(expPC) + expPC = np.reshape(expPC, [exp_dim[0], -1]) + expPC = np.transpose(expPC) + + expEV = np.loadtxt(osp.join(bfm_folder, 'std_exp.txt')) + + return expPC, expEV + + +# transfer original BFM09 to our face model +def transferBFM09(bfm_folder='BFM'): + print('Transfer BFM09 to BFM_model_front......') + original_BFM = loadmat(osp.join(bfm_folder, '01_MorphableModel.mat')) + shapePC = original_BFM['shapePC'] # shape basis + shapeEV = original_BFM['shapeEV'] # corresponding eigen value + shapeMU = original_BFM['shapeMU'] # mean face + texPC = original_BFM['texPC'] # texture basis + texEV = original_BFM['texEV'] # eigen value + texMU = original_BFM['texMU'] # mean texture + + expPC, expEV = LoadExpBasis() + + # transfer BFM09 to our face model + + idBase = shapePC*np.reshape(shapeEV, [-1, 199]) + idBase = idBase/1e5 # unify the scale to decimeter + idBase = idBase[:, :80] # use only first 80 basis + + exBase = expPC*np.reshape(expEV, [-1, 79]) + exBase = exBase/1e5 # unify the scale to decimeter + exBase = exBase[:, :64] # use only first 64 basis + + texBase = texPC*np.reshape(texEV, [-1, 199]) + texBase = texBase[:, :80] # use only first 80 basis + + # our face model is cropped along face landmarks and contains only 35709 vertex. + # original BFM09 contains 53490 vertex, and expression basis provided by Guo et al. contains 53215 vertex. + # thus we select corresponding vertex to get our face model. + + index_exp = loadmat(osp.join(bfm_folder, 'BFM_front_idx.mat')) + index_exp = index_exp['idx'].astype(np.int32) - 1 # starts from 0 (to 53215) + + index_shape = loadmat(osp.join(bfm_folder, 'BFM_exp_idx.mat')) + index_shape = index_shape['trimIndex'].astype( + np.int32) - 1 # starts from 0 (to 53490) + index_shape = index_shape[index_exp] + + idBase = np.reshape(idBase, [-1, 3, 80]) + idBase = idBase[index_shape, :, :] + idBase = np.reshape(idBase, [-1, 80]) + + texBase = np.reshape(texBase, [-1, 3, 80]) + texBase = texBase[index_shape, :, :] + texBase = np.reshape(texBase, [-1, 80]) + + exBase = np.reshape(exBase, [-1, 3, 64]) + exBase = exBase[index_exp, :, :] + exBase = np.reshape(exBase, [-1, 64]) + + meanshape = np.reshape(shapeMU, [-1, 3])/1e5 + meanshape = meanshape[index_shape, :] + meanshape = np.reshape(meanshape, [1, -1]) + + meantex = np.reshape(texMU, [-1, 3]) + meantex = meantex[index_shape, :] + meantex = np.reshape(meantex, [1, -1]) + + # other info contains triangles, region used for computing photometric loss, + # region used for skin texture regularization, and 68 landmarks index etc. + other_info = loadmat(osp.join(bfm_folder, 'facemodel_info.mat')) + frontmask2_idx = other_info['frontmask2_idx'] + skinmask = other_info['skinmask'] + keypoints = other_info['keypoints'] + point_buf = other_info['point_buf'] + tri = other_info['tri'] + tri_mask2 = other_info['tri_mask2'] + + # save our face model + savemat(osp.join(bfm_folder, 'BFM_model_front.mat'), {'meanshape': meanshape, 'meantex': meantex, 'idBase': idBase, 'exBase': exBase, 'texBase': texBase, + 'tri': tri, 'point_buf': point_buf, 'tri_mask2': tri_mask2, 'keypoints': keypoints, 'frontmask2_idx': frontmask2_idx, 'skinmask': skinmask}) + + +# load landmarks for standard face, which is used for image preprocessing +def load_lm3d(bfm_folder): + + Lm3D = loadmat(osp.join(bfm_folder, 'similarity_Lm3D_all.mat')) + Lm3D = Lm3D['lm'] + + # calculate 5 facial landmarks using 68 landmarks + lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1 + Lm3D = np.stack([Lm3D[lm_idx[0], :], np.mean(Lm3D[lm_idx[[1, 2]], :], 0), np.mean( + Lm3D[lm_idx[[3, 4]], :], 0), Lm3D[lm_idx[5], :], Lm3D[lm_idx[6], :]], axis=0) + Lm3D = Lm3D[[1, 2, 0, 3, 4], :] + + return Lm3D + + +if __name__ == '__main__': + transferBFM09() \ No newline at end of file diff --git a/third_part/face3d/util/nvdiffrast.py b/third_part/face3d/util/nvdiffrast.py new file mode 100644 index 0000000000000000000000000000000000000000..08490cd190734489406e6f61810bd34629294ef9 --- /dev/null +++ b/third_part/face3d/util/nvdiffrast.py @@ -0,0 +1,89 @@ +"""This script is the differentiable renderer for Deep3DFaceRecon_pytorch + Attention, antialiasing step is missing in current version. +""" + +import torch +import torch.nn.functional as F +import kornia +from kornia.geometry.camera import pixel2cam +import numpy as np +from typing import List +import nvdiffrast.torch as dr +from scipy.io import loadmat +from torch import nn + +def ndc_projection(x=0.1, n=1.0, f=50.0): + return np.array([[n/x, 0, 0, 0], + [ 0, n/-x, 0, 0], + [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)], + [ 0, 0, -1, 0]]).astype(np.float32) + +class MeshRenderer(nn.Module): + def __init__(self, + rasterize_fov, + znear=0.1, + zfar=10, + rasterize_size=224): + super(MeshRenderer, self).__init__() + + x = np.tan(np.deg2rad(rasterize_fov * 0.5)) * znear + self.ndc_proj = torch.tensor(ndc_projection(x=x, n=znear, f=zfar)).matmul( + torch.diag(torch.tensor([1., -1, -1, 1]))) + self.rasterize_size = rasterize_size + self.glctx = None + + def forward(self, vertex, tri, feat=None): + """ + Return: + mask -- torch.tensor, size (B, 1, H, W) + depth -- torch.tensor, size (B, 1, H, W) + features(optional) -- torch.tensor, size (B, C, H, W) if feat is not None + + Parameters: + vertex -- torch.tensor, size (B, N, 3) + tri -- torch.tensor, size (B, M, 3) or (M, 3), triangles + feat(optional) -- torch.tensor, size (B, C), features + """ + device = vertex.device + rsize = int(self.rasterize_size) + ndc_proj = self.ndc_proj.to(device) + # trans to homogeneous coordinates of 3d vertices, the direction of y is the same as v + if vertex.shape[-1] == 3: + vertex = torch.cat([vertex, torch.ones([*vertex.shape[:2], 1]).to(device)], dim=-1) + vertex[..., 1] = -vertex[..., 1] + + + vertex_ndc = vertex @ ndc_proj.t() + if self.glctx is None: + self.glctx = dr.RasterizeGLContext(device=device) + print("create glctx on device cuda:%d"%device.index) + + ranges = None + if isinstance(tri, List) or len(tri.shape) == 3: + vum = vertex_ndc.shape[1] + fnum = torch.tensor([f.shape[0] for f in tri]).unsqueeze(1).to(device) + fstartidx = torch.cumsum(fnum, dim=0) - fnum + ranges = torch.cat([fstartidx, fnum], axis=1).type(torch.int32).cpu() + for i in range(tri.shape[0]): + tri[i] = tri[i] + i*vum + vertex_ndc = torch.cat(vertex_ndc, dim=0) + tri = torch.cat(tri, dim=0) + + # for range_mode vetex: [B*N, 4], tri: [B*M, 3], for instance_mode vetex: [B, N, 4], tri: [M, 3] + tri = tri.type(torch.int32).contiguous() + rast_out, _ = dr.rasterize(self.glctx, vertex_ndc.contiguous(), tri, resolution=[rsize, rsize], ranges=ranges) + + depth, _ = dr.interpolate(vertex.reshape([-1,4])[...,2].unsqueeze(1).contiguous(), rast_out, tri) + depth = depth.permute(0, 3, 1, 2) + mask = (rast_out[..., 3] > 0).float().unsqueeze(1) + depth = mask * depth + + + image = None + if feat is not None: + image, _ = dr.interpolate(feat, rast_out, tri) + image = image.permute(0, 3, 1, 2) + image = mask * image + + return mask, depth, image + diff --git a/third_part/face3d/util/preprocess.py b/third_part/face3d/util/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..43d1ed7c914a025f56d8dc40420d593750591b31 --- /dev/null +++ b/third_part/face3d/util/preprocess.py @@ -0,0 +1,230 @@ +""" +This script contains the image preprocessing code for Deep3DFaceRecon_pytorch +""" + +import numpy as np +from scipy.io import loadmat +from PIL import Image +import cv2 +import os +from skimage import transform as trans +import torch +import warnings +warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) +warnings.filterwarnings("ignore", category=FutureWarning) + + +# calculating least square problem for image alignment +def POS(xp, x): + npts = xp.shape[1] + + A = np.zeros([2*npts, 8]) + + A[0:2*npts-1:2, 0:3] = x.transpose() + A[0:2*npts-1:2, 3] = 1 + + A[1:2*npts:2, 4:7] = x.transpose() + A[1:2*npts:2, 7] = 1 + + b = np.reshape(xp.transpose(), [2*npts, 1]) + + k, _, _, _ = np.linalg.lstsq(A, b) + + R1 = k[0:3] + R2 = k[4:7] + sTx = k[3] + sTy = k[7] + s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2 + t = np.stack([sTx, sTy], axis=0) + + return t, s + +# bounding box for 68 landmark detection +def BBRegression(points, params): + + w1 = params['W1'] + b1 = params['B1'] + w2 = params['W2'] + b2 = params['B2'] + data = points.copy() + data = data.reshape([5, 2]) + data_mean = np.mean(data, axis=0) + x_mean = data_mean[0] + y_mean = data_mean[1] + data[:, 0] = data[:, 0] - x_mean + data[:, 1] = data[:, 1] - y_mean + + rms = np.sqrt(np.sum(data ** 2)/5) + data = data / rms + data = data.reshape([1, 10]) + data = np.transpose(data) + inputs = np.matmul(w1, data) + b1 + inputs = 2 / (1 + np.exp(-2 * inputs)) - 1 + inputs = np.matmul(w2, inputs) + b2 + inputs = np.transpose(inputs) + x = inputs[:, 0] * rms + x_mean + y = inputs[:, 1] * rms + y_mean + w = 224/inputs[:, 2] * rms + rects = [x, y, w, w] + return np.array(rects).reshape([4]) + +# utils for landmark detection +def img_padding(img, box): + success = True + bbox = box.copy() + res = np.zeros([2*img.shape[0], 2*img.shape[1], 3]) + res[img.shape[0] // 2: img.shape[0] + img.shape[0] // + 2, img.shape[1] // 2: img.shape[1] + img.shape[1]//2] = img + + bbox[0] = bbox[0] + img.shape[1] // 2 + bbox[1] = bbox[1] + img.shape[0] // 2 + if bbox[0] < 0 or bbox[1] < 0: + success = False + return res, bbox, success + +# utils for landmark detection +def crop(img, bbox): + padded_img, padded_bbox, flag = img_padding(img, bbox) + if flag: + crop_img = padded_img[padded_bbox[1]: padded_bbox[1] + + padded_bbox[3], padded_bbox[0]: padded_bbox[0] + padded_bbox[2]] + crop_img = cv2.resize(crop_img.astype(np.uint8), + (224, 224), interpolation=cv2.INTER_CUBIC) + scale = 224 / padded_bbox[3] + return crop_img, scale + else: + return padded_img, 0 + +# utils for landmark detection +def scale_trans(img, lm, t, s): + imgw = img.shape[1] + imgh = img.shape[0] + M_s = np.array([[1, 0, -t[0] + imgw//2 + 0.5], [0, 1, -imgh//2 + t[1]]], + dtype=np.float32) + img = cv2.warpAffine(img, M_s, (imgw, imgh)) + w = int(imgw / s * 100) + h = int(imgh / s * 100) + img = cv2.resize(img, (w, h)) + lm = np.stack([lm[:, 0] - t[0] + imgw // 2, lm[:, 1] - + t[1] + imgh // 2], axis=1) / s * 100 + + left = w//2 - 112 + up = h//2 - 112 + bbox = [left, up, 224, 224] + cropped_img, scale2 = crop(img, bbox) + assert(scale2!=0) + t1 = np.array([bbox[0], bbox[1]]) + + # back to raw img s * crop + s * t1 + t2 + t1 = np.array([w//2 - 112, h//2 - 112]) + scale = s / 100 + t2 = np.array([t[0] - imgw/2, t[1] - imgh / 2]) + inv = (scale/scale2, scale * t1 + t2.reshape([2])) + return cropped_img, inv + +# utils for landmark detection +def align_for_lm(img, five_points): + five_points = np.array(five_points).reshape([1, 10]) + params = loadmat('util/BBRegressorParam_r.mat') + bbox = BBRegression(five_points, params) + assert(bbox[2] != 0) + bbox = np.round(bbox).astype(np.int32) + crop_img, scale = crop(img, bbox) + return crop_img, scale, bbox + + +# resize and crop images for face reconstruction +def resize_n_crop_img(img, lm, t, s, target_size=224., mask=None): + w0, h0 = img.size + w = (w0*s).astype(np.int32) + h = (h0*s).astype(np.int32) + left = (w/2 - target_size/2 + float((t[0] - w0/2)*s)).astype(np.int32) + right = left + target_size + up = (h/2 - target_size/2 + float((h0/2 - t[1])*s)).astype(np.int32) + below = up + target_size + + img = img.resize((w, h), resample=Image.BICUBIC) + img = img.crop((left, up, right, below)) + + if mask is not None: + mask = mask.resize((w, h), resample=Image.BICUBIC) + mask = mask.crop((left, up, right, below)) + + lm = np.stack([lm[:, 0] - t[0] + w0/2, lm[:, 1] - + t[1] + h0/2], axis=1)*s + lm = lm - np.reshape( + np.array([(w/2 - target_size/2), (h/2-target_size/2)]), [1, 2]) + + return img, lm, mask + +# utils for face reconstruction +def extract_5p(lm): + lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1 + lm5p = np.stack([lm[lm_idx[0], :], np.mean(lm[lm_idx[[1, 2]], :], 0), np.mean( + lm[lm_idx[[3, 4]], :], 0), lm[lm_idx[5], :], lm[lm_idx[6], :]], axis=0) + lm5p = lm5p[[1, 2, 0, 3, 4], :] + return lm5p + +# utils for face reconstruction +def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.): + """ + Return: + transparams --numpy.array (raw_W, raw_H, scale, tx, ty) + img_new --PIL.Image (target_size, target_size, 3) + lm_new --numpy.array (68, 2), y direction is opposite to v direction + mask_new --PIL.Image (target_size, target_size) + + Parameters: + img --PIL.Image (raw_H, raw_W, 3) + lm --numpy.array (68, 2), y direction is opposite to v direction + lm3D --numpy.array (5, 3) + mask --PIL.Image (raw_H, raw_W, 3) + """ + + w0, h0 = img.size + if lm.shape[0] != 5: + lm5p = extract_5p(lm) + else: + lm5p = lm + + # calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face + t, s = POS(lm5p.transpose(), lm3D.transpose()) + s = rescale_factor/s + + # processing the image + img_new, lm_new, mask_new = resize_n_crop_img(img, lm, t, s, target_size=target_size, mask=mask) + trans_params = np.array([w0, h0, s, t[0], t[1]]) + + return trans_params, img_new, lm_new, mask_new + +# utils for face recognition model +def estimate_norm(lm_68p, H): + # from https://github.com/deepinsight/insightface/blob/c61d3cd208a603dfa4a338bd743b320ce3e94730/recognition/common/face_align.py#L68 + """ + Return: + trans_m --numpy.array (2, 3) + Parameters: + lm --numpy.array (68, 2), y direction is opposite to v direction + H --int/float , image height + """ + lm = extract_5p(lm_68p) + lm[:, -1] = H - 1 - lm[:, -1] + tform = trans.SimilarityTransform() + src = np.array( + [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], + [41.5493, 92.3655], [70.7299, 92.2041]], + dtype=np.float32) + tform.estimate(lm, src) + M = tform.params + if np.linalg.det(M) == 0: + M = np.eye(3) + + return M[0:2, :] + +def estimate_norm_torch(lm_68p, H): + lm_68p_ = lm_68p.detach().cpu().numpy() + M = [] + for i in range(lm_68p_.shape[0]): + M.append(estimate_norm(lm_68p_[i], H)) + M = torch.tensor(np.array(M), dtype=torch.float32).to(lm_68p.device) + return M diff --git a/third_part/face3d/util/skin_mask.py b/third_part/face3d/util/skin_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..a8a74e4c3b40d13b0258b83a12f56321a85bb179 --- /dev/null +++ b/third_part/face3d/util/skin_mask.py @@ -0,0 +1,125 @@ +"""This script is to generate skin attention mask for Deep3DFaceRecon_pytorch +""" + +import math +import numpy as np +import os +import cv2 + +class GMM: + def __init__(self, dim, num, w, mu, cov, cov_det, cov_inv): + self.dim = dim # feature dimension + self.num = num # number of Gaussian components + self.w = w # weights of Gaussian components (a list of scalars) + self.mu= mu # mean of Gaussian components (a list of 1xdim vectors) + self.cov = cov # covariance matrix of Gaussian components (a list of dimxdim matrices) + self.cov_det = cov_det # pre-computed determinet of covariance matrices (a list of scalars) + self.cov_inv = cov_inv # pre-computed inverse covariance matrices (a list of dimxdim matrices) + + self.factor = [0]*num + for i in range(self.num): + self.factor[i] = (2*math.pi)**(self.dim/2) * self.cov_det[i]**0.5 + + def likelihood(self, data): + assert(data.shape[1] == self.dim) + N = data.shape[0] + lh = np.zeros(N) + + for i in range(self.num): + data_ = data - self.mu[i] + + tmp = np.matmul(data_,self.cov_inv[i]) * data_ + tmp = np.sum(tmp,axis=1) + power = -0.5 * tmp + + p = np.array([math.exp(power[j]) for j in range(N)]) + p = p/self.factor[i] + lh += p*self.w[i] + + return lh + + +def _rgb2ycbcr(rgb): + m = np.array([[65.481, 128.553, 24.966], + [-37.797, -74.203, 112], + [112, -93.786, -18.214]]) + shape = rgb.shape + rgb = rgb.reshape((shape[0] * shape[1], 3)) + ycbcr = np.dot(rgb, m.transpose() / 255.) + ycbcr[:, 0] += 16. + ycbcr[:, 1:] += 128. + return ycbcr.reshape(shape) + + +def _bgr2ycbcr(bgr): + rgb = bgr[..., ::-1] + return _rgb2ycbcr(rgb) + + +gmm_skin_w = [0.24063933, 0.16365987, 0.26034665, 0.33535415] +gmm_skin_mu = [np.array([113.71862, 103.39613, 164.08226]), + np.array([150.19858, 105.18467, 155.51428]), + np.array([183.92976, 107.62468, 152.71820]), + np.array([114.90524, 113.59782, 151.38217])] +gmm_skin_cov_det = [5692842.5, 5851930.5, 2329131., 1585971.] +gmm_skin_cov_inv = [np.array([[0.0019472069, 0.0020450759, -0.00060243998],[0.0020450759, 0.017700525, 0.0051420014],[-0.00060243998, 0.0051420014, 0.0081308950]]), + np.array([[0.0027110141, 0.0011036990, 0.0023122299],[0.0011036990, 0.010707724, 0.010742856],[0.0023122299, 0.010742856, 0.017481629]]), + np.array([[0.0048026871, 0.00022935172, 0.0077668377],[0.00022935172, 0.011729696, 0.0081661865],[0.0077668377, 0.0081661865, 0.025374353]]), + np.array([[0.0011989699, 0.0022453172, -0.0010748957],[0.0022453172, 0.047758564, 0.020332102],[-0.0010748957, 0.020332102, 0.024502251]])] + +gmm_skin = GMM(3, 4, gmm_skin_w, gmm_skin_mu, [], gmm_skin_cov_det, gmm_skin_cov_inv) + +gmm_nonskin_w = [0.12791070, 0.31130761, 0.34245777, 0.21832393] +gmm_nonskin_mu = [np.array([99.200851, 112.07533, 140.20602]), + np.array([110.91392, 125.52969, 130.19237]), + np.array([129.75864, 129.96107, 126.96808]), + np.array([112.29587, 128.85121, 129.05431])] +gmm_nonskin_cov_det = [458703648., 6466488., 90611376., 133097.63] +gmm_nonskin_cov_inv = [np.array([[0.00085371657, 0.00071197288, 0.00023958916],[0.00071197288, 0.0025935620, 0.00076557708],[0.00023958916, 0.00076557708, 0.0015042332]]), + np.array([[0.00024650150, 0.00045542428, 0.00015019422],[0.00045542428, 0.026412144, 0.018419769],[0.00015019422, 0.018419769, 0.037497383]]), + np.array([[0.00037054974, 0.00038146760, 0.00040408765],[0.00038146760, 0.0085505722, 0.0079136286],[0.00040408765, 0.0079136286, 0.010982352]]), + np.array([[0.00013709733, 0.00051228428, 0.00012777430],[0.00051228428, 0.28237113, 0.10528370],[0.00012777430, 0.10528370, 0.23468947]])] + +gmm_nonskin = GMM(3, 4, gmm_nonskin_w, gmm_nonskin_mu, [], gmm_nonskin_cov_det, gmm_nonskin_cov_inv) + +prior_skin = 0.8 +prior_nonskin = 1 - prior_skin + + +# calculate skin attention mask +def skinmask(imbgr): + im = _bgr2ycbcr(imbgr) + + data = im.reshape((-1,3)) + + lh_skin = gmm_skin.likelihood(data) + lh_nonskin = gmm_nonskin.likelihood(data) + + tmp1 = prior_skin * lh_skin + tmp2 = prior_nonskin * lh_nonskin + post_skin = tmp1 / (tmp1+tmp2) # posterior probability + + post_skin = post_skin.reshape((im.shape[0],im.shape[1])) + + post_skin = np.round(post_skin*255) + post_skin = post_skin.astype(np.uint8) + post_skin = np.tile(np.expand_dims(post_skin,2),[1,1,3]) # reshape to H*W*3 + + return post_skin + + +def get_skin_mask(img_path): + print('generating skin masks......') + names = [i for i in sorted(os.listdir( + img_path)) if 'jpg' in i or 'png' in i or 'jpeg' in i or 'PNG' in i] + save_path = os.path.join(img_path, 'mask') + if not os.path.isdir(save_path): + os.makedirs(save_path) + + for i in range(0, len(names)): + name = names[i] + print('%05d' % (i), ' ', name) + full_image_name = os.path.join(img_path, name) + img = cv2.imread(full_image_name).astype(np.float32) + skin_img = skinmask(img) + cv2.imwrite(os.path.join(save_path, name), skin_img.astype(np.uint8)) diff --git a/third_part/face3d/util/test_mean_face.txt b/third_part/face3d/util/test_mean_face.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a46d4db7699ffed8f898fcee64099631509946d --- /dev/null +++ b/third_part/face3d/util/test_mean_face.txt @@ -0,0 +1,136 @@ +-5.228591537475585938e+01 +2.078247070312500000e-01 +-5.064269638061523438e+01 +-1.315765380859375000e+01 +-4.952939224243164062e+01 +-2.592591094970703125e+01 +-4.793047332763671875e+01 +-3.832135772705078125e+01 +-4.512159729003906250e+01 +-5.059623336791992188e+01 +-3.917720794677734375e+01 +-6.043736648559570312e+01 +-2.929953765869140625e+01 +-6.861183166503906250e+01 +-1.719801330566406250e+01 +-7.572736358642578125e+01 +-1.961936950683593750e+00 +-7.862001037597656250e+01 +1.467941284179687500e+01 +-7.607844543457031250e+01 +2.744073486328125000e+01 +-6.915261840820312500e+01 +3.855677795410156250e+01 +-5.950350570678710938e+01 +4.478240966796875000e+01 +-4.867547225952148438e+01 +4.714337158203125000e+01 +-3.800830078125000000e+01 +4.940315246582031250e+01 +-2.496297454833984375e+01 +5.117234802246093750e+01 +-1.241538238525390625e+01 +5.190507507324218750e+01 +8.244247436523437500e-01 +-4.150688934326171875e+01 +2.386329650878906250e+01 +-3.570307159423828125e+01 +3.017010498046875000e+01 +-2.790358734130859375e+01 +3.212951660156250000e+01 +-1.941773223876953125e+01 +3.156523132324218750e+01 +-1.138106536865234375e+01 +2.841992187500000000e+01 +5.993263244628906250e+00 +2.895182800292968750e+01 +1.343590545654296875e+01 +3.189880371093750000e+01 +2.203153991699218750e+01 +3.302221679687500000e+01 +2.992478942871093750e+01 +3.099150085449218750e+01 +3.628388977050781250e+01 +2.765748596191406250e+01 +-1.933914184570312500e+00 +1.405374145507812500e+01 +-2.153038024902343750e+00 +5.772636413574218750e+00 +-2.270050048828125000e+00 +-2.121643066406250000e+00 +-2.218330383300781250e+00 +-1.068978118896484375e+01 +-1.187252044677734375e+01 +-1.997912597656250000e+01 +-6.879402160644531250e+00 +-2.143579864501953125e+01 +-1.227821350097656250e+00 +-2.193494415283203125e+01 +4.623237609863281250e+00 +-2.152721405029296875e+01 +9.721397399902343750e+00 +-1.953671264648437500e+01 +-3.648714447021484375e+01 +9.811126708984375000e+00 +-3.130242919921875000e+01 +1.422447967529296875e+01 +-2.212834930419921875e+01 +1.493019866943359375e+01 +-1.500880432128906250e+01 +1.073588562011718750e+01 +-2.095037078857421875e+01 +9.054298400878906250e+00 +-3.050099182128906250e+01 +8.704177856445312500e+00 +1.173237609863281250e+01 +1.054329681396484375e+01 +1.856353759765625000e+01 +1.535009765625000000e+01 +2.893331909179687500e+01 +1.451992797851562500e+01 +3.452944946289062500e+01 +1.065280151367187500e+01 +2.875990295410156250e+01 +8.654792785644531250e+00 +1.942100524902343750e+01 +9.422447204589843750e+00 +-2.204488372802734375e+01 +-3.983994293212890625e+01 +-1.324458312988281250e+01 +-3.467377471923828125e+01 +-6.749649047851562500e+00 +-3.092894744873046875e+01 +-9.183349609375000000e-01 +-3.196458435058593750e+01 +4.220649719238281250e+00 +-3.090406036376953125e+01 +1.089889526367187500e+01 +-3.497008514404296875e+01 +1.874589538574218750e+01 +-4.065438079833984375e+01 +1.124106597900390625e+01 +-4.438417816162109375e+01 +5.181709289550781250e+00 +-4.649170684814453125e+01 +-1.158607482910156250e+00 +-4.680406951904296875e+01 +-7.918922424316406250e+00 +-4.671575164794921875e+01 +-1.452505493164062500e+01 +-4.416526031494140625e+01 +-2.005007171630859375e+01 +-3.997841644287109375e+01 +-1.054919433593750000e+01 +-3.849683380126953125e+01 +-1.051826477050781250e+00 +-3.794863128662109375e+01 +6.412681579589843750e+00 +-3.804645538330078125e+01 +1.627674865722656250e+01 +-4.039697265625000000e+01 +6.373878479003906250e+00 +-4.087213897705078125e+01 +-8.551712036132812500e-01 +-4.157129669189453125e+01 +-1.014953613281250000e+01 +-4.128469085693359375e+01 diff --git a/third_part/face3d/util/util.py b/third_part/face3d/util/util.py new file mode 100644 index 0000000000000000000000000000000000000000..0d689ca138fc0fbf5bec794511ea0f9e638f9ea9 --- /dev/null +++ b/third_part/face3d/util/util.py @@ -0,0 +1,208 @@ +"""This script contains basic utilities for Deep3DFaceRecon_pytorch +""" +from __future__ import print_function +import numpy as np +import torch +from PIL import Image +import os +import importlib +import argparse +from argparse import Namespace +import torchvision + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + +def copyconf(default_opt, **kwargs): + conf = Namespace(**vars(default_opt)) + for key in kwargs: + setattr(conf, key, kwargs[key]) + return conf + +def genvalconf(train_opt, **kwargs): + conf = Namespace(**vars(train_opt)) + attr_dict = train_opt.__dict__ + for key, value in attr_dict.items(): + if 'val' in key and key.split('_')[0] in attr_dict: + setattr(conf, key.split('_')[0], value) + + for key in kwargs: + setattr(conf, key, kwargs[key]) + + return conf + +def find_class_in_module(target_cls_name, module): + target_cls_name = target_cls_name.replace('_', '').lower() + clslib = importlib.import_module(module) + cls = None + for name, clsobj in clslib.__dict__.items(): + if name.lower() == target_cls_name: + cls = clsobj + + assert cls is not None, "In %s, there should be a class whose name matches %s in lowercase without underscore(_)" % (module, target_cls_name) + + return cls + + +def tensor2im(input_image, imtype=np.uint8): + """"Converts a Tensor array into a numpy image array. + + Parameters: + input_image (tensor) -- the input image tensor array, range(0, 1) + imtype (type) -- the desired type of the converted numpy array + """ + if not isinstance(input_image, np.ndarray): + if isinstance(input_image, torch.Tensor): # get the data from a variable + image_tensor = input_image.data + else: + return input_image + image_numpy = image_tensor.clamp(0.0, 1.0).cpu().float().numpy() # convert it into a numpy array + if image_numpy.shape[0] == 1: # grayscale to RGB + image_numpy = np.tile(image_numpy, (3, 1, 1)) + image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0 # post-processing: tranpose and scaling + else: # if it is a numpy array, do nothing + image_numpy = input_image + return image_numpy.astype(imtype) + + +def diagnose_network(net, name='network'): + """Calculate and print the mean of average absolute(gradients) + + Parameters: + net (torch network) -- Torch network + name (str) -- the name of the network + """ + mean = 0.0 + count = 0 + for param in net.parameters(): + if param.grad is not None: + mean += torch.mean(torch.abs(param.grad.data)) + count += 1 + if count > 0: + mean = mean / count + print(name) + print(mean) + + +def save_image(image_numpy, image_path, aspect_ratio=1.0): + """Save a numpy image to the disk + + Parameters: + image_numpy (numpy array) -- input numpy array + image_path (str) -- the path of the image + """ + + image_pil = Image.fromarray(image_numpy) + h, w, _ = image_numpy.shape + + if aspect_ratio is None: + pass + elif aspect_ratio > 1.0: + image_pil = image_pil.resize((h, int(w * aspect_ratio)), Image.BICUBIC) + elif aspect_ratio < 1.0: + image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC) + image_pil.save(image_path) + + +def print_numpy(x, val=True, shp=False): + """Print the mean, min, max, median, std, and size of a numpy array + + Parameters: + val (bool) -- if print the values of the numpy array + shp (bool) -- if print the shape of the numpy array + """ + x = x.astype(np.float64) + if shp: + print('shape,', x.shape) + if val: + x = x.flatten() + print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( + np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) + + +def mkdirs(paths): + """create empty directories if they don't exist + + Parameters: + paths (str list) -- a list of directory paths + """ + if isinstance(paths, list) and not isinstance(paths, str): + for path in paths: + mkdir(path) + else: + mkdir(paths) + + +def mkdir(path): + """create a single empty directory if it didn't exist + + Parameters: + path (str) -- a single directory path + """ + if not os.path.exists(path): + os.makedirs(path) + + +def correct_resize_label(t, size): + device = t.device + t = t.detach().cpu() + resized = [] + for i in range(t.size(0)): + one_t = t[i, :1] + one_np = np.transpose(one_t.numpy().astype(np.uint8), (1, 2, 0)) + one_np = one_np[:, :, 0] + one_image = Image.fromarray(one_np).resize(size, Image.NEAREST) + resized_t = torch.from_numpy(np.array(one_image)).long() + resized.append(resized_t) + return torch.stack(resized, dim=0).to(device) + + +def correct_resize(t, size, mode=Image.BICUBIC): + device = t.device + t = t.detach().cpu() + resized = [] + for i in range(t.size(0)): + one_t = t[i:i + 1] + one_image = Image.fromarray(tensor2im(one_t)).resize(size, Image.BICUBIC) + resized_t = torchvision.transforms.functional.to_tensor(one_image) * 2 - 1.0 + resized.append(resized_t) + return torch.stack(resized, dim=0).to(device) + +def draw_landmarks(img, landmark, color='r', step=2): + """ + Return: + img -- numpy.array, (B, H, W, 3) img with landmark, RGB order, range (0, 255) + + + Parameters: + img -- numpy.array, (B, H, W, 3), RGB order, range (0, 255) + landmark -- numpy.array, (B, 68, 2), y direction is opposite to v direction + color -- str, 'r' or 'b' (red or blue) + """ + if color =='r': + c = np.array([255., 0, 0]) + else: + c = np.array([0, 0, 255.]) + + _, H, W, _ = img.shape + img, landmark = img.copy(), landmark.copy() + landmark[..., 1] = H - 1 - landmark[..., 1] + landmark = np.round(landmark).astype(np.int32) + for i in range(landmark.shape[1]): + x, y = landmark[:, i, 0], landmark[:, i, 1] + for j in range(-step, step): + for k in range(-step, step): + u = np.clip(x + j, 0, W - 1) + v = np.clip(y + k, 0, H - 1) + for m in range(landmark.shape[0]): + img[m, v[m], u[m]] = c + return img diff --git a/third_part/face3d/util/visualizer.py b/third_part/face3d/util/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..4023a6d4086acba9bc88e079f625194d324d7c9e --- /dev/null +++ b/third_part/face3d/util/visualizer.py @@ -0,0 +1,227 @@ +"""This script defines the visualizer for Deep3DFaceRecon_pytorch +""" + +import numpy as np +import os +import sys +import ntpath +import time +from . import util, html +from subprocess import Popen, PIPE +from torch.utils.tensorboard import SummaryWriter + +def save_images(webpage, visuals, image_path, aspect_ratio=1.0, width=256): + """Save images to the disk. + + Parameters: + webpage (the HTML class) -- the HTML webpage class that stores these imaegs (see html.py for more details) + visuals (OrderedDict) -- an ordered dictionary that stores (name, images (either tensor or numpy) ) pairs + image_path (str) -- the string is used to create image paths + aspect_ratio (float) -- the aspect ratio of saved images + width (int) -- the images will be resized to width x width + + This function will save images stored in 'visuals' to the HTML file specified by 'webpage'. + """ + image_dir = webpage.get_image_dir() + short_path = ntpath.basename(image_path[0]) + name = os.path.splitext(short_path)[0] + + webpage.add_header(name) + ims, txts, links = [], [], [] + + for label, im_data in visuals.items(): + im = util.tensor2im(im_data) + image_name = '%s/%s.png' % (label, name) + os.makedirs(os.path.join(image_dir, label), exist_ok=True) + save_path = os.path.join(image_dir, image_name) + util.save_image(im, save_path, aspect_ratio=aspect_ratio) + ims.append(image_name) + txts.append(label) + links.append(image_name) + webpage.add_images(ims, txts, links, width=width) + + +class Visualizer(): + """This class includes several functions that can display/save images and print/save logging information. + + It uses a Python library tensprboardX for display, and a Python library 'dominate' (wrapped in 'HTML') for creating HTML files with images. + """ + + def __init__(self, opt): + """Initialize the Visualizer class + + Parameters: + opt -- stores all the experiment flags; needs to be a subclass of BaseOptions + Step 1: Cache the training/test options + Step 2: create a tensorboard writer + Step 3: create an HTML object for saveing HTML filters + Step 4: create a logging file to store training losses + """ + self.opt = opt # cache the option + self.use_html = opt.isTrain and not opt.no_html + self.writer = SummaryWriter(os.path.join(opt.checkpoints_dir, 'logs', opt.name)) + self.win_size = opt.display_winsize + self.name = opt.name + self.saved = False + if self.use_html: # create an HTML object at /web/; images will be saved under /web/images/ + self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web') + self.img_dir = os.path.join(self.web_dir, 'images') + print('create web directory %s...' % self.web_dir) + util.mkdirs([self.web_dir, self.img_dir]) + # create a logging file to store training losses + self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt') + with open(self.log_name, "a") as log_file: + now = time.strftime("%c") + log_file.write('================ Training Loss (%s) ================\n' % now) + + def reset(self): + """Reset the self.saved status""" + self.saved = False + + + def display_current_results(self, visuals, total_iters, epoch, save_result): + """Display current results on tensorboad; save current results to an HTML file. + + Parameters: + visuals (OrderedDict) - - dictionary of images to display or save + total_iters (int) -- total iterations + epoch (int) - - the current epoch + save_result (bool) - - if save the current results to an HTML file + """ + for label, image in visuals.items(): + self.writer.add_image(label, util.tensor2im(image), total_iters, dataformats='HWC') + + if self.use_html and (save_result or not self.saved): # save images to an HTML file if they haven't been saved. + self.saved = True + # save images to the disk + for label, image in visuals.items(): + image_numpy = util.tensor2im(image) + img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label)) + util.save_image(image_numpy, img_path) + + # update website + webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, refresh=0) + for n in range(epoch, 0, -1): + webpage.add_header('epoch [%d]' % n) + ims, txts, links = [], [], [] + + for label, image_numpy in visuals.items(): + image_numpy = util.tensor2im(image) + img_path = 'epoch%.3d_%s.png' % (n, label) + ims.append(img_path) + txts.append(label) + links.append(img_path) + webpage.add_images(ims, txts, links, width=self.win_size) + webpage.save() + + def plot_current_losses(self, total_iters, losses): + # G_loss_collection = {} + # D_loss_collection = {} + # for name, value in losses.items(): + # if 'G' in name or 'NCE' in name or 'idt' in name: + # G_loss_collection[name] = value + # else: + # D_loss_collection[name] = value + # self.writer.add_scalars('G_collec', G_loss_collection, total_iters) + # self.writer.add_scalars('D_collec', D_loss_collection, total_iters) + for name, value in losses.items(): + self.writer.add_scalar(name, value, total_iters) + + # losses: same format as |losses| of plot_current_losses + def print_current_losses(self, epoch, iters, losses, t_comp, t_data): + """print current losses on console; also save the losses to the disk + + Parameters: + epoch (int) -- current epoch + iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch) + losses (OrderedDict) -- training losses stored in the format of (name, float) pairs + t_comp (float) -- computational time per data point (normalized by batch_size) + t_data (float) -- data loading time per data point (normalized by batch_size) + """ + message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, iters, t_comp, t_data) + for k, v in losses.items(): + message += '%s: %.3f ' % (k, v) + + print(message) # print the message + with open(self.log_name, "a") as log_file: + log_file.write('%s\n' % message) # save the message + + +class MyVisualizer: + def __init__(self, opt): + """Initialize the Visualizer class + + Parameters: + opt -- stores all the experiment flags; needs to be a subclass of BaseOptions + Step 1: Cache the training/test options + Step 2: create a tensorboard writer + Step 3: create an HTML object for saveing HTML filters + Step 4: create a logging file to store training losses + """ + self.opt = opt # cache the optio + self.name = opt.name + self.img_dir = os.path.join(opt.checkpoints_dir, opt.name, 'results') + + if opt.phase != 'test': + self.writer = SummaryWriter(os.path.join(opt.checkpoints_dir, opt.name, 'logs')) + # create a logging file to store training losses + self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt') + with open(self.log_name, "a") as log_file: + now = time.strftime("%c") + log_file.write('================ Training Loss (%s) ================\n' % now) + + + def display_current_results(self, visuals, total_iters, epoch, dataset='train', save_results=False, count=0, name=None, + add_image=True): + """Display current results on tensorboad; save current results to an HTML file. + + Parameters: + visuals (OrderedDict) - - dictionary of images to display or save + total_iters (int) -- total iterations + epoch (int) - - the current epoch + dataset (str) - - 'train' or 'val' or 'test' + """ + # if (not add_image) and (not save_results): return + + for label, image in visuals.items(): + for i in range(image.shape[0]): + image_numpy = util.tensor2im(image[i]) + if add_image: + self.writer.add_image(label + '%s_%02d'%(dataset, i + count), + image_numpy, total_iters, dataformats='HWC') + + if save_results: + save_path = os.path.join(self.img_dir, dataset, 'epoch_%s_%06d'%(epoch, total_iters)) + if not os.path.isdir(save_path): + os.makedirs(save_path) + + if name is not None: + img_path = os.path.join(save_path, '%s.png' % name) + else: + img_path = os.path.join(save_path, '%s_%03d.png' % (label, i + count)) + util.save_image(image_numpy, img_path) + + + def plot_current_losses(self, total_iters, losses, dataset='train'): + for name, value in losses.items(): + self.writer.add_scalar(name + '/%s'%dataset, value, total_iters) + + # losses: same format as |losses| of plot_current_losses + def print_current_losses(self, epoch, iters, losses, t_comp, t_data, dataset='train'): + """print current losses on console; also save the losses to the disk + + Parameters: + epoch (int) -- current epoch + iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch) + losses (OrderedDict) -- training losses stored in the format of (name, float) pairs + t_comp (float) -- computational time per data point (normalized by batch_size) + t_data (float) -- data loading time per data point (normalized by batch_size) + """ + message = '(dataset: %s, epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % ( + dataset, epoch, iters, t_comp, t_data) + for k, v in losses.items(): + message += '%s: %.3f ' % (k, v) + + print(message) # print the message + with open(self.log_name, "a") as log_file: + log_file.write('%s\n' % message) # save the message diff --git a/third_part/face_detection/README.md b/third_part/face_detection/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c073376e4eeda6d4b29cc31c50cb7e88ab42bb73 --- /dev/null +++ b/third_part/face_detection/README.md @@ -0,0 +1 @@ +The code for Face Detection in this folder has been taken from the wonderful [face_alignment](https://github.com/1adrianb/face-alignment) repository. This has been modified to take batches of faces at a time. \ No newline at end of file diff --git a/third_part/face_detection/__init__.py b/third_part/face_detection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4bae29fd5f85b41e4669302bd2603bc6924eddc7 --- /dev/null +++ b/third_part/face_detection/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +__author__ = """Adrian Bulat""" +__email__ = 'adrian.bulat@nottingham.ac.uk' +__version__ = '1.0.1' + +from .api import FaceAlignment, LandmarksType, NetworkSize diff --git a/third_part/face_detection/api.py b/third_part/face_detection/api.py new file mode 100644 index 0000000000000000000000000000000000000000..cb02d5252db5362b9985687a992e128a522e5b63 --- /dev/null +++ b/third_part/face_detection/api.py @@ -0,0 +1,79 @@ +from __future__ import print_function +import os +import torch +from torch.utils.model_zoo import load_url +from enum import Enum +import numpy as np +import cv2 +try: + import urllib.request as request_file +except BaseException: + import urllib as request_file + +from .models import FAN, ResNetDepth +from .utils import * + + +class LandmarksType(Enum): + """Enum class defining the type of landmarks to detect. + + ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face + ``_2halfD`` - this points represent the projection of the 3D points into 3D + ``_3D`` - detect the points ``(x,y,z)``` in a 3D space + + """ + _2D = 1 + _2halfD = 2 + _3D = 3 + + +class NetworkSize(Enum): + # TINY = 1 + # SMALL = 2 + # MEDIUM = 3 + LARGE = 4 + + def __new__(cls, value): + member = object.__new__(cls) + member._value_ = value + return member + + def __int__(self): + return self.value + +ROOT = os.path.dirname(os.path.abspath(__file__)) + +class FaceAlignment: + def __init__(self, landmarks_type, network_size=NetworkSize.LARGE, + device='cuda', flip_input=False, face_detector='sfd', verbose=False): + self.device = device + self.flip_input = flip_input + self.landmarks_type = landmarks_type + self.verbose = verbose + + network_size = int(network_size) + + if 'cuda' in device: + torch.backends.cudnn.benchmark = True + + # Get the face detector + face_detector_module = __import__('face_detection.detection.' + face_detector, + globals(), locals(), [face_detector], 0) + self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose) + + def get_detections_for_batch(self, images): + images = images[..., ::-1] + detected_faces = self.face_detector.detect_from_batch(images.copy()) + results = [] + + for i, d in enumerate(detected_faces): + if len(d) == 0: + results.append(None) + continue + d = d[0] + d = np.clip(d, 0, None) + + x1, y1, x2, y2 = map(int, d[:-1]) + results.append((x1, y1, x2, y2)) + + return results \ No newline at end of file diff --git a/third_part/face_detection/detection/__init__.py b/third_part/face_detection/detection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a6b0402dae864a3cc5dc2a90a412fd842a0efc7 --- /dev/null +++ b/third_part/face_detection/detection/__init__.py @@ -0,0 +1 @@ +from .core import FaceDetector \ No newline at end of file diff --git a/third_part/face_detection/detection/core.py b/third_part/face_detection/detection/core.py new file mode 100644 index 0000000000000000000000000000000000000000..0f8275e8e53143f66298f75f0517c234a68778cd --- /dev/null +++ b/third_part/face_detection/detection/core.py @@ -0,0 +1,130 @@ +import logging +import glob +from tqdm import tqdm +import numpy as np +import torch +import cv2 + + +class FaceDetector(object): + """An abstract class representing a face detector. + + Any other face detection implementation must subclass it. All subclasses + must implement ``detect_from_image``, that return a list of detected + bounding boxes. Optionally, for speed considerations detect from path is + recommended. + """ + + def __init__(self, device, verbose): + self.device = device + self.verbose = verbose + + if verbose: + if 'cpu' in device: + logger = logging.getLogger(__name__) + logger.warning("Detection running on CPU, this may be potentially slow.") + + if 'cpu' not in device and 'cuda' not in device: + if verbose: + logger.error("Expected values for device are: {cpu, cuda} but got: %s", device) + raise ValueError + + def detect_from_image(self, tensor_or_path): + """Detects faces in a given image. + + This function detects the faces present in a provided BGR(usually) + image. The input can be either the image itself or the path to it. + + Arguments: + tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path + to an image or the image itself. + + Example:: + + >>> path_to_image = 'data/image_01.jpg' + ... detected_faces = detect_from_image(path_to_image) + [A list of bounding boxes (x1, y1, x2, y2)] + >>> image = cv2.imread(path_to_image) + ... detected_faces = detect_from_image(image) + [A list of bounding boxes (x1, y1, x2, y2)] + + """ + raise NotImplementedError + + def detect_from_directory(self, path, extensions=['.jpg', '.png'], recursive=False, show_progress_bar=True): + """Detects faces from all the images present in a given directory. + + Arguments: + path {string} -- a string containing a path that points to the folder containing the images + + Keyword Arguments: + extensions {list} -- list of string containing the extensions to be + consider in the following format: ``.extension_name`` (default: + {['.jpg', '.png']}) recursive {bool} -- option wherever to scan the + folder recursively (default: {False}) show_progress_bar {bool} -- + display a progressbar (default: {True}) + + Example: + >>> directory = 'data' + ... detected_faces = detect_from_directory(directory) + {A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]} + + """ + if self.verbose: + logger = logging.getLogger(__name__) + + if len(extensions) == 0: + if self.verbose: + logger.error("Expected at list one extension, but none was received.") + raise ValueError + + if self.verbose: + logger.info("Constructing the list of images.") + additional_pattern = '/**/*' if recursive else '/*' + files = [] + for extension in extensions: + files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive)) + + if self.verbose: + logger.info("Finished searching for images. %s images found", len(files)) + logger.info("Preparing to run the detection.") + + predictions = {} + for image_path in tqdm(files, disable=not show_progress_bar): + if self.verbose: + logger.info("Running the face detector on image: %s", image_path) + predictions[image_path] = self.detect_from_image(image_path) + + if self.verbose: + logger.info("The detector was successfully run on all %s images", len(files)) + + return predictions + + @property + def reference_scale(self): + raise NotImplementedError + + @property + def reference_x_shift(self): + raise NotImplementedError + + @property + def reference_y_shift(self): + raise NotImplementedError + + @staticmethod + def tensor_or_path_to_ndarray(tensor_or_path, rgb=True): + """Convert path (represented as a string) or torch.tensor to a numpy.ndarray + + Arguments: + tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself + """ + if isinstance(tensor_or_path, str): + return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1] + elif torch.is_tensor(tensor_or_path): + # Call cpu in case its coming from cuda + return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy() + elif isinstance(tensor_or_path, np.ndarray): + return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path + else: + raise TypeError diff --git a/third_part/face_detection/detection/sfd/__init__.py b/third_part/face_detection/detection/sfd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a63ecd45658f22e66c171ada751fb33764d4559 --- /dev/null +++ b/third_part/face_detection/detection/sfd/__init__.py @@ -0,0 +1 @@ +from .sfd_detector import SFDDetector as FaceDetector \ No newline at end of file diff --git a/third_part/face_detection/detection/sfd/bbox.py b/third_part/face_detection/detection/sfd/bbox.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd7222e5e5f78a51944cbeed3cccbacddc46bed --- /dev/null +++ b/third_part/face_detection/detection/sfd/bbox.py @@ -0,0 +1,129 @@ +from __future__ import print_function +import os +import sys +import cv2 +import random +import datetime +import time +import math +import argparse +import numpy as np +import torch + +try: + from iou import IOU +except BaseException: + # IOU cython speedup 10x + def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2): + sa = abs((ax2 - ax1) * (ay2 - ay1)) + sb = abs((bx2 - bx1) * (by2 - by1)) + x1, y1 = max(ax1, bx1), max(ay1, by1) + x2, y2 = min(ax2, bx2), min(ay2, by2) + w = x2 - x1 + h = y2 - y1 + if w < 0 or h < 0: + return 0.0 + else: + return 1.0 * w * h / (sa + sb - w * h) + + +def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh): + xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1 + dx, dy = (xc - axc) / aww, (yc - ayc) / ahh + dw, dh = math.log(ww / aww), math.log(hh / ahh) + return dx, dy, dw, dh + + +def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh): + xc, yc = dx * aww + axc, dy * ahh + ayc + ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh + x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2 + return x1, y1, x2, y2 + + +def nms(dets, thresh): + if 0 == len(dets): + return [] + x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]]) + xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]]) + + w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1) + ovr = w * h / (areas[i] + areas[order[1:]] - w * h) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep + + +def encode(matched, priors, variances): + """Encode the variances from the priorbox layers into the ground truth boxes + we have matched (based on jaccard overlap) with the prior boxes. + Args: + matched: (tensor) Coords of ground truth for each prior in point-form + Shape: [num_priors, 4]. + priors: (tensor) Prior boxes in center-offset form + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + encoded boxes (tensor), Shape: [num_priors, 4] + """ + + # dist b/t match center and prior's center + g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] + # encode variance + g_cxcy /= (variances[0] * priors[:, 2:]) + # match wh / prior wh + g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] + g_wh = torch.log(g_wh) / variances[1] + # return target for smooth_l1_loss + return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] + + +def decode(loc, priors, variances): + """Decode locations from predictions using priors to undo + the encoding we did for offset regression at train time. + Args: + loc (tensor): location predictions for loc layers, + Shape: [num_priors,4] + priors (tensor): Prior boxes in center-offset form. + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + decoded bounding box predictions + """ + + boxes = torch.cat(( + priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) + boxes[:, :2] -= boxes[:, 2:] / 2 + boxes[:, 2:] += boxes[:, :2] + return boxes + +def batch_decode(loc, priors, variances): + """Decode locations from predictions using priors to undo + the encoding we did for offset regression at train time. + Args: + loc (tensor): location predictions for loc layers, + Shape: [num_priors,4] + priors (tensor): Prior boxes in center-offset form. + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + decoded bounding box predictions + """ + + boxes = torch.cat(( + priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:], + priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2) + boxes[:, :, :2] -= boxes[:, :, 2:] / 2 + boxes[:, :, 2:] += boxes[:, :, :2] + return boxes diff --git a/third_part/face_detection/detection/sfd/detect.py b/third_part/face_detection/detection/sfd/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..2143d6effa737eba01df70ff4573c2fad7919992 --- /dev/null +++ b/third_part/face_detection/detection/sfd/detect.py @@ -0,0 +1,115 @@ +import torch +import torch.nn.functional as F + +import os +import sys +import cv2 +import random +import datetime +import math +import argparse +import numpy as np + +import scipy.io as sio +import zipfile +from .net_s3fd import s3fd +from .bbox import * + + +def detect(net, img, device): + img = img - np.array([104, 117, 123]) + img = img.transpose(2, 0, 1) + img = img.reshape((1,) + img.shape) + + if 'cuda' in device: + torch.backends.cudnn.benchmark = True + + img = torch.from_numpy(img).float().to(device) + BB, CC, HH, WW = img.size() + with torch.no_grad(): + olist = net(img) + + bboxlist = [] + for i in range(len(olist) // 2): + olist[i * 2] = F.softmax(olist[i * 2], dim=1) + olist = [oelem.data.cpu() for oelem in olist] + for i in range(len(olist) // 2): + ocls, oreg = olist[i * 2], olist[i * 2 + 1] + FB, FC, FH, FW = ocls.size() # feature map size + stride = 2**(i + 2) # 4,8,16,32,64,128 + anchor = stride * 4 + poss = zip(*np.where(ocls[:, 1, :, :] > 0.05)) + for Iindex, hindex, windex in poss: + axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride + score = ocls[0, 1, hindex, windex] + loc = oreg[0, :, hindex, windex].contiguous().view(1, 4) + priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]) + variances = [0.1, 0.2] + box = decode(loc, priors, variances) + x1, y1, x2, y2 = box[0] * 1.0 + # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1) + bboxlist.append([x1, y1, x2, y2, score]) + bboxlist = np.array(bboxlist) + if 0 == len(bboxlist): + bboxlist = np.zeros((1, 5)) + + return bboxlist + +def batch_detect(net, imgs, device): + imgs = imgs - np.array([104, 117, 123]) + imgs = imgs.transpose(0, 3, 1, 2) + + if 'cuda' in device: + torch.backends.cudnn.benchmark = True + + imgs = torch.from_numpy(imgs).float().to(device) + BB, CC, HH, WW = imgs.size() + with torch.no_grad(): + # print(type(net),type(imgs), device) + olist = net(imgs) + + bboxlist = [] + for i in range(len(olist) // 2): + olist[i * 2] = F.softmax(olist[i * 2], dim=1) + # print(olist) + # import pdb; pdb.set_trace() + olist = [oelem.cpu() for oelem in olist] + for i in range(len(olist) // 2): + ocls, oreg = olist[i * 2], olist[i * 2 + 1] + FB, FC, FH, FW = ocls.size() # feature map size + stride = 2**(i + 2) # 4,8,16,32,64,128 + anchor = stride * 4 + poss = zip(*np.where(ocls[:, 1, :, :] > 0.05)) + for Iindex, hindex, windex in poss: + axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride + score = ocls[:, 1, hindex, windex] + loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4) + priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4) + variances = [0.1, 0.2] + box = batch_decode(loc, priors, variances) + box = box[:, 0] * 1.0 + # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1) + bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy()) + bboxlist = np.array(bboxlist) + if 0 == len(bboxlist): + bboxlist = np.zeros((1, BB, 5)) + + return bboxlist + +def flip_detect(net, img, device): + img = cv2.flip(img, 1) + b = detect(net, img, device) + + bboxlist = np.zeros(b.shape) + bboxlist[:, 0] = img.shape[1] - b[:, 2] + bboxlist[:, 1] = b[:, 1] + bboxlist[:, 2] = img.shape[1] - b[:, 0] + bboxlist[:, 3] = b[:, 3] + bboxlist[:, 4] = b[:, 4] + return bboxlist + + +def pts_to_bb(pts): + min_x, min_y = np.min(pts, axis=0) + max_x, max_y = np.max(pts, axis=0) + return np.array([min_x, min_y, max_x, max_y]) diff --git a/third_part/face_detection/detection/sfd/net_s3fd.py b/third_part/face_detection/detection/sfd/net_s3fd.py new file mode 100644 index 0000000000000000000000000000000000000000..fc64313c277ab594d0257585c70f147606693452 --- /dev/null +++ b/third_part/face_detection/detection/sfd/net_s3fd.py @@ -0,0 +1,129 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class L2Norm(nn.Module): + def __init__(self, n_channels, scale=1.0): + super(L2Norm, self).__init__() + self.n_channels = n_channels + self.scale = scale + self.eps = 1e-10 + self.weight = nn.Parameter(torch.Tensor(self.n_channels)) + self.weight.data *= 0.0 + self.weight.data += self.scale + + def forward(self, x): + norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps + x = x / norm * self.weight.view(1, -1, 1, 1) + return x + + +class s3fd(nn.Module): + def __init__(self): + super(s3fd, self).__init__() + self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) + self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1) + + self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1) + self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) + + self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) + self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + + self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1) + self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + + self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + + self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=3) + self.fc7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) + + self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) + self.conv6_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1) + + self.conv7_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0) + self.conv7_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) + + self.conv3_3_norm = L2Norm(256, scale=10) + self.conv4_3_norm = L2Norm(512, scale=8) + self.conv5_3_norm = L2Norm(512, scale=5) + + self.conv3_3_norm_mbox_conf = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) + self.conv3_3_norm_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) + self.conv4_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) + self.conv4_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) + self.conv5_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) + self.conv5_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) + + self.fc7_mbox_conf = nn.Conv2d(1024, 2, kernel_size=3, stride=1, padding=1) + self.fc7_mbox_loc = nn.Conv2d(1024, 4, kernel_size=3, stride=1, padding=1) + self.conv6_2_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) + self.conv6_2_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) + self.conv7_2_mbox_conf = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1) + self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + h = F.relu(self.conv1_1(x)) + h = F.relu(self.conv1_2(h)) + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv2_1(h)) + h = F.relu(self.conv2_2(h)) + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv3_1(h)) + h = F.relu(self.conv3_2(h)) + h = F.relu(self.conv3_3(h)) + f3_3 = h + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv4_1(h)) + h = F.relu(self.conv4_2(h)) + h = F.relu(self.conv4_3(h)) + f4_3 = h + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv5_1(h)) + h = F.relu(self.conv5_2(h)) + h = F.relu(self.conv5_3(h)) + f5_3 = h + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.fc6(h)) + h = F.relu(self.fc7(h)) + ffc7 = h + h = F.relu(self.conv6_1(h)) + h = F.relu(self.conv6_2(h)) + f6_2 = h + h = F.relu(self.conv7_1(h)) + h = F.relu(self.conv7_2(h)) + f7_2 = h + + f3_3 = self.conv3_3_norm(f3_3) + f4_3 = self.conv4_3_norm(f4_3) + f5_3 = self.conv5_3_norm(f5_3) + + cls1 = self.conv3_3_norm_mbox_conf(f3_3) + reg1 = self.conv3_3_norm_mbox_loc(f3_3) + cls2 = self.conv4_3_norm_mbox_conf(f4_3) + reg2 = self.conv4_3_norm_mbox_loc(f4_3) + cls3 = self.conv5_3_norm_mbox_conf(f5_3) + reg3 = self.conv5_3_norm_mbox_loc(f5_3) + cls4 = self.fc7_mbox_conf(ffc7) + reg4 = self.fc7_mbox_loc(ffc7) + cls5 = self.conv6_2_mbox_conf(f6_2) + reg5 = self.conv6_2_mbox_loc(f6_2) + cls6 = self.conv7_2_mbox_conf(f7_2) + reg6 = self.conv7_2_mbox_loc(f7_2) + + # max-out background label + chunk = torch.chunk(cls1, 4, 1) + bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2]) + cls1 = torch.cat([bmax, chunk[3]], dim=1) + + return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6] diff --git a/third_part/face_detection/detection/sfd/sfd_detector.py b/third_part/face_detection/detection/sfd/sfd_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..576f404433243082696abe14161182ac430c80e2 --- /dev/null +++ b/third_part/face_detection/detection/sfd/sfd_detector.py @@ -0,0 +1,59 @@ +import os +import cv2 +from torch.utils.model_zoo import load_url + +from ..core import FaceDetector + +from .net_s3fd import s3fd +from .bbox import * +from .detect import * + +models_urls = { + 's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth', +} + + +class SFDDetector(FaceDetector): + def __init__(self, device, path_to_detector='/apdcephfs/share_1290939/shadowcun/pretrained/s3fd.pth', verbose=False): + super(SFDDetector, self).__init__(device, verbose) + + # Initialise the face detector + if not os.path.isfile(path_to_detector): + model_weights = load_url(models_urls['s3fd']) + else: + model_weights = torch.load(path_to_detector) + + self.face_detector = s3fd() + self.face_detector.load_state_dict(model_weights) + self.face_detector.to(device) + self.face_detector.eval() + + def detect_from_image(self, tensor_or_path): + image = self.tensor_or_path_to_ndarray(tensor_or_path) + + bboxlist = detect(self.face_detector, image, device=self.device) + keep = nms(bboxlist, 0.3) + bboxlist = bboxlist[keep, :] + bboxlist = [x for x in bboxlist if x[-1] > 0.5] + + return bboxlist + + def detect_from_batch(self, images): + bboxlists = batch_detect(self.face_detector, images, device=self.device) + keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])] + bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)] + bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists] + + return bboxlists + + @property + def reference_scale(self): + return 195 + + @property + def reference_x_shift(self): + return 0 + + @property + def reference_y_shift(self): + return 0 diff --git a/third_part/face_detection/models.py b/third_part/face_detection/models.py new file mode 100644 index 0000000000000000000000000000000000000000..ee2dde32bdf72c25a4600e48efa73ffc0d4a3893 --- /dev/null +++ b/third_part/face_detection/models.py @@ -0,0 +1,261 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import math + + +def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, + stride=strd, padding=padding, bias=bias) + + +class ConvBlock(nn.Module): + def __init__(self, in_planes, out_planes): + super(ConvBlock, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = conv3x3(in_planes, int(out_planes / 2)) + self.bn2 = nn.BatchNorm2d(int(out_planes / 2)) + self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4)) + self.bn3 = nn.BatchNorm2d(int(out_planes / 4)) + self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4)) + + if in_planes != out_planes: + self.downsample = nn.Sequential( + nn.BatchNorm2d(in_planes), + nn.ReLU(True), + nn.Conv2d(in_planes, out_planes, + kernel_size=1, stride=1, bias=False), + ) + else: + self.downsample = None + + def forward(self, x): + residual = x + + out1 = self.bn1(x) + out1 = F.relu(out1, True) + out1 = self.conv1(out1) + + out2 = self.bn2(out1) + out2 = F.relu(out2, True) + out2 = self.conv2(out2) + + out3 = self.bn3(out2) + out3 = F.relu(out3, True) + out3 = self.conv3(out3) + + out3 = torch.cat((out1, out2, out3), 1) + + if self.downsample is not None: + residual = self.downsample(residual) + + out3 += residual + + return out3 + + +class Bottleneck(nn.Module): + + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class HourGlass(nn.Module): + def __init__(self, num_modules, depth, num_features): + super(HourGlass, self).__init__() + self.num_modules = num_modules + self.depth = depth + self.features = num_features + + self._generate_network(self.depth) + + def _generate_network(self, level): + self.add_module('b1_' + str(level), ConvBlock(self.features, self.features)) + + self.add_module('b2_' + str(level), ConvBlock(self.features, self.features)) + + if level > 1: + self._generate_network(level - 1) + else: + self.add_module('b2_plus_' + str(level), ConvBlock(self.features, self.features)) + + self.add_module('b3_' + str(level), ConvBlock(self.features, self.features)) + + def _forward(self, level, inp): + # Upper branch + up1 = inp + up1 = self._modules['b1_' + str(level)](up1) + + # Lower branch + low1 = F.avg_pool2d(inp, 2, stride=2) + low1 = self._modules['b2_' + str(level)](low1) + + if level > 1: + low2 = self._forward(level - 1, low1) + else: + low2 = low1 + low2 = self._modules['b2_plus_' + str(level)](low2) + + low3 = low2 + low3 = self._modules['b3_' + str(level)](low3) + + up2 = F.interpolate(low3, scale_factor=2, mode='nearest') + + return up1 + up2 + + def forward(self, x): + return self._forward(self.depth, x) + + +class FAN(nn.Module): + + def __init__(self, num_modules=1): + super(FAN, self).__init__() + self.num_modules = num_modules + + # Base part + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) + self.bn1 = nn.BatchNorm2d(64) + self.conv2 = ConvBlock(64, 128) + self.conv3 = ConvBlock(128, 128) + self.conv4 = ConvBlock(128, 256) + + # Stacking part + for hg_module in range(self.num_modules): + self.add_module('m' + str(hg_module), HourGlass(1, 4, 256)) + self.add_module('top_m_' + str(hg_module), ConvBlock(256, 256)) + self.add_module('conv_last' + str(hg_module), + nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)) + self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256)) + self.add_module('l' + str(hg_module), nn.Conv2d(256, + 68, kernel_size=1, stride=1, padding=0)) + + if hg_module < self.num_modules - 1: + self.add_module( + 'bl' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)) + self.add_module('al' + str(hg_module), nn.Conv2d(68, + 256, kernel_size=1, stride=1, padding=0)) + + def forward(self, x): + x = F.relu(self.bn1(self.conv1(x)), True) + x = F.avg_pool2d(self.conv2(x), 2, stride=2) + x = self.conv3(x) + x = self.conv4(x) + + previous = x + + outputs = [] + for i in range(self.num_modules): + hg = self._modules['m' + str(i)](previous) + + ll = hg + ll = self._modules['top_m_' + str(i)](ll) + + ll = F.relu(self._modules['bn_end' + str(i)] + (self._modules['conv_last' + str(i)](ll)), True) + + # Predict heatmaps + tmp_out = self._modules['l' + str(i)](ll) + outputs.append(tmp_out) + + if i < self.num_modules - 1: + ll = self._modules['bl' + str(i)](ll) + tmp_out_ = self._modules['al' + str(i)](tmp_out) + previous = previous + ll + tmp_out_ + + return outputs + + +class ResNetDepth(nn.Module): + + def __init__(self, block=Bottleneck, layers=[3, 8, 36, 3], num_classes=68): + self.inplanes = 64 + super(ResNetDepth, self).__init__() + self.conv1 = nn.Conv2d(3 + 68, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x diff --git a/third_part/face_detection/utils.py b/third_part/face_detection/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3dc4cf3e328efaa227cbcfdd969e1056688adad5 --- /dev/null +++ b/third_part/face_detection/utils.py @@ -0,0 +1,313 @@ +from __future__ import print_function +import os +import sys +import time +import torch +import math +import numpy as np +import cv2 + + +def _gaussian( + size=3, sigma=0.25, amplitude=1, normalize=False, width=None, + height=None, sigma_horz=None, sigma_vert=None, mean_horz=0.5, + mean_vert=0.5): + # handle some defaults + if width is None: + width = size + if height is None: + height = size + if sigma_horz is None: + sigma_horz = sigma + if sigma_vert is None: + sigma_vert = sigma + center_x = mean_horz * width + 0.5 + center_y = mean_vert * height + 0.5 + gauss = np.empty((height, width), dtype=np.float32) + # generate kernel + for i in range(height): + for j in range(width): + gauss[i][j] = amplitude * math.exp(-(math.pow((j + 1 - center_x) / ( + sigma_horz * width), 2) / 2.0 + math.pow((i + 1 - center_y) / (sigma_vert * height), 2) / 2.0)) + if normalize: + gauss = gauss / np.sum(gauss) + return gauss + + +def draw_gaussian(image, point, sigma): + # Check if the gaussian is inside + ul = [math.floor(point[0] - 3 * sigma), math.floor(point[1] - 3 * sigma)] + br = [math.floor(point[0] + 3 * sigma), math.floor(point[1] + 3 * sigma)] + if (ul[0] > image.shape[1] or ul[1] > image.shape[0] or br[0] < 1 or br[1] < 1): + return image + size = 6 * sigma + 1 + g = _gaussian(size) + g_x = [int(max(1, -ul[0])), int(min(br[0], image.shape[1])) - int(max(1, ul[0])) + int(max(1, -ul[0]))] + g_y = [int(max(1, -ul[1])), int(min(br[1], image.shape[0])) - int(max(1, ul[1])) + int(max(1, -ul[1]))] + img_x = [int(max(1, ul[0])), int(min(br[0], image.shape[1]))] + img_y = [int(max(1, ul[1])), int(min(br[1], image.shape[0]))] + assert (g_x[0] > 0 and g_y[1] > 0) + image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1] + ] = image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]] + g[g_y[0] - 1:g_y[1], g_x[0] - 1:g_x[1]] + image[image > 1] = 1 + return image + + +def transform(point, center, scale, resolution, invert=False): + """Generate and affine transformation matrix. + + Given a set of points, a center, a scale and a targer resolution, the + function generates and affine transformation matrix. If invert is ``True`` + it will produce the inverse transformation. + + Arguments: + point {torch.tensor} -- the input 2D point + center {torch.tensor or numpy.array} -- the center around which to perform the transformations + scale {float} -- the scale of the face/object + resolution {float} -- the output resolution + + Keyword Arguments: + invert {bool} -- define wherever the function should produce the direct or the + inverse transformation matrix (default: {False}) + """ + _pt = torch.ones(3) + _pt[0] = point[0] + _pt[1] = point[1] + + h = 200.0 * scale + t = torch.eye(3) + t[0, 0] = resolution / h + t[1, 1] = resolution / h + t[0, 2] = resolution * (-center[0] / h + 0.5) + t[1, 2] = resolution * (-center[1] / h + 0.5) + + if invert: + t = torch.inverse(t) + + new_point = (torch.matmul(t, _pt))[0:2] + + return new_point.int() + + +def crop(image, center, scale, resolution=256.0): + """Center crops an image or set of heatmaps + + Arguments: + image {numpy.array} -- an rgb image + center {numpy.array} -- the center of the object, usually the same as of the bounding box + scale {float} -- scale of the face + + Keyword Arguments: + resolution {float} -- the size of the output cropped image (default: {256.0}) + + Returns: + [type] -- [description] + """ # Crop around the center point + """ Crops the image around the center. Input is expected to be an np.ndarray """ + ul = transform([1, 1], center, scale, resolution, True) + br = transform([resolution, resolution], center, scale, resolution, True) + # pad = math.ceil(torch.norm((ul - br).float()) / 2.0 - (br[0] - ul[0]) / 2.0) + if image.ndim > 2: + newDim = np.array([br[1] - ul[1], br[0] - ul[0], + image.shape[2]], dtype=np.int32) + newImg = np.zeros(newDim, dtype=np.uint8) + else: + newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) + newImg = np.zeros(newDim, dtype=np.uint8) + ht = image.shape[0] + wd = image.shape[1] + newX = np.array( + [max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) + newY = np.array( + [max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) + oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) + oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) + newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] + ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] + newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), + interpolation=cv2.INTER_LINEAR) + return newImg + + +def get_preds_fromhm(hm, center=None, scale=None): + """Obtain (x,y) coordinates given a set of N heatmaps. If the center + and the scale is provided the function will return the points also in + the original coordinate frame. + + Arguments: + hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H] + + Keyword Arguments: + center {torch.tensor} -- the center of the bounding box (default: {None}) + scale {float} -- face scale (default: {None}) + """ + max, idx = torch.max( + hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2) + idx += 1 + preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float() + preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1) + preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1) + + for i in range(preds.size(0)): + for j in range(preds.size(1)): + hm_ = hm[i, j, :] + pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1 + if pX > 0 and pX < 63 and pY > 0 and pY < 63: + diff = torch.FloatTensor( + [hm_[pY, pX + 1] - hm_[pY, pX - 1], + hm_[pY + 1, pX] - hm_[pY - 1, pX]]) + preds[i, j].add_(diff.sign_().mul_(.25)) + + preds.add_(-.5) + + preds_orig = torch.zeros(preds.size()) + if center is not None and scale is not None: + for i in range(hm.size(0)): + for j in range(hm.size(1)): + preds_orig[i, j] = transform( + preds[i, j], center, scale, hm.size(2), True) + + return preds, preds_orig + +def get_preds_fromhm_batch(hm, centers=None, scales=None): + """Obtain (x,y) coordinates given a set of N heatmaps. If the centers + and the scales is provided the function will return the points also in + the original coordinate frame. + + Arguments: + hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H] + + Keyword Arguments: + centers {torch.tensor} -- the centers of the bounding box (default: {None}) + scales {float} -- face scales (default: {None}) + """ + max, idx = torch.max( + hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2) + idx += 1 + preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float() + preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1) + preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1) + + for i in range(preds.size(0)): + for j in range(preds.size(1)): + hm_ = hm[i, j, :] + pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1 + if pX > 0 and pX < 63 and pY > 0 and pY < 63: + diff = torch.FloatTensor( + [hm_[pY, pX + 1] - hm_[pY, pX - 1], + hm_[pY + 1, pX] - hm_[pY - 1, pX]]) + preds[i, j].add_(diff.sign_().mul_(.25)) + + preds.add_(-.5) + + preds_orig = torch.zeros(preds.size()) + if centers is not None and scales is not None: + for i in range(hm.size(0)): + for j in range(hm.size(1)): + preds_orig[i, j] = transform( + preds[i, j], centers[i], scales[i], hm.size(2), True) + + return preds, preds_orig + +def shuffle_lr(parts, pairs=None): + """Shuffle the points left-right according to the axis of symmetry + of the object. + + Arguments: + parts {torch.tensor} -- a 3D or 4D object containing the + heatmaps. + + Keyword Arguments: + pairs {list of integers} -- [order of the flipped points] (default: {None}) + """ + if pairs is None: + pairs = [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35, + 34, 33, 32, 31, 45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41, + 40, 54, 53, 52, 51, 50, 49, 48, 59, 58, 57, 56, 55, 64, 63, + 62, 61, 60, 67, 66, 65] + if parts.ndimension() == 3: + parts = parts[pairs, ...] + else: + parts = parts[:, pairs, ...] + + return parts + + +def flip(tensor, is_label=False): + """Flip an image or a set of heatmaps left-right + + Arguments: + tensor {numpy.array or torch.tensor} -- [the input image or heatmaps] + + Keyword Arguments: + is_label {bool} -- [denote wherever the input is an image or a set of heatmaps ] (default: {False}) + """ + if not torch.is_tensor(tensor): + tensor = torch.from_numpy(tensor) + + if is_label: + tensor = shuffle_lr(tensor).flip(tensor.ndimension() - 1) + else: + tensor = tensor.flip(tensor.ndimension() - 1) + + return tensor + +# From pyzolib/paths.py (https://bitbucket.org/pyzo/pyzolib/src/tip/paths.py) + + +def appdata_dir(appname=None, roaming=False): + """ appdata_dir(appname=None, roaming=False) + + Get the path to the application directory, where applications are allowed + to write user specific files (e.g. configurations). For non-user specific + data, consider using common_appdata_dir(). + If appname is given, a subdir is appended (and created if necessary). + If roaming is True, will prefer a roaming directory (Windows Vista/7). + """ + + # Define default user directory + userDir = os.getenv('FACEALIGNMENT_USERDIR', None) + if userDir is None: + userDir = os.path.expanduser('~') + if not os.path.isdir(userDir): # pragma: no cover + userDir = '/var/tmp' # issue #54 + + # Get system app data dir + path = None + if sys.platform.startswith('win'): + path1, path2 = os.getenv('LOCALAPPDATA'), os.getenv('APPDATA') + path = (path2 or path1) if roaming else (path1 or path2) + elif sys.platform.startswith('darwin'): + path = os.path.join(userDir, 'Library', 'Application Support') + # On Linux and as fallback + if not (path and os.path.isdir(path)): + path = userDir + + # Maybe we should store things local to the executable (in case of a + # portable distro or a frozen application that wants to be portable) + prefix = sys.prefix + if getattr(sys, 'frozen', None): + prefix = os.path.abspath(os.path.dirname(sys.executable)) + for reldir in ('settings', '../settings'): + localpath = os.path.abspath(os.path.join(prefix, reldir)) + if os.path.isdir(localpath): # pragma: no cover + try: + open(os.path.join(localpath, 'test.write'), 'wb').close() + os.remove(os.path.join(localpath, 'test.write')) + except IOError: + pass # We cannot write in this directory + else: + path = localpath + break + + # Get path specific for this app + if appname: + if path == userDir: + appname = '.' + appname.lstrip('.') # Make it a hidden directory + path = os.path.join(path, appname) + if not os.path.isdir(path): # pragma: no cover + os.mkdir(path) + + # Done + return path diff --git a/third_part/ganimation_replicate/LICENSE b/third_part/ganimation_replicate/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9c7fb6c6bec6b279683384de16310a37ba078c8b --- /dev/null +++ b/third_part/ganimation_replicate/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Yuedong Chen (Donald) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_part/ganimation_replicate/checkpoints/opt.txt b/third_part/ganimation_replicate/checkpoints/opt.txt new file mode 100644 index 0000000000000000000000000000000000000000..46b5e742cc4bc6f06bd89005194d06428ddd1661 --- /dev/null +++ b/third_part/ganimation_replicate/checkpoints/opt.txt @@ -0,0 +1,1403 @@ +------------------- [ test][220417_224012]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints [default: ./ckpts] + data_root: datasets/celebA [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650206412 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220417_224012 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/celebA_ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220417_224012]End ---------------------- + + +------------------- [ test][220419_184832]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365312 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_184832 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_184832]End ---------------------- + + +------------------- [ test][220419_185232]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365552 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_185232 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_185232]End ---------------------- + + +------------------- [ test][220419_185252]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365572 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_185252 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_185252]End ---------------------- + + +------------------- [ test][220419_185305]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365585 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_185305 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_185305]End ---------------------- + + +------------------- [ test][220419_185320]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365600 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_185320 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_185320]End ---------------------- + + +------------------- [ test][220419_185810]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365890 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_185810 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_185810]End ---------------------- + + +------------------- [ test][220419_190338]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650366218 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_190338 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_190338]End ---------------------- + + +------------------- [ test][220419_190445]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650366285 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_190445 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_190445]End ---------------------- + + +------------------- [ test][220419_190628]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650366388 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_190628 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_190628]End ---------------------- + + +------------------- [ test][220419_195037]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650369037 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_195037 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_195037]End ---------------------- + + +------------------- [ test][220419_200348]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650369828 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200348 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200348]End ---------------------- + + +------------------- [ test][220419_200512]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650369912 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200512 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200512]End ---------------------- + + +------------------- [ test][220419_200529]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650369929 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200529 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200529]End ---------------------- + + +------------------- [ test][220419_200554]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650369954 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200554 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200554]End ---------------------- + + +------------------- [ test][220419_200622]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650369982 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200622 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200622]End ---------------------- + + +------------------- [ test][220419_200641]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650370001 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200641 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200641]End ---------------------- + + +------------------- [ test][220419_200658]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650370018 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200658 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200658]End ---------------------- + + +------------------- [ test][220419_200717]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650370037 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200717 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200717]End ---------------------- + + +------------------- [ test][220419_200740]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650370060 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200740 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200740]End ---------------------- + + +------------------- [ test][220419_200807]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650370087 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_200807 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_200807]End ---------------------- + + +------------------- [ test][220419_213236]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650375156 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_213236 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_213236]End ---------------------- + + +------------------- [ test][220419_213329]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: checkpoints/ [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 30 [default: 0] + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650375209 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_213329 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_30 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_213329]End ---------------------- + + diff --git a/third_part/ganimation_replicate/checkpoints/run_script.sh b/third_part/ganimation_replicate/checkpoints/run_script.sh new file mode 100644 index 0000000000000000000000000000000000000000..538f3510ca4f558e6925e8acfd9cf434305528c7 --- /dev/null +++ b/third_part/ganimation_replicate/checkpoints/run_script.sh @@ -0,0 +1,23 @@ +[ test][220417_224012]python main.py --mode test --data_root datasets/celebA --ckpt_dir checkpoints --load_epoch 30 +[ test][220419_184832]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_185232]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_185252]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_185305]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_185320]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_185810]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_190338]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_190445]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_190628]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_195037]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200348]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200512]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200529]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200554]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200622]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200641]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200658]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200717]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200740]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_200807]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_213236]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ +[ test][220419_213329]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ diff --git a/third_part/ganimation_replicate/ckpts/ganimation/220419_183211/opt.txt b/third_part/ganimation_replicate/ckpts/ganimation/220419_183211/opt.txt new file mode 100644 index 0000000000000000000000000000000000000000..3581b40cf9bbe9efb61e2e9e3f254b44dd4da102 --- /dev/null +++ b/third_part/ganimation_replicate/ckpts/ganimation/220419_183211/opt.txt @@ -0,0 +1,61 @@ +------------------- [train][220419_183211]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts/./ganimation/220419_183211 [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364331 [default: 0] + max_dataset_size: inf + mode: train + model: ganimation + n_threads: 6 + name: 220419_183211 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 1 + visdom_env: main + visdom_port: 8097 +--------------------- [train][220419_183211]End ---------------------- + + diff --git a/third_part/ganimation_replicate/ckpts/ganimation/220419_183211/run_script.sh b/third_part/ganimation_replicate/ckpts/ganimation/220419_183211/run_script.sh new file mode 100644 index 0000000000000000000000000000000000000000..0ecd4f150062be94b635f5649f771d7cd02d84b4 --- /dev/null +++ b/third_part/ganimation_replicate/ckpts/ganimation/220419_183211/run_script.sh @@ -0,0 +1 @@ +[train][220419_183211]python test.py --data_root . diff --git a/third_part/ganimation_replicate/ckpts/ganimation/220419_183229/opt.txt b/third_part/ganimation_replicate/ckpts/ganimation/220419_183229/opt.txt new file mode 100644 index 0000000000000000000000000000000000000000..175dd6ff97580d9b48a373740a1d405afc168eca --- /dev/null +++ b/third_part/ganimation_replicate/ckpts/ganimation/220419_183229/opt.txt @@ -0,0 +1,61 @@ +------------------- [train][220419_183229]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts/./ganimation/220419_183229 [default: ./ckpts] + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364349 [default: 0] + max_dataset_size: inf + mode: train + model: ganimation + n_threads: 6 + name: 220419_183229 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 1 + visdom_env: main + visdom_port: 8097 +--------------------- [train][220419_183229]End ---------------------- + + diff --git a/third_part/ganimation_replicate/ckpts/ganimation/220419_183229/run_script.sh b/third_part/ganimation_replicate/ckpts/ganimation/220419_183229/run_script.sh new file mode 100644 index 0000000000000000000000000000000000000000..646c1969f9039607dce7e7133e60995a38730d83 --- /dev/null +++ b/third_part/ganimation_replicate/ckpts/ganimation/220419_183229/run_script.sh @@ -0,0 +1 @@ +[train][220419_183229]python test.py --data_root . diff --git a/third_part/ganimation_replicate/ckpts/opt.txt b/third_part/ganimation_replicate/ckpts/opt.txt new file mode 100644 index 0000000000000000000000000000000000000000..9597268726ba626cecf0103087ebd48020fa64f2 --- /dev/null +++ b/third_part/ganimation_replicate/ckpts/opt.txt @@ -0,0 +1,732 @@ +------------------- [ test][220419_183311]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364391 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_183311 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_183311]End ---------------------- + + +------------------- [ test][220419_183356]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364436 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_183356 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_183356]End ---------------------- + + +------------------- [ test][220419_183456]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364496 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_183456 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_183456]End ---------------------- + + +------------------- [ test][220419_183528]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364528 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_183528 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_183528]End ---------------------- + + +------------------- [ test][220419_183711]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364631 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_183711 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_183711]End ---------------------- + + +------------------- [ test][220419_183837]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650364717 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_183837 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_183837]End ---------------------- + + +------------------- [ test][220419_184333]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365013 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_184333 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_184333]End ---------------------- + + +------------------- [ test][220419_184442]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365082 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_184442 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_184442]End ---------------------- + + +------------------- [ test][220419_184500]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365100 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_184500 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_184500]End ---------------------- + + +------------------- [ test][220419_184533]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365133 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_184533 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_184533]End ---------------------- + + +------------------- [ test][220419_184603]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365163 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_184603 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_184603]End ---------------------- + + +------------------- [ test][220419_184714]Options -------------------- + aus_nc: 17 + aus_pkl: aus_openface.pkl + batch_size: 25 + beta1: 0.5 + ckpt_dir: ./ckpts + data_root: . [default: None] + epoch_count: 1 + final_size: 128 + gan_type: wgan-gp + gpu_ids: [0] [default: 0] + img_nc: 3 + imgs_dir: imgs + init_gain: 0.02 + init_type: normal + interpolate_len: 5 + lambda_aus: 160.0 + lambda_dis: 1.0 + lambda_mask: 0 + lambda_rec: 10.0 + lambda_tv: 0 + lambda_wgan_gp: 10.0 + load_epoch: 0 + load_size: 148 + log_file: logs.txt + lr: 0.0001 + lr_decay_iters: 50 + lr_policy: lambda + lucky_seed: 1650365234 [default: 0] + max_dataset_size: inf + mode: test [default: train] + model: ganimation + n_threads: 6 + name: 220419_184714 + ndf: 64 + ngf: 64 + niter: 20 + niter_decay: 10 + no_aus_noise: False + no_flip: False + no_test_eval: False + norm: instance + opt_file: opt.txt + plot_losses_freq: 20000 + print_losses_freq: 100 + resize_or_crop: none + results: results/._ganimation_0 [default: results] + sample_img_freq: 2000 + save_epoch_freq: 2 + save_test_gif: False + serial_batches: False + test_csv: test_ids.csv + train_csv: train_ids.csv + train_gen_iter: 5 + use_dropout: False + visdom_display_id: 0 [default: 1] + visdom_env: main + visdom_port: 8097 +--------------------- [ test][220419_184714]End ---------------------- + + diff --git a/third_part/ganimation_replicate/ckpts/run_script.sh b/third_part/ganimation_replicate/ckpts/run_script.sh new file mode 100644 index 0000000000000000000000000000000000000000..5313b28437075f799b3ad9bdf5b84209734b2776 --- /dev/null +++ b/third_part/ganimation_replicate/ckpts/run_script.sh @@ -0,0 +1,12 @@ +[ test][220419_183311]python test.py --data_root . --mode test +[ test][220419_183356]python test.py --data_root . --mode test +[ test][220419_183456]python test.py --data_root . --mode test +[ test][220419_183528]python test.py --data_root . --mode test +[ test][220419_183711]python test.py --data_root . --mode test +[ test][220419_183837]python test.py --data_root . --mode test +[ test][220419_184333]python test.py --data_root . --mode test +[ test][220419_184442]python test.py --data_root . --mode test +[ test][220419_184500]python test.py --data_root . --mode test +[ test][220419_184533]python test.py --data_root . --mode test +[ test][220419_184603]python test.py --data_root . --mode test +[ test][220419_184714]python test.py --data_root . --mode test diff --git a/third_part/ganimation_replicate/data/__init__.py b/third_part/ganimation_replicate/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed66ab698e31f4cc64753178f8fe12ef21489793 --- /dev/null +++ b/third_part/ganimation_replicate/data/__init__.py @@ -0,0 +1 @@ +from .data_loader import create_dataloader \ No newline at end of file diff --git a/third_part/ganimation_replicate/data/base_dataset.py b/third_part/ganimation_replicate/data/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..aa01cf615c55fca7a59234554d03644371805608 --- /dev/null +++ b/third_part/ganimation_replicate/data/base_dataset.py @@ -0,0 +1,89 @@ +import torch +import os +from PIL import Image +import random +import numpy as np +import pickle +import torchvision.transforms as transforms + + + +class BaseDataset(torch.utils.data.Dataset): + """docstring for BaseDataset""" + def __init__(self): + super(BaseDataset, self).__init__() + + def name(self): + return os.path.basename(self.opt.data_root.strip('/')) + + def initialize(self, opt): + self.opt = opt + self.imgs_dir = os.path.join(self.opt.data_root, self.opt.imgs_dir) + self.is_train = self.opt.mode == "train" + + # load images path + filename = self.opt.train_csv if self.is_train else self.opt.test_csv + self.imgs_name_file = os.path.join(self.opt.data_root, filename) + self.imgs_path = self.make_dataset() + + # load AUs dicitionary + aus_pkl = os.path.join(self.opt.data_root, self.opt.aus_pkl) + self.aus_dict = self.load_dict(aus_pkl) + + # load image to tensor transformer + self.img2tensor = self.img_transformer() + + def make_dataset(self): + return None + + def load_dict(self, pkl_path): + saved_dict = {} + with open(pkl_path, 'rb') as f: + saved_dict = pickle.load(f, encoding='latin1') + return saved_dict + + def get_img_by_path(self, img_path): + assert os.path.isfile(img_path), "Cannot find image file: %s" % img_path + img_type = 'L' if self.opt.img_nc == 1 else 'RGB' + return Image.open(img_path).convert(img_type) + + def get_aus_by_path(self, img_path): + return None + + def img_transformer(self): + transform_list = [] + if self.opt.resize_or_crop == 'resize_and_crop': + transform_list.append(transforms.Resize([self.opt.load_size, self.opt.load_size], Image.BICUBIC)) + transform_list.append(transforms.RandomCrop(self.opt.final_size)) + elif self.opt.resize_or_crop == 'crop': + transform_list.append(transforms.RandomCrop(self.opt.final_size)) + elif self.opt.resize_or_crop == 'none': + transform_list.append(transforms.Lambda(lambda image: image)) + else: + raise ValueError("--resize_or_crop %s is not a valid option." % self.opt.resize_or_crop) + + if self.is_train and not self.opt.no_flip: + transform_list.append(transforms.RandomHorizontalFlip()) + + transform_list.append(transforms.ToTensor()) + transform_list.append(transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) + + img2tensor = transforms.Compose(transform_list) + + return img2tensor + + def __len__(self): + return len(self.imgs_path) + + + + + + + + + + + + + diff --git a/third_part/ganimation_replicate/data/celeba.py b/third_part/ganimation_replicate/data/celeba.py new file mode 100644 index 0000000000000000000000000000000000000000..35461c0ccd6533c009369f41ba03bc2a92f1e56a --- /dev/null +++ b/third_part/ganimation_replicate/data/celeba.py @@ -0,0 +1,50 @@ +from .base_dataset import BaseDataset +import os +import random +import numpy as np + + +class CelebADataset(BaseDataset): + """docstring for CelebADataset""" + def __init__(self): + super(CelebADataset, self).__init__() + + def initialize(self, opt): + super(CelebADataset, self).initialize(opt) + + def get_aus_by_path(self, img_path): + assert os.path.isfile(img_path), "Cannot find image file: %s" % img_path + img_id = str(os.path.splitext(os.path.basename(img_path))[0]) + return self.aus_dict[img_id] / 5.0 # norm to [0, 1] + + def make_dataset(self): + # return all image full path in a list + imgs_path = [] + assert os.path.isfile(self.imgs_name_file), "%s does not exist." % self.imgs_name_file + with open(self.imgs_name_file, 'r') as f: + lines = f.readlines() + imgs_path = [os.path.join(self.imgs_dir, line.strip()) for line in lines] + imgs_path = sorted(imgs_path) + return imgs_path + + def __getitem__(self, index): + img_path = self.imgs_path[index] + + # load source image + src_img = self.get_img_by_path(img_path) + src_img_tensor = self.img2tensor(src_img) + src_aus = self.get_aus_by_path(img_path) + + # load target image + tar_img_path = random.choice(self.imgs_path) + tar_img = self.get_img_by_path(tar_img_path) + tar_img_tensor = self.img2tensor(tar_img) + tar_aus = self.get_aus_by_path(tar_img_path) + if self.is_train and not self.opt.no_aus_noise: + tar_aus = tar_aus + np.random.uniform(-0.1, 0.1, tar_aus.shape) + + # record paths for debug and test usage + data_dict = {'src_img':src_img_tensor, 'src_aus':src_aus, 'tar_img':tar_img_tensor, 'tar_aus':tar_aus, \ + 'src_path':img_path, 'tar_path':tar_img_path} + + return data_dict diff --git a/third_part/ganimation_replicate/data/data_loader.py b/third_part/ganimation_replicate/data/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..4847b0165c0a6a8e585de6e0b7f13dff88cb9920 --- /dev/null +++ b/third_part/ganimation_replicate/data/data_loader.py @@ -0,0 +1,49 @@ +import torch +import os +from PIL import Image +import random +import numpy as np +import pickle +import torchvision.transforms as transforms + +from .celeba import CelebADataset + + +def create_dataloader(opt): + data_loader = DataLoader() + data_loader.initialize(opt) + return data_loader + + +class DataLoader: + def name(self): + return self.dataset.name() + "_Loader" + + def create_datase(self): + # specify which dataset to load here + loaded_dataset = os.path.basename(self.opt.data_root.strip('/')).lower() + if 'celeba' in loaded_dataset or 'emotion' in loaded_dataset: + dataset = CelebADataset() + else: + dataset = BaseDataset() + dataset.initialize(self.opt) + return dataset + + def initialize(self, opt): + self.opt = opt + self.dataset = self.create_datase() + self.dataloader = torch.utils.data.DataLoader( + self.dataset, + batch_size=opt.batch_size, + shuffle=not opt.serial_batches, + num_workers=int(opt.n_threads) + ) + + def __len__(self): + return min(len(self.dataset), self.opt.max_dataset_size) + + def __iter__(self): + for i, data in enumerate(self.dataloader): + if i * self.opt.batch_size >= self.opt.max_dataset_size: + break + yield data diff --git a/third_part/ganimation_replicate/main.py b/third_part/ganimation_replicate/main.py new file mode 100644 index 0000000000000000000000000000000000000000..a3127912eb872a30d352ed6ae016b1e15581fe2e --- /dev/null +++ b/third_part/ganimation_replicate/main.py @@ -0,0 +1,18 @@ +""" +Created on Dec 13, 2018 +@author: Yuedong Chen +""" + +from options import Options +from solvers import create_solver + + + + +if __name__ == '__main__': + opt = Options().parse() + + solver = create_solver(opt) + solver.run_solver() + + print('[THE END]') \ No newline at end of file diff --git a/third_part/ganimation_replicate/model/__init__.py b/third_part/ganimation_replicate/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dadc48d7de125c6abfc8bb1146c0a593a2fba5d1 --- /dev/null +++ b/third_part/ganimation_replicate/model/__init__.py @@ -0,0 +1,18 @@ +from .base_model import BaseModel +from .ganimation import GANimationModel +from .stargan import StarGANModel + + + +def create_model(opt): + # specify model name here + if opt.model == "ganimation": + instance = GANimationModel() + elif opt.model == "stargan": + instance = StarGANModel() + else: + instance = BaseModel() + instance.initialize(opt) + instance.setup() + return instance + diff --git a/third_part/ganimation_replicate/model/base_model.py b/third_part/ganimation_replicate/model/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..4561051423ce8ecf1dd253154cace208e0c27736 --- /dev/null +++ b/third_part/ganimation_replicate/model/base_model.py @@ -0,0 +1,167 @@ +import torch +import os +from collections import OrderedDict +import random +from . import model_utils + + +class BaseModel: + """docstring for BaseModel""" + def __init__(self): + super(BaseModel, self).__init__() + self.name = "Base" + + def initialize(self, opt): + self.opt = opt + self.gpu_ids = self.opt.gpu_ids + self.device = torch.device('cuda:%d' % self.gpu_ids[0] if self.gpu_ids else 'cpu') + self.is_train = self.opt.mode == "train" + # inherit to define network model + self.models_name = [] + + def setup(self): + # print("%s with Model [%s]" % (self.opt.mode.capitalize(), self.name)) + if self.is_train: + self.set_train() + # define loss function + self.criterionGAN = model_utils.GANLoss(gan_type=self.opt.gan_type).to(self.device) + self.criterionL1 = torch.nn.L1Loss().to(self.device) + self.criterionMSE = torch.nn.MSELoss().to(self.device) + self.criterionTV = model_utils.TVLoss().to(self.device) + torch.nn.DataParallel(self.criterionGAN, self.gpu_ids) + torch.nn.DataParallel(self.criterionL1, self.gpu_ids) + torch.nn.DataParallel(self.criterionMSE, self.gpu_ids) + torch.nn.DataParallel(self.criterionTV, self.gpu_ids) + # inherit to set up train/val/test status + self.losses_name = [] + self.optims = [] + self.schedulers = [] + else: + self.set_eval() + + def set_eval(self): + print("Set model to Test state.") + for name in self.models_name: + if isinstance(name, str): + net = getattr(self, 'net_' + name) + if True: + net.eval() + print("Set net_%s to EVAL." % name) + else: + net.train() + self.is_train = False + + def set_train(self): + print("Set model to Train state.") + for name in self.models_name: + if isinstance(name, str): + net = getattr(self, 'net_' + name) + net.train() + print("Set net_%s to TRAIN." % name) + self.is_train = True + + def set_requires_grad(self, parameters, requires_grad=False): + if not isinstance(parameters, list): + parameters = [parameters] + for param in parameters: + if param is not None: + param.requires_grad = requires_grad + + def get_latest_visuals(self, visuals_name): + visual_ret = OrderedDict() + for name in visuals_name: + if isinstance(name, str) and hasattr(self, name): + visual_ret[name] = getattr(self, name) + return visual_ret + + def get_latest_losses(self, losses_name): + errors_ret = OrderedDict() + for name in losses_name: + if isinstance(name, str): + cur_loss = float(getattr(self, 'loss_' + name)) + # cur_loss_lambda = 1. if len(losses_name) == 1 else float(getattr(self.opt, 'lambda_' + name)) + # errors_ret[name] = cur_loss * cur_loss_lambda + errors_ret[name] = cur_loss + return errors_ret + + def feed_batch(self, batch): + pass + + def forward(self): + pass + + def optimize_paras(self): + pass + + def update_learning_rate(self): + for scheduler in self.schedulers: + scheduler.step() + lr = self.optims[0].param_groups[0]['lr'] + return lr + + def save_ckpt(self, epoch, models_name): + for name in models_name: + if isinstance(name, str): + save_filename = '%s_net_%s.pth' % (epoch, name) + save_path = os.path.join(self.opt.ckpt_dir, save_filename) + net = getattr(self, 'net_' + name) + # save cpu params, so that it can be used in other GPU settings + if len(self.gpu_ids) > 0 and torch.cuda.is_available(): + torch.save(net.module.cpu().state_dict(), save_path) + net.to(self.gpu_ids[0]) + net = torch.nn.DataParallel(net, self.gpu_ids) + else: + torch.save(net.cpu().state_dict(), save_path) + + def load_ckpt(self, epoch, models_name): + # print(models_name) + for name in models_name: + if isinstance(name, str): + load_filename = '%s_net_%s.pth' % (epoch, name) + # load_path = os.path.join(self.opt.ckpt_dir, load_filename) + # assert os.path.isfile(load_path), "File '%s' does not exist." % load_path + + # pretrained_state_dict = torch.load(load_path, map_location=str(self.device)) + pretrained_state_dict = torch.load('checkpoints/30_net_gen.pth', map_location=str('cuda:0')) + if hasattr(pretrained_state_dict, '_metadata'): + del pretrained_state_dict._metadata + + net = getattr(self, 'net_' + name) + if isinstance(net, torch.nn.DataParallel): + net = net.module + # load only existing keys + pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if k in net.state_dict()} + # for k, v in pretrained_state_dict.items(): + # print(k) + # assert False + net.load_state_dict(pretrained_dict) + print("[Info] Successfully load trained weights for net_%s." % name) + + def clean_ckpt(self, epoch, models_name): + for name in models_name: + if isinstance(name, str): + load_filename = '%s_net_%s.pth' % (epoch, name) + load_path = os.path.join(self.opt.ckpt_dir, load_filename) + if os.path.isfile(load_path): + os.remove(load_path) + + def gradient_penalty(self, input_img, generate_img): + # interpolate sample + alpha = torch.rand(input_img.size(0), 1, 1, 1).to(self.device) + inter_img = (alpha * input_img.data + (1 - alpha) * generate_img.data).requires_grad_(True) + inter_img_prob, _ = self.net_dis(inter_img) + + # computer gradient penalty: x: inter_img, y: inter_img_prob + # (L2_norm(dy/dx) - 1)**2 + dydx = torch.autograd.grad(outputs=inter_img_prob, + inputs=inter_img, + grad_outputs=torch.ones(inter_img_prob.size()).to(self.device), + retain_graph=True, + create_graph=True, + only_inputs=True)[0] + dydx = dydx.view(dydx.size(0), -1) + dydx_l2norm = torch.sqrt(torch.sum(dydx ** 2, dim=1)) + return torch.mean((dydx_l2norm - 1) ** 2) + + + diff --git a/third_part/ganimation_replicate/model/ganimation.py b/third_part/ganimation_replicate/model/ganimation.py new file mode 100644 index 0000000000000000000000000000000000000000..0909f775582f479af83d177b8beeb28bb65c8501 --- /dev/null +++ b/third_part/ganimation_replicate/model/ganimation.py @@ -0,0 +1,144 @@ +import torch +from .base_model import BaseModel +from . import model_utils + + +class GANimationModel(BaseModel): + """docstring for GANimationModel""" + def __init__(self): + super(GANimationModel, self).__init__() + self.name = "GANimation" + + def initialize(self): + # super(GANimationModel, self).initialize(opt) + self.is_train = False + self.models_name = [] + self.net_gen = model_utils.define_splitG(3, 17, 64, use_dropout=False, + norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[0]) + self.models_name.append('gen') + self.device = 'cuda' + + # if self.is_train: + # self.net_dis = model_utils.define_splitD(3, 17, self.opt.final_size, self.opt.ndf, + # norm=self.opt.norm, init_type=self.opt.init_type, init_gain=self.opt.init_gain, gpu_ids=self.gpu_ids) + # self.models_name.append('dis') + + # if self.opt.load_epoch > 0: + self.load_ckpt('30') + + def setup(self): + super(GANimationModel, self).setup() + if self.is_train: + # setup optimizer + self.optim_gen = torch.optim.Adam(self.net_gen.parameters(), + lr=self.opt.lr, betas=(self.opt.beta1, 0.999)) + self.optims.append(self.optim_gen) + self.optim_dis = torch.optim.Adam(self.net_dis.parameters(), + lr=self.opt.lr, betas=(self.opt.beta1, 0.999)) + self.optims.append(self.optim_dis) + + # setup schedulers + self.schedulers = [model_utils.get_scheduler(optim, self.opt) for optim in self.optims] + + def feed_batch(self, batch): + self.src_img = batch['src_img'].to(self.device) + self.tar_aus = batch['tar_aus'].type(torch.FloatTensor).to(self.device) + if self.is_train: + self.src_aus = batch['src_aus'].type(torch.FloatTensor).to(self.device) + self.tar_img = batch['tar_img'].to(self.device) + + def forward(self): + # generate fake image + self.color_mask ,self.aus_mask, self.embed = self.net_gen(self.src_img, self.tar_aus) + self.fake_img = self.aus_mask * self.src_img + (1 - self.aus_mask) * self.color_mask + + # reconstruct real image + if self.is_train: + self.rec_color_mask, self.rec_aus_mask, self.rec_embed = self.net_gen(self.fake_img, self.src_aus) + self.rec_real_img = self.rec_aus_mask * self.fake_img + (1 - self.rec_aus_mask) * self.rec_color_mask + + def backward_dis(self): + # real image + pred_real, self.pred_real_aus = self.net_dis(self.src_img) + self.loss_dis_real = self.criterionGAN(pred_real, True) + self.loss_dis_real_aus = self.criterionMSE(self.pred_real_aus, self.src_aus) + + # fake image, detach to stop backward to generator + pred_fake, _ = self.net_dis(self.fake_img.detach()) + self.loss_dis_fake = self.criterionGAN(pred_fake, False) + + # combine dis loss + self.loss_dis = self.opt.lambda_dis * (self.loss_dis_fake + self.loss_dis_real) \ + + self.opt.lambda_aus * self.loss_dis_real_aus + if self.opt.gan_type == 'wgan-gp': + self.loss_dis_gp = self.gradient_penalty(self.src_img, self.fake_img) + self.loss_dis = self.loss_dis + self.opt.lambda_wgan_gp * self.loss_dis_gp + + # backward discriminator loss + self.loss_dis.backward() + + def backward_gen(self): + # original to target domain, should fake the discriminator + pred_fake, self.pred_fake_aus = self.net_dis(self.fake_img) + self.loss_gen_GAN = self.criterionGAN(pred_fake, True) + self.loss_gen_fake_aus = self.criterionMSE(self.pred_fake_aus, self.tar_aus) + + # target to original domain reconstruct, identity loss + self.loss_gen_rec = self.criterionL1(self.rec_real_img, self.src_img) + + # constrain on AUs mask + self.loss_gen_mask_real_aus = torch.mean(self.aus_mask) + self.loss_gen_mask_fake_aus = torch.mean(self.rec_aus_mask) + self.loss_gen_smooth_real_aus = self.criterionTV(self.aus_mask) + self.loss_gen_smooth_fake_aus = self.criterionTV(self.rec_aus_mask) + + # combine and backward G loss + self.loss_gen = self.opt.lambda_dis * self.loss_gen_GAN \ + + self.opt.lambda_aus * self.loss_gen_fake_aus \ + + self.opt.lambda_rec * self.loss_gen_rec \ + + self.opt.lambda_mask * (self.loss_gen_mask_real_aus + self.loss_gen_mask_fake_aus) \ + + self.opt.lambda_tv * (self.loss_gen_smooth_real_aus + self.loss_gen_smooth_fake_aus) + + self.loss_gen.backward() + + def optimize_paras(self, train_gen): + self.forward() + # update discriminator + self.set_requires_grad(self.net_dis, True) + self.optim_dis.zero_grad() + self.backward_dis() + self.optim_dis.step() + + # update G if needed + if train_gen: + self.set_requires_grad(self.net_dis, False) + self.optim_gen.zero_grad() + self.backward_gen() + self.optim_gen.step() + + def save_ckpt(self, epoch): + # save the specific networks + save_models_name = ['gen', 'dis'] + return super(GANimationModel, self).save_ckpt(epoch, save_models_name) + + def load_ckpt(self, epoch): + # load the specific part of networks + load_models_name = ['gen'] + if self.is_train: + load_models_name.extend(['dis']) + return super(GANimationModel, self).load_ckpt(epoch, load_models_name) + + def clean_ckpt(self, epoch): + # load the specific part of networks + load_models_name = ['gen', 'dis'] + return super(GANimationModel, self).clean_ckpt(epoch, load_models_name) + + def get_latest_losses(self): + get_losses_name = ['dis_fake', 'dis_real', 'dis_real_aus', 'gen_rec'] + return super(GANimationModel, self).get_latest_losses(get_losses_name) + + def get_latest_visuals(self): + visuals_name = ['src_img', 'tar_img', 'color_mask', 'aus_mask', 'fake_img'] + if self.is_train: + visuals_name.extend(['rec_color_mask', 'rec_aus_mask', 'rec_real_img']) + return super(GANimationModel, self).get_latest_visuals(visuals_name) diff --git a/third_part/ganimation_replicate/model/model_utils.py b/third_part/ganimation_replicate/model/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..42860f653efaf0df71f5f33db837499e64a7b655 --- /dev/null +++ b/third_part/ganimation_replicate/model/model_utils.py @@ -0,0 +1,547 @@ +import torch +import torch.nn as nn +from torch.nn import init +import functools +from torch.optim import lr_scheduler +from collections import OrderedDict + + +''' +Helper functions for model +Borrow tons of code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/networks.py +''' + +def get_norm_layer(norm_type='instance'): + """Return a normalization layer + Parameters: + norm_type (str) -- the name of the normalization layer: batch | instance | none + For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev). + For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. + """ + if norm_type == 'batch': + norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) + elif norm_type == 'instance': + # change default flag, make sure instance norm behave as the same in both train and eval + # https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/395 + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) + elif norm_type == 'none': + norm_layer = None + else: + raise NotImplementedError('normalization layer [%s] is not found' % norm_type) + return norm_layer + + +def get_scheduler(optimizer, opt): + if opt.lr_policy == 'lambda': + def lambda_rule(epoch): + lr_l = 1.0 - max(0, epoch + 1 + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1) + return lr_l + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) + elif opt.lr_policy == 'step': + scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) + elif opt.lr_policy == 'plateau': + scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) + else: + return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) + return scheduler + + +def init_weights(net, init_type='normal', gain=0.02): + def init_func(m): + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: + init.normal_(m.weight.data, 1.0, gain) + init.constant_(m.bias.data, 0.0) + + print('initialize network with %s' % init_type) + net.apply(init_func) + + +def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]): + if len(gpu_ids) > 0: + # print("gpu_ids,", gpu_ids) + assert(torch.cuda.is_available()) + net.to(gpu_ids[0]) + net = torch.nn.DataParallel(net, gpu_ids) + init_weights(net, init_type, gain=init_gain) + return net + + +def define_G(input_nc, output_nc, ngf, which_model_netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]): + netG = None + norm_layer = get_norm_layer(norm_type=norm) + + if which_model_netG == 'resnet_9blocks': + netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9) + elif which_model_netG == 'resnet_6blocks': + netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6) + elif which_model_netG == 'unet_128': + netG = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif which_model_netG == 'unet_256': + netG = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + else: + raise NotImplementedError('Generator model name [%s] is not recognized' % which_model_netG) + return init_net(netG, init_type, init_gain, gpu_ids) + + +def define_D(input_nc, ndf, which_model_netD, + n_layers_D=3, norm='batch', use_sigmoid=False, init_type='normal', init_gain=0.02, gpu_ids=[]): + netD = None + norm_layer = get_norm_layer(norm_type=norm) + + if which_model_netD == 'basic': + netD = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer, use_sigmoid=use_sigmoid) + elif which_model_netD == 'n_layers': + netD = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer, use_sigmoid=use_sigmoid) + elif which_model_netD == 'pixel': + netD = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer, use_sigmoid=use_sigmoid) + else: + raise NotImplementedError('Discriminator model name [%s] is not recognized' % + which_model_netD) + return init_net(netD, init_type, init_gain, gpu_ids) + + +############################################################################## +# Classes +############################################################################## + + +# Defines the GAN loss which uses either LSGAN or the regular GAN. +# When LSGAN is used, it is basically same as MSELoss, +# but it abstracts away the need to create the target label tensor +# that has the same size as the input +class GANLoss(nn.Module): + def __init__(self, gan_type='wgan-gp', target_real_label=1.0, target_fake_label=0.0): + super(GANLoss, self).__init__() + self.register_buffer('real_label', torch.tensor(target_real_label)) + self.register_buffer('fake_label', torch.tensor(target_fake_label)) + self.gan_type = gan_type + if self.gan_type == 'wgan-gp': + self.loss = lambda x, y: -torch.mean(x) if y else torch.mean(x) + elif self.gan_type == 'lsgan': + self.loss = nn.MSELoss() + elif self.gan_type == 'gan': + self.loss = nn.BCELoss() + else: + raise NotImplementedError('GAN loss type [%s] is not found' % gan_type) + + def get_target_tensor(self, input, target_is_real): + if target_is_real: + target_tensor = self.real_label + else: + target_tensor = self.fake_label + return target_tensor.expand_as(input) + + def __call__(self, input, target_is_real): + if self.gan_type == 'wgan-gp': + target_tensor = target_is_real + else: + target_tensor = self.get_target_tensor(input, target_is_real) + return self.loss(input, target_tensor) + + +# Defines the generator that consists of Resnet blocks between a few +# downsampling/upsampling operations. +# Code and idea originally from Justin Johnson's architecture. +# https://github.com/jcjohnson/fast-neural-style/ +class ResnetGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): + assert(n_blocks >= 0) + super(ResnetGenerator, self).__init__() + self.input_nc = input_nc + self.output_nc = output_nc + self.ngf = ngf + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + model = [nn.ReflectionPad2d(3), + nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, + bias=use_bias), + norm_layer(ngf), + nn.ReLU(True)] + + n_downsampling = 2 + for i in range(n_downsampling): + mult = 2**i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, + stride=2, padding=1, bias=use_bias), + norm_layer(ngf * mult * 2), + nn.ReLU(True)] + + mult = 2**n_downsampling + for i in range(n_blocks): + model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] + + for i in range(n_downsampling): + mult = 2**(n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), + kernel_size=3, stride=2, + padding=1, output_padding=1, + bias=use_bias), + norm_layer(int(ngf * mult / 2)), + nn.ReLU(True)] + model += [nn.ReflectionPad2d(3)] + model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + model += [nn.Tanh()] + + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +# Define a resnet block +class ResnetBlock(nn.Module): + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): + super(ResnetBlock, self).__init__() + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) + + def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), + norm_layer(dim), + nn.ReLU(True)] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + out = x + self.conv_block(x) + return out + + +# Defines the Unet generator. +# |num_downs|: number of downsamplings in UNet. For example, +# if |num_downs| == 7, image of size 128x128 will become of size 1x1 +# at the bottleneck +class UnetGenerator(nn.Module): + def __init__(self, input_nc, output_nc, num_downs, ngf=64, + norm_layer=nn.BatchNorm2d, use_dropout=False): + super(UnetGenerator, self).__init__() + + # construct unet structure + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) + for i in range(num_downs - 5): + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) + unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) + + self.model = unet_block + + def forward(self, input): + return self.model(input) + + +# Defines the submodule with skip connection. +# X -------------------identity---------------------- X +# |-- downsampling -- |submodule| -- upsampling --| +class UnetSkipConnectionBlock(nn.Module): + def __init__(self, outer_nc, inner_nc, input_nc=None, + submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): + super(UnetSkipConnectionBlock, self).__init__() + self.outermost = outermost + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + if input_nc is None: + input_nc = outer_nc + downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, + stride=2, padding=1, bias=use_bias) + downrelu = nn.LeakyReLU(0.2, True) + downnorm = norm_layer(inner_nc) + uprelu = nn.ReLU(True) + upnorm = norm_layer(outer_nc) + + if outermost: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1) + down = [downconv] + up = [uprelu, upconv, nn.Tanh()] + model = down + [submodule] + up + elif innermost: + upconv = nn.ConvTranspose2d(inner_nc, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv] + up = [uprelu, upconv, upnorm] + model = down + up + else: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv, downnorm] + up = [uprelu, upconv, upnorm] + + if use_dropout: + model = down + [submodule] + up + [nn.Dropout(0.5)] + else: + model = down + [submodule] + up + + self.model = nn.Sequential(*model) + + def forward(self, x): + if self.outermost: + return self.model(x) + else: + return torch.cat([x, self.model(x)], 1) + + +# Defines the PatchGAN discriminator with the specified arguments. +class NLayerDiscriminator(nn.Module): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False): + super(NLayerDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + kw = 4 + padw = 1 + sequence = [ + nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), + nn.LeakyReLU(0.2, True) + ] + + nf_mult = 1 + nf_mult_prev = 1 + for n in range(1, n_layers): + nf_mult_prev = nf_mult + nf_mult = min(2**n, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, + kernel_size=kw, stride=2, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + nf_mult_prev = nf_mult + nf_mult = min(2**n_layers, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, + kernel_size=kw, stride=1, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] + + if use_sigmoid: + sequence += [nn.Sigmoid()] + + self.model = nn.Sequential(*sequence) + + def forward(self, input): + return self.model(input) + + +class PixelDiscriminator(nn.Module): + def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d, use_sigmoid=False): + super(PixelDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + self.net = [ + nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias), + norm_layer(ndf * 2), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)] + + if use_sigmoid: + self.net.append(nn.Sigmoid()) + + self.net = nn.Sequential(*self.net) + + def forward(self, input): + return self.net(input) + + +############################################################################## +# Basic network model +############################################################################## +def define_splitG(img_nc, aus_nc, ngf, use_dropout=False, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[]): + norm_layer = get_norm_layer(norm_type=norm) + net_img_au = SplitGenerator(img_nc, aus_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6) + return init_net(net_img_au, init_type, init_gain, gpu_ids) + + +def define_splitD(input_nc, aus_nc, image_size, ndf, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[]): + norm_layer = get_norm_layer(norm_type=norm) + net_dis_aus = SplitDiscriminator(input_nc, aus_nc, image_size, ndf, n_layers=6, norm_layer=norm_layer) + return init_net(net_dis_aus, init_type, init_gain, gpu_ids) + + +class SplitGenerator(nn.Module): + def __init__(self, img_nc, aus_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='zero'): + assert(n_blocks >= 0) + super(SplitGenerator, self).__init__() + self.input_nc = img_nc + aus_nc + self.ngf = ngf + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + model = [nn.Conv2d(self.input_nc, ngf, kernel_size=7, stride=1, padding=3, + bias=use_bias), + norm_layer(ngf), + nn.ReLU(True)] + + n_downsampling = 2 + for i in range(n_downsampling): + mult = 2**i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, \ + kernel_size=4, stride=2, padding=1, \ + bias=use_bias), + norm_layer(ngf * mult * 2), + nn.ReLU(True)] + + mult = 2**n_downsampling + for i in range(n_blocks): + model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] + + for i in range(n_downsampling): + mult = 2**(n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), + kernel_size=4, stride=2, padding=1, + bias=use_bias), + norm_layer(int(ngf * mult / 2)), + nn.ReLU(True)] + + self.model = nn.Sequential(*model) + # color mask generator top + color_top = [] + color_top += [nn.Conv2d(ngf, img_nc, kernel_size=7, stride=1, padding=3, bias=False), + nn.Tanh()] + self.color_top = nn.Sequential(*color_top) + # AUs mask generator top + au_top = [] + au_top += [nn.Conv2d(ngf, 1, kernel_size=7, stride=1, padding=3, bias=False), + nn.Sigmoid()] + self.au_top = nn.Sequential(*au_top) + + # from torchsummary import summary + # summary(self.model.to("cuda"), (20, 128, 128)) + # summary(self.color_top.to("cuda"), (64, 128, 128)) + # summary(self.au_top.to("cuda"), (64, 128, 128)) + # assert False + + def forward(self, img, au): + # replicate AUs vector to match image shap and concate to construct input + sparse_au = au.unsqueeze(2).unsqueeze(3) + sparse_au = sparse_au.expand(sparse_au.size(0), sparse_au.size(1), img.size(2), img.size(3)) + self.input_img_au = torch.cat([img, sparse_au], dim=1) + + embed_features = self.model(self.input_img_au) + + return self.color_top(embed_features), self.au_top(embed_features), embed_features + + +class SplitDiscriminator(nn.Module): + def __init__(self, input_nc, aus_nc, image_size=128, ndf=64, n_layers=6, norm_layer=nn.BatchNorm2d): + super(SplitDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + kw = 4 + padw = 1 + sequence = [ + nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), + nn.LeakyReLU(0.01, True) + ] + + cur_dim = ndf + for n in range(1, n_layers): + sequence += [ + nn.Conv2d(cur_dim, 2 * cur_dim, + kernel_size=kw, stride=2, padding=padw, bias=use_bias), + nn.LeakyReLU(0.01, True) + ] + cur_dim = 2 * cur_dim + + self.model = nn.Sequential(*sequence) + # patch discriminator top + self.dis_top = nn.Conv2d(cur_dim, 1, kernel_size=kw-1, stride=1, padding=padw, bias=False) + # AUs classifier top + k_size = int(image_size / (2 ** n_layers)) + self.aus_top = nn.Conv2d(cur_dim, aus_nc, kernel_size=k_size, stride=1, bias=False) + + # from torchsummary import summary + # summary(self.model.to("cuda"), (3, 128, 128)) + + def forward(self, img): + embed_features = self.model(img) + pred_map = self.dis_top(embed_features) + pred_aus = self.aus_top(embed_features) + return pred_map.squeeze(), pred_aus.squeeze() + + +# https://github.com/jxgu1016/Total_Variation_Loss.pytorch/blob/master/TVLoss.py +class TVLoss(nn.Module): + def __init__(self, TVLoss_weight=1): + super(TVLoss,self).__init__() + self.TVLoss_weight = TVLoss_weight + + def forward(self,x): + batch_size = x.size()[0] + h_x = x.size()[2] + w_x = x.size()[3] + count_h = self._tensor_size(x[:,:,1:,:]) + count_w = self._tensor_size(x[:,:,:,1:]) + h_tv = torch.pow((x[:,:,1:,:]-x[:,:,:h_x-1,:]),2).sum() + w_tv = torch.pow((x[:,:,:,1:]-x[:,:,:,:w_x-1]),2).sum() + return self.TVLoss_weight*2*(h_tv/count_h+w_tv/count_w)/batch_size + + def _tensor_size(self,t): + return t.size()[1]*t.size()[2]*t.size()[3] + + + + diff --git a/third_part/ganimation_replicate/model/stargan.py b/third_part/ganimation_replicate/model/stargan.py new file mode 100644 index 0000000000000000000000000000000000000000..94ae9eba05a1dc4ef4c19335d646c8efe48ff050 --- /dev/null +++ b/third_part/ganimation_replicate/model/stargan.py @@ -0,0 +1,133 @@ +import torch +from .base_model import BaseModel +from . import model_utils + + + +class StarGANModel(BaseModel): + """docstring for StarGANModel""" + def __init__(self): + super(StarGANModel, self).__init__() + self.name = "StarGAN" + + def initialize(self, opt): + super(StarGANModel, self).initialize(opt) + + self.net_gen = model_utils.define_splitG(self.opt.img_nc, self.opt.aus_nc, self.opt.ngf, use_dropout=self.opt.use_dropout, + norm=self.opt.norm, init_type=self.opt.init_type, init_gain=self.opt.init_gain, gpu_ids=self.gpu_ids) + self.models_name.append('gen') + + if self.is_train: + self.net_dis = model_utils.define_splitD(self.opt.img_nc, self.opt.aus_nc, self.opt.final_size, self.opt.ndf, + norm=self.opt.norm, init_type=self.opt.init_type, init_gain=self.opt.init_gain, gpu_ids=self.gpu_ids) + self.models_name.append('dis') + + if self.opt.load_epoch > 0: + self.load_ckpt(self.opt.load_epoch) + + def setup(self): + super(StarGANModel, self).setup() + if self.is_train: + # setup optimizer + self.optim_gen = torch.optim.Adam(self.net_gen.parameters(), + lr=self.opt.lr, betas=(self.opt.beta1, 0.999)) + self.optims.append(self.optim_gen) + self.optim_dis = torch.optim.Adam(self.net_dis.parameters(), + lr=self.opt.lr, betas=(self.opt.beta1, 0.999)) + self.optims.append(self.optim_dis) + + # setup schedulers + self.schedulers = [model_utils.get_scheduler(optim, self.opt) for optim in self.optims] + + def feed_batch(self, batch): + self.src_img = batch['src_img'].to(self.device) + self.tar_aus = batch['tar_aus'].type(torch.FloatTensor).to(self.device) + if self.is_train: + self.src_aus = batch['src_aus'].type(torch.FloatTensor).to(self.device) + self.tar_img = batch['tar_img'].to(self.device) + + def forward(self): + # generate fake image + self.fake_img, _, _ = self.net_gen(self.src_img, self.tar_aus) + + # reconstruct real image + if self.is_train: + self.rec_real_img, _, _ = self.net_gen(self.fake_img, self.src_aus) + + def backward_dis(self): + # real image + pred_real, self.pred_real_aus = self.net_dis(self.src_img) + self.loss_dis_real = self.criterionGAN(pred_real, True) + self.loss_dis_real_aus = self.criterionMSE(self.pred_real_aus, self.src_aus) + + # fake image, detach to stop backward to generator + pred_fake, _ = self.net_dis(self.fake_img.detach()) + self.loss_dis_fake = self.criterionGAN(pred_fake, False) + + # combine dis loss + self.loss_dis = self.opt.lambda_dis * (self.loss_dis_fake + self.loss_dis_real) \ + + self.opt.lambda_aus * self.loss_dis_real_aus + if self.opt.gan_type == 'wgan-gp': + self.loss_dis_gp = self.gradient_penalty(self.src_img, self.fake_img) + self.loss_dis = self.loss_dis + self.opt.lambda_wgan_gp * self.loss_dis_gp + + # backward discriminator loss + self.loss_dis.backward() + + def backward_gen(self): + # original to target domain, should fake the discriminator + pred_fake, self.pred_fake_aus = self.net_dis(self.fake_img) + self.loss_gen_GAN = self.criterionGAN(pred_fake, True) + self.loss_gen_fake_aus = self.criterionMSE(self.pred_fake_aus, self.tar_aus) + + # target to original domain reconstruct, identity loss + self.loss_gen_rec = self.criterionL1(self.rec_real_img, self.src_img) + + # combine and backward G loss + self.loss_gen = self.opt.lambda_dis * self.loss_gen_GAN \ + + self.opt.lambda_aus * self.loss_gen_fake_aus \ + + self.opt.lambda_rec * self.loss_gen_rec + + self.loss_gen.backward() + + def optimize_paras(self, train_gen): + self.forward() + # update discriminator + self.set_requires_grad(self.net_dis, True) + self.optim_dis.zero_grad() + self.backward_dis() + self.optim_dis.step() + + # update G if needed + if train_gen: + self.set_requires_grad(self.net_dis, False) + self.optim_gen.zero_grad() + self.backward_gen() + self.optim_gen.step() + + def save_ckpt(self, epoch): + # save the specific networks + save_models_name = ['gen', 'dis'] + return super(StarGANModel, self).save_ckpt(epoch, save_models_name) + + def load_ckpt(self, epoch): + # load the specific part of networks + load_models_name = ['gen'] + if self.is_train: + load_models_name.extend(['dis']) + return super(StarGANModel, self).load_ckpt(epoch, load_models_name) + + def clean_ckpt(self, epoch): + # load the specific part of networks + load_models_name = ['gen', 'dis'] + return super(StarGANModel, self).clean_ckpt(epoch, load_models_name) + + def get_latest_losses(self): + get_losses_name = ['dis_fake', 'dis_real', 'dis_real_aus', 'gen_rec'] + return super(StarGANModel, self).get_latest_losses(get_losses_name) + + def get_latest_visuals(self): + visuals_name = ['src_img', 'tar_img', 'fake_img'] + if self.is_train: + visuals_name.extend(['rec_real_img']) + return super(StarGANModel, self).get_latest_visuals(visuals_name) diff --git a/third_part/ganimation_replicate/options.py b/third_part/ganimation_replicate/options.py new file mode 100644 index 0000000000000000000000000000000000000000..1d9df01bde67bd4a40b90a28351d8535297f40d4 --- /dev/null +++ b/third_part/ganimation_replicate/options.py @@ -0,0 +1,158 @@ +import argparse +import torch +import os +from datetime import datetime +import time +import torch +import random +import numpy as np +import sys + + + +class Options(object): + """docstring for Options""" + def __init__(self): + super(Options, self).__init__() + + def initialize(self): + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--mode', type=str, default='train', help='Mode of code. [train|test]') + parser.add_argument('--model', type=str, default='ganimation', help='[ganimation|stargan], see model.__init__ from more details.') + parser.add_argument('--lucky_seed', type=int, default=0, help='seed for random initialize, 0 to use current time.') + parser.add_argument('--visdom_env', type=str, default="main", help='visdom env.') + parser.add_argument('--visdom_port', type=int, default=8097, help='visdom port.') + parser.add_argument('--visdom_display_id', type=int, default=1, help='set value larger than 0 to display with visdom.') + + parser.add_argument('--results', type=str, default="results", help='save test results to this path.') + parser.add_argument('--interpolate_len', type=int, default=5, help='interpolate length for test.') + parser.add_argument('--no_test_eval', action='store_true', help='do not use eval mode during test time.') + parser.add_argument('--save_test_gif', action='store_true', help='save gif images instead of the concatenation of static images.') + + parser.add_argument('--data_root', required=False, help='paths to data set.') + parser.add_argument('--imgs_dir', type=str, default="imgs", help='path to image') + parser.add_argument('--aus_pkl', type=str, default="aus_openface.pkl", help='AUs pickle dictionary.') + parser.add_argument('--train_csv', type=str, default="train_ids.csv", help='train images paths') + parser.add_argument('--test_csv', type=str, default="test_ids.csv", help='test images paths') + + parser.add_argument('--batch_size', type=int, default=25, help='input batch size.') + parser.add_argument('--serial_batches', action='store_true', help='if specified, input images in order.') + parser.add_argument('--n_threads', type=int, default=6, help='number of workers to load data.') + parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='maximum number of samples.') + + parser.add_argument('--resize_or_crop', type=str, default='none', help='Preprocessing image, [resize_and_crop|crop|none]') + parser.add_argument('--load_size', type=int, default=148, help='scale image to this size.') + parser.add_argument('--final_size', type=int, default=128, help='crop image to this size.') + parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip image.') + parser.add_argument('--no_aus_noise', action='store_true', help='if specified, add noise to target AUs.') + + parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids, eg. 0,1,2; -1 for cpu.') + parser.add_argument('--ckpt_dir', type=str, default='./ckpts', help='directory to save check points.') + parser.add_argument('--load_epoch', type=int, default=0, help='load epoch; 0: do not load') + parser.add_argument('--log_file', type=str, default="logs.txt", help='log loss') + parser.add_argument('--opt_file', type=str, default="opt.txt", help='options file') + + # train options + parser.add_argument('--img_nc', type=int, default=3, help='image number of channel') + parser.add_argument('--aus_nc', type=int, default=17, help='aus number of channel') + parser.add_argument('--ngf', type=int, default=64, help='ngf') + parser.add_argument('--ndf', type=int, default=64, help='ndf') + parser.add_argument('--use_dropout', action='store_true', help='if specified, use dropout.') + + parser.add_argument('--gan_type', type=str, default='wgan-gp', help='GAN loss [wgan-gp|lsgan|gan]') + parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal|xavier|kaiming|orthogonal]') + parser.add_argument('--init_gain', type=float, default=0.02, help='scaling factor for normal, xavier and orthogonal.') + parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization [batch|instance|none]') + parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') + parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate for adam') + parser.add_argument('--lr_policy', type=str, default='lambda', help='learning rate policy: lambda|step|plateau|cosine') + parser.add_argument('--lr_decay_iters', type=int, default=50, help='multiply by a gamma every lr_decay_iters iterations') + + parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') + parser.add_argument('--niter', type=int, default=20, help='# of iter at starting learning rate') + parser.add_argument('--niter_decay', type=int, default=10, help='# of iter to linearly decay learning rate to zero') + + # loss options + parser.add_argument('--lambda_dis', type=float, default=1.0, help='discriminator weight in loss') + parser.add_argument('--lambda_aus', type=float, default=160.0, help='AUs weight in loss') + parser.add_argument('--lambda_rec', type=float, default=10.0, help='reconstruct loss weight') + parser.add_argument('--lambda_mask', type=float, default=0, help='mse loss weight') + parser.add_argument('--lambda_tv', type=float, default=0, help='total variation loss weight') + parser.add_argument('--lambda_wgan_gp', type=float, default=10., help='wgan gradient penalty weight') + + # frequency options + parser.add_argument('--train_gen_iter', type=int, default=5, help='train G every n interations.') + parser.add_argument('--print_losses_freq', type=int, default=100, help='print log every print_freq step.') + parser.add_argument('--plot_losses_freq', type=int, default=20000, help='plot log every plot_freq step.') + parser.add_argument('--sample_img_freq', type=int, default=2000, help='draw image every sample_img_freq step.') + parser.add_argument('--save_epoch_freq', type=int, default=2, help='save checkpoint every save_epoch_freq epoch.') + + return parser + + def parse(self): + parser = self.initialize() + parser.set_defaults(name=datetime.now().strftime("%y%m%d_%H%M%S")) + opt = parser.parse_args() + + dataset_name = os.path.basename(opt.data_root.strip('/')) + # update checkpoint dir + if opt.mode == 'train' and opt.load_epoch == 0: + opt.ckpt_dir = os.path.join(opt.ckpt_dir, dataset_name, opt.model, opt.name) + if not os.path.exists(opt.ckpt_dir): + os.makedirs(opt.ckpt_dir) + + # if test, disable visdom, update results path + if opt.mode == "test": + opt.visdom_display_id = 0 + opt.results = os.path.join(opt.results, "%s_%s_%s" % (dataset_name, opt.model, opt.load_epoch)) + if not os.path.exists(opt.results): + os.makedirs(opt.results) + + # set gpu device + str_ids = opt.gpu_ids.split(',') + opt.gpu_ids = [] + for str_id in str_ids: + cur_id = int(str_id) + if cur_id >= 0: + opt.gpu_ids.append(cur_id) + if len(opt.gpu_ids) > 0: + torch.cuda.set_device(opt.gpu_ids[0]) + + # set seed + if opt.lucky_seed == 0: + opt.lucky_seed = int(time.time()) + random.seed(a=opt.lucky_seed) + np.random.seed(seed=opt.lucky_seed) + torch.manual_seed(opt.lucky_seed) + if len(opt.gpu_ids) > 0: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.cuda.manual_seed(opt.lucky_seed) + torch.cuda.manual_seed_all(opt.lucky_seed) + + # write command to file + script_dir = opt.ckpt_dir + with open(os.path.join(os.path.join(script_dir, "run_script.sh")), 'a+') as f: + f.write("[%5s][%s]python %s\n" % (opt.mode, opt.name, ' '.join(sys.argv))) + + # print and write options file + msg = '' + msg += '------------------- [%5s][%s]Options --------------------\n' % (opt.mode, opt.name) + for k, v in sorted(vars(opt).items()): + comment = '' + default_v = parser.get_default(k) + if v != default_v: + comment = '\t[default: %s]' % str(default_v) + msg += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + msg += '--------------------- [%5s][%s]End ----------------------\n' % (opt.mode, opt.name) + print(msg) + with open(os.path.join(os.path.join(script_dir, "opt.txt")), 'a+') as f: + f.write(msg + '\n\n') + + return opt + + + + + + diff --git a/third_part/ganimation_replicate/solvers.py b/third_part/ganimation_replicate/solvers.py new file mode 100644 index 0000000000000000000000000000000000000000..f432536724e8a20dfe7b29c023cf1f71765e83ec --- /dev/null +++ b/third_part/ganimation_replicate/solvers.py @@ -0,0 +1,162 @@ +""" +Created on Dec 13, 2018 +@author: Yuedong Chen +""" + +from data import create_dataloader +from model import create_model +from visualizer import Visualizer +import copy +import time +import os +import torch +import numpy as np +from PIL import Image + + +def create_solver(opt): + instance = Solver() + instance.initialize(opt) + return instance + + + +class Solver(object): + """docstring for Solver""" + def __init__(self): + super(Solver, self).__init__() + + def initialize(self, opt): + self.opt = opt + self.visual = Visualizer() + self.visual.initialize(self.opt) + + def run_solver(self): + if self.opt.mode == "train": + self.train_networks() + else: + self.test_networks(self.opt) + + def train_networks(self): + # init train setting + self.init_train_setting() + + # for every epoch + for epoch in range(self.opt.epoch_count, self.epoch_len + 1): + # train network + self.train_epoch(epoch) + # update learning rate + self.cur_lr = self.train_model.update_learning_rate() + # save checkpoint if needed + if epoch % self.opt.save_epoch_freq == 0: + self.train_model.save_ckpt(epoch) + + # save the last epoch + self.train_model.save_ckpt(self.epoch_len) + + def init_train_setting(self): + self.train_dataset = create_dataloader(self.opt) + self.train_model = create_model(self.opt) + + self.train_total_steps = 0 + self.epoch_len = self.opt.niter + self.opt.niter_decay + self.cur_lr = self.opt.lr + + def train_epoch(self, epoch): + epoch_start_time = time.time() + epoch_steps = 0 + + last_print_step_t = time.time() + for idx, batch in enumerate(self.train_dataset): + + self.train_total_steps += self.opt.batch_size + epoch_steps += self.opt.batch_size + # train network + self.train_model.feed_batch(batch) + self.train_model.optimize_paras(train_gen=(idx % self.opt.train_gen_iter == 0)) + # print losses + if self.train_total_steps % self.opt.print_losses_freq == 0: + cur_losses = self.train_model.get_latest_losses() + avg_step_t = (time.time() - last_print_step_t) / self.opt.print_losses_freq + last_print_step_t = time.time() + # print loss info to command line + info_dict = {'epoch': epoch, 'epoch_len': self.epoch_len, + 'epoch_steps': idx * self.opt.batch_size, 'epoch_steps_len': len(self.train_dataset), + 'step_time': avg_step_t, 'cur_lr': self.cur_lr, + 'log_path': os.path.join(self.opt.ckpt_dir, self.opt.log_file), + 'losses': cur_losses + } + self.visual.print_losses_info(info_dict) + + # plot loss map to visdom + if self.train_total_steps % self.opt.plot_losses_freq == 0 and self.visual.display_id > 0: + cur_losses = self.train_model.get_latest_losses() + epoch_steps = idx * self.opt.batch_size + self.visual.display_current_losses(epoch - 1, epoch_steps / len(self.train_dataset), cur_losses) + + # display image on visdom + if self.train_total_steps % self.opt.sample_img_freq == 0 and self.visual.display_id > 0: + cur_vis = self.train_model.get_latest_visuals() + self.visual.display_online_results(cur_vis, epoch) + # latest_aus = model.get_latest_aus() + # visual.log_aus(epoch, epoch_steps, latest_aus, opt.ckpt_dir) + + def test_networks(self, opt): + self.init_test_setting(opt) + self.test_ops() + + def init_test_setting(self, opt): + self.test_dataset = create_dataloader(opt) + self.test_model = create_model(opt) + + def test_ops(self): + for batch_idx, batch in enumerate(self.test_dataset): + with torch.no_grad(): + # interpolate several times + faces_list = [batch['src_img'].float().numpy()] + paths_list = [batch['src_path'], batch['tar_path']] + for idx in range(self.opt.interpolate_len): + cur_alpha = (idx + 1.) / float(self.opt.interpolate_len) + cur_tar_aus = cur_alpha * batch['tar_aus'] + (1 - cur_alpha) * batch['src_aus'] + # print(batch['src_aus']) + # print(cur_tar_aus) + test_batch = {'src_img': batch['src_img'], 'tar_aus': cur_tar_aus, 'src_aus':batch['src_aus'], 'tar_img':batch['tar_img']} + + self.test_model.feed_batch(test_batch) + self.test_model.forward() + + cur_gen_faces = self.test_model.fake_img.cpu().float().numpy() + faces_list.append(cur_gen_faces) + faces_list.append(batch['tar_img'].float().numpy()) + self.test_save_imgs(faces_list, paths_list) + + def test_save_imgs(self, faces_list, paths_list): + for idx in range(len(paths_list[0])): + src_name = os.path.splitext(os.path.basename(paths_list[0][idx]))[0] + tar_name = os.path.splitext(os.path.basename(paths_list[1][idx]))[0] + + if self.opt.save_test_gif: + import imageio + imgs_numpy_list = [] + for face_idx in range(len(faces_list) - 1): # remove target image + cur_numpy = np.array(self.visual.numpy2im(faces_list[face_idx][idx])) + imgs_numpy_list.extend([cur_numpy for _ in range(3)]) + saved_path = os.path.join(self.opt.results, "%s_%s.gif" % (src_name, tar_name)) + imageio.mimsave(saved_path, imgs_numpy_list) + else: + # concate src, inters, tar faces + concate_img = np.array(self.visual.numpy2im(faces_list[0][idx])) + for face_idx in range(1, len(faces_list)): + concate_img = np.concatenate((concate_img, np.array(self.visual.numpy2im(faces_list[face_idx][idx]))), axis=1) + concate_img = Image.fromarray(concate_img) + # save image + saved_path = os.path.join(self.opt.results, "%s_%s.jpg" % (src_name, tar_name)) + concate_img.save(saved_path) + + print("[Success] Saved images to %s" % saved_path) + + + + + + diff --git a/third_part/ganimation_replicate/visualizer.py b/third_part/ganimation_replicate/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..e175d190be809b77fb8ae61d66d8a446cac120b1 --- /dev/null +++ b/third_part/ganimation_replicate/visualizer.py @@ -0,0 +1,104 @@ +import os +import numpy as np +import torch +import math +from PIL import Image +# import matplotlib.pyplot as plt + + + +class Visualizer(object): + """docstring for Visualizer""" + def __init__(self): + super(Visualizer, self).__init__() + + def initialize(self, opt): + self.opt = opt + # self.vis_saved_dir = os.path.join(self.opt.ckpt_dir, 'vis_pics') + # if not os.path.isdir(self.vis_saved_dir): + # os.makedirs(self.vis_saved_dir) + # plt.switch_backend('agg') + + self.display_id = self.opt.visdom_display_id + if self.display_id > 0: + import visdom + self.ncols = 8 + self.vis = visdom.Visdom(server="http://localhost", port=self.opt.visdom_port, env=self.opt.visdom_env) + + def throw_visdom_connection_error(self): + print('\n\nno visdom server.') + exit(1) + + def print_losses_info(self, info_dict): + msg = '[{}][Epoch: {:0>3}/{:0>3}; Images: {:0>4}/{:0>4}; Time: {:.3f}s/Batch({}); LR: {:.7f}] '.format( + self.opt.name, info_dict['epoch'], info_dict['epoch_len'], + info_dict['epoch_steps'], info_dict['epoch_steps_len'], + info_dict['step_time'], self.opt.batch_size, info_dict['cur_lr']) + for k, v in info_dict['losses'].items(): + msg += '| {}: {:.4f} '.format(k, v) + msg += '|' + print(msg) + with open(info_dict['log_path'], 'a+') as f: + f.write(msg + '\n') + + def display_current_losses(self, epoch, counter_ratio, losses_dict): + if not hasattr(self, 'plot_data'): + self.plot_data = {'X': [], 'Y': [], 'legend': list(losses_dict.keys())} + self.plot_data['X'].append(epoch + counter_ratio) + self.plot_data['Y'].append([losses_dict[k] for k in self.plot_data['legend']]) + try: + self.vis.line( + X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1), + Y=np.array(self.plot_data['Y']), + opts={ + 'title': self.opt.name + ' loss over time', + 'legend':self.plot_data['legend'], + 'xlabel':'epoch', + 'ylabel':'loss'}, + win=self.display_id) + except ConnectionError: + self.throw_visdom_connection_error() + + def display_online_results(self, visuals, epoch): + win_id = self.display_id + 24 + images = [] + labels = [] + for label, image in visuals.items(): + if 'mask' in label: # or 'focus' in label: + image = (image - 0.5) / 0.5 # convert map from [0, 1] to [-1, 1] + image_numpy = self.tensor2im(image) + images.append(image_numpy.transpose([2, 0, 1])) + labels.append(label) + try: + title = ' || '.join(labels) + self.vis.images(images, nrow=self.ncols, win=win_id, + padding=5, opts=dict(title=title)) + except ConnectionError: + self.throw_visdom_connection_error() + + # utils + def tensor2im(self, input_image, imtype=np.uint8): + if isinstance(input_image, torch.Tensor): + image_tensor = input_image.data + else: + return input_image + image_numpy = image_tensor[0].cpu().float().numpy() + im = self.numpy2im(image_numpy, imtype).resize((80, 80), Image.ANTIALIAS) + return np.array(im) + + def numpy2im(self, image_numpy, imtype=np.uint8): + if image_numpy.shape[0] == 1: + image_numpy = np.tile(image_numpy, (3, 1, 1)) + # input should be [0, 1] + #image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0 + image_numpy = (np.transpose(image_numpy, (1, 2, 0)) / 2. + 0.5) * 255.0 + # print(image_numpy.shape) + image_numpy = image_numpy.astype(imtype) + im = Image.fromarray(image_numpy) + # im = Image.fromarray(image_numpy).resize((64, 64), Image.ANTIALIAS) + return im # np.array(im) + + + + + diff --git a/utils/alignment_stit.py b/utils/alignment_stit.py new file mode 100644 index 0000000000000000000000000000000000000000..649544a835b24fcdb2fb81afdd36f5d81aa07c39 --- /dev/null +++ b/utils/alignment_stit.py @@ -0,0 +1,209 @@ +import PIL +import PIL.Image +import dlib +import face_alignment +import numpy as np +import scipy +import scipy.ndimage +import skimage.io as io +from PIL import Image +from scipy.ndimage import gaussian_filter1d +from tqdm import tqdm + +# from configs import paths_config +def paste_image(inverse_transform, img, orig_image): + pasted_image = orig_image.copy().convert('RGBA') + projected = img.convert('RGBA').transform(orig_image.size, Image.PERSPECTIVE, inverse_transform, Image.BILINEAR) + pasted_image.paste(projected, (0, 0), mask=projected) + return pasted_image + +def get_landmark(filepath, predictor, detector=None, fa=None): + """get landmark with dlib + :return: np.array shape=(68, 2) + """ + if fa is not None: + image = io.imread(filepath) + lms, _, bboxes = fa.get_landmarks(image, return_bboxes=True) + if len(lms) == 0: + return None + return lms[0] + + if detector is None: + detector = dlib.get_frontal_face_detector() + if isinstance(filepath, PIL.Image.Image): + img = np.array(filepath) + else: + img = dlib.load_rgb_image(filepath) + dets = detector(img) + + for k, d in enumerate(dets): + shape = predictor(img, d) + break + else: + return None + t = list(shape.parts()) + a = [] + for tt in t: + a.append([tt.x, tt.y]) + lm = np.array(a) + return lm + + +def align_face(filepath_or_image, predictor, output_size, detector=None, + enable_padding=False, scale=1.0): + """ + :param filepath: str + :return: PIL Image + """ + + c, x, y = compute_transform(filepath_or_image, predictor, detector=detector, + scale=scale) + quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) + img = crop_image(filepath_or_image, output_size, quad, enable_padding=enable_padding) + + # Return aligned image. + return img + + +def crop_image(filepath, output_size, quad, enable_padding=False): + x = (quad[3] - quad[1]) / 2 + qsize = np.hypot(*x) * 2 + # read image + if isinstance(filepath, PIL.Image.Image): + img = filepath + else: + img = PIL.Image.open(filepath) + transform_size = output_size + # Shrink. + shrink = int(np.floor(qsize / output_size * 0.5)) + if shrink > 1: + rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink))) + img = img.resize(rsize, PIL.Image.ANTIALIAS) + quad /= shrink + qsize /= shrink + # Crop. + border = max(int(np.rint(qsize * 0.1)), 3) + crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), + int(np.ceil(max(quad[:, 1])))) + crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), + min(crop[3] + border, img.size[1])) + if (crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]): + img = img.crop(crop) + quad -= crop[0:2] + # Pad. + pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), + int(np.ceil(max(quad[:, 1])))) + pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), + max(pad[3] - img.size[1] + border, 0)) + if enable_padding and max(pad) > border - 4: + pad = np.maximum(pad, int(np.rint(qsize * 0.3))) + img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') + h, w, _ = img.shape + y, x, _ = np.ogrid[:h, :w, :1] + mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]), + 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3])) + blur = qsize * 0.02 + img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) + img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0) + img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB') + quad += pad[:2] + # Transform. + img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR) + if output_size < transform_size: + img = img.resize((output_size, output_size), PIL.Image.ANTIALIAS) + return img + +def compute_transform(lm, predictor, detector=None, scale=1.0, fa=None): + # lm = get_landmark(filepath, predictor, detector, fa) + # if lm is None: + # raise Exception(f'Did not detect any faces in image: {filepath}') + lm_chin = lm[0: 17] # left-right + lm_eyebrow_left = lm[17: 22] # left-right + lm_eyebrow_right = lm[22: 27] # left-right + lm_nose = lm[27: 31] # top-down + lm_nostrils = lm[31: 36] # top-down + lm_eye_left = lm[36: 42] # left-clockwise + lm_eye_right = lm[42: 48] # left-clockwise + lm_mouth_outer = lm[48: 60] # left-clockwise + lm_mouth_inner = lm[60: 68] # left-clockwise + # Calculate auxiliary vectors. + eye_left = np.mean(lm_eye_left, axis=0) + eye_right = np.mean(lm_eye_right, axis=0) + eye_avg = (eye_left + eye_right) * 0.5 + eye_to_eye = eye_right - eye_left + mouth_left = lm_mouth_outer[0] + mouth_right = lm_mouth_outer[6] + mouth_avg = (mouth_left + mouth_right) * 0.5 + eye_to_mouth = mouth_avg - eye_avg + # Choose oriented crop rectangle. + x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1] + x /= np.hypot(*x) + x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) + + x *= scale + y = np.flipud(x) * [-1, 1] + c = eye_avg + eye_to_mouth * 0.1 + return c, x, y + + +def crop_faces(IMAGE_SIZE, files, scale, center_sigma=0.0, xy_sigma=0.0, use_fa=False, fa=None): + if use_fa: + if fa == None: + fa = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, flip_input=True) + predictor = None + detector = None + else: + fa = None + predictor = None + detector = None + # predictor = dlib.shape_predictor(paths_config.shape_predictor_path) + # detector = dlib.get_frontal_face_detector() + + cs, xs, ys = [], [], [] + for lm, pil in tqdm(files): + c, x, y = compute_transform(lm, predictor, detector=detector, + scale=scale, fa=fa) + cs.append(c) + xs.append(x) + ys.append(y) + + cs = np.stack(cs) + xs = np.stack(xs) + ys = np.stack(ys) + if center_sigma != 0: + cs = gaussian_filter1d(cs, sigma=center_sigma, axis=0) + + if xy_sigma != 0: + xs = gaussian_filter1d(xs, sigma=xy_sigma, axis=0) + ys = gaussian_filter1d(ys, sigma=xy_sigma, axis=0) + + quads = np.stack([cs - xs - ys, cs - xs + ys, cs + xs + ys, cs + xs - ys], axis=1) + quads = list(quads) + + crops, orig_images = crop_faces_by_quads(IMAGE_SIZE, files, quads) + + return crops, orig_images, quads + + +def crop_faces_by_quads(IMAGE_SIZE, files, quads): + orig_images = [] + crops = [] + for quad, (_, path) in tqdm(zip(quads, files), total=len(quads)): + crop = crop_image(path, IMAGE_SIZE, quad.copy()) + orig_image = path # Image.open(path) + orig_images.append(orig_image) + crops.append(crop) + return crops, orig_images + + +def calc_alignment_coefficients(pa, pb): + matrix = [] + for p1, p2 in zip(pa, pb): + matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) + matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) + + a = np.matrix(matrix, dtype=float) + b = np.array(pb).reshape(8) + + res = np.dot(np.linalg.inv(a.T * a) * a.T, b) + return np.array(res).reshape(8) \ No newline at end of file diff --git a/utils/audio.py b/utils/audio.py new file mode 100644 index 0000000000000000000000000000000000000000..2ef4ee00789c6923532b726efd4935c4d3df7e59 --- /dev/null +++ b/utils/audio.py @@ -0,0 +1,136 @@ +import librosa +import librosa.filters +import numpy as np +# import tensorflow as tf +from scipy import signal +from scipy.io import wavfile +from .hparams import hparams as hp + +def load_wav(path, sr): + return librosa.core.load(path, sr=sr)[0] + +def save_wav(wav, path, sr): + wav *= 32767 / max(0.01, np.max(np.abs(wav))) + #proposed by @dsmiller + wavfile.write(path, sr, wav.astype(np.int16)) + +def save_wavenet_wav(wav, path, sr): + librosa.output.write_wav(path, wav, sr=sr) + +def preemphasis(wav, k, preemphasize=True): + if preemphasize: + return signal.lfilter([1, -k], [1], wav) + return wav + +def inv_preemphasis(wav, k, inv_preemphasize=True): + if inv_preemphasize: + return signal.lfilter([1], [1, -k], wav) + return wav + +def get_hop_size(): + hop_size = hp.hop_size + if hop_size is None: + assert hp.frame_shift_ms is not None + hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate) + return hop_size + +def linearspectrogram(wav): + D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize)) + S = _amp_to_db(np.abs(D)) - hp.ref_level_db + + if hp.signal_normalization: + return _normalize(S) + return S + +def melspectrogram(wav): + D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize)) + S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db + + if hp.signal_normalization: + return _normalize(S) + return S + +def _lws_processor(): + import lws + return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech") + +def _stft(y): + if hp.use_lws: + return _lws_processor(hp).stft(y).T + else: + return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size) + +########################################################## +#Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!) +def num_frames(length, fsize, fshift): + """Compute number of time frames of spectrogram + """ + pad = (fsize - fshift) + if length % fshift == 0: + M = (length + pad * 2 - fsize) // fshift + 1 + else: + M = (length + pad * 2 - fsize) // fshift + 2 + return M + + +def pad_lr(x, fsize, fshift): + """Compute left and right padding + """ + M = num_frames(len(x), fsize, fshift) + pad = (fsize - fshift) + T = len(x) + 2 * pad + r = (M - 1) * fshift + fsize - T + return pad, pad + r +########################################################## +#Librosa correct padding +def librosa_pad_lr(x, fsize, fshift): + return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0] + +# Conversions +_mel_basis = None + +def _linear_to_mel(spectogram): + global _mel_basis + if _mel_basis is None: + _mel_basis = _build_mel_basis() + return np.dot(_mel_basis, spectogram) + +def _build_mel_basis(): + assert hp.fmax <= hp.sample_rate // 2 + return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, + fmin=hp.fmin, fmax=hp.fmax) + +def _amp_to_db(x): + min_level = np.exp(hp.min_level_db / 20 * np.log(10)) + return 20 * np.log10(np.maximum(min_level, x)) + +def _db_to_amp(x): + return np.power(10.0, (x) * 0.05) + +def _normalize(S): + if hp.allow_clipping_in_normalization: + if hp.symmetric_mels: + return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value, + -hp.max_abs_value, hp.max_abs_value) + else: + return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value) + + assert S.max() <= 0 and S.min() - hp.min_level_db >= 0 + if hp.symmetric_mels: + return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value + else: + return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)) + +def _denormalize(D): + if hp.allow_clipping_in_normalization: + if hp.symmetric_mels: + return (((np.clip(D, -hp.max_abs_value, + hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + + hp.min_level_db) + else: + return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db) + + if hp.symmetric_mels: + return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db) + else: + return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db) diff --git a/utils/ffhq_preprocess.py b/utils/ffhq_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..151f51dfc059be03a4b0fae65cd1fec074a3be02 --- /dev/null +++ b/utils/ffhq_preprocess.py @@ -0,0 +1,140 @@ +import os +import cv2 +import time +import glob +import argparse +import scipy +import numpy as np +from PIL import Image +from tqdm import tqdm +from itertools import cycle +from torch.multiprocessing import Pool, Process, set_start_method + + +""" +brief: face alignment with FFHQ method (https://github.com/NVlabs/ffhq-dataset) +author: lzhbrian (https://lzhbrian.me) +date: 2020.1.5 +note: code is heavily borrowed from + https://github.com/NVlabs/ffhq-dataset + http://dlib.net/face_landmark_detection.py.html +requirements: + apt install cmake + conda install Pillow numpy scipy + pip install dlib + # download face landmark model from: + # http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 +""" + +import numpy as np +from PIL import Image +import dlib + + +class Croper: + def __init__(self, path_of_lm): + # download model from: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 + self.predictor = dlib.shape_predictor(path_of_lm) + + def get_landmark(self, img_np): + """get landmark with dlib + :return: np.array shape=(68, 2) + """ + detector = dlib.get_frontal_face_detector() + dets = detector(img_np, 1) + if len(dets) == 0: + return None + d = dets[0] + # Get the landmarks/parts for the face in box d. + shape = self.predictor(img_np, d) + t = list(shape.parts()) + a = [] + for tt in t: + a.append([tt.x, tt.y]) + lm = np.array(a) + return lm + + def align_face(self, img, lm, output_size=1024): + """ + :param filepath: str + :return: PIL Image + """ + lm_chin = lm[0: 17] # left-right + lm_eyebrow_left = lm[17: 22] # left-right + lm_eyebrow_right = lm[22: 27] # left-right + lm_nose = lm[27: 31] # top-down + lm_nostrils = lm[31: 36] # top-down + lm_eye_left = lm[36: 42] # left-clockwise + lm_eye_right = lm[42: 48] # left-clockwise + lm_mouth_outer = lm[48: 60] # left-clockwise + lm_mouth_inner = lm[60: 68] # left-clockwise + + # Calculate auxiliary vectors. + eye_left = np.mean(lm_eye_left, axis=0) + eye_right = np.mean(lm_eye_right, axis=0) + eye_avg = (eye_left + eye_right) * 0.5 + eye_to_eye = eye_right - eye_left + mouth_left = lm_mouth_outer[0] + mouth_right = lm_mouth_outer[6] + mouth_avg = (mouth_left + mouth_right) * 0.5 + eye_to_mouth = mouth_avg - eye_avg + + # Choose oriented crop rectangle. + x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1] + x /= np.hypot(*x) + x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) + y = np.flipud(x) * [-1, 1] + c = eye_avg + eye_to_mouth * 0.1 + quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) + qsize = np.hypot(*x) * 2 + + # Shrink. + shrink = int(np.floor(qsize / output_size * 0.5)) + if shrink > 1: + rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink))) + img = img.resize(rsize, Image.ANTIALIAS) + quad /= shrink + qsize /= shrink + + # Crop. + border = max(int(np.rint(qsize * 0.1)), 3) + crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), + int(np.ceil(max(quad[:, 1])))) + crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), + min(crop[3] + border, img.size[1])) + if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: + quad -= crop[0:2] + + # Transform. + quad = (quad + 0.5).flatten() + lx = max(min(quad[0], quad[2]), 0) + ly = max(min(quad[1], quad[7]), 0) + rx = min(max(quad[4], quad[6]), img.size[0]) + ry = min(max(quad[3], quad[5]), img.size[0]) + + # Save aligned image. + return crop, [lx, ly, rx, ry] + + def crop(self, img_np_list, xsize=512): # first frame for all video + idx = 0 + while idx < len(img_np_list)//2 : # TODO + img_np = img_np_list[idx] + lm = self.get_landmark(img_np) + if lm is not None: + break # can detect face + idx += 1 + if lm is None: + return None + + crop, quad = self.align_face(img=Image.fromarray(img_np), lm=lm, output_size=xsize) + clx, cly, crx, cry = crop + lx, ly, rx, ry = quad + lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry) + for _i in range(len(img_np_list)): + _inp = img_np_list[_i] + _inp = _inp[cly:cry, clx:crx] + _inp = _inp[ly:ry, lx:rx] + img_np_list[_i] = _inp + return img_np_list, crop, quad + + diff --git a/utils/flow_util.py b/utils/flow_util.py new file mode 100644 index 0000000000000000000000000000000000000000..376a6cbe222bfe3e1833b954e764e4e6c086c766 --- /dev/null +++ b/utils/flow_util.py @@ -0,0 +1,56 @@ +import torch + +def convert_flow_to_deformation(flow): + r"""convert flow fields to deformations. + + Args: + flow (tensor): Flow field obtained by the model + Returns: + deformation (tensor): The deformation used for warpping + """ + b,c,h,w = flow.shape + flow_norm = 2 * torch.cat([flow[:,:1,...]/(w-1),flow[:,1:,...]/(h-1)], 1) + grid = make_coordinate_grid(flow) + deformation = grid + flow_norm.permute(0,2,3,1) + return deformation + +def make_coordinate_grid(flow): + r"""obtain coordinate grid with the same size as the flow filed. + + Args: + flow (tensor): Flow field obtained by the model + Returns: + grid (tensor): The grid with the same size as the input flow + """ + b,c,h,w = flow.shape + + x = torch.arange(w).to(flow) + y = torch.arange(h).to(flow) + + x = (2 * (x / (w - 1)) - 1) + y = (2 * (y / (h - 1)) - 1) + + yy = y.view(-1, 1).repeat(1, w) + xx = x.view(1, -1).repeat(h, 1) + + meshed = torch.cat([xx.unsqueeze_(2), yy.unsqueeze_(2)], 2) + meshed = meshed.expand(b, -1, -1, -1) + return meshed + + +def warp_image(source_image, deformation): + r"""warp the input image according to the deformation + + Args: + source_image (tensor): source images to be warpped + deformation (tensor): deformations used to warp the images; value in range (-1, 1) + Returns: + output (tensor): the warpped images + """ + _, h_old, w_old, _ = deformation.shape + _, _, h, w = source_image.shape + if h_old != h or w_old != w: + deformation = deformation.permute(0, 3, 1, 2) + deformation = torch.nn.functional.interpolate(deformation, size=(h, w), mode='bilinear') + deformation = deformation.permute(0, 2, 3, 1) + return torch.nn.functional.grid_sample(source_image, deformation) \ No newline at end of file diff --git a/utils/hparams.py b/utils/hparams.py new file mode 100644 index 0000000000000000000000000000000000000000..3a0abb318afbb873a35943f65f7c5aa2bf2ceb85 --- /dev/null +++ b/utils/hparams.py @@ -0,0 +1,137 @@ +import os + +class HParams: + def __init__(self, **kwargs): + self.data = {} + + for key, value in kwargs.items(): + self.data[key] = value + + def __getattr__(self, key): + if key not in self.data: + raise AttributeError("'HParams' object has no attribute %s" % key) + return self.data[key] + + def set_hparam(self, key, value): + self.data[key] = value + + +# Default hyperparameters +hparams = HParams( + num_mels=80, # Number of mel-spectrogram channels and local conditioning dimensionality + # network + rescale=True, # Whether to rescale audio prior to preprocessing + rescaling_max=0.9, # Rescaling value + + # Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction + # It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder + # Does not work if n_ffit is not multiple of hop_size!! + use_lws=False, + + n_fft=800, # Extra window size is filled with 0 paddings to match this parameter + hop_size=200, # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate) + win_size=800, # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate) + sample_rate=16000, # 16000Hz (corresponding to librispeech) (sox --i ) + + frame_shift_ms=None, # Can replace hop_size parameter. (Recommended: 12.5) + + # Mel and Linear spectrograms normalization/scaling and clipping + signal_normalization=True, + # Whether to normalize mel spectrograms to some predefined range (following below parameters) + allow_clipping_in_normalization=True, # Only relevant if mel_normalization = True + symmetric_mels=True, + # Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, + # faster and cleaner convergence) + max_abs_value=4., + # max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not + # be too big to avoid gradient explosion, + # not too small for fast convergence) + # Contribution by @begeekmyfriend + # Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude + # levels. Also allows for better G&L phase reconstruction) + preemphasize=True, # whether to apply filter + preemphasis=0.97, # filter coefficient. + + # Limits + min_level_db=-100, + ref_level_db=20, + fmin=55, + # Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To + # test depending on dataset. Pitch info: male~[65, 260], female~[100, 525]) + fmax=7600, # To be increased/reduced depending on data. + + ###################### Our training parameters ################################# + img_size=96, + fps=25, + + batch_size=8, + initial_learning_rate=1e-4, + nepochs=300000, ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs + num_workers=20, + checkpoint_interval=3000, + eval_interval=3000, + writer_interval=300, + save_optimizer_state=True, + + syncnet_wt=0.0, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. + syncnet_batch_size=64, + syncnet_lr=1e-4, + syncnet_eval_interval=10000, + syncnet_checkpoint_interval=10000, + + disc_wt=0.07, + disc_initial_learning_rate=1e-4, +) + + + +# Default hyperparameters +hparamsdebug = HParams( + num_mels=80, # Number of mel-spectrogram channels and local conditioning dimensionality + # network + rescale=True, # Whether to rescale audio prior to preprocessing + rescaling_max=0.9, # Rescaling value + + # Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction + # It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder + # Does not work if n_ffit is not multiple of hop_size!! + use_lws=False, + + n_fft=800, # Extra window size is filled with 0 paddings to match this parameter + hop_size=200, # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate) + win_size=800, # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate) + sample_rate=16000, # 16000Hz (corresponding to librispeech) (sox --i ) + + frame_shift_ms=None, # Can replace hop_size parameter. (Recommended: 12.5) + + # Mel and Linear spectrograms normalization/scaling and clipping + signal_normalization=True, + # Whether to normalize mel spectrograms to some predefined range (following below parameters) + allow_clipping_in_normalization=True, # Only relevant if mel_normalization = True + symmetric_mels=True, + # Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, + # faster and cleaner convergence) + max_abs_value=4., + # max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not + # be too big to avoid gradient explosion, + # not too small for fast convergence) + # Contribution by @begeekmyfriend + # Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude + # levels. Also allows for better G&L phase reconstruction) + preemphasize=True, # whether to apply filter + preemphasis=0.97, # filter coefficient. + + # Limits + min_level_db=-100, + ref_level_db=20, + fmin=55, + # Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To + # test depending on dataset. Pitch info: male~[65, 260], female~[100, 525]) + fmax=7600, # To be increased/reduced depending on data. +) + + +def hparams_debug_string(): + values = hparams.values() + hp = [" %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"] + return "Hyperparameters:\n" + "\n".join(hp) diff --git a/utils/inference_utils.py b/utils/inference_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..84025f17ead7945cde620eb3803b1b3823e183ff --- /dev/null +++ b/utils/inference_utils.py @@ -0,0 +1,252 @@ +import numpy as np +import cv2, argparse, torch +import torchvision.transforms.functional as TF + +from models import load_network, load_DNet +from tqdm import tqdm +from PIL import Image +from scipy.spatial import ConvexHull +from third_part import face_detection +from third_part.face3d.models import networks + +import warnings +warnings.filterwarnings("ignore") + +def options(): + parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models') + + parser.add_argument('--DNet_path', type=str, default='checkpoints/DNet.pt') + parser.add_argument('--LNet_path', type=str, default='checkpoints/LNet.pth') + parser.add_argument('--ENet_path', type=str, default='checkpoints/ENet.pth') + parser.add_argument('--face3d_net_path', type=str, default='checkpoints/face3d_pretrain_epoch_20.pth') + parser.add_argument('--face', type=str, help='Filepath of video/image that contains faces to use', required=True) + parser.add_argument('--audio', type=str, help='Filepath of video/audio file to use as raw audio source', required=True) + parser.add_argument('--exp_img', type=str, help='Expression template. neutral, smile or image path', default='neutral') + parser.add_argument('--outfile', type=str, help='Video path to save result') + + parser.add_argument('--fps', type=float, help='Can be specified only if input is a static image (default: 25)', default=25., required=False) + parser.add_argument('--pads', nargs='+', type=int, default=[0, 20, 0, 0], help='Padding (top, bottom, left, right). Please adjust to include chin at least') + parser.add_argument('--face_det_batch_size', type=int, help='Batch size for face detection', default=4) + parser.add_argument('--LNet_batch_size', type=int, help='Batch size for LNet', default=16) + parser.add_argument('--img_size', type=int, default=384) + parser.add_argument('--crop', nargs='+', type=int, default=[0, -1, 0, -1], + help='Crop video to a smaller region (top, bottom, left, right). Applied after resize_factor and rotate arg. ' + 'Useful if multiple face present. -1 implies the value will be auto-inferred based on height, width') + parser.add_argument('--box', nargs='+', type=int, default=[-1, -1, -1, -1], + help='Specify a constant bounding box for the face. Use only as a last resort if the face is not detected.' + 'Also, might work only if the face is not moving around much. Syntax: (top, bottom, left, right).') + parser.add_argument('--nosmooth', default=False, action='store_true', help='Prevent smoothing face detections over a short temporal window') + parser.add_argument('--static', default=False, action='store_true') + + + parser.add_argument('--up_face', default='original') + parser.add_argument('--one_shot', action='store_true') + parser.add_argument('--without_rl1', default=False, action='store_true', help='Do not use the relative l1') + parser.add_argument('--tmp_dir', type=str, default='temp', help='Folder to save tmp results') + parser.add_argument('--re_preprocess', action='store_true') + + args = parser.parse_args() + return args + +exp_aus_dict = { # AU01_r, AU02_r, AU04_r, AU05_r, AU06_r, AU07_r, AU09_r, AU10_r, AU12_r, AU14_r, AU15_r, AU17_r, AU20_r, AU23_r, AU25_r, AU26_r, AU45_r. + 'sad': torch.Tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), + 'angry':torch.Tensor([[0, 0, 0.3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), + 'surprise': torch.Tensor([[0, 0, 0, 0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) +} + +def mask_postprocess(mask, thres=20): + mask[:thres, :] = 0; mask[-thres:, :] = 0 + mask[:, :thres] = 0; mask[:, -thres:] = 0 + mask = cv2.GaussianBlur(mask, (101, 101), 11) + mask = cv2.GaussianBlur(mask, (101, 101), 11) + return mask.astype(np.float32) + +def trans_image(image): + image = TF.resize( + image, size=256, interpolation=Image.BICUBIC) + image = TF.to_tensor(image) + image = TF.normalize(image, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) + return image + +def obtain_seq_index(index, num_frames): + seq = list(range(index-13, index+13)) + seq = [ min(max(item, 0), num_frames-1) for item in seq ] + return seq + +def transform_semantic(semantic, frame_index, crop_norm_ratio=None): + index = obtain_seq_index(frame_index, semantic.shape[0]) + + coeff_3dmm = semantic[index,...] + ex_coeff = coeff_3dmm[:,80:144] #expression # 64 + angles = coeff_3dmm[:,224:227] #euler angles for pose + translation = coeff_3dmm[:,254:257] #translation + crop = coeff_3dmm[:,259:262] #crop param + + if crop_norm_ratio: + crop[:, -3] = crop[:, -3] * crop_norm_ratio + + coeff_3dmm = np.concatenate([ex_coeff, angles, translation, crop], 1) + return torch.Tensor(coeff_3dmm).permute(1,0) + +def find_crop_norm_ratio(source_coeff, target_coeffs): + alpha = 0.3 + exp_diff = np.mean(np.abs(target_coeffs[:,80:144] - source_coeff[:,80:144]), 1) # mean different exp + angle_diff = np.mean(np.abs(target_coeffs[:,224:227] - source_coeff[:,224:227]), 1) # mean different angle + index = np.argmin(alpha*exp_diff + (1-alpha)*angle_diff) # find the smallerest index + crop_norm_ratio = source_coeff[:,-3] / target_coeffs[index:index+1, -3] + return crop_norm_ratio + +def get_smoothened_boxes(boxes, T): + for i in range(len(boxes)): + if i + T > len(boxes): + window = boxes[len(boxes) - T:] + else: + window = boxes[i : i + T] + boxes[i] = np.mean(window, axis=0) + return boxes + +def face_detect(images, args, jaw_correction=False, detector=None): + if detector == None: + detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, + flip_input=False, device='cuda:0') + + batch_size = args.face_det_batch_size + while 1: + predictions = [] + try: + for i in tqdm(range(0, len(images), batch_size),desc='FaceDet:'): + predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size]))) + except RuntimeError: + if batch_size == 1: + raise RuntimeError('Image too big to run face detection on GPU. Please use the --resize_factor argument') + batch_size //= 2 + print('Recovering from OOM error; New batch size: {}'.format(batch_size)) + continue + break + + results = [] + pady1, pady2, padx1, padx2 = args.pads if jaw_correction else (0,20,0,0) + for rect, image in zip(predictions, images): + if rect is None: + cv2.imwrite('temp/faulty_frame.jpg', image) # check this frame where the face was not detected. + raise ValueError('Face not detected! Ensure the video contains a face in all the frames.') + + y1 = max(0, rect[1] - pady1) + y2 = min(image.shape[0], rect[3] + pady2) + x1 = max(0, rect[0] - padx1) + x2 = min(image.shape[1], rect[2] + padx2) + results.append([x1, y1, x2, y2]) + + boxes = np.array(results) + if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5) + results = [[image[y1: y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)] + + del detector + torch.cuda.empty_cache() + return results + +def _load(checkpoint_path, device): + if device == 'cuda': + checkpoint = torch.load(checkpoint_path) + else: + checkpoint = torch.load(checkpoint_path, + map_location=lambda storage, loc: storage) + return checkpoint + +def split_coeff(coeffs): + """ + Return: + coeffs_dict -- a dict of torch.tensors + + Parameters: + coeffs -- torch.tensor, size (B, 256) + """ + id_coeffs = coeffs[:, :80] + exp_coeffs = coeffs[:, 80: 144] + tex_coeffs = coeffs[:, 144: 224] + angles = coeffs[:, 224: 227] + gammas = coeffs[:, 227: 254] + translations = coeffs[:, 254:] + return { + 'id': id_coeffs, + 'exp': exp_coeffs, + 'tex': tex_coeffs, + 'angle': angles, + 'gamma': gammas, + 'trans': translations + } + +def Laplacian_Pyramid_Blending_with_mask(A, B, m, num_levels = 6): + # generate Gaussian pyramid for A,B and mask + GA = A.copy() + GB = B.copy() + GM = m.copy() + gpA = [GA] + gpB = [GB] + gpM = [GM] + for i in range(num_levels): + GA = cv2.pyrDown(GA) + GB = cv2.pyrDown(GB) + GM = cv2.pyrDown(GM) + gpA.append(np.float32(GA)) + gpB.append(np.float32(GB)) + gpM.append(np.float32(GM)) + + # generate Laplacian Pyramids for A,B and masks + lpA = [gpA[num_levels-1]] # the bottom of the Lap-pyr holds the last (smallest) Gauss level + lpB = [gpB[num_levels-1]] + gpMr = [gpM[num_levels-1]] + for i in range(num_levels-1,0,-1): + # Laplacian: subtarct upscaled version of lower level from current level + # to get the high frequencies + LA = np.subtract(gpA[i-1], cv2.pyrUp(gpA[i])) + LB = np.subtract(gpB[i-1], cv2.pyrUp(gpB[i])) + lpA.append(LA) + lpB.append(LB) + gpMr.append(gpM[i-1]) # also reverse the masks + + # Now blend images according to mask in each level + LS = [] + for la,lb,gm in zip(lpA,lpB,gpMr): + gm = gm[:,:,np.newaxis] + ls = la * gm + lb * (1.0 - gm) + LS.append(ls) + + # now reconstruct + ls_ = LS[0] + for i in range(1,num_levels): + ls_ = cv2.pyrUp(ls_) + ls_ = cv2.add(ls_, LS[i]) + return ls_ + +def load_model(args, device): + D_Net = load_DNet(args).to(device) + model = load_network(args).to(device) + return D_Net, model + +def normalize_kp(kp_source, kp_driving, kp_driving_initial, adapt_movement_scale=False, + use_relative_movement=False, use_relative_jacobian=False): + if adapt_movement_scale: + source_area = ConvexHull(kp_source['value'][0].data.cpu().numpy()).volume + driving_area = ConvexHull(kp_driving_initial['value'][0].data.cpu().numpy()).volume + adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area) + else: + adapt_movement_scale = 1 + + kp_new = {k: v for k, v in kp_driving.items()} + if use_relative_movement: + kp_value_diff = (kp_driving['value'] - kp_driving_initial['value']) + kp_value_diff *= adapt_movement_scale + kp_new['value'] = kp_value_diff + kp_source['value'] + + if use_relative_jacobian: + jacobian_diff = torch.matmul(kp_driving['jacobian'], torch.inverse(kp_driving_initial['jacobian'])) + kp_new['jacobian'] = torch.matmul(jacobian_diff, kp_source['jacobian']) + return kp_new + +def load_face3d_net(ckpt_path, device): + net_recon = networks.define_net_recon(net_recon='resnet50', use_last_fc=False, init_path='').to(device) + checkpoint = torch.load(ckpt_path, map_location=device) + net_recon.load_state_dict(checkpoint['net_recon']) + net_recon.eval() + return net_recon \ No newline at end of file