adirik's picture
update repo
f4c3c2b
import os
import argparse
from argparse import Namespace
import time
import os
import sys
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as transforms
sys.path.append(".")
sys.path.append("..")
from utils.common import tensor2im
from models.psp import pSp # we use the pSp framework to load the e4e encoder.
experiment_type = 'ffhq_encode'
parser = argparse.ArgumentParser()
parser.add_argument('--input_image', type=str, default="", help='input image path')
args = parser.parse_args()
opts = vars(args)
print(opts)
image_path = opts["input_image"]
def get_download_model_command(file_id, file_name):
""" Get wget download command for downloading the desired model and save to directory pretrained_models. """
current_directory = os.getcwd()
save_path = "encoder4editing/saves"
if not os.path.exists(save_path):
os.makedirs(save_path)
url = r"""wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={FILE_ID}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id={FILE_ID}" -O {SAVE_PATH}/{FILE_NAME} && rm -rf /tmp/cookies.txt""".format(FILE_ID=file_id, FILE_NAME=file_name, SAVE_PATH=save_path)
return url
MODEL_PATHS = {
"ffhq_encode": {"id": "1cUv_reLE6k3604or78EranS7XzuVMWeO", "name": "e4e_ffhq_encode.pt"},
"cars_encode": {"id": "17faPqBce2m1AQeLCLHUVXaDfxMRU2QcV", "name": "e4e_cars_encode.pt"},
"horse_encode": {"id": "1TkLLnuX86B_BMo2ocYD0kX9kWh53rUVX", "name": "e4e_horse_encode.pt"},
"church_encode": {"id": "1-L0ZdnQLwtdy6-A_Ccgq5uNJGTqE7qBa", "name": "e4e_church_encode.pt"}
}
path = MODEL_PATHS[experiment_type]
download_command = get_download_model_command(file_id=path["id"], file_name=path["name"])
EXPERIMENT_DATA_ARGS = {
"ffhq_encode": {
"model_path": "encoder4editing/e4e_ffhq_encode.pt",
"image_path": "notebooks/images/input_img.jpg"
},
"cars_encode": {
"model_path": "pretrained_models/e4e_cars_encode.pt",
"image_path": "notebooks/images/car_img.jpg"
},
"horse_encode": {
"model_path": "pretrained_models/e4e_horse_encode.pt",
"image_path": "notebooks/images/horse_img.jpg"
},
"church_encode": {
"model_path": "pretrained_models/e4e_church_encode.pt",
"image_path": "notebooks/images/church_img.jpg"
}
}
# Setup required image transformations
EXPERIMENT_ARGS = EXPERIMENT_DATA_ARGS[experiment_type]
if experiment_type == 'cars_encode':
EXPERIMENT_ARGS['transform'] = transforms.Compose([
transforms.Resize((192, 256)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
resize_dims = (256, 192)
else:
EXPERIMENT_ARGS['transform'] = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
resize_dims = (256, 256)
model_path = EXPERIMENT_ARGS['model_path']
ckpt = torch.load(model_path, map_location='cpu')
opts = ckpt['opts']
# update the training options
opts['checkpoint_path'] = model_path
opts= Namespace(**opts)
net = pSp(opts)
net.eval()
net.cuda()
print('Model successfully loaded!')
original_image = Image.open(image_path)
original_image = original_image.convert("RGB")
def run_alignment(image_path):
import dlib
from utils.alignment import align_face
predictor = dlib.shape_predictor("encoder4editing/shape_predictor_68_face_landmarks.dat")
aligned_image = align_face(filepath=image_path, predictor=predictor)
print("Aligned image has shape: {}".format(aligned_image.size))
return aligned_image
if experiment_type == "ffhq_encode":
input_image = run_alignment(image_path)
else:
input_image = original_image
input_image.resize(resize_dims)
img_transforms = EXPERIMENT_ARGS['transform']
transformed_image = img_transforms(input_image)
def display_alongside_source_image(result_image, source_image):
res = np.concatenate([np.array(source_image.resize(resize_dims)),
np.array(result_image.resize(resize_dims))], axis=1)
return Image.fromarray(res)
def run_on_batch(inputs, net):
images, latents = net(inputs.to("cuda").float(), randomize_noise=False, return_latents=True)
if experiment_type == 'cars_encode':
images = images[:, :, 32:224, :]
return images, latents
with torch.no_grad():
tic = time.time()
images, latents = run_on_batch(transformed_image.unsqueeze(0), net)
result_image, latent = images[0], latents[0]
toc = time.time()
print('Inference took {:.4f} seconds.'.format(toc - tic))
# Display inversion:
display_alongside_source_image(tensor2im(result_image), input_image)
np.savez(f'encoder4editing/projected_w.npz', w=latents.cpu().numpy())