Spaces:

adirik
/

stylemc-demo

Sleeping

File size: 5,528 Bytes

f4c3c2b

import sys
import argparse
import torch
import numpy as np
from torch.utils.data import DataLoader

sys.path.append(".")
sys.path.append("..")

from configs import data_configs
from datasets.images_dataset import ImagesDataset
from utils.model_utils import setup_model


class LEC:
    def __init__(self, net, is_cars=False):
        """
        Latent Editing Consistency metric as proposed in the main paper.
        :param net: e4e model loaded over the pSp framework.
        :param is_cars: An indication as to whether or not to crop the middle of the StyleGAN's output images.
        """
        self.net = net
        self.is_cars = is_cars

    def _encode(self, images):
        """
        Encodes the given images into StyleGAN's latent space.
        :param images: Tensor of shape NxCxHxW representing the images to be encoded.
        :return: Tensor of shape NxKx512 representing the latent space embeddings of the given image (in W(K, *) space).
        """
        codes = self.net.encoder(images)
        assert codes.ndim == 3, f"Invalid latent codes shape, should be NxKx512 but is {codes.shape}"
        # normalize with respect to the center of an average face
        if self.net.opts.start_from_latent_avg:
            codes = codes + self.net.latent_avg.repeat(codes.shape[0], 1, 1)
        return codes

    def _generate(self, codes):
        """
        Generate the StyleGAN2 images of the given codes
        :param codes: Tensor of shape NxKx512 representing the StyleGAN's latent codes (in W(K, *) space).
        :return: Tensor of shape  NxCxHxW representing the generated images.
        """
        images, _ = self.net.decoder([codes], input_is_latent=True, randomize_noise=False, return_latents=True)
        images = self.net.face_pool(images)
        if self.is_cars:
            images = images[:, :, 32:224, :]
        return images

    @staticmethod
    def _filter_outliers(arr):
        arr = np.array(arr)

        lo = np.percentile(arr, 1, interpolation="lower")
        hi = np.percentile(arr, 99, interpolation="higher")
        return np.extract(
            np.logical_and(lo <= arr, arr <= hi), arr
        )

    def calculate_metric(self, data_loader, edit_function, inverse_edit_function):
        """
        Calculate the LEC metric score.
        :param data_loader: An iterable that returns a tuple of (images, _), similar to the training data loader.
        :param edit_function: A function that receives latent codes and performs a semantically meaningful edit in the
                              latent space.
        :param inverse_edit_function: A function that receives latent codes and performs the inverse edit of the
                                      `edit_function` parameter.
        :return: The LEC metric score.
        """
        distances = []
        with torch.no_grad():
            for batch in data_loader:
                x, _ = batch
                inputs = x.to(device).float()

                codes = self._encode(inputs)
                edited_codes = edit_function(codes)
                edited_image = self._generate(edited_codes)
                edited_image_inversion_codes = self._encode(edited_image)
                inverse_edit_codes = inverse_edit_function(edited_image_inversion_codes)

                dist = (codes - inverse_edit_codes).norm(2, dim=(1, 2)).mean()
                distances.append(dist.to("cpu").numpy())

        distances = self._filter_outliers(distances)
        return distances.mean()


if __name__ == "__main__":
    device = "cuda"

    parser = argparse.ArgumentParser(description="LEC metric calculator")

    parser.add_argument("--batch", type=int, default=8, help="batch size for the models")
    parser.add_argument("--images_dir", type=str, default=None,
                        help="Path to the images directory on which we calculate the LEC score")
    parser.add_argument("ckpt", metavar="CHECKPOINT", help="path to the model checkpoints")

    args = parser.parse_args()
    print(args)

    net, opts = setup_model(args.ckpt, device)
    dataset_args = data_configs.DATASETS[opts.dataset_type]
    transforms_dict = dataset_args['transforms'](opts).get_transforms()

    images_directory = dataset_args['test_source_root'] if args.images_dir is None else args.images_dir
    test_dataset = ImagesDataset(source_root=images_directory,
                                 target_root=images_directory,
                                 source_transform=transforms_dict['transform_source'],
                                 target_transform=transforms_dict['transform_test'],
                                 opts=opts)

    data_loader = DataLoader(test_dataset,
                             batch_size=args.batch,
                             shuffle=False,
                             num_workers=2,
                             drop_last=True)

    print(f'dataset length: {len(test_dataset)}')

    # In the following example, we are using an InterfaceGAN based editing to calculate the LEC metric.
    # Change the provided example according to your domain and needs.
    direction = torch.load('../editings/interfacegan_directions/age.pt').to(device)

    def edit_func_example(codes):
        return codes + 3 * direction


    def inverse_edit_func_example(codes):
        return codes - 3 * direction

    lec = LEC(net, is_cars='car' in opts.dataset_type)
    result = lec.calculate_metric(data_loader, edit_func_example, inverse_edit_func_example)
    print(f"LEC: {result}")