Spaces:

ethanNeuralImage
/

inversion_testing

Runtime error

App Files Files Community

ethanNeuralImage commited on Aug 19, 2022

Commit

5238ef9

•

1 Parent(s): 47689a5

trying to get RIS working

Browse files

Files changed (18) hide show

app.py +80 -8
pretrained_models/ris/catalog.pkl +3 -0
pretrained_models/ris/stylegan2-ffhq-config-f.pt +3 -0
requirements.txt +2 -0
ris/__init__.py +0 -0
ris/blend.py +131 -0
ris/e4e_projection.py +37 -0
ris/legacy.py +320 -0
ris/manipulator.py +311 -0
ris/model.py +786 -0
ris/op/__init__.py +2 -0
ris/op/fused_act.py +39 -0
ris/op/upfirdn2d.py +60 -0
ris/projector.py +213 -0
ris/spherical_kmeans.py +390 -0
ris/util.py +403 -0
ris/wrapper.py +199 -0
spherical_kmeans.py +390 -0

app.py CHANGED Viewed

@@ -22,6 +22,10 @@ from argparse import Namespace
 from mapper.styleclip_mapper import StyleCLIPMapper
 from PIL import Image
 opts_args = ['--no_fine_mapper']
@@ -62,6 +66,10 @@ resize_amount = (256, 256) if hyperstyle_args.resize_outputs else (hyperstyle_ar
 im2tensor_transforms = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
 direction_calculator = load_direction_calculator(opts)
 with gr.Blocks() as demo:
     with gr.Row() as row:
@@ -70,6 +78,8 @@ with gr.Blocks() as demo:
             align = gr.Checkbox(True, label='Align Image')
             inverter_bools = gr.CheckboxGroup(["Hyperstyle", "E4E"], value=['Hyperstyle'], label='Inverter Choices')
             n_hyperstyle_iterations = gr.Number(5, label='Number of Iterations For Hyperstyle', precision=0)
             with gr.Box():
                 mapper_bool = gr.Checkbox(True, label='Output Mapper Result')
                 with gr.Box() as mapper_opts:
@@ -82,14 +92,26 @@ with gr.Blocks() as demo:
                     target_text = gr.Text(value=mapper_descs['afro'], label='Target Text')
                     alpha = gr.Slider(minimum=-10.0, maximum=10.0, value=4.1, step=0.1, label="Alpha for Global Direction")
                     beta = gr.Slider(minimum=0.0, maximum=0.30, value=0.15, step=0.01, label="Beta for Global Direction")
             submit_button = gr.Button("Edit Image")
         with gr.Column() as outputs:
             with gr.Row() as hyperstyle_images:
                 output_hyperstyle_mapper = gr.Image(type='pil', label="Hyperstyle Mapper")
                 output_hyperstyle_gd = gr.Image(type='pil', label="Hyperstyle Global Directions", visible=False)
             with gr.Row(visible=False) as e4e_images:
                 output_e4e_mapper = gr.Image(type='pil', label="E4E Mapper")
                 output_e4e_gd = gr.Image(type='pil', label="E4E Global Directions", visible=False)
     def n_iter_change(number):
         if number < 0:
             return 0
@@ -105,7 +127,11 @@ with gr.Blocks() as demo:
             e4e_images: gr.update(visible=e4e_bool),
             n_hyperstyle_iterations: gr.update(visible=hyperstyle_bool)
         }
     def mapper_toggles(bool):
         return {
             mapper_opts: gr.update(visible=bool),
@@ -118,12 +144,20 @@ with gr.Blocks() as demo:
             output_hyperstyle_gd: gr.update(visible=bool),
             output_e4e_gd: gr.update(visible=bool)
             }
     n_hyperstyle_iterations.change(n_iter_change, n_hyperstyle_iterations, n_hyperstyle_iterations)
     mapper_choice.change(mapper_change, mapper_choice, [target_text])
     inverter_bools.change(inverter_toggles, inverter_bools, [hyperstyle_images, e4e_images, n_hyperstyle_iterations])
     mapper_bool.change(mapper_toggles, mapper_bool, [mapper_opts, output_hyperstyle_mapper, output_e4e_mapper])
     gd_bool.change(gd_toggles, gd_bool, [gd_opts, output_hyperstyle_gd, output_e4e_gd])
     def map_latent(mapper, inputs, stylespace=False, weight_deltas=None, strength=0.1):
         w = inputs.to(device)
         with torch.no_grad():
@@ -140,9 +174,10 @@ with gr.Blocks() as demo:
             result_batch = (x_hat, w_hat)
         return result_batch
     def submit(
-        src, align_img, inverter_bools, n_iterations,
         mapper_bool, mapper_choice, mapper_alpha,
         gd_bool, neutral_text, target_text, alpha, beta,
         ):
         if device == 'cuda': torch.cuda.empty_cache()
         opts.checkpoint_path = mapper_dict[mapper_choice]
@@ -166,9 +201,20 @@ with gr.Blocks() as demo:
                 opts.target_text = target_text
                 opts.alpha = alpha
                 opts.beta = beta
             if 'Hyperstyle' in inverter_bools:
                 hyperstyle_batch, hyperstyle_latents, hyperstyle_deltas, _ = run_inversion(input_img.unsqueeze(0), hyperstyle, hyperstyle_args, return_intermediate_results=False)
                 if mapper_bool:
                     mapped_hyperstyle, _ = map_latent(mapper, hyperstyle_latents, stylespace=False, weight_deltas=hyperstyle_deltas, strength=mapper_alpha)
                     mapped_hyperstyle = tensor2im(mapped_hyperstyle[0])
@@ -181,13 +227,27 @@ with gr.Blocks() as demo:
                 else:
                     gd_hyperstyle = None
-                hyperstyle_output = [mapped_hyperstyle,gd_hyperstyle]
             else:
-                hyperstyle_output = [None, None]
             output_imgs.extend(hyperstyle_output)
             if 'E4E' in inverter_bools:
                 e4e_batch, e4e_latents = hyperstyle.w_invert(input_img.unsqueeze(0))
                 e4e_deltas = None
                 if mapper_bool:
                     mapped_e4e, _ = map_latent(mapper, e4e_latents, stylespace=False, weight_deltas=e4e_deltas, strength=mapper_alpha)
                     mapped_e4e = tensor2im(mapped_e4e[0])
@@ -200,19 +260,31 @@ with gr.Blocks() as demo:
                 else:
                     gd_e4e = None
-                e4e_output = [mapped_e4e, gd_e4e]
             else:
-                e4e_output = [None, None]
             output_imgs.extend(e4e_output)
         return output_imgs
     submit_button.click(
         submit,
         [
-            source, align, inverter_bools, n_hyperstyle_iterations,
             mapper_bool, mapper_choice, mapper_alpha,
             gd_bool, neutral_text, target_text, alpha, beta,
         ],
-        [output_hyperstyle_mapper, output_hyperstyle_gd, output_e4e_mapper, output_e4e_gd]
             )
 demo.launch()

 from mapper.styleclip_mapper import StyleCLIPMapper
+import ris.spherical_kmeans as spherical_kmeans
+from ris.blend import blend_latents
+from ris.model import Generator as RIS_Generator
 from PIL import Image
 opts_args = ['--no_fine_mapper']
 im2tensor_transforms = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
 direction_calculator = load_direction_calculator(opts)
+ris_gen = RIS_Generator(1024, 512, 8, channel_multiplier=2).to(device).eval()
+ris_ckpt = torch.load('./pretrained_models/ris/stylegan2-ffhq-config-f.pt', map_location=lambda storage, loc: storage)
+ris_gen.load_state_dict(ris_ckpt['g_ema'], strict=False)
 with gr.Blocks() as demo:
     with gr.Row() as row:
             align = gr.Checkbox(True, label='Align Image')
             inverter_bools = gr.CheckboxGroup(["Hyperstyle", "E4E"], value=['Hyperstyle'], label='Inverter Choices')
             n_hyperstyle_iterations = gr.Number(5, label='Number of Iterations For Hyperstyle', precision=0)
+            with gr.Box():
+                invert_bool = gr.Checkbox(False, label='Output Inverter Result')
             with gr.Box():
                 mapper_bool = gr.Checkbox(True, label='Output Mapper Result')
                 with gr.Box() as mapper_opts:
                     target_text = gr.Text(value=mapper_descs['afro'], label='Target Text')
                     alpha = gr.Slider(minimum=-10.0, maximum=10.0, value=4.1, step=0.1, label="Alpha for Global Direction")
                     beta = gr.Slider(minimum=0.0, maximum=0.30, value=0.15, step=0.01, label="Beta for Global Direction")
+            with gr.Box():
+                ris_bool = gr.Checkbox(False, label='Output RIS Result')
+                with gr.Box(visible=False) as ris_opts:
+                    ref_img = gr.Image(label='Refrence Image for Hair', type='filepath')
             submit_button = gr.Button("Edit Image")
         with gr.Column() as outputs:
             with gr.Row() as hyperstyle_images:
+                output_hyperstyle_invert = gr.Image(type='pil', label="Hyperstyle Inverted", visible=False)
                 output_hyperstyle_mapper = gr.Image(type='pil', label="Hyperstyle Mapper")
                 output_hyperstyle_gd = gr.Image(type='pil', label="Hyperstyle Global Directions", visible=False)
+                output_hyperstyle_ris = gr.Image(type='pil', label='Hyperstyle RIS', visible=False)
+            with gr.Row() as hyperstyle_metrics:
+                output_hypersyle_metrics = gr.Text()
             with gr.Row(visible=False) as e4e_images:
+                output_e4e_invert = gr.Image(type='pil', label="E4E Inverted", visible=False)
                 output_e4e_mapper = gr.Image(type='pil', label="E4E Mapper")
                 output_e4e_gd = gr.Image(type='pil', label="E4E Global Directions", visible=False)
+                output_e4e_ris = gr.Image(type='pil', label='E4E RIS', visible=False)
+            with gr.Row() as e4e_metrics:
+                output_e4e_metrics = gr.Text()
     def n_iter_change(number):
         if number < 0:
             return 0
             e4e_images: gr.update(visible=e4e_bool),
             n_hyperstyle_iterations: gr.update(visible=hyperstyle_bool)
         }
+    def outp_toggles(bool):
+        return {
+            output_hyperstyle_invert: gr.update(visible=bool),
+            output_e4e_invert: gr.update(visible=bool)
+        }
     def mapper_toggles(bool):
         return {
             mapper_opts: gr.update(visible=bool),
             output_hyperstyle_gd: gr.update(visible=bool),
             output_e4e_gd: gr.update(visible=bool)
             }
+    def ris_toggles(bool):
+        return {
+            ris_opts: gr.update(visible=bool),
+            output_hyperstyle_ris: gr.update(visible=bool),
+            output_e4e_ris: gr.update(visible=bool)
+        }
     n_hyperstyle_iterations.change(n_iter_change, n_hyperstyle_iterations, n_hyperstyle_iterations)
     mapper_choice.change(mapper_change, mapper_choice, [target_text])
     inverter_bools.change(inverter_toggles, inverter_bools, [hyperstyle_images, e4e_images, n_hyperstyle_iterations])
+    invert_bool.change(outp_toggles, invert_bool, [output_hyperstyle_invert, output_e4e_invert])
     mapper_bool.change(mapper_toggles, mapper_bool, [mapper_opts, output_hyperstyle_mapper, output_e4e_mapper])
     gd_bool.change(gd_toggles, gd_bool, [gd_opts, output_hyperstyle_gd, output_e4e_gd])
+    ris_bool.change(ris_toggles, ris_bool, [ris_opts, output_hyperstyle_ris, output_e4e_ris])
     def map_latent(mapper, inputs, stylespace=False, weight_deltas=None, strength=0.1):
         w = inputs.to(device)
         with torch.no_grad():
             result_batch = (x_hat, w_hat)
         return result_batch
     def submit(
+        src, align_img, inverter_bools, n_iterations, invert_bool,
         mapper_bool, mapper_choice, mapper_alpha,
         gd_bool, neutral_text, target_text, alpha, beta,
+        ris_bool, ref_img,
         ):
         if device == 'cuda': torch.cuda.empty_cache()
         opts.checkpoint_path = mapper_dict[mapper_choice]
                 opts.target_text = target_text
                 opts.alpha = alpha
                 opts.beta = beta
+            if ris_bool:
+                if align_img:
+                    ref_input = align_face(ref_img, predictor)
+                else:
+                    ref_input = Image.open(src).convert('RGB')
+                ref_input = im2tensor_transforms(ref_input).to(device)
             if 'Hyperstyle' in inverter_bools:
                 hyperstyle_batch, hyperstyle_latents, hyperstyle_deltas, _ = run_inversion(input_img.unsqueeze(0), hyperstyle, hyperstyle_args, return_intermediate_results=False)
+                if invert_bool:
+                    invert_hyperstyle = tensor2im(hyperstyle_batch[0])
+                else:
+                    invert_hyperstyle = None
                 if mapper_bool:
                     mapped_hyperstyle, _ = map_latent(mapper, hyperstyle_latents, stylespace=False, weight_deltas=hyperstyle_deltas, strength=mapper_alpha)
                     mapped_hyperstyle = tensor2im(mapped_hyperstyle[0])
                 else:
                     gd_hyperstyle = None
+                if ris_bool:
+                    ref_hyperstyle_batch, ref_hyperstyle_latents, ref_hyperstyle_deltas, _ = run_inversion(ref_input.unsqueeze(0), hyperstyle, hyperstyle_args, return_intermediate_results=False)
+                    blend_hyperstyle, blend_hyperstyle_latents = blend_latents(hyperstyle_latents, ref_hyperstyle_batch,
+                                                    src_deltas=hyperstyle_deltas, ref_deltas=ref_hyperstyle_deltas,
+                                                    generator=ris_gen, device=device)
+                    ris_hyperstyle = tensor2im(blend_hyperstyle)
+                else:
+                    ris_hyperstyle=None
+                hyperstyle_output = [invert_hyperstyle, mapped_hyperstyle,gd_hyperstyle, ris_hyperstyle]
             else:
+                hyperstyle_output = [None, None, None, None]
             output_imgs.extend(hyperstyle_output)
             if 'E4E' in inverter_bools:
                 e4e_batch, e4e_latents = hyperstyle.w_invert(input_img.unsqueeze(0))
                 e4e_deltas = None
+                if invert_bool:
+                    invert_e4e = tensor2im(e4e_batch[0])
+                else:
+                    invert_e4e = None
                 if mapper_bool:
                     mapped_e4e, _ = map_latent(mapper, e4e_latents, stylespace=False, weight_deltas=e4e_deltas, strength=mapper_alpha)
                     mapped_e4e = tensor2im(mapped_e4e[0])
                 else:
                     gd_e4e = None
+                if ris_bool:
+                    ref_e4e_batch, ref_e4e_latents, = hyperstyle.w_invert(ref_input.unsqueeze(0))
+                    ref_e4e_deltas= None
+                    blend_e4e, blend_e4e_latents = blend_latents(e4e_latents, ref_e4e_batch,
+                                                    src_deltas=None, ref_deltas=None,
+                                                    generator=ris_gen, device=device)
+                    ris_e4e = tensor2im(blend_e4e)
+                else:
+                    ris_e4e=None
+                e4e_output = [invert_e4e, mapped_e4e, gd_e4e, ris_e4e]
             else:
+                e4e_output = [None, None, None, None]
             output_imgs.extend(e4e_output)
         return output_imgs
     submit_button.click(
         submit,
         [
+            source, align, inverter_bools, n_hyperstyle_iterations, invert_bool,
             mapper_bool, mapper_choice, mapper_alpha,
             gd_bool, neutral_text, target_text, alpha, beta,
+            ris_bool, ref_img
         ],
+        [output_hyperstyle_invert, output_hyperstyle_mapper, output_hyperstyle_gd, output_hyperstyle_ris,
+         output_e4e_invert, output_e4e_mapper, output_e4e_gd, output_e4e_ris]
             )
 demo.launch()

pretrained_models/ris/catalog.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1835e4a20709c43ec1cfd47d17f55473a8dc14fa1b5418880bab04cb6b9f9b26
+size 857089

pretrained_models/ris/stylegan2-ffhq-config-f.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bae494ef77e32a9cd1792a81a3c167692a0e64f6bcd8b06592ff42917e2ed46e
+size 381462551

requirements.txt CHANGED Viewed

@@ -6,4 +6,6 @@ numpy
 matplotlib
 opencv-python
 scipy
 git+https://github.com/openai/CLIP.git

 matplotlib
 opencv-python
 scipy
+scikit-learn==0.22
 git+https://github.com/openai/CLIP.git

ris/__init__.py ADDED Viewed

File without changes

ris/blend.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import imp
+import torch
+import pickle
+from .util import *
+from .spherical_kmeans import MiniBatchSphericalKMeans as sKmeans
+truncation = 0.5
+stop_idx = 11
+n_clusters = 18
+clusterer = pickle.load(open('./pretrained_models/ris/catalog.pkl', 'rb'))
+labels2idx = {
+	'nose': 0,
+	'eyes': 1,
+	'mouth': 2,
+	'hair': 3,
+	'background': 4,
+	'cheek': 5,
+	'neck': 6,
+	'clothes': 7,
+}
+labels_map = {
+	0: torch.tensor([7]),
+	1: torch.tensor([1,6]),
+	2: torch.tensor([4]),
+	3: torch.tensor([0,3,5,8,10,15,16]),
+	4: torch.tensor([11,13,14]),
+	5: torch.tensor([9]),
+	6: torch.tensor([17]),
+	7: torch.tensor([2,12]),
+}
+lables2idx = dict((v,k) for k,v in labels2idx.items())
+n_class = len(lables2idx)
+segid_map = dict.fromkeys(labels_map[0].tolist(), 0)
+segid_map.update(dict.fromkeys(labels_map[1].tolist(), 1))
+segid_map.update(dict.fromkeys(labels_map[2].tolist(), 2))
+segid_map.update(dict.fromkeys(labels_map[3].tolist(), 3))
+segid_map.update(dict.fromkeys(labels_map[4].tolist(), 4))
+segid_map.update(dict.fromkeys(labels_map[5].tolist(), 5))
+segid_map.update(dict.fromkeys(labels_map[6].tolist(), 6))
+segid_map.update(dict.fromkeys(labels_map[7].tolist(), 7))
+torch.manual_seed(0)
+# compute M given a style code.
+@torch.no_grad()
+def compute_M(w, generator, weights_deltas=None, device='cuda'):
+	M = []
+	# get segmentation
+	# _, outputs = generator(w, is_cluster=1)
+	_, outputs = generator(w, weights_deltas=weights_deltas)
+	cluster_layer = outputs[stop_idx][0]
+	activation = flatten_act(cluster_layer)
+	seg_mask = clusterer.predict(activation)
+	b,c,h,w = cluster_layer.size()
+	# create masks for each feature
+	all_seg_mask = []
+	seg_mask = torch.from_numpy(seg_mask).view(b,1,h,w,1).to(device)
+	for key in range(n_class):
+		# combine masks for all indices for a particular segmentation class
+		indices = labels_map[key].view(1,1,1,1,-1)
+		key_mask = (seg_mask == indices.to(device)).any(-1) #[b,1,h,w]
+		all_seg_mask.append(key_mask)
+	all_seg_mask = torch.stack(all_seg_mask, 1)
+	# go through each activation layer and compute M
+	for layer_idx in range(len(outputs)):
+		layer = outputs[layer_idx][1].to(device)
+		b,c,h,w = layer.size()
+		layer = F.instance_norm(layer)
+		layer = layer.pow(2)
+		# resize the segmentation masks to current activations' resolution
+		layer_seg_mask = F.interpolate(all_seg_mask.flatten(0,1).float(), align_corners=False,
+									 size=(h,w), mode='bilinear').view(b,-1,1,h,w)
+		masked_layer = layer.unsqueeze(1) * layer_seg_mask # [b,k,c,h,w]
+		masked_layer = (masked_layer.sum([3,4])/ (h*w))#[b,k,c]
+		M.append(masked_layer.to(device))
+	M = torch.cat(M, -1) #[b, k, c]
+	# softmax to assign each channel to a particular segmentation class
+	M = F.softmax(M/.1, 1)
+	# simple thresholding
+	M = (M>.8).float()
+	# zero out torgb transfers, from https://arxiv.org/abs/2011.12799
+	for i in range(n_class):
+		part_M = style2list(M[:, i])
+		for j in range(len(part_M)):
+			if j in rgb_layer_idx:
+				part_M[j].zero_()
+		part_M = list2style(part_M)
+		M[:, i] = part_M
+	return M
+def blend_latents (source_latent, ref_latent, generator, src_deltas=None, ref_deltas=None, device='cuda'):
+    source = generator.get_latent(source_latent[0].unsqueeze(0), truncation=1, is_latent=True)
+    ref = generator.get_latent(ref_latent[0].unsqueeze(0), truncation=1, is_latent=True)
+    source_M = compute_M(source, generator, weights_deltas=src_deltas, device='cpu')
+    ref_M = compute_M(ref, generator, weights_deltas=ref_deltas, device='cpu')
+    blend_deltas = src_deltas
+    max_M = torch.max(source_M.expand_as(ref_M), ref_M)
+    max_M = add_pose(max_M, labels2idx)
+    idx = labels2idx['hair']
+    part_M = max_M[:, idx].to(device)
+    part_M_mask = style2list(part_M)
+    blend = style2list((add_direction(source, ref, part_M, 1.3)))
+    blend_out, _ = generator(blend, weights_deltas=blend_deltas)
+    return blend_out, blend

ris/e4e_projection.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import sys
+import numpy as np
+from PIL import Image
+import torch
+import torchvision.transforms as transforms
+from argparse import Namespace
+from e4e.models.psp import pSp
+from util import *
+@ torch.no_grad()
+def projection(img, name, generator, device='cuda'):
+    model_path = 'e4e_ffhq_encode.pt'
+    ensure_checkpoint_exists(model_path)
+    ckpt = torch.load(model_path, map_location='cpu')
+    opts = ckpt['opts']
+    opts['checkpoint_path'] = model_path
+    opts= Namespace(**opts)
+    net = pSp(opts, device).eval().to(device)
+    transform = transforms.Compose(
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(256),
+            transforms.ToTensor(),
+            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
+        ]
+    )
+    img = transform(img).unsqueeze(0).to(device)
+    images, w_plus = net(img, randomize_noise=False, return_latents=True)
+    result_file = {}
+    filename = './inversion_codes/' + name + '.pt'
+    result_file['latent'] = w_plus[0]
+    torch.save(result_file, filename)

ris/legacy.py ADDED Viewed

	@@ -0,0 +1,320 @@

+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import click
+import pickle
+import re
+import copy
+import numpy as np
+import torch
+import dnnlib
+from torch_utils import misc
+#----------------------------------------------------------------------------
+def load_network_pkl(f, force_fp16=False):
+    data = _LegacyUnpickler(f).load()
+    # Legacy TensorFlow pickle => convert.
+    if isinstance(data, tuple) and len(data) == 3 and all(isinstance(net, _TFNetworkStub) for net in data):
+        tf_G, tf_D, tf_Gs = data
+        G = convert_tf_generator(tf_G)
+        D = convert_tf_discriminator(tf_D)
+        G_ema = convert_tf_generator(tf_Gs)
+        data = dict(G=G, D=D, G_ema=G_ema)
+    # Add missing fields.
+    if 'training_set_kwargs' not in data:
+        data['training_set_kwargs'] = None
+    if 'augment_pipe' not in data:
+        data['augment_pipe'] = None
+    # Validate contents.
+    assert isinstance(data['G'], torch.nn.Module)
+    assert isinstance(data['D'], torch.nn.Module)
+    assert isinstance(data['G_ema'], torch.nn.Module)
+    assert isinstance(data['training_set_kwargs'], (dict, type(None)))
+    assert isinstance(data['augment_pipe'], (torch.nn.Module, type(None)))
+    # Force FP16.
+    if force_fp16:
+        for key in ['G', 'D', 'G_ema']:
+            old = data[key]
+            kwargs = copy.deepcopy(old.init_kwargs)
+            if key.startswith('G'):
+                kwargs.synthesis_kwargs = dnnlib.EasyDict(kwargs.get('synthesis_kwargs', {}))
+                kwargs.synthesis_kwargs.num_fp16_res = 4
+                kwargs.synthesis_kwargs.conv_clamp = 256
+            if key.startswith('D'):
+                kwargs.num_fp16_res = 4
+                kwargs.conv_clamp = 256
+            if kwargs != old.init_kwargs:
+                new = type(old)(**kwargs).eval().requires_grad_(False)
+                misc.copy_params_and_buffers(old, new, require_all=True)
+                data[key] = new
+    return data
+#----------------------------------------------------------------------------
+class _TFNetworkStub(dnnlib.EasyDict):
+    pass
+class _LegacyUnpickler(pickle.Unpickler):
+    def find_class(self, module, name):
+        if module == 'dnnlib.tflib.network' and name == 'Network':
+            return _TFNetworkStub
+        return super().find_class(module, name)
+#----------------------------------------------------------------------------
+def _collect_tf_params(tf_net):
+    # pylint: disable=protected-access
+    tf_params = dict()
+    def recurse(prefix, tf_net):
+        for name, value in tf_net.variables:
+            tf_params[prefix + name] = value
+        for name, comp in tf_net.components.items():
+            recurse(prefix + name + '/', comp)
+    recurse('', tf_net)
+    return tf_params
+#----------------------------------------------------------------------------
+def _populate_module_params(module, *patterns):
+    for name, tensor in misc.named_params_and_buffers(module):
+        found = False
+        value = None
+        for pattern, value_fn in zip(patterns[0::2], patterns[1::2]):
+            match = re.fullmatch(pattern, name)
+            if match:
+                found = True
+                if value_fn is not None:
+                    value = value_fn(*match.groups())
+                break
+        try:
+            assert found
+            if value is not None:
+                tensor.copy_(torch.from_numpy(np.array(value)))
+        except:
+            print(name, list(tensor.shape))
+            raise
+#----------------------------------------------------------------------------
+def convert_tf_generator(tf_G):
+    if tf_G.version < 4:
+        raise ValueError('TensorFlow pickle version too low')
+    # Collect kwargs.
+    tf_kwargs = tf_G.static_kwargs
+    known_kwargs = set()
+    def kwarg(tf_name, default=None, none=None):
+        known_kwargs.add(tf_name)
+        val = tf_kwargs.get(tf_name, default)
+        return val if val is not None else none
+    # Convert kwargs.
+    kwargs = dnnlib.EasyDict(
+        z_dim                   = kwarg('latent_size',          512),
+        c_dim                   = kwarg('label_size',           0),
+        w_dim                   = kwarg('dlatent_size',         512),
+        img_resolution          = kwarg('resolution',           1024),
+        img_channels            = kwarg('num_channels',         3),
+        mapping_kwargs = dnnlib.EasyDict(
+            num_layers          = kwarg('mapping_layers',       8),
+            embed_features      = kwarg('label_fmaps',          None),
+            layer_features      = kwarg('mapping_fmaps',        None),
+            activation          = kwarg('mapping_nonlinearity', 'lrelu'),
+            lr_multiplier       = kwarg('mapping_lrmul',        0.01),
+            w_avg_beta          = kwarg('w_avg_beta',           0.995,  none=1),
+        ),
+        synthesis_kwargs = dnnlib.EasyDict(
+            channel_base        = kwarg('fmap_base',            16384) * 2,
+            channel_max         = kwarg('fmap_max',             512),
+            num_fp16_res        = kwarg('num_fp16_res',         0),
+            conv_clamp          = kwarg('conv_clamp',           None),
+            architecture        = kwarg('architecture',         'skip'),
+            resample_filter     = kwarg('resample_kernel',      [1,3,3,1]),
+            use_noise           = kwarg('use_noise',            True),
+            activation          = kwarg('nonlinearity',         'lrelu'),
+        ),
+    )
+    # Check for unknown kwargs.
+    kwarg('truncation_psi')
+    kwarg('truncation_cutoff')
+    kwarg('style_mixing_prob')
+    kwarg('structure')
+    unknown_kwargs = list(set(tf_kwargs.keys()) - known_kwargs)
+    if len(unknown_kwargs) > 0:
+        raise ValueError('Unknown TensorFlow kwarg', unknown_kwargs[0])
+    # Collect params.
+    tf_params = _collect_tf_params(tf_G)
+    for name, value in list(tf_params.items()):
+        match = re.fullmatch(r'ToRGB_lod(\d+)/(.*)', name)
+        if match:
+            r = kwargs.img_resolution // (2 ** int(match.group(1)))
+            tf_params[f'{r}x{r}/ToRGB/{match.group(2)}'] = value
+            kwargs.synthesis.kwargs.architecture = 'orig'
+    #for name, value in tf_params.items(): print(f'{name:<50s}{list(value.shape)}')
+    # Convert params.
+    from training import networks
+    G = networks.Generator(**kwargs).eval().requires_grad_(False)
+    # pylint: disable=unnecessary-lambda
+    _populate_module_params(G,
+        r'mapping\.w_avg',                                  lambda:     tf_params[f'dlatent_avg'],
+        r'mapping\.embed\.weight',                          lambda:     tf_params[f'mapping/LabelEmbed/weight'].transpose(),
+        r'mapping\.embed\.bias',                            lambda:     tf_params[f'mapping/LabelEmbed/bias'],
+        r'mapping\.fc(\d+)\.weight',                        lambda i:   tf_params[f'mapping/Dense{i}/weight'].transpose(),
+        r'mapping\.fc(\d+)\.bias',                          lambda i:   tf_params[f'mapping/Dense{i}/bias'],
+        r'synthesis\.b4\.const',                            lambda:     tf_params[f'synthesis/4x4/Const/const'][0],
+        r'synthesis\.b4\.conv1\.weight',                    lambda:     tf_params[f'synthesis/4x4/Conv/weight'].transpose(3, 2, 0, 1),
+        r'synthesis\.b4\.conv1\.bias',                      lambda:     tf_params[f'synthesis/4x4/Conv/bias'],
+        r'synthesis\.b4\.conv1\.noise_const',               lambda:     tf_params[f'synthesis/noise0'][0, 0],
+        r'synthesis\.b4\.conv1\.noise_strength',            lambda:     tf_params[f'synthesis/4x4/Conv/noise_strength'],
+        r'synthesis\.b4\.conv1\.affine\.weight',            lambda:     tf_params[f'synthesis/4x4/Conv/mod_weight'].transpose(),
+        r'synthesis\.b4\.conv1\.affine\.bias',              lambda:     tf_params[f'synthesis/4x4/Conv/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.conv0\.weight',                lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/weight'][::-1, ::-1].transpose(3, 2, 0, 1),
+        r'synthesis\.b(\d+)\.conv0\.bias',                  lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/bias'],
+        r'synthesis\.b(\d+)\.conv0\.noise_const',           lambda r:   tf_params[f'synthesis/noise{int(np.log2(int(r)))*2-5}'][0, 0],
+        r'synthesis\.b(\d+)\.conv0\.noise_strength',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/noise_strength'],
+        r'synthesis\.b(\d+)\.conv0\.affine\.weight',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/mod_weight'].transpose(),
+        r'synthesis\.b(\d+)\.conv0\.affine\.bias',          lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.conv1\.weight',                lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/weight'].transpose(3, 2, 0, 1),
+        r'synthesis\.b(\d+)\.conv1\.bias',                  lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/bias'],
+        r'synthesis\.b(\d+)\.conv1\.noise_const',           lambda r:   tf_params[f'synthesis/noise{int(np.log2(int(r)))*2-4}'][0, 0],
+        r'synthesis\.b(\d+)\.conv1\.noise_strength',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/noise_strength'],
+        r'synthesis\.b(\d+)\.conv1\.affine\.weight',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/mod_weight'].transpose(),
+        r'synthesis\.b(\d+)\.conv1\.affine\.bias',          lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.torgb\.weight',                lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/weight'].transpose(3, 2, 0, 1),
+        r'synthesis\.b(\d+)\.torgb\.bias',                  lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/bias'],
+        r'synthesis\.b(\d+)\.torgb\.affine\.weight',        lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/mod_weight'].transpose(),
+        r'synthesis\.b(\d+)\.torgb\.affine\.bias',          lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.skip\.weight',                 lambda r:   tf_params[f'synthesis/{r}x{r}/Skip/weight'][::-1, ::-1].transpose(3, 2, 0, 1),
+        r'.*\.resample_filter',                             None,
+    )
+    return G
+#----------------------------------------------------------------------------
+def convert_tf_discriminator(tf_D):
+    if tf_D.version < 4:
+        raise ValueError('TensorFlow pickle version too low')
+    # Collect kwargs.
+    tf_kwargs = tf_D.static_kwargs
+    known_kwargs = set()
+    def kwarg(tf_name, default=None):
+        known_kwargs.add(tf_name)
+        return tf_kwargs.get(tf_name, default)
+    # Convert kwargs.
+    kwargs = dnnlib.EasyDict(
+        c_dim                   = kwarg('label_size',           0),
+        img_resolution          = kwarg('resolution',           1024),
+        img_channels            = kwarg('num_channels',         3),
+        architecture            = kwarg('architecture',         'resnet'),
+        channel_base            = kwarg('fmap_base',            16384) * 2,
+        channel_max             = kwarg('fmap_max',             512),
+        num_fp16_res            = kwarg('num_fp16_res',         0),
+        conv_clamp              = kwarg('conv_clamp',           None),
+        cmap_dim                = kwarg('mapping_fmaps',        None),
+        block_kwargs = dnnlib.EasyDict(
+            activation          = kwarg('nonlinearity',         'lrelu'),
+            resample_filter     = kwarg('resample_kernel',      [1,3,3,1]),
+            freeze_layers       = kwarg('freeze_layers',        0),
+        ),
+        mapping_kwargs = dnnlib.EasyDict(
+            num_layers          = kwarg('mapping_layers',       0),
+            embed_features      = kwarg('mapping_fmaps',        None),
+            layer_features      = kwarg('mapping_fmaps',        None),
+            activation          = kwarg('nonlinearity',         'lrelu'),
+            lr_multiplier       = kwarg('mapping_lrmul',        0.1),
+        ),
+        epilogue_kwargs = dnnlib.EasyDict(
+            mbstd_group_size    = kwarg('mbstd_group_size',     None),
+            mbstd_num_channels  = kwarg('mbstd_num_features',   1),
+            activation          = kwarg('nonlinearity',         'lrelu'),
+        ),
+    )
+    # Check for unknown kwargs.
+    kwarg('structure')
+    unknown_kwargs = list(set(tf_kwargs.keys()) - known_kwargs)
+    if len(unknown_kwargs) > 0:
+        raise ValueError('Unknown TensorFlow kwarg', unknown_kwargs[0])
+    # Collect params.
+    tf_params = _collect_tf_params(tf_D)
+    for name, value in list(tf_params.items()):
+        match = re.fullmatch(r'FromRGB_lod(\d+)/(.*)', name)
+        if match:
+            r = kwargs.img_resolution // (2 ** int(match.group(1)))
+            tf_params[f'{r}x{r}/FromRGB/{match.group(2)}'] = value
+            kwargs.architecture = 'orig'
+    #for name, value in tf_params.items(): print(f'{name:<50s}{list(value.shape)}')
+    # Convert params.
+    from training import networks
+    D = networks.Discriminator(**kwargs).eval().requires_grad_(False)
+    # pylint: disable=unnecessary-lambda
+    _populate_module_params(D,
+        r'b(\d+)\.fromrgb\.weight',     lambda r:       tf_params[f'{r}x{r}/FromRGB/weight'].transpose(3, 2, 0, 1),
+        r'b(\d+)\.fromrgb\.bias',       lambda r:       tf_params[f'{r}x{r}/FromRGB/bias'],
+        r'b(\d+)\.conv(\d+)\.weight',   lambda r, i:    tf_params[f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/weight'].transpose(3, 2, 0, 1),
+        r'b(\d+)\.conv(\d+)\.bias',     lambda r, i:    tf_params[f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/bias'],
+        r'b(\d+)\.skip\.weight',        lambda r:       tf_params[f'{r}x{r}/Skip/weight'].transpose(3, 2, 0, 1),
+        r'mapping\.embed\.weight',      lambda:         tf_params[f'LabelEmbed/weight'].transpose(),
+        r'mapping\.embed\.bias',        lambda:         tf_params[f'LabelEmbed/bias'],
+        r'mapping\.fc(\d+)\.weight',    lambda i:       tf_params[f'Mapping{i}/weight'].transpose(),
+        r'mapping\.fc(\d+)\.bias',      lambda i:       tf_params[f'Mapping{i}/bias'],
+        r'b4\.conv\.weight',            lambda:         tf_params[f'4x4/Conv/weight'].transpose(3, 2, 0, 1),
+        r'b4\.conv\.bias',              lambda:         tf_params[f'4x4/Conv/bias'],
+        r'b4\.fc\.weight',              lambda:         tf_params[f'4x4/Dense0/weight'].transpose(),
+        r'b4\.fc\.bias',                lambda:         tf_params[f'4x4/Dense0/bias'],
+        r'b4\.out\.weight',             lambda:         tf_params[f'Output/weight'].transpose(),
+        r'b4\.out\.bias',               lambda:         tf_params[f'Output/bias'],
+        r'.*\.resample_filter',         None,
+    )
+    return D
+#----------------------------------------------------------------------------
+@click.command()
+@click.option('--source', help='Input pickle', required=True, metavar='PATH')
+@click.option('--dest', help='Output pickle', required=True, metavar='PATH')
+@click.option('--force-fp16', help='Force the networks to use FP16', type=bool, default=False, metavar='BOOL', show_default=True)
+def convert_network_pickle(source, dest, force_fp16):
+    """Convert legacy network pickle into the native PyTorch format.
+    The tool is able to load the main network configurations exported using the TensorFlow version of StyleGAN2 or StyleGAN2-ADA.
+    It does not support e.g. StyleGAN2-ADA comparison methods, StyleGAN2 configs A-D, or StyleGAN1 networks.
+    Example:
+    \b
+    python legacy.py \\
+        --source=https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-cat-config-f.pkl \\
+        --dest=stylegan2-cat-config-f.pkl
+    """
+    print(f'Loading "{source}"...')
+    with dnnlib.util.open_url(source) as f:
+        data = load_network_pkl(f, force_fp16=force_fp16)
+    print(f'Saving "{dest}"...')
+    with open(dest, 'wb') as f:
+        pickle.dump(data, f)
+    print('Done.')
+#----------------------------------------------------------------------------
+if __name__ == "__main__":
+    convert_network_pickle() # pylint: disable=no-value-for-parameter
+#----------------------------------------------------------------------------

ris/manipulator.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import argparse
+import copy
+import os
+import time
+from tqdm import tqdm
+import numpy as np
+import PIL.Image
+import torch
+import clip
+from wrapper import (FaceLandmarksDetector, Generator_wrapper,
+                     VGGFeatExtractor, e4eEncoder, PivotTuning)
+from projector import project
+class Manipulator():
+    """Manipulator for style editing
+    in paper, use 100 image pairs to estimate the mean for alpha(magnitude of the perturbation) [-5, 5]
+    *** Args ***
+    G : Genertor wrapper for synthesis styles
+    device : torch.device
+    lst_alpha : magnitude of the perturbation
+    num_images : num images to process
+    *** Attributes ***
+    S :  List[dict(str, torch.Tensor)] # length 2,000
+    styles : List[dict(str, torch.Tensor)] # length of num_images
+                (num_images, style)
+    lst_alpha : List[int]
+    boundary : (num_images, len_alpha)
+    edited_styles : List[styles]
+    edited_images : List[(num_images, 3, 1024, 1024)]
+    """
+    def __init__(
+        self,
+        G,
+        device,
+        lst_alpha=[0],
+        num_images=1,
+        start_ind=0,
+        face_preprocess=True,
+        dataset_name=''
+    ):
+        """Initialize
+        - use pre-saved generated latent/style from random Z
+        - to use projection, used method "set_real_img_projection"
+        """
+        assert start_ind + num_images < 2000
+        self.W = torch.load(f'tensor/W{dataset_name}.pt')
+        self.S = torch.load(f'tensor/S{dataset_name}.pt')
+        self.S_mean = torch.load(f'tensor/S_mean{dataset_name}.pt')
+        self.S_std = torch.load(f'tensor/S_std{dataset_name}.pt')
+        self.S = {layer: self.S[layer].to(device) for layer in G.style_layers}
+        self.styles = {layer: self.S[layer][start_ind:start_ind+num_images] for layer in G.style_layers}
+        self.latent = self.W[start_ind:start_ind+num_images]
+        self.latent = self.latent.to(device)
+        del self.W
+        del self.S
+        # S_mean, S_std for extracting global style direction
+        self.S_mean = {layer: self.S_mean[layer].to(device) for layer in G.style_layers}
+        self.S_std = {layer: self.S_std[layer].to(device) for layer in G.style_layers}
+        # setting
+        self.face_preprocess = face_preprocess
+        if face_preprocess:
+            self.landmarks_detector = FaceLandmarksDetector()
+        self.vgg16 = VGGFeatExtractor(device).module
+        self.W_projector_steps = 200
+        self.G = G
+        self.device = device
+        self.num_images = num_images
+        self.lst_alpha = lst_alpha
+        self.manipulate_layers = [layer for layer in G.style_layers if 'torgb' not in layer]
+    def set_alpha(self, lst_alpha):
+        """Setter for alpha
+        """
+        self.lst_alpha = lst_alpha
+    def set_real_img_projection(self, img, inv_mode='w', pti_mode=None):
+        """Set real img instead of pre-saved styles
+        Args :
+        - img : img directory or img file path to manipulate
+            - face aligned if self.face_preprocess == True
+            - set self.num_images
+        - inv_mode : inversion mode, setting self.latent, self.styles
+            - w : use W projector (projector.project)
+            - w+ : use e4e encoder (wrapper.e4eEncoder)
+        - pti_mode : pivot tuning inversion mode (wrapper.PivotTuning)
+            - None
+            - w : W latent pivot tuning
+            - s : S style pivot tuning
+        """
+        assert inv_mode in ['w', 'w+']
+        assert pti_mode in [None, 'w', 's']
+        allowed_extensions = ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']
+        # img directory input
+        if os.path.isdir(img):
+            imgpaths = sorted(os.listdir(img))
+            imgpaths = [os.path.join(img, imgpath)
+                        for imgpath in imgpaths
+                        if imgpath.split('.')[-1] in allowed_extensions]
+        # img file path input
+        else:
+            imgpaths = [img]
+        self.num_images = len(imgpaths)
+        if inv_mode == 'w':
+            targets = list()
+            target_pils = list()
+            for imgpath in imgpaths:
+                if self.face_preprocess:
+                    target_pil = self.landmarks_detector(imgpath)
+                else:
+                    target_pil = PIL.Image.open(imgpath).convert('RGB')
+                target_pils.append(target_pil)
+                w, h = target_pil.size
+                s = min(w, h)
+                target_pil = target_pil.crop(((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2))
+                target_pil = target_pil.resize((self.G.G.img_resolution, self.G.G.img_resolution),
+                                                PIL.Image.LANCZOS)
+                target_uint8 = np.array(target_pil, dtype=np.uint8)
+                targets.append(torch.Tensor(target_uint8.transpose([2,0,1])).to(self.device))
+            self.latent = list()
+            for target in tqdm(targets, total=len(targets)):
+                projected_w_steps = project(
+                    self.G.G,
+                    self.vgg16,
+                    target=target,
+                    num_steps=self.W_projector_steps, # TODO get projector steps from configs
+                    device=self.device,
+                    verbose=False,
+                )
+                self.latent.append(projected_w_steps[-1])
+            self.latent = torch.stack(self.latent)
+            self.styles = self.G.mapping_stylespace(self.latent)
+        else: # inv_mode == 'w+'
+            # use e4e encoder
+            target_pils = list()
+            for imgpath in imgpaths:
+                if self.face_preprocess:
+                    target_pil = self.landmarks_detector(imgpath)
+                else:
+                    target_pil = PIL.Image.open(imgpath).convert('RGB')
+                target_pils.append(target_pil)
+            self.encoder = e4eEncoder(self.device)
+            self.latent = self.encoder(target_pils)
+            self.styles = self.G.mapping_stylespace(self.latent)
+        if pti_mode is not None: # w or s
+            # pivot tuning inversion
+            pti = PivotTuning(self.device, self.G.G, mode=pti_mode)
+            new_G = pti(self.latent, target_pils)
+            self.G.G = new_G
+    def manipulate(self, delta_s):
+        """Edit style by given delta_style
+        - use perturbation (delta s) * (alpha) as a boundary
+        """
+        styles = [copy.deepcopy(self.styles) for _ in range(len(self.lst_alpha))]
+        for (alpha, style) in zip(self.lst_alpha, styles):
+            for layer in self.G.style_layers:
+                perturbation = delta_s[layer] * alpha
+                style[layer] += perturbation
+        return styles
+    def manipulate_one_channel(self, layer, channel_ind:int):
+        """Edit style from given layer, channel index
+        - use mean value of pre-saved style
+        - use perturbation (pre-saved style std) * (alpha) as a boundary
+        """
+        assert layer in self.G.style_layers
+        assert 0 <= channel_ind < self.styles[layer].shape[1]
+        boundary = self.S_std[layer][channel_ind].item()
+        # apply self.S_mean value for given layer, channel_ind
+        for ind in range(self.num_images):
+            self.styles[layer][ind][channel_ind] = self.S_mean[layer][channel_ind]
+        styles = [copy.deepcopy(self.styles) for _ in range(len(self.lst_alpha))]
+        perturbation = (torch.Tensor(self.lst_alpha) * boundary).numpy().tolist()
+        # apply one channel manipulation
+        for img_ind in range(self.num_images):
+            for edit_ind, delta in enumerate(perturbation):
+                styles[edit_ind][layer][img_ind][channel_ind] += delta
+        return styles
+    def synthesis_from_styles(self, styles, start_ind, end_ind):
+        """Synthesis edited styles from styles, lst_alpha
+        """
+        styles_ = list()
+        for style in styles:
+            style_ = dict()
+            for layer in self.G.style_layers:
+                style_[layer] = style[layer][start_ind:end_ind].to(self.device)
+            styles_.append(style_)
+        print("synthesis_from_styles", type(style_))
+        imgs = [self.G.synthesis_from_stylespace(self.latent[start_ind:end_ind], style_).cpu()
+                for style_ in styles_]
+        return imgs
+def extract_global_direction(G, device, lst_alpha, num_images, dataset_name=''):
+    """Extract global style direction in 100 images
+    """
+    assert len(lst_alpha) == 2
+    model, preprocess = clip.load("ViT-B/32", device=device)
+    # lindex in original tf version
+    manipulate_layers = [layer for layer in G.style_layers if 'torgb' not in layer]
+    # total channel: 6048 (1024 resolution)
+    resolution = G.G.img_resolution
+    latent = torch.randn([1,G.to_w_idx[f'G.synthesis.b{resolution}.torgb.affine']+1,512]).to(device) # 1024 -> 18, 512 -> 16, 256 -> 14
+    style = G.mapping_stylespace(latent)
+    cnt = 0
+    for layer in manipulate_layers:
+        cnt += style[layer].shape[1]
+    del latent
+    del style
+    # 1024 -> 6048 channels, 256 -> 4928 channels
+    print(f"total channels to manipulate: {cnt}")
+    manipulator = Manipulator(G, device, lst_alpha, num_images, face_preprocess=False, dataset_name=dataset_name)
+    all_feats = list()
+    for layer in manipulate_layers:
+        print(f'\nStyle manipulation in layer "{layer}"')
+        channel_num = manipulator.styles[layer].shape[1]
+        for channel_ind in tqdm(range(channel_num), total=channel_num):
+            styles = manipulator.manipulate_one_channel(layer, channel_ind)
+            # 2 * 100 images
+            batchsize = 10
+            nbatch = int(100 / batchsize)
+            feats = list()
+            for img_ind in range(0, nbatch): # batch size 10 * 2
+                start = img_ind*nbatch
+                end = img_ind*nbatch + batchsize
+                synth_imgs = manipulator.synthesis_from_styles(styles, start, end)
+                synth_imgs = [(synth_img.permute(0,2,3,1)*127.5+128).clamp(0,255).to(torch.uint8).numpy()
+                            for synth_img in synth_imgs]
+                imgs = list()
+                for i in range(batchsize):
+                    img0 = PIL.Image.fromarray(synth_imgs[0][i])
+                    img1 = PIL.Image.fromarray(synth_imgs[1][i])
+                    imgs.append(preprocess(img0).unsqueeze(0).to(device))
+                    imgs.append(preprocess(img1).unsqueeze(0).to(device))
+                with torch.no_grad():
+                    feat = model.encode_image(torch.cat(imgs))
+                feats.append(feat)
+            all_feats.append(torch.cat(feats).view([-1, 2, 512]).cpu())
+    all_feats = torch.stack(all_feats).numpy()
+    fs = all_feats
+    fs1=fs/np.linalg.norm(fs,axis=-1)[:,:,:,None]
+    fs2=fs1[:,:,1,:]-fs1[:,:,0,:] # 5*sigma - (-5)*sigma
+    fs3=fs2/np.linalg.norm(fs2,axis=-1)[:,:,None]
+    fs3=fs3.mean(axis=1)
+    fs3=fs3/np.linalg.norm(fs3,axis=-1)[:,None]
+    np.save(f'tensor/fs3{dataset_name}.npy', fs3) # global style direction
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('runtype', type=str, default='test')
+    parser.add_argument('--ckpt', type=str, default='pretrained/ffhq.pkl')
+    parser.add_argument('--face_preprocess', type=bool, default=True)
+    parser.add_argument('--dataset_name', type=str, default='')
+    args = parser.parse_args()
+    runtype = args.runtype
+    assert runtype in ['test', 'extract']
+    device = torch.device('cuda:0')
+    ckpt = args.ckpt
+    G = Generator(ckpt, device)
+    face_preprocess = args.face_preprocess
+    dataset_name = args.dataset_name
+    if runtype == 'test': # test manipulator
+        num_images = 100
+        lst_alpha = [-5, 0, 5]
+        layer = G.style_layers[6]
+        channel_ind = 501
+        manipulator = Manipulator(G, device, lst_alpha, num_images, face_preprocess=face_preprocess, dataset_name=dataset_name)
+        styles = manipulator.manipulate_one_channel(layer, channel_ind)
+        start_ind, end_ind= 0, 10
+        imgs = manipulator.synthesis_from_styles(styles, start_ind, end_ind)
+        print(len(imgs), imgs[0].shape)
+    elif runtype == 'extract': # extract global style direction from "tensor/S.pt"
+        num_images = 100
+        lst_alpha = [-5, 5]
+        extract_global_direction(G, device, lst_alpha, num_images, dataset_name=dataset_name)

ris/model.py ADDED Viewed

	@@ -0,0 +1,786 @@

+import math
+import random
+import functools
+import operator
+import torch
+import torchvision
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Function
+from .op import FusedLeakyReLU, fused_leaky_relu, upfirdn2d
+class PixelNorm(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, input):
+        return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)
+class To4d(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, input):
+        return input.view(*input.size(),1,1)
+def make_kernel(k):
+    k = torch.tensor(k, dtype=torch.float32)
+    if k.ndim == 1:
+        k = k[None, :] * k[:, None]
+    k /= k.sum()
+    return k
+class Upsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel) * (factor ** 2)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2 + factor - 1
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=self.factor, down=1, pad=self.pad)
+        return out
+class Downsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=1, down=self.factor, pad=self.pad)
+        return out
+class Blur(nn.Module):
+    def __init__(self, kernel, pad, upsample_factor=1):
+        super().__init__()
+        kernel = make_kernel(kernel)
+        if upsample_factor > 1:
+            kernel = kernel * (upsample_factor ** 2)
+        self.register_buffer('kernel', kernel)
+        self.pad = pad
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, pad=self.pad)
+        return out
+class EqualConv2d(nn.Module):
+    def __init__(
+        self, in_channel, out_channel, kernel_size, groups=1, stride=1, padding=0, bias=True, lr_mul=1
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(
+            torch.randn(out_channel, in_channel//groups, kernel_size, kernel_size).div_(lr_mul)
+        )
+        self.scale = lr_mul / math.sqrt((in_channel//groups) * kernel_size ** 2)
+        self.stride = stride
+        self.padding = padding
+        self.groups = groups
+        self.lr_mul =lr_mul
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channel))
+        else:
+            self.bias = None
+    def forward(self, input):
+        bias = self.bias * self.lr_mul if self.bias is not None else None
+        out = F.conv2d(
+            input,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+            groups=self.groups
+        )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},'
+            f' {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})'
+        )
+class EqualLinear(nn.Module):
+    def __init__(
+        self, in_dim, out_dim, bias=True, bias_init=0, lr_mul=1, activation=None
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(torch.randn(out_dim, in_dim).div_(lr_mul))
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_dim).fill_(bias_init))
+        else:
+            self.bias = None
+        self.activation = activation
+        self.scale = (1 / math.sqrt(in_dim)) * lr_mul
+        self.lr_mul = lr_mul
+    def forward(self, input):
+        if self.activation:
+            out = F.linear(input, self.weight * self.scale)
+            out = fused_leaky_relu(out, self.bias * self.lr_mul)
+        else:
+            out = F.linear(
+                input, self.weight * self.scale, bias=self.bias * self.lr_mul
+            )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]})'
+        )
+class ScaledLeakyReLU(nn.Module):
+    def __init__(self, negative_slope=0.2):
+        super().__init__()
+        self.negative_slope = negative_slope
+    def forward(self, input):
+        out = F.leaky_relu(input, negative_slope=self.negative_slope)
+        return out * math.sqrt(2)
+class ModulatedConv2d(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        style_dim,
+        demodulate=True,
+        upsample=False,
+        downsample=False,
+        blur_kernel=[1, 3, 3, 1],
+    ):
+        super().__init__()
+        self.eps = 1e-8
+        self.kernel_size = kernel_size
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+        self.upsample = upsample
+        self.downsample = downsample
+        if upsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) - (kernel_size - 1)
+            pad0 = (p + 1) // 2 + factor - 1
+            pad1 = p // 2 + 1
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1), upsample_factor=factor)
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1))
+        fan_in = in_channel * kernel_size ** 2
+        self.scale = 1 / math.sqrt(fan_in)
+        self.padding = kernel_size // 2
+        self.weight = nn.Parameter(
+            torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.modulation = EqualLinear(style_dim, in_channel, bias_init=1)
+        self.demodulate = demodulate
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.in_channel}, {self.out_channel}, {self.kernel_size}, '
+            f'upsample={self.upsample}, downsample={self.downsample})'
+        )
+    def get_latent(self, style):
+        style = self.modulation(style)
+        return style
+    def forward(self, input, style, weights_delta=None):
+        batch, in_channel, height, width = input.shape
+        # style = self.modulation(style).view(batch, 1, in_channel, 1, 1)
+        style = style.view(batch, 1, in_channel, 1, 1)
+        if weights_delta is None:
+            weight = self.scale * self.weight * style
+        else:
+            weight = self.scale * (self.weight * (1 + weights_delta) * style)
+        if self.demodulate:
+            demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8)
+            weight = weight * demod.view(batch, self.out_channel, 1, 1, 1)
+        weight = weight.view(
+            batch * self.out_channel, in_channel, self.kernel_size, self.kernel_size
+        )
+        if self.upsample:
+            input = input.view(1, batch * in_channel, height, width)
+            weight = weight.view(
+                batch, self.out_channel, in_channel, self.kernel_size, self.kernel_size
+            )
+            weight = weight.transpose(1, 2).reshape(
+                batch * in_channel, self.out_channel, self.kernel_size, self.kernel_size
+            )
+            out = F.conv_transpose2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+            out = self.blur(out)
+        elif self.downsample:
+            input = self.blur(input)
+            _, _, height, width = input.shape
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        else:
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=self.padding, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        return out
+class NoiseInjection(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.weight = nn.Parameter(torch.zeros(1))
+    def forward(self, image, noise=None):
+        if noise is None:
+            batch, _, height, width = image.shape
+            noise = image.new_empty(batch, 1, height, width).normal_()
+        return image + self.weight * noise
+class ConstantInput(nn.Module):
+    def __init__(self, channel, size=4):
+        super().__init__()
+        self.input = nn.Parameter(torch.randn(1, channel, size, size))
+    def forward(self, input):
+        batch = input.shape[0]
+        out = self.input.repeat(batch, 1, 1, 1)
+        return out
+class StyledConv(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        style_dim,
+        upsample=False,
+        blur_kernel=[1, 3, 3, 1],
+        demodulate=True,
+    ):
+        super().__init__()
+        self.conv = ModulatedConv2d(
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            upsample=upsample,
+            blur_kernel=blur_kernel,
+            demodulate=demodulate,
+        )
+        self.noise = NoiseInjection()
+        # self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1))
+        # self.activate = ScaledLeakyReLU(0.2)
+        self.activate = FusedLeakyReLU(out_channel)
+    def get_latent(self, style):
+        return self.conv.get_latent(style)
+    def forward(self, input, style, noise=None, weights_delta=None):
+        out_t = self.conv(input, style, weights_delta=weights_delta)
+        out = self.noise(out_t, noise=noise)
+        # out = out + self.bias
+        out = self.activate(out)
+        return out, out_t
+class ToRGB(nn.Module):
+    def __init__(self, in_channel, style_dim, upsample=True, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        if upsample:
+            self.upsample = Upsample(blur_kernel)
+        self.conv = ModulatedConv2d(in_channel, 3, 1, style_dim, demodulate=False)
+        self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+    def get_latent(self, style):
+        return self.conv.get_latent(style)
+    def forward(self, input, style, skip=None, weights_delta=None):
+        out = self.conv(input, style, weights_delta)
+        out = out + self.bias
+        if skip is not None:
+            skip = self.upsample(skip)
+            out = out + skip
+        return out
+class Generator(nn.Module):
+    def __init__(
+        self,
+        size,
+        style_dim,
+        n_mlp,
+        channel_multiplier=2,
+        blur_kernel=[1, 3, 3, 1],
+        lr_mlp=0.01,
+    ):
+        super().__init__()
+        self.size = size
+        self.style_dim = style_dim
+        layers = [PixelNorm()]
+        for i in range(n_mlp):
+            layers.append(
+                EqualLinear(
+                    style_dim, style_dim, lr_mul=lr_mlp, activation='fused_lrelu'
+                )
+            )
+        self.style = nn.Sequential(*layers)
+        self.channels = {
+            4: 512,
+            8: 512,
+            16: 512,
+            32: 512,
+            64: 256 * channel_multiplier,
+            128: 128 * channel_multiplier,
+            256: 64 * channel_multiplier,
+            512: 32 * channel_multiplier,
+            1024: 16 * channel_multiplier,
+        }
+        self.input = ConstantInput(self.channels[4])
+        self.conv1 = StyledConv(
+            self.channels[4], self.channels[4], 3, style_dim, blur_kernel=blur_kernel
+        )
+        self.to_rgb1 = ToRGB(self.channels[4], style_dim, upsample=False)
+        self.log_size = int(math.log(size, 2))
+        self.num_layers = (self.log_size - 2) * 2 + 1
+        self.convs = nn.ModuleList()
+        self.upsamples = nn.ModuleList()
+        self.to_rgbs = nn.ModuleList()
+        self.noises = nn.Module()
+        in_channel = self.channels[4]
+        for layer_idx in range(self.num_layers):
+            res = (layer_idx + 5) // 2
+            shape = [1, 1, 2 ** res, 2 ** res]
+            self.noises.register_buffer(f'noise_{layer_idx}', torch.randn(*shape))
+        for i in range(3, self.log_size + 1):
+            out_channel = self.channels[2 ** i]
+            self.convs.append(
+                StyledConv(
+                    in_channel,
+                    out_channel,
+                    3,
+                    style_dim,
+                    upsample=True,
+                    blur_kernel=blur_kernel,
+                )
+            )
+            self.convs.append(
+                StyledConv(
+                    out_channel, out_channel, 3, style_dim, blur_kernel=blur_kernel
+                )
+            )
+            self.to_rgbs.append(ToRGB(out_channel, style_dim))
+            in_channel = out_channel
+        self.n_latent = self.log_size * 2 - 2
+    def make_noise(self):
+        device = self.input.input.device
+        noises = [torch.randn(1, 1, 2 ** 2, 2 ** 2, device=device)]
+        for i in range(3, self.log_size + 1):
+            for _ in range(2):
+                noises.append(torch.randn(1, 1, 2 ** i, 2 ** i, device=device))
+        return noises
+    def mean_latent(self, n_latent):
+        latent_in = torch.randn( n_latent, self.style_dim, device=self.input.input.device)
+        latent = self.get_latent(latent_in)#.mean(0, keepdim=True)
+        latent = [latent[i].mean(0, keepdim=True) for i in range(len(latent))]
+        return latent
+    def get_w(self, input):
+        latent = self.style(input)
+        latent = fused_leaky_relu(latent, torch.zeros_like(latent).cuda(), 5.)
+        return latent
+    def get_latent(self, input, is_latent=False, truncation=1, mean_latent=None):
+        output = []
+        if not is_latent:
+            latent = self.style(input)
+            latent = latent.unsqueeze(1).repeat(1, self.n_latent, 1) #[B, 14, 512]
+        else:
+            latent = input
+        output.append(self.conv1.get_latent(latent[:, 0]))
+        output.append(self.to_rgb1.get_latent(latent[:, 1]))
+        i = 1
+        # print("Get latent dimensions:")
+        for conv1, conv2, to_rgb in zip(self.convs[::2], self.convs[1::2], self.to_rgbs):
+            # print(f'{i}: {conv1.get_latent(latent[:, i]).shape}')
+            # print(f'{i+1}: {conv2.get_latent(latent[:, i+1]).shape}')
+            # print(f'{i+2}: {to_rgb.get_latent(latent[:, i+2]).shape}')
+            # print("")
+            output.append(conv1.get_latent(latent[:, i]))
+            output.append(conv2.get_latent(latent[:, i+1]))
+            output.append(to_rgb.get_latent(latent[:, i+2]))
+            i += 2
+#         output = torch.cat(output, 1)
+        if truncation < 1 and mean_latent is not None:
+            output = [mean_latent[i] + truncation * (output[i] - mean_latent[i]) for i in range(len(output))]
+        return output
+    def forward(
+        self,
+        styles,
+        stop_idx=99,
+        is_cluster=False,
+        noise=None,
+        randomize_noise=False,
+        weights_deltas=None,
+    ):
+        total_convs = len(self.convs) + len(self.to_rgbs) +2
+        if weights_deltas is None:
+            weights_deltas = [None]* total_convs
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers
+            else:
+                noise = [
+                    getattr(self.noises, f'noise_{i}') for i in range(self.num_layers)
+                ]
+        outputs = []
+        idx_count = 0
+        latent = styles
+        out = self.input(latent[0])
+        outputs.append([out, out])
+        if idx_count == stop_idx:
+            return outputs
+        out, out_t = self.conv1(out, latent[idx_count], noise=noise[0],weights_delta=weights_deltas[0])
+        outputs.append([out_t, out])
+        idx_count += 1
+        if idx_count == stop_idx:
+            return outputs
+        skip = self.to_rgb1(out, latent[idx_count], weights_delta=weights_deltas[1])
+        i = 1
+        weight_idx = 2
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+            self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs
+        ):
+            outputs.append([out_t, out])
+            idx_count += 1
+            if idx_count == stop_idx:
+                return outputs
+            out, out_t = conv1(out, latent[idx_count], noise=noise1, weights_delta=weights_deltas[weight_idx])
+            outputs.append([out_t, out])
+            idx_count += 1
+            if idx_count == stop_idx:
+                return outputs
+            out, out_t = conv2(out, latent[idx_count], noise=noise2, weights_delta=weights_deltas[weight_idx+1])
+            outputs.append([out_t, out])
+            idx_count += 1
+            if idx_count == stop_idx:
+                return outputs
+            skip = to_rgb(out, latent[idx_count], skip, weights_delta=weights_deltas[weight_idx+2])
+            i += 2
+            weight_idx += 3
+        image = skip.clamp(-1,1)
+        return image, outputs
+class ConvLayer(nn.Sequential):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        groups=1,
+        downsample=False,
+        blur_kernel=[1, 3, 3, 1],
+        bias=True,
+        activate=True,
+        lr_mul=1,
+    ):
+        layers = []
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            layers.append(Blur(blur_kernel, pad=(pad0, pad1)))
+            stride = 2
+            self.padding = 0
+        else:
+            stride = 1
+            self.padding = kernel_size // 2
+        layers.append(
+            EqualConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                groups=groups,
+                padding=self.padding,
+                stride=stride,
+                bias=bias and not activate,
+                lr_mul=lr_mul,
+            )
+        )
+        if activate:
+            if bias:
+                layers.append(FusedLeakyReLU(out_channel, lr_mul=lr_mul))
+            else:
+                layers.append(ScaledLeakyReLU(0.2))
+        super().__init__(*layers)
+class ResBlock(nn.Module):
+    def __init__(self, in_channel, out_channel, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        self.conv1 = ConvLayer(in_channel, in_channel, 3)
+        self.conv2 = ConvLayer(in_channel, out_channel, 3, downsample=True)
+        self.skip = ConvLayer(
+            in_channel, out_channel, 1, downsample=True, activate=False, bias=False
+        )
+    def forward(self, input):
+        out = self.conv1(input)
+        out = self.conv2(out)
+        skip = self.skip(input)
+        out = (out + skip) / math.sqrt(2)
+        return out
+class Discriminator(nn.Module):
+    def __init__(self, size, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        channels = {
+            4: 512,
+            8: 512,
+            16: 512,
+            32: 512,
+            64: 256 * channel_multiplier,
+            128: 128 * channel_multiplier,
+            256: 64 * channel_multiplier,
+            512: 32 * channel_multiplier,
+            1024: 16 * channel_multiplier,
+        }
+        convs = [ConvLayer(3, channels[size], 1)]
+        log_size = int(math.log(size, 2))
+        in_channel = channels[size]
+        for i in range(log_size, 2, -1):
+            out_channel = channels[2 ** (i - 1)]
+            convs.append(ResBlock(in_channel, out_channel, blur_kernel))
+            in_channel = out_channel
+        self.convs = nn.Sequential(*convs)
+        self.stddev_group = 4
+        self.stddev_feat = 1
+        self.final_conv = ConvLayer(in_channel + 1, channels[4], 3)
+        self.final_linear = nn.Sequential(
+            EqualLinear(channels[4] * 4 * 4, channels[4], activation='fused_lrelu'),
+            EqualLinear(channels[4], 1),
+        )
+    def forward(self, input):
+        out = self.convs(input)
+        batch, channel, height, width = out.shape
+        group = min(batch, self.stddev_group)
+        #group = batch
+        stddev = out.view(
+            group, -1, self.stddev_feat, channel // self.stddev_feat, height, width
+        )
+        stddev = torch.sqrt(stddev.var(0, unbiased=False) + 1e-8)
+        stddev = stddev.mean([2, 3, 4], keepdims=True).squeeze(2)
+        stddev = stddev.repeat(group, 1, height, width)
+        out = torch.cat([out, stddev], 1)
+        out = self.final_conv(out)
+        out = out.view(batch, -1)
+        out = self.final_linear(out)
+        return out
+class VGGExtractor(torch.nn.Module):
+    def __init__(self, resize=False):
+        super(VGGExtractor, self).__init__()
+        vgg16 = torchvision.models.vgg16(pretrained=True).eval()
+        blocks = vgg16.features[:23]
+        for p in blocks:
+            p.requires_grad = False
+        self.blocks = blocks
+        self.transform = torch.nn.functional.interpolate
+        self.mean = torch.nn.Parameter(torch.tensor([0.485, 0.456, 0.406]).view(1,3,1,1))
+        self.std = torch.nn.Parameter(torch.tensor([0.229, 0.224, 0.225]).view(1,3,1,1))
+        self.resize = resize
+    def forward(self, input):
+        if input.shape[1] != 3:
+            input = input.repeat(1, 3, 1, 1)
+        input = (input + 1) / 2
+        input = (input-self.mean) / self.std
+        if self.resize:
+            input = self.transform(input, mode='bilinear', size=(224, 224), align_corners=False)
+        return self.blocks(input)
+class Encoder(nn.Module):
+    def __init__(self, size, groups, channel_multiplier=1, blur_kernel=[1, 3, 3, 1]):
+        '''
+        [16]: [14,15,16,17,18,19]
+        [8]: [8,9,10,11,12,13]
+        [4]: [0,1,2,3,4,5,6,7]
+        '''
+        super().__init__()
+        in_channel = 3
+        out_channel = 64
+        convs = nn.ModuleList()
+        for i in range(6):
+            convs.append(ResBlock(in_channel, out_channel, blur_kernel))
+            in_channel = out_channel
+            out_channel = min(1024, in_channel*2)
+        self.fc_high = nn.Sequential(nn.AdaptiveAvgPool2d(4),
+                                    nn.Flatten(),
+                                    EqualLinear(512*4*4, 4*512+3*256+2*128))
+        self.fc_mid = nn.Sequential(nn.AdaptiveAvgPool2d(4),
+                                    nn.Flatten(),
+                                    EqualLinear(1024*4*4, 512*6))
+        self.fc_low = nn.Sequential(nn.AdaptiveAvgPool2d(4),
+                                    nn.Flatten(),
+                                    EqualLinear(1024*4*4, 512*5))
+    def forward(self, input):
+        shared = self.convs(input)
+        local = self.local_fc(shared)
+        glob = self.global_fc(shared)
+        return local.view(local.size(0), -1), glob

ris/op/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .fused_act import FusedLeakyReLU, fused_leaky_relu
2	+ from .upfirdn2d import upfirdn2d

ris/op/fused_act.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os
+import torch
+from torch import nn
+from torch.nn import functional as F
+module_path = os.path.dirname(__file__)
+class FusedLeakyReLU(nn.Module):
+    def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5):
+        super().__init__()
+        self.bias = nn.Parameter(torch.zeros(channel))
+        self.negative_slope = negative_slope
+        self.scale = scale
+    def forward(self, input):
+        return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
+def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5):
+    rest_dim = [1] * (input.ndim - bias.ndim - 1)
+    if input.ndim == 3:
+        return (
+            F.leaky_relu(
+                input + bias.view(1, *rest_dim, bias.shape[0]), negative_slope=negative_slope
+            )
+            * scale
+        )
+    else:
+        return (
+            F.leaky_relu(
+                input + bias.view(1, bias.shape[0], *rest_dim), negative_slope=negative_slope
+            )
+            * scale
+        )

ris/op/upfirdn2d.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import torch
+from torch.nn import functional as F
+module_path = os.path.dirname(__file__)
+def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+    out = upfirdn2d_native(
+        input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1]
+    )
+    return out
+def upfirdn2d_native(
+    input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+):
+    _, channel, in_h, in_w = input.shape
+    input = input.reshape(-1, in_h, in_w, 1)
+    _, in_h, in_w, minor = input.shape
+    kernel_h, kernel_w = kernel.shape
+    out = input.view(-1, in_h, 1, in_w, 1, minor)
+    out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
+    out = out.view(-1, in_h * up_y, in_w * up_x, minor)
+    out = F.pad(
+        out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]
+    )
+    out = out[
+        :,
+        max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0),
+        max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0),
+        :,
+    ]
+    out = out.permute(0, 3, 1, 2)
+    out = out.reshape(
+        [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]
+    )
+    w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
+    out = F.conv2d(out, w)
+    out = out.reshape(
+        -1,
+        minor,
+        in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
+        in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
+    )
+    out = out.permute(0, 2, 3, 1)
+    out = out[:, ::down_y, ::down_x, :]
+    out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
+    out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
+    return out.view(-1, channel, out_h, out_w)

ris/projector.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+"""Project given image to the latent space of pretrained network pickle."""
+import copy
+import os
+from time import perf_counter
+import click
+import imageio
+import numpy as np
+import PIL.Image
+import torch
+import torch.nn.functional as F
+import dnnlib
+import legacy
+def project(
+    G,
+    vgg16,
+    target: torch.Tensor, # [C,H,W] and dynamic range [0,255], W & H must match G output resolution
+    *,
+    num_steps                  = 1000,
+    w_avg_samples              = 10000,
+    initial_learning_rate      = 0.1,
+    initial_noise_factor       = 0.05,
+    lr_rampdown_length         = 0.25,
+    lr_rampup_length           = 0.05,
+    noise_ramp_length          = 0.75,
+    regularize_noise_weight    = 1e5,
+    verbose                    = False,
+    device: torch.device
+):
+    assert target.shape == (G.img_channels, G.img_resolution, G.img_resolution)
+    def logprint(*args):
+        if verbose:
+            print(*args)
+    G = copy.deepcopy(G).eval().requires_grad_(False).to(device) # type: ignore
+    # Compute w stats.
+    logprint(f'Computing W midpoint and stddev using {w_avg_samples} samples...')
+    z_samples = np.random.RandomState(123).randn(w_avg_samples, G.z_dim)
+    w_samples = G.mapping(torch.from_numpy(z_samples).to(device), None)  # [N, L, C]
+    w_samples = w_samples[:, :1, :].cpu().numpy().astype(np.float32)       # [N, 1, C]
+    w_avg = np.mean(w_samples, axis=0, keepdims=True)      # [1, 1, C]
+    w_std = (np.sum((w_samples - w_avg) ** 2) / w_avg_samples) ** 0.5
+    # Setup noise inputs.
+    noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name }
+    # Load VGG16 feature detector.
+#    url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt'
+#    with dnnlib.util.open_url(url) as f:
+#        vgg16 = torch.jit.load(f).eval().to(device)
+    # Features for target image.
+    target_images = target.unsqueeze(0).to(device).to(torch.float32)
+    if target_images.shape[2] > 256:
+        target_images = F.interpolate(target_images, size=(256, 256), mode='area')
+    target_features = vgg16(target_images, resize_images=False, return_lpips=True)
+    w_opt = torch.tensor(w_avg, dtype=torch.float32, device=device, requires_grad=True) # pylint: disable=not-callable
+    w_out = torch.zeros([num_steps] + list(w_opt.shape[1:]), dtype=torch.float32, device=device)
+    optimizer = torch.optim.Adam([w_opt] + list(noise_bufs.values()), betas=(0.9, 0.999), lr=initial_learning_rate)
+    # Init noise.
+    for buf in noise_bufs.values():
+        buf[:] = torch.randn_like(buf)
+        buf.requires_grad = True
+    for step in range(num_steps):
+        # Learning rate schedule.
+        t = step / num_steps
+        w_noise_scale = w_std * initial_noise_factor * max(0.0, 1.0 - t / noise_ramp_length) ** 2
+        lr_ramp = min(1.0, (1.0 - t) / lr_rampdown_length)
+        lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
+        lr_ramp = lr_ramp * min(1.0, t / lr_rampup_length)
+        lr = initial_learning_rate * lr_ramp
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = lr
+        # Synth images from opt_w.
+        w_noise = torch.randn_like(w_opt) * w_noise_scale
+        ws = (w_opt + w_noise).repeat([1, G.mapping.num_ws, 1])
+        synth_images = G.synthesis(ws, noise_mode='const')
+        # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
+        synth_images = (synth_images + 1) * (255/2)
+        if synth_images.shape[2] > 256:
+            synth_images = F.interpolate(synth_images, size=(256, 256), mode='area')
+        # Features for synth images.
+        synth_features = vgg16(synth_images, resize_images=False, return_lpips=True)
+        dist = (target_features - synth_features).square().sum()
+        # Noise regularization.
+        reg_loss = 0.0
+        for v in noise_bufs.values():
+            noise = v[None,None,:,:] # must be [1,1,H,W] for F.avg_pool2d()
+            while True:
+                reg_loss += (noise*torch.roll(noise, shifts=1, dims=3)).mean()**2
+                reg_loss += (noise*torch.roll(noise, shifts=1, dims=2)).mean()**2
+                if noise.shape[2] <= 8:
+                    break
+                noise = F.avg_pool2d(noise, kernel_size=2)
+        loss = dist + reg_loss * regularize_noise_weight
+        # Step
+        optimizer.zero_grad(set_to_none=True)
+        loss.backward()
+        optimizer.step()
+        logprint(f'step {step+1:>4d}/{num_steps}: dist {dist:<4.2f} loss {float(loss):<5.2f}')
+        # Save projected W for each optimization step.
+        w_out[step] = w_opt.detach()[0]
+        # Normalize noise.
+        with torch.no_grad():
+            for buf in noise_bufs.values():
+                buf -= buf.mean()
+                buf *= buf.square().mean().rsqrt()
+    return w_out.repeat([1, G.mapping.num_ws, 1])
+#----------------------------------------------------------------------------
+@click.command()
+@click.option('--network', 'network_pkl', help='Network pickle filename', required=True)
+@click.option('--target', 'target_fname', help='Target image file to project to', required=True, metavar='FILE')
+@click.option('--num-steps',              help='Number of optimization steps', type=int, default=1000, show_default=True)
+@click.option('--seed',                   help='Random seed', type=int, default=303, show_default=True)
+@click.option('--save-video',             help='Save an mp4 video of optimization progress', type=bool, default=True, show_default=True)
+@click.option('--outdir',                 help='Where to save the output images', required=True, metavar='DIR')
+def run_projection(
+    network_pkl: str,
+    target_fname: str,
+    outdir: str,
+    save_video: bool,
+    seed: int,
+    num_steps: int
+):
+    """Project given image to the latent space of pretrained network pickle.
+    Examples:
+    \b
+    python projector.py --outdir=out --target=~/mytargetimg.png \\
+        --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl
+    """
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    # Load networks.
+    print('Loading networks from "%s"...' % network_pkl)
+    device = torch.device('cuda')
+    with dnnlib.util.open_url(network_pkl) as fp:
+        G = legacy.load_network_pkl(fp)['G_ema'].requires_grad_(False).to(device) # type: ignore
+    # Load target image.
+    target_pil = PIL.Image.open(target_fname).convert('RGB')
+    w, h = target_pil.size
+    s = min(w, h)
+    target_pil = target_pil.crop(((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2))
+    target_pil = target_pil.resize((G.img_resolution, G.img_resolution), PIL.Image.LANCZOS)
+    target_uint8 = np.array(target_pil, dtype=np.uint8)
+    # Optimize projection.
+    start_time = perf_counter()
+    projected_w_steps = project(
+        G,
+        target=torch.tensor(target_uint8.transpose([2, 0, 1]), device=device), # pylint: disable=not-callable
+        num_steps=num_steps,
+        device=device,
+        verbose=True
+    )
+    print (f'Elapsed: {(perf_counter()-start_time):.1f} s')
+    # Render debug output: optional video and projected image and W vector.
+    os.makedirs(outdir, exist_ok=True)
+    if save_video:
+        video = imageio.get_writer(f'{outdir}/proj.mp4', mode='I', fps=10, codec='libx264', bitrate='16M')
+        print (f'Saving optimization progress video "{outdir}/proj.mp4"')
+        for projected_w in projected_w_steps:
+            synth_image = G.synthesis(projected_w.unsqueeze(0), noise_mode='const')
+            synth_image = (synth_image + 1) * (255/2)
+            synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8)[0].cpu().numpy()
+            video.append_data(np.concatenate([target_uint8, synth_image], axis=1))
+        video.close()
+    # Save final projected frame and W vector.
+    target_pil.save(f'{outdir}/target.png')
+    projected_w = projected_w_steps[-1]
+    synth_image = G.synthesis(projected_w.unsqueeze(0), noise_mode='const')
+    synth_image = (synth_image + 1) * (255/2)
+    synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8)[0].cpu().numpy()
+    PIL.Image.fromarray(synth_image, 'RGB').save(f'{outdir}/proj.png')
+    np.savez(f'{outdir}/projected_w.npz', w=projected_w.unsqueeze(0).cpu().numpy())
+#----------------------------------------------------------------------------
+if __name__ == "__main__":
+    run_projection() # pylint: disable=no-value-for-parameter
+#----------------------------------------------------------------------------

ris/spherical_kmeans.py ADDED Viewed

	@@ -0,0 +1,390 @@

+import warnings
+import numpy as np
+from sklearn.preprocessing import normalize
+from sklearn.utils.sparsefuncs_fast import assign_rows_csr
+from sklearn.utils.validation import _check_sample_weight
+from sklearn.utils import check_array, check_random_state
+from sklearn.utils.extmath import row_norms
+import scipy.sparse as sp
+from sklearn.cluster import MiniBatchKMeans
+from sklearn.cluster.k_means_ import (
+    _init_centroids,
+    _labels_inertia,
+    _tolerance,
+    _mini_batch_step,
+    _mini_batch_convergence
+)
+def _check_normalize_sample_weight(sample_weight, X):
+    """Set sample_weight if None, and check for correct dtype"""
+    sample_weight_was_none = sample_weight is None
+    sample_weight = _check_sample_weight(sample_weight, X)
+    if not sample_weight_was_none:
+        # normalize the weights to sum up to n_samples
+        # an array of 1 (i.e. samples_weight is None) is already normalized
+        n_samples = len(sample_weight)
+        scale = n_samples / sample_weight.sum()
+        sample_weight *= scale
+    return sample_weight
+def _mini_batch_spherical_step(X, sample_weight, x_squared_norms, centers, weight_sums,
+                               old_center_buffer, compute_squared_diff,
+                               distances, random_reassign=False,
+                               random_state=None, reassignment_ratio=.01,
+                               verbose=False):
+    """Incremental update of the centers for the Minibatch K-Means algorithm.
+    Parameters
+    ----------
+    X : array, shape (n_samples, n_features)
+        The original data array.
+    sample_weight : array-like, shape (n_samples,)
+        The weights for each observation in X.
+    x_squared_norms : array, shape (n_samples,)
+        Squared euclidean norm of each data point.
+    centers : array, shape (k, n_features)
+        The cluster centers. This array is MODIFIED IN PLACE
+    counts : array, shape (k,)
+         The vector in which we keep track of the numbers of elements in a
+         cluster. This array is MODIFIED IN PLACE
+    distances : array, dtype float, shape (n_samples), optional
+        If not None, should be a pre-allocated array that will be used to store
+        the distances of each sample to its closest center.
+        May not be None when random_reassign is True.
+    random_state : int, RandomState instance or None (default)
+        Determines random number generation for centroid initialization and to
+        pick new clusters amongst observations with uniform probability. Use
+        an int to make the randomness deterministic.
+        See :term:`Glossary <random_state>`.
+    random_reassign : boolean, optional
+        If True, centers with very low counts are randomly reassigned
+        to observations.
+    reassignment_ratio : float, optional
+        Control the fraction of the maximum number of counts for a
+        center to be reassigned. A higher value means that low count
+        centers are more likely to be reassigned, which means that the
+        model will take longer to converge, but should converge in a
+        better clustering.
+    verbose : bool, optional, default False
+        Controls the verbosity.
+    compute_squared_diff : bool
+        If set to False, the squared diff computation is skipped.
+    old_center_buffer : int
+        Copy of old centers for monitoring convergence.
+    Returns
+    -------
+    inertia : float
+        Sum of squared distances of samples to their closest cluster center.
+    squared_diff : numpy array, shape (n_clusters,)
+        Squared distances between previous and updated cluster centers.
+    """
+    # Perform label assignment to nearest centers
+    nearest_center, inertia = _labels_inertia(X, sample_weight,
+                                              x_squared_norms, centers,
+                                              distances=distances)
+    if random_reassign and reassignment_ratio > 0:
+        random_state = check_random_state(random_state)
+        # Reassign clusters that have very low weight
+        to_reassign = weight_sums < reassignment_ratio * weight_sums.max()
+        # pick at most .5 * batch_size samples as new centers
+        if to_reassign.sum() > .5 * X.shape[0]:
+            indices_dont_reassign = \
+                np.argsort(weight_sums)[int(.5 * X.shape[0]):]
+            to_reassign[indices_dont_reassign] = False
+        n_reassigns = to_reassign.sum()
+        if n_reassigns:
+            # Pick new clusters amongst observations with uniform probability
+            new_centers = random_state.choice(X.shape[0], replace=False,
+                                              size=n_reassigns)
+            if verbose:
+                print("[MiniBatchKMeans] Reassigning %i cluster centers."
+                      % n_reassigns)
+            if sp.issparse(X) and not sp.issparse(centers):
+                assign_rows_csr(
+                    X, new_centers.astype(np.intp, copy=False),
+                    np.where(to_reassign)[0].astype(np.intp, copy=False),
+                    centers)
+            else:
+                centers[to_reassign] = X[new_centers]
+        # reset counts of reassigned centers, but don't reset them too small
+        # to avoid instant reassignment. This is a pretty dirty hack as it
+        # also modifies the learning rates.
+        weight_sums[to_reassign] = np.min(weight_sums[~to_reassign])
+    # implementation for the sparse CSR representation completely written in
+    # cython
+    if sp.issparse(X):
+        return inertia, sklearn.cluster.k_means_._k_means._mini_batch_update_csr(
+            X, sample_weight, x_squared_norms, centers, weight_sums,
+            nearest_center, old_center_buffer, compute_squared_diff)
+    # dense variant in mostly numpy (not as memory efficient though)
+    k = centers.shape[0]
+    squared_diff = 0.0
+    for center_idx in range(k):
+        # find points from minibatch that are assigned to this center
+        center_mask = nearest_center == center_idx
+        wsum = sample_weight[center_mask].sum()
+        if wsum > 0:
+            if compute_squared_diff:
+                old_center_buffer[:] = centers[center_idx]
+            # inplace remove previous count scaling
+            centers[center_idx] *= weight_sums[center_idx]
+            # inplace sum with new points members of this cluster
+            centers[center_idx] += \
+                np.sum(X[center_mask] *
+                       sample_weight[center_mask, np.newaxis], axis=0)
+            # unit-normalize for spherical k-means
+            centers[center_idx] = normalize(centers[center_idx, None])[:, 0]
+            # update the squared diff if necessary
+            if compute_squared_diff:
+                diff = centers[center_idx].ravel() - old_center_buffer.ravel()
+                squared_diff += np.dot(diff, diff)
+    return inertia, squared_diff
+class MiniBatchSphericalKMeans(MiniBatchKMeans):
+    def fit(self, X, y=None, sample_weight=None):
+        """Compute the centroids on X by chunking it into mini-batches.
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape=(n_samples, n_features)
+            Training instances to cluster. It must be noted that the data
+            will be converted to C ordering, which will cause a memory copy
+            if the given data is not C-contiguous.
+        y : Ignored
+            Not used, present here for API consistency by convention.
+        sample_weight : array-like, shape (n_samples,), optional
+            The weights for each observation in X. If None, all observations
+            are assigned equal weight (default: None).
+        Returns
+        -------
+        self
+        """
+        random_state = check_random_state(self.random_state)
+        # unit-normalize for spherical k-means
+        X = normalize(X)
+        X = check_array(X, accept_sparse="csr", order='C',
+                        dtype=[np.float64, np.float32])
+        n_samples, n_features = X.shape
+        if n_samples < self.n_clusters:
+            raise ValueError("n_samples=%d should be >= n_clusters=%d"
+                             % (n_samples, self.n_clusters))
+        sample_weight = _check_normalize_sample_weight(sample_weight, X)
+        n_init = self.n_init
+        if hasattr(self.init, '__array__'):
+            self.init = np.ascontiguousarray(self.init, dtype=X.dtype)
+            if n_init != 1:
+                warnings.warn(
+                    'Explicit initial center position passed: '
+                    'performing only one init in MiniBatchKMeans instead of '
+                    'n_init=%d'
+                    % self.n_init, RuntimeWarning, stacklevel=2)
+                n_init = 1
+        x_squared_norms = row_norms(X, squared=True)
+        if self.tol > 0.0:
+            tol = _tolerance(X, self.tol)
+            # using tol-based early stopping needs the allocation of a
+            # dedicated before which can be expensive for high dim data:
+            # hence we allocate it outside of the main loop
+            old_center_buffer = np.zeros(n_features, dtype=X.dtype)
+        else:
+            tol = 0.0
+            # no need for the center buffer if tol-based early stopping is
+            # disabled
+            old_center_buffer = np.zeros(0, dtype=X.dtype)
+        distances = np.zeros(self.batch_size, dtype=X.dtype)
+        n_batches = int(np.ceil(float(n_samples) / self.batch_size))
+        n_iter = int(self.max_iter * n_batches)
+        init_size = self.init_size
+        if init_size is None:
+            init_size = 3 * self.batch_size
+        if init_size > n_samples:
+            init_size = n_samples
+        self.init_size_ = init_size
+        validation_indices = random_state.randint(0, n_samples, init_size)
+        X_valid = X[validation_indices]
+        sample_weight_valid = sample_weight[validation_indices]
+        x_squared_norms_valid = x_squared_norms[validation_indices]
+        # perform several inits with random sub-sets
+        best_inertia = None
+        for init_idx in range(n_init):
+            if self.verbose:
+                print("Init %d/%d with method: %s"
+                      % (init_idx + 1, n_init, self.init))
+            weight_sums = np.zeros(self.n_clusters, dtype=sample_weight.dtype)
+            # TODO: once the `k_means` function works with sparse input we
+            # should refactor the following init to use it instead.
+            # Initialize the centers using only a fraction of the data as we
+            # expect n_samples to be very large when using MiniBatchKMeans
+            cluster_centers = _init_centroids(
+                X, self.n_clusters, self.init,
+                random_state=random_state,
+                x_squared_norms=x_squared_norms,
+                init_size=init_size)
+            cluster_centers = normalize(cluster_centers)
+            # Compute the label assignment on the init dataset
+            _mini_batch_step(
+                X_valid, sample_weight_valid,
+                x_squared_norms[validation_indices], cluster_centers,
+                weight_sums, old_center_buffer, False, distances=None,
+                verbose=self.verbose)
+            cluster_centers = normalize(cluster_centers)
+            # Keep only the best cluster centers across independent inits on
+            # the common validation set
+            _, inertia = _labels_inertia(X_valid, sample_weight_valid,
+                                         x_squared_norms_valid,
+                                         cluster_centers)
+            if self.verbose:
+                print("Inertia for init %d/%d: %f"
+                      % (init_idx + 1, n_init, inertia))
+            if best_inertia is None or inertia < best_inertia:
+                self.cluster_centers_ = cluster_centers
+                self.counts_ = weight_sums
+                best_inertia = inertia
+        # Empty context to be used inplace by the convergence check routine
+        convergence_context = {}
+        # Perform the iterative optimization until the final convergence
+        # criterion
+        for iteration_idx in range(n_iter):
+            # Sample a minibatch from the full dataset
+            minibatch_indices = random_state.randint(
+                0, n_samples, self.batch_size)
+            # Perform the actual update step on the minibatch data
+            self.cluster_centers_ = normalize(self.cluster_centers_)
+            batch_inertia, centers_squared_diff = _mini_batch_step(
+                X[minibatch_indices], sample_weight[minibatch_indices],
+                x_squared_norms[minibatch_indices],
+                self.cluster_centers_, self.counts_,
+                old_center_buffer, tol > 0.0, distances=distances,
+                # Here we randomly choose whether to perform
+                # random reassignment: the choice is done as a function
+                # of the iteration index, and the minimum number of
+                # counts, in order to force this reassignment to happen
+                # every once in a while
+                random_reassign=((iteration_idx + 1)
+                                 % (10 + int(self.counts_.min())) == 0),
+                random_state=random_state,
+                reassignment_ratio=self.reassignment_ratio,
+                verbose=self.verbose)
+            self.cluster_centers_ = normalize(self.cluster_centers_)
+            # Monitor convergence and do early stopping if necessary
+            if _mini_batch_convergence(
+                    self, iteration_idx, n_iter, tol, n_samples,
+                    centers_squared_diff, batch_inertia, convergence_context,
+                    verbose=self.verbose):
+                break
+        self.n_iter_ = iteration_idx + 1
+        if self.compute_labels:
+            self.labels_, self.inertia_ = \
+                self._labels_inertia_minibatch(X, sample_weight)
+        return self
+    def partial_fit(self, X, y=None, sample_weight=None):
+        """Update k means estimate on a single mini-batch X.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Coordinates of the data points to cluster. It must be noted that
+            X will be copied if it is not C-contiguous.
+        y : Ignored
+            Not used, present here for API consistency by convention.
+        sample_weight : array-like, shape (n_samples,), optional
+            The weights for each observation in X. If None, all observations
+            are assigned equal weight (default: None).
+        Returns
+        -------
+        self
+        """
+        X = check_array(X, accept_sparse="csr", order="C",
+                        dtype=[np.float64, np.float32])
+        n_samples, n_features = X.shape
+        if hasattr(self.init, '__array__'):
+            self.init = np.ascontiguousarray(self.init, dtype=X.dtype)
+        if n_samples == 0:
+            return self
+        # unit-normalize for spherical k-means
+        X = normalize(X)
+        sample_weight = _check_normalize_sample_weight(sample_weight, X)
+        x_squared_norms = row_norms(X, squared=True)
+        self.random_state_ = getattr(self, "random_state_",
+                                     check_random_state(self.random_state))
+        if (not hasattr(self, 'counts_')
+                or not hasattr(self, 'cluster_centers_')):
+            # this is the first call partial_fit on this object:
+            # initialize the cluster centers
+            self.cluster_centers_ = _init_centroids(
+                X, self.n_clusters, self.init,
+                random_state=self.random_state_,
+                x_squared_norms=x_squared_norms, init_size=self.init_size)
+            self.counts_ = np.zeros(self.n_clusters,
+                                    dtype=sample_weight.dtype)
+            random_reassign = False
+            distances = None
+        else:
+            # The lower the minimum count is, the more we do random
+            # reassignment, however, we don't want to do random
+            # reassignment too often, to allow for building up counts
+            random_reassign = self.random_state_.randint(
+                10 * (1 + self.counts_.min())) == 0
+            distances = np.zeros(X.shape[0], dtype=X.dtype)
+        self.cluster_centers_ = normalize(self.cluster_centers_)
+        _mini_batch_spherical_step(X, sample_weight, x_squared_norms,
+                                   self.cluster_centers_, self.counts_,
+                                   np.zeros(0, dtype=X.dtype), 0,
+                                   random_reassign=random_reassign, distances=distances,
+                                   random_state=self.random_state_,
+                                   reassignment_ratio=self.reassignment_ratio,
+                                   verbose=self.verbose)
+        self.cluster_centers_ = normalize(self.cluster_centers_)
+        if self.compute_labels:
+            self.labels_, self.inertia_ = _labels_inertia(
+                X, sample_weight, x_squared_norms, self.cluster_centers_)
+        return self

ris/util.py ADDED Viewed

	@@ -0,0 +1,403 @@

+from matplotlib import pyplot as plt
+import torch
+import torch.nn.functional as F
+import os
+import cv2
+import dlib
+from PIL import Image
+import numpy as np
+import pandas as pd
+import math
+import scipy
+import scipy.ndimage
+import gc
+# Number of style channels per StyleGAN layer
+style2list_len = [512, 512, 512, 512, 512, 512, 512, 512, 512, 512,
+                  512, 512, 512, 512, 512, 256, 256, 256, 128, 128,
+                  128, 64, 64, 64, 32, 32]
+# Layer indices of ToRGB modules
+rgb_layer_idx = [1,4,7,10,13,16,19,22,25]
+google_drive_paths = {
+    "stylegan2-ffhq-config-f.pt": "https://drive.google.com/uc?id=1Yr7KuD959btpmcKGAUsbAk5rPjX2MytK",
+    "inversion_stats.npz": "https://drive.google.com/uc?id=1oE_mIKf-Vr7b3J04l2UjsSrxZiw-UuFg",
+    "model_ir_se50.pt": "https://drive.google.com/uc?id=1KW7bjndL3QG3sxBbZxreGHigcCCpsDgn",
+    "dlibshape_predictor_68_face_landmarks.dat": "https://drive.google.com/uc?id=11BDmNKS1zxSZxkgsEvQoKgFd8J264jKp",
+    "e4e_ffhq_encode.pt": "https://drive.google.com/uc?id=1o6ijA3PkcewZvwJJ73dJ0fxhndn0nnh7"
+}
+def ensure_checkpoint_exists(model_weights_filename):
+    if not os.path.isfile(model_weights_filename) and (
+        model_weights_filename in google_drive_paths
+    ):
+        gdrive_url = google_drive_paths[model_weights_filename]
+        try:
+            from gdown import download as drive_download
+            drive_download(gdrive_url, model_weights_filename, quiet=False)
+        except ModuleNotFoundError:
+            print(
+                "gdown module not found.",
+                "pip3 install gdown or, manually download the checkpoint file:",
+                gdrive_url
+            )
+    if not os.path.isfile(model_weights_filename) and (
+        model_weights_filename not in google_drive_paths
+    ):
+        print(
+            model_weights_filename,
+            " not found, you may need to manually download the model weights."
+        )
+# given a list of filenames, load the inverted style code
+@torch.no_grad()
+def load_source(files, generator, device='cuda'):
+    sources = []
+    # for file in files:
+    source = torch.load(f'./inversion_codes/{files}.pt')['latent'].to(device)
+    if source.size(0) != 1:
+        source = source.unsqueeze(0)
+    if source.ndim == 3:
+        source = generator.get_latent(source, truncation=1, is_latent=True)
+        source = list2style(source)
+    sources.append(source)
+    sources = torch.cat(sources, 0)
+    if type(sources) is not list:
+        sources = style2list(sources)
+    return sources
+'''
+Given M, we zero out the first 2048 dimensions for non pose or hair features.
+The reason is that the first 2048 mostly contain hair and pose information and rarely
+anything related to other classes.
+'''
+def remove_2048(M, labels2idx):
+    M_hair = M[:,labels2idx['hair']].clone()
+    # zero out first 2048 channels (4 style layers) for non hair and pose features
+    M[...,:2048] = 0
+    M[:,labels2idx['hair']] = M_hair
+    return M
+# Compute pose M and append it as the last index of M
+def add_pose(M, labels2idx):
+    M = remove_2048(M, labels2idx)
+    # Add pose to the very last index of M
+    pose = 1-M[:,labels2idx['hair']]
+    M = torch.cat([M, pose.view(-1,1,9088)], 1)
+    #zero out rest of the channels after 2048 as pose should not affect other features
+    M[:,-1, 2048:] = 0
+    return M
+# add direction specified by q from source to reference, scaled by a
+def add_direction(s, r, q, a):
+    if isinstance(s, list):
+        s = list2style(s)
+    if isinstance(r, list):
+        r = list2style(r)
+    if s.ndim == 1:
+        s = s.unsqueeze(0)
+    if r.ndim == 1:
+        r = r.unsqueeze(0)
+    if q.ndim == 1:
+        q = q.unsqueeze(0)
+    if len(s) != len(r):
+        if s.size(0)< r.size(0):
+            s = s.expand(r.size(0), -1)
+        else:
+            r = r.expand(s.size(0), -1)
+    q = q.float()
+    old_norm = (q*s).norm(2,dim=1, keepdim=True)+1e-8
+    new_dir = q*r
+    new_dir = new_dir/(new_dir.norm(2,dim=1, keepdim=True)+1e-8) * old_norm
+    return s -a*q*s + a*new_dir
+# convert a style vector [B, 9088] into a suitable format (list) for our generator's input
+def style2list(s):
+    output = []
+    count = 0
+    for size in style2list_len:
+        output.append(s[:, count:count+size])
+        count += size
+    return output
+# convert the list back to a style vector
+def list2style(s):
+    return torch.cat(s, 1)
+# flatten spatial activations to vectors
+def flatten_act(x):
+    b,c,h,w = x.size()
+    x = x.pow(2).permute(0,2,3,1).contiguous().view(-1, c) # [b,c]
+    return x.cpu().numpy()
+def show(imgs, title=None):
+    plt.figure(figsize=(5 * len(imgs), 5))
+    if title is not None:
+        plt.suptitle(title + '\n', fontsize=24).set_y(1.05)
+    for i in range(len(imgs)):
+        plt.subplot(1, len(imgs), i + 1)
+        plt.imshow(imgs[i])
+        plt.axis('off')
+        plt.gca().set_axis_off()
+        plt.subplots_adjust(top=1, bottom=0, right=1, left=0,
+                            hspace=0, wspace=0.02)
+    plt.savefig(title + '.png', bbox_inches='tight', pad_inches=0)
+def part_grid(target_image, refernce_images, part_images, file_name, score=None):
+    def proc(img):
+        return (img * 255).permute(1, 2, 0).squeeze().cpu().numpy().astype('uint8')
+    rows, cols = len(part_images) + 1, len(refernce_images) + 1
+    fig = plt.figure(figsize=(cols*4, rows*4))
+    sz = target_image.shape[-1]
+    i = 1
+    plt.subplot(rows, cols, i)
+    plt.imshow(proc(target_image[0]))
+    plt.axis('off')
+    plt.gca().set_axis_off()
+    plt.title('Source', fontdict={'size': 26})
+    for img in refernce_images:
+        i += 1
+        plt.subplot(rows, cols, i)
+        plt.imshow(proc(img))
+        plt.axis('off')
+        plt.gca().set_axis_off()
+        plt.title('Reference', fontdict={'size': 26})
+        # plt.text(0, sz, 'Perceptual loss: {:.2f}'.format(score[i-2]), fontdict={'size': 25}, color='red')
+    for j, label in enumerate(part_images.keys()):
+        i += 1
+        plt.subplot(rows, cols, i)
+        plt.imshow(proc(target_image[0]) * 0 + 255)
+        # plt.text(sz // 2, sz // 2, label.capitalize(), fontdict={'size': 30})
+        if score is not None:
+            plt.text(0 , sz//6, f'ID: {score[0]:.2f}', fontdict={'size': 30})
+            plt.text(0 , sz//6*2, f'Face_LPIPS:{score[1]:.2f}', fontdict={'size': 30})
+            plt.text(0 , sz//6*3, f'Hair_LPIPS:{score[2]:.2f}', fontdict={'size': 30})
+            plt.text(0 , sz//6*4, f'Total_LPIPS:{score[3]:.2f}', fontdict={'size': 30})
+            plt.text(0 , sz//6*5, f'FACE_SSIM: {score[4]:.2f}', fontdict={'size': 30})
+            plt.text(0 , sz//6*6, f'Hair_SSIM: {score[5]:.2f}', fontdict={'size': 30})
+            plt.text(0 , sz//6*7, f'Total_SSIM: {score[6]:.2f}', fontdict={'size': 30})
+        plt.axis('off')
+        plt.gca().set_axis_off()
+        for img in part_images[label]:
+            i += 1
+            plt.subplot(rows, cols, i)
+            plt.imshow(proc(img))
+            plt.axis('off')
+            plt.gca().set_axis_off()
+        plt.tight_layout(pad=0, w_pad=0, h_pad=0)
+        plt.subplots_adjust(wspace=0, hspace=0)
+    ## Put 5 lines of text beside the image
+    # plt.text(0, sz+5, 'Perceptual loss: {:.2f}'.format(score[i-2]), fontdict={'size': 25}, color='red')
+    plt.savefig(file_name , bbox_inches='tight', pad_inches=0)
+    plt.close()
+    gc.collect()
+    return fig
+def display_image(image, size=256, mode='nearest', unnorm=False, title=''):
+    # image is [3,h,w] or [1,3,h,w] tensor [0,1]
+    if image.is_cuda:
+        image = image.cpu()
+    if size is not None and image.size(-1) != size:
+        image = F.interpolate(image, size=(size,size), mode=mode)
+    if image.dim() == 4:
+        image = image[0]
+    image = ((image.clamp(-1,1)+1)/2).permute(1, 2, 0).detach().numpy()
+    plt.figure()
+    plt.title(title)
+    plt.axis('off')
+    plt.imshow(image)
+def get_parsing_labels():
+    color = torch.FloatTensor([[0, 0, 0],
+                      [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], [128, 0, 128],
+                      [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0], [64, 128, 0],
+                      [192, 128, 0], [64, 0, 128], [192, 0, 128], [64, 128, 128], [192,128,128],
+                      [0, 64, 0], [0, 0, 64], [128, 0, 192], [0, 192, 128], [64,128,192], [64,64,64]])
+    return (color/255 * 2)-1
+def decode_segmap(seg):
+    seg = seg.float()
+    label_colors = get_parsing_labels()
+    r = seg.clone()
+    g = seg.clone()
+    b = seg.clone()
+    for l in range(label_colors.size(0)):
+        r[seg == l] = label_colors[l, 0]
+        g[seg == l] = label_colors[l, 1]
+        b[seg == l] = label_colors[l, 2]
+    output = torch.stack([r,g,b], 1)
+    return output
+def remove_idx(act, i):
+    # act [N, 128]
+    return torch.cat([act[:i], act[i+1:]], 0)
+def interpolate_style(s, t, q):
+    if isinstance(s, list):
+        s = list2style(s)
+    if isinstance(t, list):
+        t = list2style(t)
+    if s.ndim == 1:
+        s = s.unsqueeze(0)
+    if t.ndim == 1:
+        t = t.unsqueeze(0)
+    if q.ndim == 1:
+        q = q.unsqueeze(0)
+    if len(s) != len(t):
+        s = s.expand(t.size(0), -1)
+    q = q.float()
+    return (1 - q) * s + q * t
+def index_layers(w, i):
+    return [w[j][[i]] for j in range(len(w))]
+def normalize_im(x):
+    return (x.clamp(-1,1)+1)/2
+def l2(a, b):
+    return (a-b).pow(2).sum(1)
+def cos_dist(a,b):
+    return -F.cosine_similarity(a, b, 1)
+def downsample(x):
+    return F.interpolate(x, size=(256,256), mode='bilinear')
+def get_landmark(filepath, predictor):
+    """get landmark with dlib
+    :return: np.array shape=(68, 2)
+    """
+    detector = dlib.get_frontal_face_detector()
+    img = dlib.load_rgb_image(filepath)
+    dets = detector(img, 1)
+    for k, d in enumerate(dets):
+        shape = predictor(img, d)
+    t = list(shape.parts())
+    a = []
+    for tt in t:
+        a.append([tt.x, tt.y])
+    lm = np.array(a)
+    return lm
+def align_face(filepath, predictor,output_size=512):
+# def align_face(filepath,output_size=512):
+    """
+    :param filepath: str
+    :return: PIL Image
+    """
+    ensure_checkpoint_exists("dlibshape_predictor_68_face_landmarks.dat")
+    predictor = dlib.shape_predictor("dlibshape_predictor_68_face_landmarks.dat")
+    lm = get_landmark(filepath, predictor)
+    lm_chin = lm[0: 17]  # left-right
+    lm_eyebrow_left = lm[17: 22]  # left-right
+    lm_eyebrow_right = lm[22: 27]  # left-right
+    lm_nose = lm[27: 31]  # top-down
+    lm_nostrils = lm[31: 36]  # top-down
+    lm_eye_left = lm[36: 42]  # left-clockwise
+    lm_eye_right = lm[42: 48]  # left-clockwise
+    lm_mouth_outer = lm[48: 60]  # left-clockwise
+    lm_mouth_inner = lm[60: 68]  # left-clockwise
+    # Calculate auxiliary vectors.
+    eye_left = np.mean(lm_eye_left, axis=0)
+    eye_right = np.mean(lm_eye_right, axis=0)
+    eye_avg = (eye_left + eye_right) * 0.5
+    eye_to_eye = eye_right - eye_left
+    mouth_left = lm_mouth_outer[0]
+    mouth_right = lm_mouth_outer[6]
+    mouth_avg = (mouth_left + mouth_right) * 0.5
+    eye_to_mouth = mouth_avg - eye_avg
+    # Choose oriented crop rectangle.
+    x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
+    x /= np.hypot(*x)
+    x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
+    y = np.flipud(x) * [-1, 1]
+    c = eye_avg + eye_to_mouth * 0.1
+    quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
+    qsize = np.hypot(*x) * 2
+    # read image
+    img = Image.open(filepath)
+    transform_size = output_size
+    enable_padding = True
+    # Shrink.
+    shrink = int(np.floor(qsize / output_size * 0.5))
+    if shrink > 1:
+        rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
+        img = img.resize(rsize, Image.ANTIALIAS)
+        quad /= shrink
+        qsize /= shrink
+    # Crop.
+    border = max(int(np.rint(qsize * 0.1)), 3)
+    crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
+            int(np.ceil(max(quad[:, 1]))))
+    crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]),
+            min(crop[3] + border, img.size[1]))
+    if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
+        img = img.crop(crop)
+        quad -= crop[0:2]
+    # Pad.
+    pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
+           int(np.ceil(max(quad[:, 1]))))
+    pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
+           max(pad[3] - img.size[1] + border, 0))
+    if enable_padding and max(pad) > border - 4:
+        pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
+        img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
+        h, w, _ = img.shape
+        y, x, _ = np.ogrid[:h, :w, :1]
+        mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
+                          1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
+        blur = qsize * 0.02
+        img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
+        img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
+        img = Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
+        quad += pad[:2]
+    # Transform.
+    img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
+    if output_size < transform_size:
+        img = img.resize((output_size, output_size), Image.ANTIALIAS)
+    # Return aligned image.
+    return img

ris/wrapper.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import time
+import shutil
+import dlib
+import numpy as np
+import PIL.Image
+import torch
+from torchvision.transforms import transforms
+import dnnlib
+import legacy
+from configs_gd import GENERATOR_CONFIGS
+from dlib_utils.face_alignment import image_align
+from dlib_utils.landmarks_detector import LandmarksDetector
+from torch_utils.misc import copy_params_and_buffers
+from pivot_tuning_inversion.utils.ImagesDataset import ImagesDataset, ImageLatentsDataset
+from pivot_tuning_inversion.training.coaches.multi_id_coach import MultiIDCoach
+class FaceLandmarksDetector:
+    """Dlib landmarks detector wrapper
+    """
+    def __init__(
+            self,
+            model_path='pretrained/shape_predictor_68_face_landmarks.dat',
+            tmp_dir='tmp'
+        ):
+        self.detector = LandmarksDetector(model_path)
+        self.timestamp = int(time.time())
+        self.tmp_src = f'{tmp_dir}/{self.timestamp}_src.png'
+        self.tmp_align = f'{tmp_dir}/{self.timestamp}_align.png'
+    def __call__(self, imgpath):
+        shutil.copy(imgpath, self.tmp_src)
+        try:
+            face_landmarks = list(self.detector.get_landmarks(self.tmp_src))[0]
+            assert isinstance(face_landmarks, list)
+            assert len(face_landmarks) == 68
+            image_align(self.tmp_src, self.tmp_align, face_landmarks)
+        except:
+            im = PIL.Image.open(self.tmp_src)
+            im.save(self.tmp_align)
+        return PIL.Image.open(self.tmp_align).convert('RGB')
+class VGGFeatExtractor():
+    """VGG16 backbone wrapper
+    """
+    def __init__(self, device):
+        self.device = device
+        self.url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt'
+        with dnnlib.util.open_url(self.url) as f:
+            self.module = torch.jit.load(f).eval().to(device)
+    def __call__(self, img): # PIL
+        img = self._preprocess(img, self.device)
+        feat = self.module(img)
+        return feat # (1, 1000)
+    def _preprocess(self, img, device):
+        img = img.resize((256,256), PIL.Image.LANCZOS)
+        img = np.array(img, dtype=np.uint8)
+        img = torch.tensor(img.transpose([2,0,1])).unsqueeze(dim=0)
+        return img.to(device)
+class Generator_wrapper():
+    """StyleGAN2 generator wrapper
+    """
+    def __init__(self, ckpt, device):
+        with dnnlib.util.open_url(ckpt) as f:
+            old_G = legacy.load_network_pkl(f)['G_ema'].requires_grad_(False).to(device)
+        resolution = old_G.img_resolution
+        generator_config = GENERATOR_CONFIGS(resolution=resolution)
+        self.G_kwargs = generator_config.G_kwargs
+        self.common_kwargs = generator_config.common_kwargs
+        self.G = dnnlib.util.construct_class_by_name(**self.G_kwargs, **self.common_kwargs).eval().requires_grad_(False).to(device)
+        copy_params_and_buffers(old_G, self.G, require_all=False)
+        del old_G
+        G = self.G
+        self.style_layers = [
+            f'G.synthesis.b{feat_size}.{layer}.affine'
+            for feat_size in [pow(2,x) for x in range(2, int(np.log2(resolution))+1)]
+            for layer in ['conv0', 'conv1', 'torgb']]
+        del(self.style_layers[0])
+        scope = locals()
+        self.to_stylespace = {layer:eval(layer, scope) for layer in self.style_layers}
+        w_idx_lst = generator_config.w_idx_lst
+        assert len(self.style_layers) == len(w_idx_lst)
+        self.to_w_idx = {self.style_layers[i]:w_idx_lst[i] for i in range(len(self.style_layers))}
+    def mapping(self, z, truncation_psi=0.7, truncation_cutoff=None, skip_w_avg_update=False):
+        '''random z -> latent w
+        '''
+        return self.G.mapping(
+            z,
+            None,
+            truncation_psi=truncation_psi,
+            truncation_cutoff=truncation_cutoff,
+            skip_w_avg_update=skip_w_avg_update
+        )
+    def mapping_stylespace(self, latent):
+        '''latent w -> style s
+        resolution | w_idx | # conv | # torgb | indices
+                 4 |     0 |      1 |       1 |     0-1
+                 8 |     1 |      2 |       1 |     1-3
+                16 |     3 |      2 |       1 |     3-5
+                32 |     5 |      2 |       1 |     5-7
+                64 |     7 |      2 |       1 |     7-9
+               128 |     9 |      2 |       1 |    9-11
+               256 |    11 |      2 |       1 |   11-13     # for  256 resolution
+               512 |    13 |      2 |       1 |   13-15     # for  512 resolution
+              1024 |    15 |      2 |       1 |   15-17     # for 1024 resolution
+        '''
+        styles = dict()
+        for layer in self.style_layers:
+            module = self.to_stylespace.get(layer)
+            w_idx = self.to_w_idx.get(layer)
+            styles[layer] = module(latent.unbind(dim=1)[w_idx])
+        return styles
+    def synthesis_from_stylespace(self, latent, styles):
+        '''style s -> generated image
+        modulated conv2d,  synthesis layer.weight,  noise
+        forward after styles = affine(w)
+        '''
+        return self.G.synthesis(latent, styles=styles, noise_mode='const')
+    def synthesis(self, latent):
+        '''latent w -> generated image
+        '''
+        return self.G.synthesis(latent, noise_mode='const')
+class e4eEncoder:
+    '''e4e Encoder
+    img paths -> latent w
+    '''
+    def __init__(self, device):
+        self.device = device
+    def __call__(self, target_pils):
+        dataset = ImagesDataset(
+            target_pils,
+            self.device,
+            transforms.Compose([
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+        )
+        dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
+        coach = MultiIDCoach(dataloader, device=self.device)
+        latents = list()
+        for fname, image in dataloader:
+            latents.append(coach.get_e4e_inversion(image))
+        latents = torch.cat(latents)
+        return latents
+class PivotTuning:
+    '''pivot tuning inversion
+    latent, style -> latent, style,
+    mode
+    - 'latent' : use latent pivot
+    - 'style' : use style pivot
+    '''
+    def __init__(self, device, G, mode='w'):
+        assert mode in ['w', 's']
+        self.device = device
+        self.G = G
+        self.mode = mode
+        self.resolution = G.img_resolution
+    def __call__(self, latent, target_pils):
+        dataset = ImageLatentsDataset(
+            target_pils,
+            latent,
+            self.device,
+            transforms.Compose([
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])],),
+            self.resolution,
+        )
+        dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
+        coach = MultiIDCoach(
+            dataloader,
+            device=self.device,
+            generator=self.G,
+            mode=self.mode
+        )
+        # run coach by self.mode
+        new_G = coach.train_from_latent()
+        return new_G

spherical_kmeans.py ADDED Viewed

	@@ -0,0 +1,390 @@

+import warnings
+import numpy as np
+from sklearn.preprocessing import normalize
+from sklearn.utils.sparsefuncs_fast import assign_rows_csr
+from sklearn.utils.validation import _check_sample_weight
+from sklearn.utils import check_array, check_random_state
+from sklearn.utils.extmath import row_norms
+import scipy.sparse as sp
+from sklearn.cluster import MiniBatchKMeans
+from sklearn.cluster.k_means_ import (
+    _init_centroids,
+    _labels_inertia,
+    _tolerance,
+    _mini_batch_step,
+    _mini_batch_convergence
+)
+def _check_normalize_sample_weight(sample_weight, X):
+    """Set sample_weight if None, and check for correct dtype"""
+    sample_weight_was_none = sample_weight is None
+    sample_weight = _check_sample_weight(sample_weight, X)
+    if not sample_weight_was_none:
+        # normalize the weights to sum up to n_samples
+        # an array of 1 (i.e. samples_weight is None) is already normalized
+        n_samples = len(sample_weight)
+        scale = n_samples / sample_weight.sum()
+        sample_weight *= scale
+    return sample_weight
+def _mini_batch_spherical_step(X, sample_weight, x_squared_norms, centers, weight_sums,
+                               old_center_buffer, compute_squared_diff,
+                               distances, random_reassign=False,
+                               random_state=None, reassignment_ratio=.01,
+                               verbose=False):
+    """Incremental update of the centers for the Minibatch K-Means algorithm.
+    Parameters
+    ----------
+    X : array, shape (n_samples, n_features)
+        The original data array.
+    sample_weight : array-like, shape (n_samples,)
+        The weights for each observation in X.
+    x_squared_norms : array, shape (n_samples,)
+        Squared euclidean norm of each data point.
+    centers : array, shape (k, n_features)
+        The cluster centers. This array is MODIFIED IN PLACE
+    counts : array, shape (k,)
+         The vector in which we keep track of the numbers of elements in a
+         cluster. This array is MODIFIED IN PLACE
+    distances : array, dtype float, shape (n_samples), optional
+        If not None, should be a pre-allocated array that will be used to store
+        the distances of each sample to its closest center.
+        May not be None when random_reassign is True.
+    random_state : int, RandomState instance or None (default)
+        Determines random number generation for centroid initialization and to
+        pick new clusters amongst observations with uniform probability. Use
+        an int to make the randomness deterministic.
+        See :term:`Glossary <random_state>`.
+    random_reassign : boolean, optional
+        If True, centers with very low counts are randomly reassigned
+        to observations.
+    reassignment_ratio : float, optional
+        Control the fraction of the maximum number of counts for a
+        center to be reassigned. A higher value means that low count
+        centers are more likely to be reassigned, which means that the
+        model will take longer to converge, but should converge in a
+        better clustering.
+    verbose : bool, optional, default False
+        Controls the verbosity.
+    compute_squared_diff : bool
+        If set to False, the squared diff computation is skipped.
+    old_center_buffer : int
+        Copy of old centers for monitoring convergence.
+    Returns
+    -------
+    inertia : float
+        Sum of squared distances of samples to their closest cluster center.
+    squared_diff : numpy array, shape (n_clusters,)
+        Squared distances between previous and updated cluster centers.
+    """
+    # Perform label assignment to nearest centers
+    nearest_center, inertia = _labels_inertia(X, sample_weight,
+                                              x_squared_norms, centers,
+                                              distances=distances)
+    if random_reassign and reassignment_ratio > 0:
+        random_state = check_random_state(random_state)
+        # Reassign clusters that have very low weight
+        to_reassign = weight_sums < reassignment_ratio * weight_sums.max()
+        # pick at most .5 * batch_size samples as new centers
+        if to_reassign.sum() > .5 * X.shape[0]:
+            indices_dont_reassign = \
+                np.argsort(weight_sums)[int(.5 * X.shape[0]):]
+            to_reassign[indices_dont_reassign] = False
+        n_reassigns = to_reassign.sum()
+        if n_reassigns:
+            # Pick new clusters amongst observations with uniform probability
+            new_centers = random_state.choice(X.shape[0], replace=False,
+                                              size=n_reassigns)
+            if verbose:
+                print("[MiniBatchKMeans] Reassigning %i cluster centers."
+                      % n_reassigns)
+            if sp.issparse(X) and not sp.issparse(centers):
+                assign_rows_csr(
+                    X, new_centers.astype(np.intp, copy=False),
+                    np.where(to_reassign)[0].astype(np.intp, copy=False),
+                    centers)
+            else:
+                centers[to_reassign] = X[new_centers]
+        # reset counts of reassigned centers, but don't reset them too small
+        # to avoid instant reassignment. This is a pretty dirty hack as it
+        # also modifies the learning rates.
+        weight_sums[to_reassign] = np.min(weight_sums[~to_reassign])
+    # implementation for the sparse CSR representation completely written in
+    # cython
+    if sp.issparse(X):
+        return inertia, sklearn.cluster.k_means_._k_means._mini_batch_update_csr(
+            X, sample_weight, x_squared_norms, centers, weight_sums,
+            nearest_center, old_center_buffer, compute_squared_diff)
+    # dense variant in mostly numpy (not as memory efficient though)
+    k = centers.shape[0]
+    squared_diff = 0.0
+    for center_idx in range(k):
+        # find points from minibatch that are assigned to this center
+        center_mask = nearest_center == center_idx
+        wsum = sample_weight[center_mask].sum()
+        if wsum > 0:
+            if compute_squared_diff:
+                old_center_buffer[:] = centers[center_idx]
+            # inplace remove previous count scaling
+            centers[center_idx] *= weight_sums[center_idx]
+            # inplace sum with new points members of this cluster
+            centers[center_idx] += \
+                np.sum(X[center_mask] *
+                       sample_weight[center_mask, np.newaxis], axis=0)
+            # unit-normalize for spherical k-means
+            centers[center_idx] = normalize(centers[center_idx, None])[:, 0]
+            # update the squared diff if necessary
+            if compute_squared_diff:
+                diff = centers[center_idx].ravel() - old_center_buffer.ravel()
+                squared_diff += np.dot(diff, diff)
+    return inertia, squared_diff
+class MiniBatchSphericalKMeans(MiniBatchKMeans):
+    def fit(self, X, y=None, sample_weight=None):
+        """Compute the centroids on X by chunking it into mini-batches.
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape=(n_samples, n_features)
+            Training instances to cluster. It must be noted that the data
+            will be converted to C ordering, which will cause a memory copy
+            if the given data is not C-contiguous.
+        y : Ignored
+            Not used, present here for API consistency by convention.
+        sample_weight : array-like, shape (n_samples,), optional
+            The weights for each observation in X. If None, all observations
+            are assigned equal weight (default: None).
+        Returns
+        -------
+        self
+        """
+        random_state = check_random_state(self.random_state)
+        # unit-normalize for spherical k-means
+        X = normalize(X)
+        X = check_array(X, accept_sparse="csr", order='C',
+                        dtype=[np.float64, np.float32])
+        n_samples, n_features = X.shape
+        if n_samples < self.n_clusters:
+            raise ValueError("n_samples=%d should be >= n_clusters=%d"
+                             % (n_samples, self.n_clusters))
+        sample_weight = _check_normalize_sample_weight(sample_weight, X)
+        n_init = self.n_init
+        if hasattr(self.init, '__array__'):
+            self.init = np.ascontiguousarray(self.init, dtype=X.dtype)
+            if n_init != 1:
+                warnings.warn(
+                    'Explicit initial center position passed: '
+                    'performing only one init in MiniBatchKMeans instead of '
+                    'n_init=%d'
+                    % self.n_init, RuntimeWarning, stacklevel=2)
+                n_init = 1
+        x_squared_norms = row_norms(X, squared=True)
+        if self.tol > 0.0:
+            tol = _tolerance(X, self.tol)
+            # using tol-based early stopping needs the allocation of a
+            # dedicated before which can be expensive for high dim data:
+            # hence we allocate it outside of the main loop
+            old_center_buffer = np.zeros(n_features, dtype=X.dtype)
+        else:
+            tol = 0.0
+            # no need for the center buffer if tol-based early stopping is
+            # disabled
+            old_center_buffer = np.zeros(0, dtype=X.dtype)
+        distances = np.zeros(self.batch_size, dtype=X.dtype)
+        n_batches = int(np.ceil(float(n_samples) / self.batch_size))
+        n_iter = int(self.max_iter * n_batches)
+        init_size = self.init_size
+        if init_size is None:
+            init_size = 3 * self.batch_size
+        if init_size > n_samples:
+            init_size = n_samples
+        self.init_size_ = init_size
+        validation_indices = random_state.randint(0, n_samples, init_size)
+        X_valid = X[validation_indices]
+        sample_weight_valid = sample_weight[validation_indices]
+        x_squared_norms_valid = x_squared_norms[validation_indices]
+        # perform several inits with random sub-sets
+        best_inertia = None
+        for init_idx in range(n_init):
+            if self.verbose:
+                print("Init %d/%d with method: %s"
+                      % (init_idx + 1, n_init, self.init))
+            weight_sums = np.zeros(self.n_clusters, dtype=sample_weight.dtype)
+            # TODO: once the `k_means` function works with sparse input we
+            # should refactor the following init to use it instead.
+            # Initialize the centers using only a fraction of the data as we
+            # expect n_samples to be very large when using MiniBatchKMeans
+            cluster_centers = _init_centroids(
+                X, self.n_clusters, self.init,
+                random_state=random_state,
+                x_squared_norms=x_squared_norms,
+                init_size=init_size)
+            cluster_centers = normalize(cluster_centers)
+            # Compute the label assignment on the init dataset
+            _mini_batch_step(
+                X_valid, sample_weight_valid,
+                x_squared_norms[validation_indices], cluster_centers,
+                weight_sums, old_center_buffer, False, distances=None,
+                verbose=self.verbose)
+            cluster_centers = normalize(cluster_centers)
+            # Keep only the best cluster centers across independent inits on
+            # the common validation set
+            _, inertia = _labels_inertia(X_valid, sample_weight_valid,
+                                         x_squared_norms_valid,
+                                         cluster_centers)
+            if self.verbose:
+                print("Inertia for init %d/%d: %f"
+                      % (init_idx + 1, n_init, inertia))
+            if best_inertia is None or inertia < best_inertia:
+                self.cluster_centers_ = cluster_centers
+                self.counts_ = weight_sums
+                best_inertia = inertia
+        # Empty context to be used inplace by the convergence check routine
+        convergence_context = {}
+        # Perform the iterative optimization until the final convergence
+        # criterion
+        for iteration_idx in range(n_iter):
+            # Sample a minibatch from the full dataset
+            minibatch_indices = random_state.randint(
+                0, n_samples, self.batch_size)
+            # Perform the actual update step on the minibatch data
+            self.cluster_centers_ = normalize(self.cluster_centers_)
+            batch_inertia, centers_squared_diff = _mini_batch_step(
+                X[minibatch_indices], sample_weight[minibatch_indices],
+                x_squared_norms[minibatch_indices],
+                self.cluster_centers_, self.counts_,
+                old_center_buffer, tol > 0.0, distances=distances,
+                # Here we randomly choose whether to perform
+                # random reassignment: the choice is done as a function
+                # of the iteration index, and the minimum number of
+                # counts, in order to force this reassignment to happen
+                # every once in a while
+                random_reassign=((iteration_idx + 1)
+                                 % (10 + int(self.counts_.min())) == 0),
+                random_state=random_state,
+                reassignment_ratio=self.reassignment_ratio,
+                verbose=self.verbose)
+            self.cluster_centers_ = normalize(self.cluster_centers_)
+            # Monitor convergence and do early stopping if necessary
+            if _mini_batch_convergence(
+                    self, iteration_idx, n_iter, tol, n_samples,
+                    centers_squared_diff, batch_inertia, convergence_context,
+                    verbose=self.verbose):
+                break
+        self.n_iter_ = iteration_idx + 1
+        if self.compute_labels:
+            self.labels_, self.inertia_ = \
+                self._labels_inertia_minibatch(X, sample_weight)
+        return self
+    def partial_fit(self, X, y=None, sample_weight=None):
+        """Update k means estimate on a single mini-batch X.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Coordinates of the data points to cluster. It must be noted that
+            X will be copied if it is not C-contiguous.
+        y : Ignored
+            Not used, present here for API consistency by convention.
+        sample_weight : array-like, shape (n_samples,), optional
+            The weights for each observation in X. If None, all observations
+            are assigned equal weight (default: None).
+        Returns
+        -------
+        self
+        """
+        X = check_array(X, accept_sparse="csr", order="C",
+                        dtype=[np.float64, np.float32])
+        n_samples, n_features = X.shape
+        if hasattr(self.init, '__array__'):
+            self.init = np.ascontiguousarray(self.init, dtype=X.dtype)
+        if n_samples == 0:
+            return self
+        # unit-normalize for spherical k-means
+        X = normalize(X)
+        sample_weight = _check_normalize_sample_weight(sample_weight, X)
+        x_squared_norms = row_norms(X, squared=True)
+        self.random_state_ = getattr(self, "random_state_",
+                                     check_random_state(self.random_state))
+        if (not hasattr(self, 'counts_')
+                or not hasattr(self, 'cluster_centers_')):
+            # this is the first call partial_fit on this object:
+            # initialize the cluster centers
+            self.cluster_centers_ = _init_centroids(
+                X, self.n_clusters, self.init,
+                random_state=self.random_state_,
+                x_squared_norms=x_squared_norms, init_size=self.init_size)
+            self.counts_ = np.zeros(self.n_clusters,
+                                    dtype=sample_weight.dtype)
+            random_reassign = False
+            distances = None
+        else:
+            # The lower the minimum count is, the more we do random
+            # reassignment, however, we don't want to do random
+            # reassignment too often, to allow for building up counts
+            random_reassign = self.random_state_.randint(
+                10 * (1 + self.counts_.min())) == 0
+            distances = np.zeros(X.shape[0], dtype=X.dtype)
+        self.cluster_centers_ = normalize(self.cluster_centers_)
+        _mini_batch_spherical_step(X, sample_weight, x_squared_norms,
+                                   self.cluster_centers_, self.counts_,
+                                   np.zeros(0, dtype=X.dtype), 0,
+                                   random_reassign=random_reassign, distances=distances,
+                                   random_state=self.random_state_,
+                                   reassignment_ratio=self.reassignment_ratio,
+                                   verbose=self.verbose)
+        self.cluster_centers_ = normalize(self.cluster_centers_)
+        if self.compute_labels:
+            self.labels_, self.inertia_ = _labels_inertia(
+                X, sample_weight, x_squared_norms, self.cluster_centers_)
+        return self