File size: 4,278 Bytes
4347fc3
 
a16f7a6
4347fc3
 
a16f7a6
4347fc3
 
a16f7a6
2a4bd0a
a16f7a6
 
4347fc3
a16f7a6
 
4347fc3
a16f7a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4347fc3
266a83c
4347fc3
 
 
 
 
 
 
 
 
266a83c
4347fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266a83c
 
3b15df8
 
 
 
 
 
 
 
 
a16f7a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import numpy as np
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import torch
from umap import UMAP
import PIL

def get_separation_space(type_bin, annotations, df, samples=100, method='LR', C=0.1):
    abstracts = np.array([float(ann) for ann in df[type_bin]])
    abstract_idxs = list(np.argsort(abstracts))[:samples]
    repr_idxs = list(np.argsort(abstracts))[-samples:]
    X = np.array([annotations['z_vectors'][i] for i in abstract_idxs+repr_idxs])
    X = X.reshape((2*samples, 512))
    y = np.array([1]*samples + [0]*samples)
    x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
    if method == 'SVM':
        svc = SVC(gamma='auto', kernel='linear', random_state=0, C=C)
        svc.fit(x_train, y_train)
        print('Val performance SVM', svc.score(x_val, y_val))
        imp_features = (np.abs(svc.coef_) > 0.2).sum()
        imp_nodes = np.where(np.abs(svc.coef_) > 0.2)[1]
        return svc.coef_, imp_features, imp_nodes
    elif method == 'LR':
        clf = LogisticRegression(random_state=0, C=C)
        clf.fit(x_train, y_train)
        print('Val performance logistic regression', clf.score(x_val, y_val))
        imp_features = (np.abs(clf.coef_) > 0.2).sum()
        imp_nodes = np.where(np.abs(clf.coef_) > 0.2)[1]
        return clf.coef_, imp_features, imp_nodes


def regenerate_images(model, z, decision_boundary, min_epsilon=-3, max_epsilon=3, count=5):
    device = torch.device('cpu')
    G = model.to(device) # type: ignore

    # Labels.
    label = torch.zeros([1, G.c_dim], device=device)
    
    z = torch.from_numpy(z.copy()).to(device)
    decision_boundary = torch.from_numpy(decision_boundary.copy()).to(device)
        
    lambdas = np.linspace(min_epsilon, max_epsilon, count)
    images = []
    # Generate images.
    for _, lambda_ in enumerate(lambdas):
        z_0 = z + lambda_ * decision_boundary
        # Construct an inverse rotation/translation matrix and pass to the generator.  The
        # generator expects this matrix as an inverse to avoid potentially failing numerical
        # operations in the network.
        #if hasattr(G.synthesis, 'input'):
            #m = make_transform(translate, rotate)
            #m = np.linalg.inv(m)
            #G.synthesis.input.transform.copy_(torch.from_numpy(m))

        img = G(z_0, label, truncation_psi=0.7, noise_mode='random')
        img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
        images.append(PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB'))

    return images, lambdas

def generate_original_image(z, model):
    device = torch.device('cpu')
    G = model.to(device) # type: ignore
    # Labels.
    label = torch.zeros([1, G.c_dim], device=device)
    z = torch.from_numpy(z.copy()).to(device)
    img = G(z, label, truncation_psi=0.7, noise_mode='random')
    img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
    return PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB')


def get_concepts_vectors(concepts, annotations, df, samples=100, method='LR', C=0.1):
    important_nodes = []
    vectors = np.zeros((len(concepts), 512))
    for i, conc in enumerate(concepts):
        vec, _, imp_nodes = get_separation_space(conc, annotations, df, samples=samples, method=method, C=C)
        vectors[i,:] = vec
        important_nodes.append(set(imp_nodes))
    
    reducer = UMAP(n_neighbors=3, # default 15, The size of local neighborhood (in terms of number of neighboring sample points) used for manifold approximation.
                   n_components=3, # default 2, The dimension of the space to embed into.
                   min_dist=0.1, # default 0.1, The effective minimum distance between embedded points.
                   spread=2.0, # default 1.0, The effective scale of embedded points. In combination with ``min_dist`` this determines how clustered/clumped the embedded points are.
                   random_state=0, # default: None, If int, random_state is the seed used by the random number generator;
               )

    projection = reducer.fit_transform(vectors)
    nodes_in_common = set.intersection(*important_nodes)
    return vectors, projection, nodes_in_common