File size: 11,834 Bytes
15d238f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c89480
15d238f
 
 
 
 
 
 
 
 
 
75bf717
15d238f
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
import torch.nn as nn
import torch
from torch.optim.lr_scheduler import ReduceLROnPlateau,OneCycleLR,CyclicLR
import pandas as pd
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import matplotlib.pyplot as plt
from torch.distributions import MultivariateNormal, LogNormal,Normal, Chi2
from torch.distributions.distribution import Distribution
from sklearn.metrics import r2_score
import numpy as np


# It's a distribution that is a kernel density estimate of a Gaussian distribution
class GaussianKDE(Distribution):
    def __init__(self, X, bw):
        """
        X : tensor (n, d)
          `n` points with `d` dimensions to which KDE will be fit
        bw : numeric
          bandwidth for Gaussian kernel
        """
        self.X = X
        self.bw = bw
        self.dims = X.shape[-1]
        self.n = X.shape[0]
        self.mvn = MultivariateNormal(loc=torch.zeros(self.dims),
                                      scale_tril=torch.eye(self.dims))

    def sample(self, num_samples):
        """
        We are sampling from a normal distribution with mean equal to the data points in the dataset and
        standard deviation equal to the bandwidth
        
        :param num_samples: the number of samples to draw from the KDE
        :return: A sample of size num_samples from the KDE.
        """
        idxs = (np.random.uniform(0, 1, num_samples) * self.n).astype(int)
        norm = Normal(loc=self.X[idxs], scale=self.bw)
        return norm.sample()

    def score_samples(self, Y, X=None):
        """Returns the kernel density estimates of each point in `Y`.

        Parameters
        ----------
        Y : tensor (m, d)
          `m` points with `d` dimensions for which the probability density will
          be calculated
        X : tensor (n, d), optional
          `n` points with `d` dimensions to which KDE will be fit. Provided to
          allow batch calculations in `log_prob`. By default, `X` is None and
          all points used to initialize KernelDensityEstimator are included.


        Returns
        -------
        log_probs : tensor (m)
          log probability densities for each of the queried points in `Y`
        """
        if X == None:
            X = self.X
        log_probs = self.mvn.log_prob((X.unsqueeze(1) - Y)).sum(dim=0)

        return log_probs

    def log_prob(self, Y):
        """Returns the total log probability of one or more points, `Y`, using
        a Multivariate Normal kernel fit to `X` and scaled using `bw`.

        Parameters
        ----------
        Y : tensor (m, d)
          `m` points with `d` dimensions for which the probability density will
          be calculated

        Returns
        -------
        log_prob : numeric
          total log probability density for the queried points, `Y`
        """

        X_chunks = self.X
        Y_chunks = Y
        self.Y = Y
        log_prob = 0

        for x in X_chunks:
            for y in Y_chunks:
                
                log_prob += self.score_samples(y,x).sum(dim=0)

        return log_prob
    
class Chi2KDE(Distribution):
    def __init__(self, X, bw):
        """
        X : tensor (n, d)
          `n` points with `d` dimensions to which KDE will be fit
        bw : numeric
          bandwidth for Gaussian kernel
        """
        self.X = X
        self.bw = bw
        self.dims = X.shape[-1]
        self.n = X.shape[0]
        self.mvn = Chi2(self.dims)

    def sample(self, num_samples):
        idxs = (np.random.uniform(0, 1, num_samples) * self.n).astype(int)
        norm = LogNormal(loc=self.X[idxs], scale=self.bw)
        return norm.sample()

    def score_samples(self, Y, X=None):
        """Returns the kernel density estimates of each point in `Y`.

        Parameters
        ----------
        Y : tensor (m, d)
          `m` points with `d` dimensions for which the probability density will
          be calculated
        X : tensor (n, d), optional
          `n` points with `d` dimensions to which KDE will be fit. Provided to
          allow batch calculations in `log_prob`. By default, `X` is None and
          all points used to initialize KernelDensityEstimator are included.


        Returns
        -------
        log_probs : tensor (m)
          log probability densities for each of the queried points in `Y`
        """
        if X == None:
            X = self.X
        log_probs = self.mvn.log_prob(abs(X.unsqueeze(1) - Y)).sum()

        return log_probs

    def log_prob(self, Y):
        """Returns the total log probability of one or more points, `Y`, using
        a Multivariate Normal kernel fit to `X` and scaled using `bw`.

        Parameters
        ----------
        Y : tensor (m, d)
          `m` points with `d` dimensions for which the probability density will
          be calculated

        Returns
        -------
        log_prob : numeric
          total log probability density for the queried points, `Y`
        """

        X_chunks = self.X
        Y_chunks = Y
        self.Y = Y
        log_prob = 0

        for x in X_chunks:
            for y in Y_chunks:
                
                log_prob += self.score_samples(y,x).sum(dim=0)

        return log_prob
    
    
class PlanarFlow(nn.Module):
    """
    A single planar flow, computes T(x) and log(det(jac_T)))
    """
    def __init__(self, D):
        super(PlanarFlow, self).__init__()
        self.u = nn.Parameter(torch.Tensor(1, D), requires_grad=True)
        self.w = nn.Parameter(torch.Tensor(1, D), requires_grad=True)
        self.b = nn.Parameter(torch.Tensor(1), requires_grad=True)
        self.h = torch.tanh
        self.init_params()

    def init_params(self):
        self.w.data.uniform_(0.4, 1)
        self.b.data.uniform_(0.4, 1)
        self.u.data.uniform_(0.4, 1)
        

    def forward(self, z):
        linear_term = torch.mm(z, self.w.T) + self.b
        return z + self.u * self.h(linear_term)

    def h_prime(self, x):
        """
        Derivative of tanh
        """
        return (1 - self.h(x) ** 2)

    def psi(self, z):
        inner = torch.mm(z, self.w.T) + self.b
        return self.h_prime(inner) * self.w

    def log_det(self, z):
        inner = 1 + torch.mm(self.psi(z), self.u.T)
        return torch.log(torch.abs(inner))


# It's a normalizing flow that takes in a distribution and outputs a distribution.
class NormalizingFlow(nn.Module):
    """
    A normalizng flow composed of a sequence of planar flows.
    """
    def __init__(self, D, n_flows=2):
        """
        The function takes in two arguments, D and n_flows. D is the dimension of the data, and n_flows
        is the number of flows. The function then creates a list of PlanarFlow objects, where the number
        of PlanarFlow objects is equal to n_flows
        
        :param D: the dimensionality of the data
        :param n_flows: number of flows to use, defaults to 2 (optional)
        """
        super(NormalizingFlow, self).__init__()
        self.flows = nn.ModuleList(
            [PlanarFlow(D) for _ in range(n_flows)])

    def sample(self, base_samples):
        """
        Transform samples from a simple base distribution
        by passing them through a sequence of Planar flows.
        """
        samples = base_samples
        for flow in self.flows:
            samples = flow(samples)
        return samples

    def forward(self, x):
        """
        Computes and returns the sum of log_det_jacobians
        and the transformed samples T(x).
        """
        sum_log_det = 0
        transformed_sample = x

        for i in range(len(self.flows)):
            log_det_i = (self.flows[i].log_det(transformed_sample))
            sum_log_det += log_det_i
            transformed_sample = self.flows[i](transformed_sample)

        return transformed_sample, sum_log_det
    
def random_normal_samples(n, dim=2):
    return torch.zeros(n, dim).normal_(mean=0, std=1.5)




class nflow():
    def __init__(self,dim=2,latent=16,batchsize:int=1,dataset=None):
        """
        The function __init__ initializes the class NormalizingFlowModel with the parameters dim,
        latent, batchsize, and datasetPath
        
        :param dim: The dimension of the data, defaults to 2 (optional)
        :param latent: The number of latent variables in the model, defaults to 16 (optional)
        :param batchsize: The number of samples to generate at a time, defaults to 1
        :type batchsize: int (optional)
        :param datasetPath: The path to the dataset, defaults to data/dataset.csv
        :type datasetPath: str (optional)
        """
        self.dim = dim
        self.batchsize = batchsize
        self.model = NormalizingFlow(dim, latent)
        self.dataset = dataset

    def compile(self,optim:torch.optim=torch.optim.Adam,distribution:str='GaussianKDE',lr:float=0.00015,bw:float=0.1,wd=0.0015):
        """
        It takes in a dataset, a model, and a distribution, and returns a compiled model
        
        :param optim: the optimizer to use
        :type optim: torch.optim
        :param distribution: the type of distribution to use, defaults to GaussianKDE
        :type distribution: str (optional)
        :param lr: learning rate
        :type lr: float
        :param bw: bandwidth for the KDE
        :type bw: float
        """
        if wd:
            self.opt = optim(
                params=self.model.parameters(),
                lr=lr,
                weight_decay = wd
                # momentum=0.9
                # momentum=0.1
            )
        else:
            self.opt = optim(
                params=self.model.parameters(),
                lr=lr,
                # momentum=0.9
                # momentum=0.1
            )
        self.scaler = StandardScaler()
        self.scaler_mm = MinMaxScaler(feature_range=(0,1))
        
        df = pd.read_csv(self.dataset)
        df = df.iloc[:,1:]
        
        
        if 'Chi2' in distribution:
            self.scaled=self.scaler_mm.fit_transform(df)
        else: self.scaled = self.scaler.fit_transform(df)
        
        self.density = globals()[distribution](X=torch.tensor(self.scaled, dtype=torch.float32), bw=bw)
        
        # self.dl = torch.utils.data.DataLoader(scaled,batchsize=self.batchsize)
        self.scheduler = ReduceLROnPlateau(self.opt, patience=10000)
        self.losses = []

    def train(self,iters:int=1000):
        """
        > We sample from a normal distribution, pass the samples through the model, and then calculate
        the loss
        
        :param iters: number of iterations to train for, defaults to 1000
        :type iters: int (optional)
        """
        for idx in range(iters):
            if idx % 100 == 0:
                print("Iteration {}".format(idx))

            samples = torch.autograd.Variable(random_normal_samples(self.batchsize,self.dim))

            z_k, sum_log_det = self.model(samples)
            log_p_x = self.density.log_prob(z_k)
            # Reverse KL since we can evaluate target density but can't sample
            loss = (-sum_log_det - (log_p_x)).mean()/self.density.n

            self.opt.zero_grad()
            loss.backward()
            self.opt.step()
            self.scheduler.step(loss)

            self.losses.append(loss.item())

            if idx % 100 == 0:
                print("Loss {}".format(loss.item()))
                yield idx,loss.item()
        
    def performance(self):
        """
        The function takes the model and the scaled data as inputs, samples from the model, and then
        prints the r2 score of the samples and the scaled data.
        """
        samples = ((self.model.sample(torch.tensor(self.scaled).float())).detach().numpy())
        
        print('r2', r2_score(self.scaled,samples))