Spaces:

ldkong
/

TranSVAE

Build error

App Files Files Community

ldkong commited on Jul 28, 2022

Commit

af4f972

•

1 Parent(s): 3c3a705

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -157

app.py CHANGED Viewed

@@ -59,18 +59,6 @@ class RelationModuleMultiScale(torch.nn.Module):
         return list(itertools.combinations([i for i in range(num_frames)], num_frames_relation))
-class GradReverse(Function):
-    @staticmethod
-    def forward(ctx, x, beta):
-        ctx.beta = beta
-        return x.view_as(x)
-    @staticmethod
-    def backward(ctx, grad_output):
-        grad_input = grad_output.neg() * ctx.beta
-        return grad_input, None
 class TransferVAE_Video(nn.Module):
     def __init__(self):
@@ -133,86 +121,18 @@ class TransferVAE_Video(nn.Module):
             self.relation_domain_classifier_all += [relation_domain_classifier]
         self.pred_classifier_video = nn.Linear(self.feat_aggregated_dim, self.num_class)
         self.fc_feature_domain_latent = nn.Linear(self.f_dim, self.f_dim)
         self.fc_classifier_doamin_latent = nn.Linear(self.f_dim, 2)
-    def domain_classifier_frame(self, feat, beta):
-        feat_fc_domain_frame = GradReverse.apply(feat, beta)
-        feat_fc_domain_frame = self.fc_feature_domain_frame(feat_fc_domain_frame)
-        feat_fc_domain_frame = self.relu(feat_fc_domain_frame)
-        pred_fc_domain_frame = self.fc_classifier_domain_frame(feat_fc_domain_frame)
-        return pred_fc_domain_frame
-    def domain_classifier_video(self, feat_video, beta):
-        feat_fc_domain_video = GradReverse.apply(feat_video, beta)
-        feat_fc_domain_video = self.fc_feature_domain_video(feat_fc_domain_video)
-        feat_fc_domain_video = self.relu(feat_fc_domain_video)
-        pred_fc_domain_video = self.fc_classifier_domain_video(feat_fc_domain_video)
-        return pred_fc_domain_video
-    def domain_classifier_latent(self, f):
-        feat_fc_domain_latent = self.fc_feature_domain_latent(f)
-        feat_fc_domain_latent = self.relu(feat_fc_domain_latent)
-        pred_fc_domain_latent = self.fc_classifier_doamin_latent(feat_fc_domain_latent)
-        return pred_fc_domain_latent
-    def domain_classifier_relation(self, feat_relation, beta):
-        pred_fc_domain_relation_video = None
-        for i in range(len(self.relation_domain_classifier_all)):
-            feat_relation_single = feat_relation[:,i,:].squeeze(1)
-            feat_fc_domain_relation_single = GradReverse.apply(feat_relation_single, beta)
-            pred_fc_domain_relation_single = self.relation_domain_classifier_all[i](feat_fc_domain_relation_single)
-            if pred_fc_domain_relation_video is None:
-                pred_fc_domain_relation_video = pred_fc_domain_relation_single.view(-1,1,2)
-            else:
-                pred_fc_domain_relation_video = torch.cat((pred_fc_domain_relation_video, pred_fc_domain_relation_single.view(-1,1,2)), 1)
-        pred_fc_domain_relation_video = pred_fc_domain_relation_video.view(-1,2)
-        return pred_fc_domain_relation_video
-    def get_trans_attn(self, pred_domain):
-        softmax = nn.Softmax(dim=1)
-        logsoftmax = nn.LogSoftmax(dim=1)
-        entropy = torch.sum(-softmax(pred_domain) * logsoftmax(pred_domain), 1)
-        weights = 1 - entropy
-        return weights
-    def get_general_attn(self, feat):
-        num_segments = feat.size()[1]
-        feat = feat.view(-1, feat.size()[-1]) # reshape features: 128x4x256 --> (128x4)x256
-        weights = self.attn_layer(feat) # e.g. (128x4)x1
-        weights = weights.view(-1, num_segments, weights.size()[-1]) # reshape attention weights: (128x4)x1 --> 128x4x1
-        weights = F.softmax(weights, dim=1)  # softmax over segments ==> 128x4x1
-        return weights
-    def get_attn_feat_relation(self, feat_fc, pred_domain, num_segments):
-        weights_attn = self.get_trans_attn(pred_domain)
-        weights_attn = weights_attn.view(-1, num_segments-1, 1).repeat(1,1,feat_fc.size()[-1]) # reshape & repeat weights (e.g. 16 x 4 x 256)
-        feat_fc_attn = (weights_attn+1) * feat_fc
-        return feat_fc_attn, weights_attn[:,:,0]
     def encode_and_sample_post(self, x):
         if isinstance(x, list):
             conv_x = self.encoder_frame(x[0])
         else:
             conv_x = self.encoder_frame(x)
-        # pass the bidirectional lstm
         lstm_out, _ = self.z_lstm(conv_x)
-        # get f:
         backward = lstm_out[:, 0, self.hidden_dim:2 * self.hidden_dim]
         frontal = lstm_out[:, self.frames - 1, 0:self.hidden_dim]
         lstm_out_f = torch.cat((frontal, backward), dim=1)
@@ -220,7 +140,6 @@ class TransferVAE_Video(nn.Module):
         f_logvar = self.f_logvar(lstm_out_f)
         f_post = self.reparameterize(f_mean, f_logvar, random_sampling=False)
-        # pass to one direction rnn
         features, _ = self.z_rnn(lstm_out)
         z_mean = self.z_mean(features)
         z_logvar = self.z_logvar(features)
@@ -232,7 +151,6 @@ class TransferVAE_Video(nn.Module):
             for t in range(1,3,1):
                 conv_x = self.encoder_frame(x[t])
                 lstm_out, _ = self.z_lstm(conv_x)
-                # get f:
                 backward = lstm_out[:, 0, self.hidden_dim:2 * self.hidden_dim]
                 frontal = lstm_out[:, self.frames - 1, 0:self.hidden_dim]
                 lstm_out_f = torch.cat((frontal, backward), dim=1)
@@ -243,7 +161,6 @@ class TransferVAE_Video(nn.Module):
                 f_post_list.append(f_post)
             f_mean = f_mean_list
             f_post = f_post_list
-        # f_mean and f_post are list if triple else not
         return f_mean, f_logvar, f_post, z_mean, z_logvar, z_post
@@ -260,7 +177,6 @@ class TransferVAE_Video(nn.Module):
     def reparameterize(self, mean, logvar, random_sampling=True):
-        # Reparametrization occurs only if random sampling is set to true, otherwise mean is returned
         if random_sampling is True:
             eps = torch.randn_like(logvar)
             std = torch.exp(0.5 * logvar)
@@ -269,88 +185,20 @@ class TransferVAE_Video(nn.Module):
         else:
             return mean
-    def sample_z_prior_train(self, z_post, random_sampling=True):
-        z_out = None
-        z_means = None
-        z_logvars = None
-        batch_size = z_post.shape[0]
-        z_t = torch.zeros(batch_size, self.z_dim).cpu()
-        h_t_ly1 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        c_t_ly1 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        h_t_ly2 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        c_t_ly2 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        for i in range(self.frames):
-            # two layer LSTM and two one-layer FC
-            h_t_ly1, c_t_ly1 = self.z_prior_lstm_ly1(z_t, (h_t_ly1, c_t_ly1))
-            h_t_ly2, c_t_ly2 = self.z_prior_lstm_ly2(h_t_ly1, (h_t_ly2, c_t_ly2))
-            z_mean_t = self.z_prior_mean(h_t_ly2)
-            z_logvar_t = self.z_prior_logvar(h_t_ly2)
-            z_prior = self.reparameterize(z_mean_t, z_logvar_t, random_sampling)
-            if z_out is None:
-                # If z_out is none it means z_t is z_1, hence store it in the format [batch_size, 1, z_dim]
-                z_out = z_prior.unsqueeze(1)
-                z_means = z_mean_t.unsqueeze(1)
-                z_logvars = z_logvar_t.unsqueeze(1)
-            else:
-                # If z_out is not none, z_t is not the initial z and hence append it to the previous z_ts collected in z_out
-                z_out = torch.cat((z_out, z_prior.unsqueeze(1)), dim=1)
-                z_means = torch.cat((z_means, z_mean_t.unsqueeze(1)), dim=1)
-                z_logvars = torch.cat((z_logvars, z_logvar_t.unsqueeze(1)), dim=1)
-            z_t = z_post[:,i,:]
-        return z_means, z_logvars, z_out
-    # If random sampling is true, reparametrization occurs else z_t is just set to the mean
-    def sample_z(self, batch_size, random_sampling=True):
-        z_out = None  # This will ultimately store all z_s in the format [batch_size, frames, z_dim]
-        z_means = None
-        z_logvars = None
-        # All states are initially set to 0, especially z_0 = 0
-        z_t = torch.zeros(batch_size, self.z_dim).cpu()
-        # z_mean_t = torch.zeros(batch_size, self.z_dim)
-        # z_logvar_t = torch.zeros(batch_size, self.z_dim)
-        h_t_ly1 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        c_t_ly1 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        h_t_ly2 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        c_t_ly2 = torch.zeros(batch_size, self.hidden_dim).cpu()
-        for _ in range(self.frames):
-            # h_t, c_t = self.z_prior_lstm(z_t, (h_t, c_t))
-            # two layer LSTM and two one-layer FC
-            h_t_ly1, c_t_ly1 = self.z_prior_lstm_ly1(z_t, (h_t_ly1, c_t_ly1))
-            h_t_ly2, c_t_ly2 = self.z_prior_lstm_ly2(h_t_ly1, (h_t_ly2, c_t_ly2))
-            z_mean_t = self.z_prior_mean(h_t_ly2)
-            z_logvar_t = self.z_prior_logvar(h_t_ly2)
-            z_t = self.reparameterize(z_mean_t, z_logvar_t, random_sampling)
-            if z_out is None:
-                # If z_out is none it means z_t is z_1, hence store it in the format [batch_size, 1, z_dim]
-                z_out = z_t.unsqueeze(1)
-                z_means = z_mean_t.unsqueeze(1)
-                z_logvars = z_logvar_t.unsqueeze(1)
-            else:
-                # If z_out is not none, z_t is not the initial z and hence append it to the previous z_ts collected in z_out
-                z_out = torch.cat((z_out, z_t.unsqueeze(1)), dim=1)
-                z_means = torch.cat((z_means, z_mean_t.unsqueeze(1)), dim=1)
-                z_logvars = torch.cat((z_logvars, z_logvar_t.unsqueeze(1)), dim=1)
-        return z_means, z_logvars, z_out
     def forward(self, x, beta):
         _, _, f_post, _, _, z_post = self.encode_and_sample_post(x)
         if isinstance(f_post, list):
             f_expand = f_post[0].unsqueeze(1).expand(-1, self.frames, self.f_dim)
         else:
             f_expand = f_post.unsqueeze(1).expand(-1, self.frames, self.f_dim)
         zf = torch.cat((z_post, f_expand), dim=2)
         recon_x = self.decoder_frame(zf)
         return f_post, z_post, recon_x
 def name2seq(file_name):
     images = []
@@ -520,7 +368,7 @@ def run(domain_source, action_source, hair_source, top_source, bottom_source, do
     # == Forward ==
     with torch.no_grad():
-    f_post, z_post, recon_x = model(x, [0]*3)
     src_orig_sample = x[0, :, :, :, :]
     src_recon_sample = recon_x[0, :, :, :, :]

         return list(itertools.combinations([i for i in range(num_frames)], num_frames_relation))
 class TransferVAE_Video(nn.Module):
     def __init__(self):
             self.relation_domain_classifier_all += [relation_domain_classifier]
         self.pred_classifier_video = nn.Linear(self.feat_aggregated_dim, self.num_class)
         self.fc_feature_domain_latent = nn.Linear(self.f_dim, self.f_dim)
         self.fc_classifier_doamin_latent = nn.Linear(self.f_dim, 2)
     def encode_and_sample_post(self, x):
         if isinstance(x, list):
             conv_x = self.encoder_frame(x[0])
         else:
             conv_x = self.encoder_frame(x)
         lstm_out, _ = self.z_lstm(conv_x)
         backward = lstm_out[:, 0, self.hidden_dim:2 * self.hidden_dim]
         frontal = lstm_out[:, self.frames - 1, 0:self.hidden_dim]
         lstm_out_f = torch.cat((frontal, backward), dim=1)
         f_logvar = self.f_logvar(lstm_out_f)
         f_post = self.reparameterize(f_mean, f_logvar, random_sampling=False)
         features, _ = self.z_rnn(lstm_out)
         z_mean = self.z_mean(features)
         z_logvar = self.z_logvar(features)
             for t in range(1,3,1):
                 conv_x = self.encoder_frame(x[t])
                 lstm_out, _ = self.z_lstm(conv_x)
                 backward = lstm_out[:, 0, self.hidden_dim:2 * self.hidden_dim]
                 frontal = lstm_out[:, self.frames - 1, 0:self.hidden_dim]
                 lstm_out_f = torch.cat((frontal, backward), dim=1)
                 f_post_list.append(f_post)
             f_mean = f_mean_list
             f_post = f_post_list
         return f_mean, f_logvar, f_post, z_mean, z_logvar, z_post
     def reparameterize(self, mean, logvar, random_sampling=True):
         if random_sampling is True:
             eps = torch.randn_like(logvar)
             std = torch.exp(0.5 * logvar)
         else:
             return mean
     def forward(self, x, beta):
         _, _, f_post, _, _, z_post = self.encode_and_sample_post(x)
         if isinstance(f_post, list):
             f_expand = f_post[0].unsqueeze(1).expand(-1, self.frames, self.f_dim)
         else:
             f_expand = f_post.unsqueeze(1).expand(-1, self.frames, self.f_dim)
         zf = torch.cat((z_post, f_expand), dim=2)
         recon_x = self.decoder_frame(zf)
         return f_post, z_post, recon_x
 def name2seq(file_name):
     images = []
     # == Forward ==
     with torch.no_grad():
+        f_post, z_post, recon_x = model(x, [0]*3)
     src_orig_sample = x[0, :, :, :, :]
     src_recon_sample = recon_x[0, :, :, :, :]