import gradio as gr import argparse import cv2 import imageio import math from math import ceil import matplotlib.pyplot as plt import matplotlib.animation as animation import numpy as np from PIL import Image import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Function class RelationModuleMultiScale(torch.nn.Module): def __init__(self, img_feature_dim, num_bottleneck, num_frames): super(RelationModuleMultiScale, self).__init__() self.subsample_num = 3 self.img_feature_dim = img_feature_dim self.scales = [i for i in range(num_frames, 1, -1)] self.relations_scales = [] self.subsample_scales = [] for scale in self.scales: relations_scale = self.return_relationset(num_frames, scale) self.relations_scales.append(relations_scale) self.subsample_scales.append(min(self.subsample_num, len(relations_scale))) self.num_frames = num_frames self.fc_fusion_scales = nn.ModuleList() # high-tech modulelist for i in range(len(self.scales)): scale = self.scales[i] fc_fusion = nn.Sequential(nn.ReLU(), nn.Linear(scale * self.img_feature_dim, num_bottleneck), nn.ReLU()) self.fc_fusion_scales += [fc_fusion] def forward(self, input): act_scale_1 = input[:, self.relations_scales[0][0] , :] act_scale_1 = act_scale_1.view(act_scale_1.size(0), self.scales[0] * self.img_feature_dim) act_scale_1 = self.fc_fusion_scales[0](act_scale_1) act_scale_1 = act_scale_1.unsqueeze(1) act_all = act_scale_1.clone() for scaleID in range(1, len(self.scales)): act_relation_all = torch.zeros_like(act_scale_1) num_total_relations = len(self.relations_scales[scaleID]) num_select_relations = self.subsample_scales[scaleID] idx_relations_evensample = [int(ceil(i * num_total_relations / num_select_relations)) for i in range(num_select_relations)] for idx in idx_relations_evensample: act_relation = input[:, self.relations_scales[scaleID][idx], :] act_relation = act_relation.view(act_relation.size(0), self.scales[scaleID] * self.img_feature_dim) act_relation = self.fc_fusion_scales[scaleID](act_relation) act_relation = act_relation.unsqueeze(1) act_relation_all += act_relation act_all = torch.cat((act_all, act_relation_all), 1) return act_all def return_relationset(self, num_frames, num_frames_relation): import itertools return list(itertools.combinations([i for i in range(num_frames)], num_frames_relation)) parser = argparse.ArgumentParser() parser.add_argument('--dataset', default='Sprite', help='datasets') parser.add_argument('--data_root', default='dataset', help='root directory for data') parser.add_argument('--num_class', type=int, default=15, help='the number of class for jester dataset') parser.add_argument('--input_type', default='image', choices=['feature', 'image'], help='the type of input') parser.add_argument('--src', default='domain_1', help='source domain') parser.add_argument('--tar', default='domain_2', help='target domain') parser.add_argument('--num_segments', type=int, default=8, help='the number of frame segment') parser.add_argument('--backbone', type=str, default="dcgan", choices=['dcgan', 'resnet101', 'I3Dpretrain','I3Dfinetune'], help='backbone') parser.add_argument('--channels', default=3, type=int, help='input channels for image inputs') parser.add_argument('--add_fc', default=1, type=int, metavar='M', help='number of additional fc layers (excluding the last fc layer) (e.g. 0, 1, 2)') parser.add_argument('--fc_dim', type=int, default=1024, help='dimension of added fc') parser.add_argument('--frame_aggregation', type=str, default='trn', choices=[ 'rnn', 'trn'], help='aggregation of frame features (none if baseline_type is not video)') parser.add_argument('--dropout_rate', default=0.5, type=float, help='dropout ratio for frame-level feature (default: 0.5)') parser.add_argument('--f_dim', type=int, default=512, help='dim of f') parser.add_argument('--z_dim', type=int, default=512, help='dimensionality of z_t') parser.add_argument('--f_rnn_layers', type=int, default=1, help='number of layers (content lstm)') parser.add_argument('--use_bn', type=str, default='none', choices=['none', 'AdaBN', 'AutoDIAL'], help='normalization-based methods') parser.add_argument('--prior_sample', type=str, default='random', choices=['random', 'post'], help='how to sample prior') parser.add_argument('--batch_size', default=128, type=int, help='-batch size') parser.add_argument('--use_attn', type=str, default='TransAttn', choices=['none', 'TransAttn', 'general'], help='attention-mechanism') parser.add_argument('--data_threads', type=int, default=5, help='number of data loading threads') opt = parser.parse_args(args=[]) def display_gif(file_name, save_name): images = [] for frame in range(8): frame_name = '%d' % (frame) image_filename = file_name + frame_name + '.png' images.append(imageio.imread(image_filename)) gif_filename = 'avatar_source.gif' return imageio.mimsave(gif_filename, images) def display_gif_pad(file_name, save_name): images = [] for frame in range(8): frame_name = '%d' % (frame) image_filename = file_name + frame_name + '.png' image = imageio.imread(image_filename) image = image[:, :, :3] image_pad = cv2.copyMakeBorder(image, 0, 0, 125, 125, cv2.BORDER_CONSTANT, value=0) images.append(image_pad) return imageio.mimsave(save_name, images) def display_image(file_name): image_filename = file_name + '0' + '.png' print(image_filename) image = imageio.imread(image_filename) imageio.imwrite('image.png', image) def run(domain_source, action_source, hair_source, top_source, bottom_source, domain_target, action_target, hair_target, top_target, bottom_target): # == Source Avatar == # body body_source = '0' # hair if hair_source == "green": hair_source = '0' elif hair_source == "yellow": hair_source = '2' elif hair_source == "rose": hair_source = '4' elif hair_source == "red": hair_source = '7' elif hair_source == "wine": hair_source = '8' # top if top_source == "brown": top_source = '0' elif top_source == "blue": top_source = '1' elif top_source == "white": top_source = '2' # bottom if bottom_source == "white": bottom_source = '0' elif bottom_source == "golden": bottom_source = '1' elif bottom_source == "red": bottom_source = '2' elif bottom_source == "silver": bottom_source = '3' file_name_source = './Sprite/frames/domain_1/' + action_source + '/' file_name_source = file_name_source + 'front' + '_' + str(body_source) + str(bottom_source) + str(top_source) + str(hair_source) + '_' gif = display_gif_pad(file_name_source, 'avatar_source.gif') # == Target Avatar == # body body_target = '1' # hair if hair_target == "violet": hair_target = '1' elif hair_target == "silver": hair_target = '3' elif hair_target == "purple": hair_target = '5' elif hair_target == "grey": hair_target = '6' elif hair_target == "golden": hair_target = '9' # top if top_target == "grey": top_target = '3' elif top_target == "khaki": top_target = '4' elif top_target == "linen": top_target = '5' elif top_target == "ocre": top_target = '6' # bottom if bottom_target == "denim": bottom_target = '4' elif bottom_target == "olive": bottom_target = '5' elif bottom_target == "brown": bottom_target = '6' file_name_target = './Sprite/frames/domain_2/' + action_target + '/' file_name_target = file_name_target + 'front' + '_' + str(body_target) + str(bottom_target) + str(top_target) + str(hair_target) + '_' gif_target = display_gif_pad(file_name_target, 'avatar_target.gif') return 'demo.gif' gr.Interface( run, inputs=[ gr.Textbox(value="Source Avatar - Human", interactive=False), gr.Radio(choices=["slash", "spellcard", "walk"], value="slash"), gr.Radio(choices=["green", "yellow", "rose", "red", "wine"], value="green"), gr.Radio(choices=["brown", "blue", "white"], value="brown"), gr.Radio(choices=["white", "golden", "red", "silver"], value="white"), gr.Textbox(value="Target Avatar - Alien", interactive=False), gr.Radio(choices=["slash", "spellcard", "walk"], value="walk"), gr.Radio(choices=["violet", "silver", "purple", "grey", "golden"], value="golden"), gr.Radio(choices=["grey", "khaki", "linen", "ocre"], value="ocre"), gr.Radio(choices=["denim", "olive", "brown"], value="brown"), ], outputs=[ gr.components.Image(type="file", label="Domain Disentanglement"), ], live=True, title="TransferVAE for Unsupervised Video Domain Adaptation", ).launch()