Spaces:

ethanNeuralImage
/

inversion_testing

Runtime error

App Files Files Community

ethanNeuralImage commited on Aug 16, 2022

Commit

6fa3e0e

•

1 Parent(s): 838d7c7

fix GPU usage to be optional

Browse files

Files changed (6) hide show

hyperstyle_global_directions/edit.py +6 -6
hyperstyle_global_directions/global_direction.py +6 -5
hyperstyle_global_directions/stylespace_utils.py +2 -2
models/hyperstyle/hypernetworks/hypernetwork.py +1 -1
models/hyperstyle/hypernetworks/shared_weights_hypernet.py +6 -5
models/hyperstyle/hyperstyle.py +1 -1

hyperstyle_global_directions/edit.py CHANGED Viewed

@@ -41,18 +41,18 @@ def parse_args(args_list=None):
 def load_direction_calculator(args):
-    delta_i_c = torch.from_numpy(np.load(args.delta_i_c)).float().cuda()
     with open(args.s_statistics, "rb") as channels_statistics:
         _, s_std = pickle.load(channels_statistics)
-        s_std = [torch.from_numpy(s_i).float().cuda() for s_i in s_std]
     with open(args.text_prompt_templates, "r") as templates:
         text_prompt_templates = templates.readlines()
-    global_direction_calculator = StyleCLIPGlobalDirection(delta_i_c, s_std, text_prompt_templates)
     return global_direction_calculator
 def load_stylegan_generator(args):
-    stylegan_model = Generator(args.stylegan_size, 512, 8, channel_multiplier=2).cuda()
     checkpoint = torch.load(args.stylegan_weights)
     stylegan_model.load_state_dict(checkpoint['g_ema'])
     return stylegan_model
@@ -72,7 +72,7 @@ def run():
         if args.n_images is not None and idx >= args.n_images:
             break
         weight_deltas = np.load(os.path.join(args.weight_deltas_path, image_name.split(".")[0] + ".npy"), allow_pickle=True)
-        weight_deltas = [torch.from_numpy(w).cuda() if w is not None else None for w in weight_deltas]
         latent = torch.from_numpy(latent)
         results, results_latent, source_img = edit_image(image_name, latent, stylegan_model, global_direction_calculator, args, weight_deltas)
         torchvision.utils.save_image(results, f"{args.output_path}/{image_name.split('.')[0]}.jpg",
@@ -80,7 +80,7 @@ def run():
 def edit_image(image_name, latent, stylegan_model, global_direction_calculator, args, weight_deltas=None):
-    latent_code = latent.cuda()
     truncation = 1
     mean_latent = None
     input_is_latent = True

 def load_direction_calculator(args):
+    delta_i_c = torch.from_numpy(np.load(args.delta_i_c)).float().to(args.device)
     with open(args.s_statistics, "rb") as channels_statistics:
         _, s_std = pickle.load(channels_statistics)
+        s_std = [torch.from_numpy(s_i).float().to(args.device) for s_i in s_std]
     with open(args.text_prompt_templates, "r") as templates:
         text_prompt_templates = templates.readlines()
+    global_direction_calculator = StyleCLIPGlobalDirection(delta_i_c, s_std, text_prompt_templates, args.device)
     return global_direction_calculator
 def load_stylegan_generator(args):
+    stylegan_model = Generator(args.stylegan_size, 512, 8, channel_multiplier=2).to(args.device)
     checkpoint = torch.load(args.stylegan_weights)
     stylegan_model.load_state_dict(checkpoint['g_ema'])
     return stylegan_model
         if args.n_images is not None and idx >= args.n_images:
             break
         weight_deltas = np.load(os.path.join(args.weight_deltas_path, image_name.split(".")[0] + ".npy"), allow_pickle=True)
+        weight_deltas = [torch.from_numpy(w).to(args.device) if w is not None else None for w in weight_deltas]
         latent = torch.from_numpy(latent)
         results, results_latent, source_img = edit_image(image_name, latent, stylegan_model, global_direction_calculator, args, weight_deltas)
         torchvision.utils.save_image(results, f"{args.output_path}/{image_name.split('.')[0]}.jpg",
 def edit_image(image_name, latent, stylegan_model, global_direction_calculator, args, weight_deltas=None):
+    latent_code = latent.to(args.device)
     truncation = 1
     mean_latent = None
     input_is_latent = True

hyperstyle_global_directions/global_direction.py CHANGED Viewed

@@ -7,12 +7,13 @@ from hyperstyle_global_directions.stylespace_utils import features_channels_to_s
 class StyleCLIPGlobalDirection:
-    def __init__(self, delta_i_c, s_std, text_prompts_templates):
         super(StyleCLIPGlobalDirection, self).__init__()
         self.delta_i_c = delta_i_c
         self.s_std = s_std
         self.text_prompts_templates = text_prompts_templates
-        self.clip_model, _ = clip.load("ViT-B/32", device="cuda")
     def get_delta_s(self, neutral_text, target_text, beta):
         delta_i = self.get_delta_i([target_text, neutral_text]).float()
@@ -23,7 +24,7 @@ class StyleCLIPGlobalDirection:
         max_channel_value = torch.abs(delta_s).max()
         if max_channel_value > 0:
             delta_s /= max_channel_value
-        direction = features_channels_to_s(delta_s, self.s_std)
         return direction
     def get_delta_i(self, text_prompts):
@@ -37,11 +38,11 @@ class StyleCLIPGlobalDirection:
             text_features_list = []
             for text_prompt in text_prompts:
                 formatted_text_prompts = [template.format(text_prompt) for template in self.text_prompts_templates]  # format with class
-                formatted_text_prompts = clip.tokenize(formatted_text_prompts).cuda()  # tokenize
                 text_embeddings = self.clip_model.encode_text(formatted_text_prompts)  # embed with text encoder
                 text_embeddings /= text_embeddings.norm(dim=-1, keepdim=True)
                 text_embedding = text_embeddings.mean(dim=0)
                 text_embedding /= text_embedding.norm()
                 text_features_list.append(text_embedding)
-            text_features = torch.stack(text_features_list, dim=1).cuda()
         return text_features.t()

 class StyleCLIPGlobalDirection:
+    def __init__(self, delta_i_c, s_std, text_prompts_templates, device='cuda'):
         super(StyleCLIPGlobalDirection, self).__init__()
+        self.device=device
         self.delta_i_c = delta_i_c
         self.s_std = s_std
         self.text_prompts_templates = text_prompts_templates
+        self.clip_model, _ = clip.load("ViT-B/32", device=device)
     def get_delta_s(self, neutral_text, target_text, beta):
         delta_i = self.get_delta_i([target_text, neutral_text]).float()
         max_channel_value = torch.abs(delta_s).max()
         if max_channel_value > 0:
             delta_s /= max_channel_value
+        direction = features_channels_to_s(delta_s, self.s_std, self.device)
         return direction
     def get_delta_i(self, text_prompts):
             text_features_list = []
             for text_prompt in text_prompts:
                 formatted_text_prompts = [template.format(text_prompt) for template in self.text_prompts_templates]  # format with class
+                formatted_text_prompts = clip.tokenize(formatted_text_prompts).to(self.device)  # tokenize
                 text_embeddings = self.clip_model.encode_text(formatted_text_prompts)  # embed with text encoder
                 text_embeddings /= text_embeddings.norm(dim=-1, keepdim=True)
                 text_embedding = text_embeddings.mean(dim=0)
                 text_embedding /= text_embedding.norm()
                 text_features_list.append(text_embedding)
+            text_features = torch.stack(text_features_list, dim=1).to(self.device)
         return text_features.t()

hyperstyle_global_directions/stylespace_utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ STYLESPACE_DIMENSIONS = [512 for _ in range(15)] + [256, 256, 256] + [128, 128,
 TORGB_INDICES = list(range(1, len(STYLESPACE_DIMENSIONS), 3))
 STYLESPACE_INDICES_WITHOUT_TORGB = [i for i in range(len(STYLESPACE_DIMENSIONS)) if i not in TORGB_INDICES][:11]
-def features_channels_to_s(s_without_torgb, s_std):
     s = []
     start_index_features = 0
     for c in range(len(STYLESPACE_DIMENSIONS)):
@@ -14,7 +14,7 @@ def features_channels_to_s(s_without_torgb, s_std):
             s_i = s_without_torgb[start_index_features:end_index_features] * s_std[c]
             start_index_features = end_index_features
         else:
-            s_i = torch.zeros(STYLESPACE_DIMENSIONS[c]).cuda()
         s_i = s_i.view(1, 1, -1, 1, 1)
         s.append(s_i)
     return s

 TORGB_INDICES = list(range(1, len(STYLESPACE_DIMENSIONS), 3))
 STYLESPACE_INDICES_WITHOUT_TORGB = [i for i in range(len(STYLESPACE_DIMENSIONS)) if i not in TORGB_INDICES][:11]
+def features_channels_to_s(s_without_torgb, s_std, device='cuda'):
     s = []
     start_index_features = 0
     for c in range(len(STYLESPACE_DIMENSIONS)):
             s_i = s_without_torgb[start_index_features:end_index_features] * s_std[c]
             start_index_features = end_index_features
         else:
+            s_i = torch.zeros(STYLESPACE_DIMENSIONS[c]).to(device)
         s_i = s_i.view(1, 1, -1, 1, 1)
         s.append(s_i)
     return s

models/hyperstyle/hypernetworks/hypernetwork.py CHANGED Viewed

@@ -34,7 +34,7 @@ class SharedWeightsHyperNetResNet(Module):
             self.layers_to_tune = [int(l) for l in opts.layers_to_tune.split(',')]
         self.shared_layers = [0, 2, 3, 5, 6, 8, 9, 11, 12]
-        self.shared_weight_hypernet = SharedWeightsHypernet(in_size=512, out_size=512, mode=None)
         self.refinement_blocks = nn.ModuleList()
         self.n_outputs = opts.n_hypernet_outputs

             self.layers_to_tune = [int(l) for l in opts.layers_to_tune.split(',')]
         self.shared_layers = [0, 2, 3, 5, 6, 8, 9, 11, 12]
+        self.shared_weight_hypernet = SharedWeightsHypernet(in_size=512, out_size=512, mode=None, device=opts.device)
         self.refinement_blocks = nn.ModuleList()
         self.n_outputs = opts.n_hypernet_outputs

models/hyperstyle/hypernetworks/shared_weights_hypernet.py CHANGED Viewed

@@ -5,8 +5,9 @@ from torch.nn.parameter import Parameter
 class SharedWeightsHypernet(nn.Module):
-    def __init__(self, f_size=3, z_dim=512, out_size=512, in_size=512, mode=None):
         super(SharedWeightsHypernet, self).__init__()
         self.mode = mode
         self.z_dim = z_dim
         self.f_size = f_size
@@ -15,11 +16,11 @@ class SharedWeightsHypernet(nn.Module):
         self.out_size = out_size
         self.in_size = in_size
-        self.w1 = Parameter(torch.fmod(torch.randn((self.z_dim, self.out_size * self.f_size * self.f_size)).cuda() / 40, 2))
-        self.b1 = Parameter(torch.fmod(torch.randn((self.out_size * self.f_size * self.f_size)).cuda() / 40, 2))
-        self.w2 = Parameter(torch.fmod(torch.randn((self.z_dim, self.in_size * self.z_dim)).cuda() / 40, 2))
-        self.b2 = Parameter(torch.fmod(torch.randn((self.in_size * self.z_dim)).cuda() / 40, 2))
     def forward(self, z):
         batch_size = z.shape[0]

 class SharedWeightsHypernet(nn.Module):
+    def __init__(self, f_size=3, z_dim=512, out_size=512, in_size=512, mode=None, device='cuda'):
         super(SharedWeightsHypernet, self).__init__()
+        self.device=device
         self.mode = mode
         self.z_dim = z_dim
         self.f_size = f_size
         self.out_size = out_size
         self.in_size = in_size
+        self.w1 = Parameter(torch.fmod(torch.randn((self.z_dim, self.out_size * self.f_size * self.f_size)).to(self.device) / 40, 2))
+        self.b1 = Parameter(torch.fmod(torch.randn((self.out_size * self.f_size * self.f_size)).to(self.device) / 40, 2))
+        self.w2 = Parameter(torch.fmod(torch.randn((self.z_dim, self.in_size * self.z_dim)).to(self.device) / 40, 2))
+        self.b2 = Parameter(torch.fmod(torch.randn((self.in_size * self.z_dim)).to(self.device) / 40, 2))
     def forward(self, z):
         batch_size = z.shape[0]

models/hyperstyle/hyperstyle.py CHANGED Viewed

@@ -145,7 +145,7 @@ class HyperStyle(nn.Module):
         w_net = pSp(opts_w_encoder)
         w_net = w_net.encoder
         w_net.eval()
-        w_net.cuda()
         return w_net
     def __get_initial_inversion(self, x, resize=True):

         w_net = pSp(opts_w_encoder)
         w_net = w_net.encoder
         w_net.eval()
+        w_net.to(self.opts.device)
         return w_net
     def __get_initial_inversion(self, x, resize=True):