xmutly
/

robustvlm-object-centric

Model card Files Files and versions Community

xmutly commited on Oct 31, 2024

Commit

5218bc4

verified ·

1 Parent(s): 399c272

Upload adversarial_training_clip_with_object_token.py

Browse files

Files changed (1) hide show

train/adversarial_training_clip_with_object_token.py +8 -3

train/adversarial_training_clip_with_object_token.py CHANGED Viewed

@@ -108,6 +108,8 @@ def main(args):
         assert str(args.start_step) in args.optimizer_state
         assert args.pretrained in ['', 'none']
         args.pretrained = args.optimizer_state.replace('_opt', '')
     model, _, _ = load_clip_model(args.clip_model_name, args.pretrained)
     # Remove the Normalize transform by creating a new Compose object
@@ -128,6 +130,9 @@ def main(args):
     cfg_dict = {'slot_dim': 256, 'num_slots': 10, 'token_num': 256, 'ISA': False, 'slot_att_iter': 3, 'query_opt': False}
     model_slots = DINOSAURpp(cfg_dict)
     proj_head = torch.nn.Linear(256, 1024) # slot-num to slot-num
     # get data
@@ -505,13 +510,13 @@ def train_one_epoch(
             wandb.log(log_data)
         # save 10 models over the course of training
-        if args.save_checkpoints and (step_total % (args.steps // 1) == 0):
             # save model and optimizer state_dict
             torch.save(unwrap_model(model).model.state_dict(), f'{args.output_dir}/checkpoints/step_{step_total}.pt')
             torch.save(unwrap_model(proj_head).state_dict(), f'{args.output_dir}/checkpoints/step_{step_total}_proj_head.pt')
             torch.save(optimizer.state_dict(), f'{args.output_dir}/checkpoints/step_{step_total}_opt.pt')
         # every 200 steps, save a fallback model, which gets overwritten
-        if step_total % 2 == 0:
             torch.save(unwrap_model(model).model.state_dict(), f'{args.output_dir}/checkpoints/fallback_{step_total}.pt')
             torch.save(unwrap_model(proj_head).state_dict(), f'{args.output_dir}/checkpoints/fallback_{step_total}_proj_head.pt')
             torch.save(optimizer.state_dict(), f'{args.output_dir}/checkpoints/fallback_{step_total}_opt.pt')
@@ -523,7 +528,7 @@ def train_one_epoch(
         if step_total >= args.steps:
             break
-        torch.cuda.empty_cache()
     return step_total

         assert str(args.start_step) in args.optimizer_state
         assert args.pretrained in ['', 'none']
         args.pretrained = args.optimizer_state.replace('_opt', '')
+        args.pretrained_proj_head = args.optimizer_state.replace('_opt', '_proj_head')
     model, _, _ = load_clip_model(args.clip_model_name, args.pretrained)
     # Remove the Normalize transform by creating a new Compose object
     cfg_dict = {'slot_dim': 256, 'num_slots': 10, 'token_num': 256, 'ISA': False, 'slot_att_iter': 3, 'query_opt': False}
     model_slots = DINOSAURpp(cfg_dict)
     proj_head = torch.nn.Linear(256, 1024) # slot-num to slot-num
+    if args.optimizer_state != '':
+        proj_head.load_state_dict(torch.load(args.pretrained_proj_head))
     # get data
             wandb.log(log_data)
         # save 10 models over the course of training
+        if args.save_checkpoints and (step_total % (args.steps // 10) == 0):
             # save model and optimizer state_dict
             torch.save(unwrap_model(model).model.state_dict(), f'{args.output_dir}/checkpoints/step_{step_total}.pt')
             torch.save(unwrap_model(proj_head).state_dict(), f'{args.output_dir}/checkpoints/step_{step_total}_proj_head.pt')
             torch.save(optimizer.state_dict(), f'{args.output_dir}/checkpoints/step_{step_total}_opt.pt')
         # every 200 steps, save a fallback model, which gets overwritten
+        if step_total % 2000 == 0:
             torch.save(unwrap_model(model).model.state_dict(), f'{args.output_dir}/checkpoints/fallback_{step_total}.pt')
             torch.save(unwrap_model(proj_head).state_dict(), f'{args.output_dir}/checkpoints/fallback_{step_total}_proj_head.pt')
             torch.save(optimizer.state_dict(), f'{args.output_dir}/checkpoints/fallback_{step_total}_opt.pt')
         if step_total >= args.steps:
             break
+        # torch.cuda.empty_cache()
     return step_total