Spaces:

openlamm
/

LAMM

Runtime error

openlamm commited on Jul 24, 2023

Commit

705386d

1 Parent(s): edb16cb

Update model/openlamm.py

Files changed (1) hide show

model/openlamm.py CHANGED Viewed

@@ -160,7 +160,7 @@ class LAMMPEFTModel(nn.Module):
         encoder_pretrain = args['encoder_pretrain'] if 'encoder_pretrain' in args else 'clip'
         self.encoder_pretrain = encoder_pretrain
         assert encoder_pretrain in ['imagebind', 'clip', 'epcl'], f'Encoder_pretrain: {encoder_pretrain} Not Implemented'
-        encoder_ckpt_path = args['encoder_ckpt_path'] if not encoder_pretrain == 'clip' else '~/.cache/clip/ViT-L-14.pt'
         vicuna_ckpt_path = args['vicuna_ckpt_path']
         system_header = args['system_header'] if 'system_header' in args else False
@@ -176,7 +176,7 @@ class LAMMPEFTModel(nn.Module):
         # TODO: Make sure the number of vision tokens is correct
         if args['encoder_pretrain'].lower() == 'clip':
-            clip_encoder, self.visual_preprocess = load_clip('ViT-L/14', device=device)
             self.visual_encoder = clip_encoder.visual
             if self.vision_feature_type == 'global':          # global feature from CLIP
                 self.vision_hidden_size = 768

         encoder_pretrain = args['encoder_pretrain'] if 'encoder_pretrain' in args else 'clip'
         self.encoder_pretrain = encoder_pretrain
         assert encoder_pretrain in ['imagebind', 'clip', 'epcl'], f'Encoder_pretrain: {encoder_pretrain} Not Implemented'
+        encoder_ckpt_path = args['encoder_ckpt_path'] if not encoder_pretrain == 'clip' and not os.path.isfile(args['encoder_ckpt_path']) else '~/.cache/clip/ViT-L-14.pt'
         vicuna_ckpt_path = args['vicuna_ckpt_path']
         system_header = args['system_header'] if 'system_header' in args else False
         # TODO: Make sure the number of vision tokens is correct
         if args['encoder_pretrain'].lower() == 'clip':
+            clip_encoder, self.visual_preprocess = load_clip(encoder_ckpt_path, device=device)
             self.visual_encoder = clip_encoder.visual
             if self.vision_feature_type == 'global':          # global feature from CLIP
                 self.vision_hidden_size = 768