Spaces:

GT-RIPL
/

GPT-K

Runtime error

cwkuo commited on Sep 12, 2023

Commit

febd802

1 Parent(s): 7962ed0

trim checkpoint model weights

Files changed (2) hide show

app.py CHANGED Viewed

@@ -370,13 +370,9 @@ def build_model():
     _, image_trans = get_gptk_image_transform()
     topk = {"whole": 60, "five": 24, "nine": 16}
     gptk_model = get_gptk_model(d_knwl=d_knwl, topk=topk)
-    gptk_ckpt = "model/ckpt/mp_rank_00_model_states.pt"
     gptk_ckpt = torch.load(gptk_ckpt, map_location="cpu")
-    gptk_ckpt = {
-        ".".join(k.split(".")[2:]): v
-        for k, v in gptk_ckpt["module"].items()
-    }
-    gptk_model.load_state_dict(gptk_ckpt)
     gptk_model = gptk_model.to(device).eval()
     return knwl_db, query_enc, query_trans, gptk_model, image_trans, topk, device

     _, image_trans = get_gptk_image_transform()
     topk = {"whole": 60, "five": 24, "nine": 16}
     gptk_model = get_gptk_model(d_knwl=d_knwl, topk=topk)
+    gptk_ckpt = "model/ckpt/gptk-vicuna7b.pt"
     gptk_ckpt = torch.load(gptk_ckpt, map_location="cpu")
+    gptk_model.load_state_dict(gptk_ckpt, strict=False)
     gptk_model = gptk_model.to(device).eval()
     return knwl_db, query_enc, query_trans, gptk_model, image_trans, topk, device

model/utils.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import os
 import torch
 import torch.distributed as dist
 import timm.models.hub as timm_hub
 def drop_sequence_mask(N, S, device, p=0.1, training=True):
     if training:
         mask = torch.rand((N, S), device=device)
@@ -77,3 +80,18 @@ def download_cached_file(url, check_hash=True, progress=False):
         dist.barrier()
     return get_cached_file_path()

 import os
+from pprint import pprint
+from tqdm import tqdm
 import torch
 import torch.distributed as dist
 import timm.models.hub as timm_hub
 def drop_sequence_mask(N, S, device, p=0.1, training=True):
     if training:
         mask = torch.rand((N, S), device=device)
         dist.barrier()
     return get_cached_file_path()
+def trim_ckpt(ckpt_input, ckpt_output, extra_keys=()):
+    kept_keys = ('llm_proj', 'knwl', 'qformer', 'ln_vision', 'query_tokens') + extra_keys
+    ckpt = torch.load(ckpt_input, map_location="cpu")
+    ckpt = {
+        ".".join(n.split(".")[2:]): v
+        for n, v in tqdm(ckpt["module"].items(), dynamic_ncols=True)
+        if any([k in n for k in kept_keys])
+    }
+    print("Kept params:")
+    pprint(list(ckpt.keys()))
+    torch.save(ckpt, ckpt_output)