Spaces:

GT-RIPL
/

GPT-K

Runtime error

App Files Files Community

cwkuo commited on Sep 17, 2023

Commit

d8c6a57

1 Parent(s): 6855619

reduce GPU memory by keeping necessary modules of query_enc

Browse files

Files changed (2) hide show

app.py +9 -6
model/gptk.py +1 -1

app.py CHANGED Viewed

@@ -4,8 +4,10 @@ import time
 import gradio as gr
 import requests
 import numpy as np
 import torch
 import open_clip
 import faiss
 from transformers import TextIteratorStreamer
@@ -96,7 +98,7 @@ def add_text(state: Conversation, text, image):
 def search(image, pos, topk, knwl_db, knwl_idx):
     with torch.cuda.amp.autocast():
         image = query_trans(image).unsqueeze(0).to(device)
-        query = query_enc.encode_image(image, normalize=True)
     query = query.cpu().numpy()
     _, I = knwl_idx.search(query, 4*topk)
@@ -372,15 +374,16 @@ def build_knowledge():
         "act": get_knwl('knowledge/(dataset-action)(clip-model-ViT-g-14)(dbscan)(eps-0.15)(ms-1)'),
         "attr": get_knwl('knowledge/(dataset-attribute)(clip-model-ViT-g-14)(dbscan)(eps-0.15)(ms-1)'),
     }
-    return knwl_db
 def build_query_model():
     query_enc, _, query_trans = open_clip.create_model_and_transforms(
-        "ViT-g-14", pretrained="laion2b_s34b_b88k", precision='fp16', device=device
     )
-    query_enc = query_enc.eval()
     return query_enc, query_trans
@@ -388,7 +391,7 @@ def build_query_model():
 def build_gptk_model():
     _, gptk_trans = get_gptk_image_transform()
     topk = {"whole": 60, "five": 24, "nine": 16}
-    gptk_model = get_gptk_model(d_knwl=1024, topk=topk)
     gptk_ckpt = "model/ckpt/gptk-vicuna7b.pt"
     gptk_ckpt = torch.load(gptk_ckpt, map_location="cpu")
     gptk_model.load_state_dict(gptk_ckpt, strict=False)
@@ -402,8 +405,8 @@ if torch.cuda.is_available():
 else:
     device = torch.device("cpu")
 gptk_model, gptk_trans, topk = build_gptk_model()
 query_enc, query_trans = build_query_model()
-knwl_db = build_knowledge()
 demo = build_demo()
 demo.queue().launch()

 import gradio as gr
 import requests
 import numpy as np
+from pathlib import Path
 import torch
+import torch.nn.functional as F
 import open_clip
 import faiss
 from transformers import TextIteratorStreamer
 def search(image, pos, topk, knwl_db, knwl_idx):
     with torch.cuda.amp.autocast():
         image = query_trans(image).unsqueeze(0).to(device)
+        query = F.normalize(query_enc(image), dim=-1)
     query = query.cpu().numpy()
     _, I = knwl_idx.search(query, 4*topk)
         "act": get_knwl('knowledge/(dataset-action)(clip-model-ViT-g-14)(dbscan)(eps-0.15)(ms-1)'),
         "attr": get_knwl('knowledge/(dataset-attribute)(clip-model-ViT-g-14)(dbscan)(eps-0.15)(ms-1)'),
     }
+    d_knwl = knwl_db["obj"][0].feature.shape[-1]
+    return knwl_db, d_knwl
 def build_query_model():
     query_enc, _, query_trans = open_clip.create_model_and_transforms(
+        "ViT-g-14", pretrained="laion2b_s34b_b88k", precision='fp16'
     )
+    query_enc = query_enc.visual.to(device).eval()
     return query_enc, query_trans
 def build_gptk_model():
     _, gptk_trans = get_gptk_image_transform()
     topk = {"whole": 60, "five": 24, "nine": 16}
+    gptk_model = get_gptk_model(d_knwl=d_knwl, topk=topk)
     gptk_ckpt = "model/ckpt/gptk-vicuna7b.pt"
     gptk_ckpt = torch.load(gptk_ckpt, map_location="cpu")
     gptk_model.load_state_dict(gptk_ckpt, strict=False)
 else:
     device = torch.device("cpu")
+knwl_db, d_knwl = build_knowledge()
 gptk_model, gptk_trans, topk = build_gptk_model()
 query_enc, query_trans = build_query_model()
 demo = build_demo()
 demo.queue().launch()

model/gptk.py CHANGED Viewed

@@ -49,7 +49,7 @@ class GPTK(nn.Module):
         llm_config.gradient_checkpointing = True
         llm_config.use_cache = True
         quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
             llm_int8_threshold=6.0,
             llm_int8_has_fp16_weight=False,
             bnb_4bit_compute_dtype=torch.float16,

         llm_config.gradient_checkpointing = True
         llm_config.use_cache = True
         quantization_config = BitsAndBytesConfig(
+            load_in_8bit=True,
             llm_int8_threshold=6.0,
             llm_int8_has_fp16_weight=False,
             bnb_4bit_compute_dtype=torch.float16,