Spaces:

XAI
/

PEEB

Running on Zero

App Files Files Community

Peijie commited on Apr 23, 2024

Commit

395d6df

1 Parent(s): a209c09

add text embeddings and change prediction logits.

Browse files

Files changed (4) hide show

app.py +32 -6
data/jsons/cub_desc_idx2name.json +202 -0
data/text_embeddings/cub_200_desc.pt +3 -0
utils/predict.py +42 -25

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import io
 import os
-os.system("pip uninstall -y gradio")
-os.system("pip install gradio==3.41.0")
 import json
 import base64
 import random
@@ -24,6 +25,9 @@ XCLIP_DESC = json.load(open(XCLIP_DESC_PATH, "r"))
 PREPROCESS = lambda x: OWLVIT_PRECESSOR(images=x, return_tensors='pt')
 IMAGES_FOLDER = "data/images"
 XCLIP_RESULTS = json.load(open("data/jsons/xclip_org.json", "r"))
 # correct_predictions = [k for k, v in XCLIP_RESULTS.items() if v['prediction']]
 # get the intersection of sachit and xclip (revised)
@@ -225,7 +229,18 @@ def update_selected_image(event: gr.SelectData):
     gt_class.state = gt_label
     # --- for initial value only ---
-    out_dict = xclip_pred(new_desc=None, new_part_mask=None, new_class=None, org_desc=XCLIP_DESC_PATH, image=Image.open(os.path.join(IMAGES_FOLDER, 'org', current_image.state)).convert('RGB'), model=XCLIP, owlvit_processor=OWLVIT_PRECESSOR, device=DEVICE, image_name=current_image.state)
     xclip_label = out_dict['pred_class']
     clip_pred_scores = out_dict['pred_score']
     xclip_part_scores = out_dict['pred_desc_scores']
@@ -298,7 +313,18 @@ def on_predict_button_click_xclip(textbox_input: str):
     descriptions_dict, part_mask, new_class_name = convert_input_text_to_xclip_format(textbox_input)
     # Get the new predictions and explanations
-    out_dict = xclip_pred(new_desc=descriptions_dict, new_part_mask=part_mask, new_class=new_class_name, org_desc=XCLIP_DESC_PATH, image=Image.open(os.path.join(IMAGES_FOLDER, 'org', current_image.state)).convert('RGB'), model=XCLIP, owlvit_processor=OWLVIT_PRECESSOR, device=DEVICE, image_name=current_image.state)
     xclip_label = out_dict['pred_class']
     xclip_pred_score = out_dict['pred_score']
     xclip_part_scores = out_dict['pred_desc_scores']
@@ -403,5 +429,5 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="PEEB") as demo:
     xclip_edit_button.click(on_edit_button_click_xclip, inputs=[], outputs=[xclip_textbox, custom_explanation])
     xclip_predict_button.click(on_predict_button_click_xclip, inputs=[xclip_textbox], outputs=[xclip_textbox, xclip_pred_label, xclip_explanation, custom_pred_label, custom_explanation])
-# demo.launch(server_port=5000, share=True)
-demo.launch()

 import io
 import os
+# os.system("pip uninstall -y gradio")
+# os.system("pip install gradio==3.41.0")
+import torch
 import json
 import base64
 import random
 PREPROCESS = lambda x: OWLVIT_PRECESSOR(images=x, return_tensors='pt')
 IMAGES_FOLDER = "data/images"
 XCLIP_RESULTS = json.load(open("data/jsons/xclip_org.json", "r"))
+CUB_DESC_EMBEDS = torch.load('data/text_embeddings/cub_200_desc.pt')
+CUB_IDX2NAME = json.load(open('data/jsons/cub_desc_idx2name.json', 'r'))
+CUB_IDX2NAME = {int(k): v for k, v in CUB_IDX2NAME.items()}
 # correct_predictions = [k for k, v in XCLIP_RESULTS.items() if v['prediction']]
 # get the intersection of sachit and xclip (revised)
     gt_class.state = gt_label
     # --- for initial value only ---
+    out_dict = xclip_pred(new_desc=None,
+                          new_part_mask=None,
+                          new_class=None,
+                          org_desc=XCLIP_DESC_PATH,
+                          image=Image.open(os.path.join(IMAGES_FOLDER, 'org', current_image.state)).convert('RGB'),
+                          model=XCLIP,
+                          owlvit_processor=OWLVIT_PRECESSOR,
+                          device=DEVICE,
+                          image_name=current_image.state,
+                          cub_embeds=CUB_DESC_EMBEDS,
+                          cub_idx2name=CUB_IDX2NAME,
+                          descriptors=XCLIP_DESC)
     xclip_label = out_dict['pred_class']
     clip_pred_scores = out_dict['pred_score']
     xclip_part_scores = out_dict['pred_desc_scores']
     descriptions_dict, part_mask, new_class_name = convert_input_text_to_xclip_format(textbox_input)
     # Get the new predictions and explanations
+    out_dict = xclip_pred(new_desc=descriptions_dict,
+                          new_part_mask=part_mask,
+                          new_class=new_class_name,
+                          org_desc=XCLIP_DESC_PATH,
+                          image=Image.open(os.path.join(IMAGES_FOLDER, 'org', current_image.state)).convert('RGB'),
+                          model=XCLIP,
+                          owlvit_processor=OWLVIT_PRECESSOR,
+                          device=DEVICE,
+                          image_name=current_image.state,
+                          cub_embeds=CUB_DESC_EMBEDS,
+                          cub_idx2name=CUB_IDX2NAME,
+                          descriptors=XCLIP_DESC)
     xclip_label = out_dict['pred_class']
     xclip_pred_score = out_dict['pred_score']
     xclip_part_scores = out_dict['pred_desc_scores']
     xclip_edit_button.click(on_edit_button_click_xclip, inputs=[], outputs=[xclip_textbox, custom_explanation])
     xclip_predict_button.click(on_predict_button_click_xclip, inputs=[xclip_textbox], outputs=[xclip_textbox, xclip_pred_label, xclip_explanation, custom_pred_label, custom_explanation])
+demo.launch(server_port=5000, share=True)
+# demo.launch()

data/jsons/cub_desc_idx2name.json ADDED Viewed

	@@ -0,0 +1,202 @@

+{
+    "0": "Black-footed Albatross",
+    "1": "Laysan Albatross",
+    "2": "Sooty Albatross",
+    "3": "Groove-billed Ani",
+    "4": "Crested Auklet",
+    "5": "Least Auklet",
+    "6": "Parakeet Auklet",
+    "7": "Rhinoceros Auklet",
+    "8": "Brewer Blackbird",
+    "9": "Red-winged Blackbird",
+    "10": "Rusty Blackbird",
+    "11": "Yellow-headed Blackbird",
+    "12": "Bobolink",
+    "13": "Indigo Bunting",
+    "14": "Lazuli Bunting",
+    "15": "Painted Bunting",
+    "16": "Cardinal",
+    "17": "Spotted Catbird",
+    "18": "Gray Catbird",
+    "19": "Yellow-breasted Chat",
+    "20": "Eastern Towhee",
+    "21": "Chuck-will Widow",
+    "22": "Brandt Cormorant",
+    "23": "Red-faced Cormorant",
+    "24": "Pelagic Cormorant",
+    "25": "Bronzed Cowbird",
+    "26": "Shiny Cowbird",
+    "27": "Brown Creeper",
+    "28": "American Crow",
+    "29": "Fish Crow",
+    "30": "Black-billed Cuckoo",
+    "31": "Mangrove Cuckoo",
+    "32": "Yellow-billed Cuckoo",
+    "33": "Gray-crowned-Rosy Finch",
+    "34": "Purple Finch",
+    "35": "Northern Flicker",
+    "36": "Acadian Flycatcher",
+    "37": "Great-Crested Flycatcher",
+    "38": "Least Flycatcher",
+    "39": "Olive-sided Flycatcher",
+    "40": "Scissor-tailed Flycatcher",
+    "41": "Vermilion Flycatcher",
+    "42": "Yellow-bellied Flycatcher",
+    "43": "Frigatebird",
+    "44": "Northern Fulmar",
+    "45": "Gadwall",
+    "46": "American Goldfinch",
+    "47": "European Goldfinch",
+    "48": "Boat-tailed Grackle",
+    "49": "Eared Grebe",
+    "50": "Horned Grebe",
+    "51": "Pied-billed Grebe",
+    "52": "Western Grebe",
+    "53": "Blue Grosbeak",
+    "54": "Evening Grosbeak",
+    "55": "Pine Grosbeak",
+    "56": "Rose-breasted Grosbeak",
+    "57": "Pigeon Guillemot",
+    "58": "California Gull",
+    "59": "Glaucous-winged Gull",
+    "60": "Heermann Gull",
+    "61": "Herring Gull",
+    "62": "Ivory Gull",
+    "63": "Ring-billed Gull",
+    "64": "Slaty-backed Gull",
+    "65": "Western Gull",
+    "66": "Anna Hummingbird",
+    "67": "Ruby-throated Hummingbird",
+    "68": "Rufous Hummingbird",
+    "69": "Green Violetear",
+    "70": "Long-tailed Jaeger",
+    "71": "Pomarine Jaeger",
+    "72": "Blue Jay",
+    "73": "Florida Jay",
+    "74": "Green Jay",
+    "75": "Dark-eyed Junco",
+    "76": "Tropical Kingbird",
+    "77": "Gray Kingbird",
+    "78": "Belted Kingfisher",
+    "79": "Green Kingfisher",
+    "80": "Pied Kingfisher",
+    "81": "Ringed Kingfisher",
+    "82": "White-breasted Kingfisher",
+    "83": "Red-legged Kittiwake",
+    "84": "Horned Lark",
+    "85": "Pacific Loon",
+    "86": "Mallard",
+    "87": "Western Meadowlark",
+    "88": "Hooded Merganser",
+    "89": "Red-breasted Merganser",
+    "90": "Mockingbird",
+    "91": "Nighthawk",
+    "92": "Clark Nutcracker",
+    "93": "White-breasted Nuthatch",
+    "94": "Baltimore Oriole",
+    "95": "Hooded Oriole",
+    "96": "Orchard Oriole",
+    "97": "Scott Oriole",
+    "98": "Ovenbird",
+    "99": "Brown Pelican",
+    "100": "White Pelican",
+    "101": "Western-Wood Pewee",
+    "102": "Sayornis",
+    "103": "American Pipit",
+    "104": "Whip-poor Will",
+    "105": "Horned Puffin",
+    "106": "Common Raven",
+    "107": "White-necked Raven",
+    "108": "American Redstart",
+    "109": "Geococcyx",
+    "110": "Loggerhead Shrike",
+    "111": "Great-Grey Shrike",
+    "112": "Baird Sparrow",
+    "113": "Black-throated Sparrow",
+    "114": "Brewer Sparrow",
+    "115": "Chipping Sparrow",
+    "116": "Clay-colored Sparrow",
+    "117": "House Sparrow",
+    "118": "Field Sparrow",
+    "119": "Fox Sparrow",
+    "120": "Grasshopper Sparrow",
+    "121": "Harris Sparrow",
+    "122": "Henslow Sparrow",
+    "123": "Le-Conte Sparrow",
+    "124": "Lincoln Sparrow",
+    "125": "Nelson-Sharp-tailed Sparrow",
+    "126": "Savannah Sparrow",
+    "127": "Seaside Sparrow",
+    "128": "Song Sparrow",
+    "129": "Tree Sparrow",
+    "130": "Vesper Sparrow",
+    "131": "White-crowned Sparrow",
+    "132": "White-throated Sparrow",
+    "133": "Cape-Glossy Starling",
+    "134": "Bank Swallow",
+    "135": "Barn Swallow",
+    "136": "Cliff Swallow",
+    "137": "Tree Swallow",
+    "138": "Scarlet Tanager",
+    "139": "Summer Tanager",
+    "140": "Artic Tern",
+    "141": "Black Tern",
+    "142": "Caspian Tern",
+    "143": "Common Tern",
+    "144": "Elegant Tern",
+    "145": "Least Tern",
+    "146": "Green-tailed Towhee",
+    "147": "Brown Thrasher",
+    "148": "Sage Thrasher",
+    "149": "Black-capped Vireo",
+    "150": "Blue-headed Vireo",
+    "151": "Philadelphia Vireo",
+    "152": "Red-eyed Vireo",
+    "153": "Warbling Vireo",
+    "154": "White-eyed Vireo",
+    "155": "Yellow-throated Vireo",
+    "156": "Bay-breasted Warbler",
+    "157": "Black-and-white Warbler",
+    "158": "Black-throated-Blue Warbler",
+    "159": "Blue-winged Warbler",
+    "160": "Canada Warbler",
+    "161": "Cape-May Warbler",
+    "162": "Cerulean Warbler",
+    "163": "Chestnut-sided Warbler",
+    "164": "Golden-winged Warbler",
+    "165": "Hooded Warbler",
+    "166": "Kentucky Warbler",
+    "167": "Magnolia Warbler",
+    "168": "Mourning Warbler",
+    "169": "Myrtle Warbler",
+    "170": "Nashville Warbler",
+    "171": "Orange-crowned Warbler",
+    "172": "Palm Warbler",
+    "173": "Pine Warbler",
+    "174": "Prairie Warbler",
+    "175": "Prothonotary Warbler",
+    "176": "Swainson Warbler",
+    "177": "Tennessee Warbler",
+    "178": "Wilson Warbler",
+    "179": "Worm-eating Warbler",
+    "180": "Yellow Warbler",
+    "181": "Northern Waterthrush",
+    "182": "Louisiana Waterthrush",
+    "183": "Bohemian Waxwing",
+    "184": "Cedar Waxwing",
+    "185": "American-Three-toed Woodpecker",
+    "186": "Pileated Woodpecker",
+    "187": "Red-bellied Woodpecker",
+    "188": "Red-cockaded Woodpecker",
+    "189": "Red-headed Woodpecker",
+    "190": "Downy Woodpecker",
+    "191": "Bewick Wren",
+    "192": "Cactus Wren",
+    "193": "Carolina Wren",
+    "194": "House Wren",
+    "195": "Marsh Wren",
+    "196": "Rock Wren",
+    "197": "Winter Wren",
+    "198": "Common Yellowthroat",
+    "199": "Forsters Tern"
+}

data/text_embeddings/cub_200_desc.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:059f1ec588c01a202417a09136c17f8026fec533213536b7f70b711ec40b575d
+size 4916405

utils/predict.py CHANGED Viewed

@@ -40,7 +40,10 @@ def xclip_pred(new_desc: dict,
                device: str,
                return_img_embeds: bool = False,
                use_precompute_embeddings = True,
-               image_name: str = None,):
     # reorder the new description and the mask
     if new_class is not None:
         new_desc_ = {k: new_desc[k] for k in ORG_PART_ORDER}
@@ -49,34 +52,49 @@ def xclip_pred(new_desc: dict,
     else:
         desc_mask = [1] * 12
-    # replace the description if the new class is in the description, otherwise add a new class
-    getprompt = GetPromptList(org_desc)
-    if new_class not in getprompt.desc and new_class is not None:
-        getprompt.name2idx[new_class] = len(getprompt.name2idx)
-    if new_class is not None:
-        getprompt.desc[new_class] = list(new_desc_.values())
-    idx2name = dict(zip(getprompt.name2idx.values(), getprompt.name2idx.keys()))
-    modified_class_idx = getprompt.name2idx[new_class] if new_class is not None else None
-    n_classes = len(getprompt.name2idx)
     model.cls_head.num_classes = n_classes
-    descs, class_idxs, class_mapping, org_desc_mapper, class_list = getprompt('chatgpt-no-template', max_len=12, pad=True)
-    query_embeds = encode_descs_xclip(owlvit_processor, model, descs, device)
     with torch.no_grad():
-        image_input = owlvit_processor(images=image, return_tensors='pt').to(device)
-        # image_input['pixel_values'] = image_input['pixel_values'].squeeze(1)
         part_embeds = owlvit_processor(text=[ORG_PART_ORDER], return_tensors="pt").to(device)
-        if return_img_embeds:
-            feature_map, _ = model.image_embedder(pixel_values = image_input['pixel_values'])
         if use_precompute_embeddings:
             image_embeds = torch.load(f'data/image_embeddings/{image_name}.pt').to(device)
-            pred_logits, part_logits, output_dict = model(image_embeds, part_embeds, query_embeds, None)
         else:
-            pred_logits, part_logits, output_dict = model(image_input, part_embeds, query_embeds, None)
         b, c, n = part_logits.shape
         mask = torch.tensor(desc_mask, dtype=float).unsqueeze(0).unsqueeze(0).repeat(b, c, 1).to(device)
@@ -100,18 +118,17 @@ def xclip_pred(new_desc: dict,
         else:
             modified_score = None
             modified_part_scores_dict = None
-            modified_part_scores_dict = None
     output_dict = {"pred_class": pred_class_name,
                    "pred_score": softmax_score_top1,
                    "pred_desc_scores": part_scores_dict,
-                   "descriptions": getprompt.desc[pred_class_name],
                    "modified_class": new_class,
                    "modified_score": modified_score,
                    "modified_desc_scores": modified_part_scores_dict,
-                   "modified_descriptions": getprompt.desc[new_class] if new_class is not None else None,
                    }
-    return output_dict if not return_img_embeds else (output_dict, feature_map)
 # def sachit_pred(new_desc: list,

                device: str,
                return_img_embeds: bool = False,
                use_precompute_embeddings = True,
+               image_name: str = None,
+               cub_embeds: torch.Tensor = None,
+               cub_idx2name: dict = None,
+               descriptors: dict = None):
     # reorder the new description and the mask
     if new_class is not None:
         new_desc_ = {k: new_desc[k] for k in ORG_PART_ORDER}
     else:
         desc_mask = [1] * 12
+    if cub_embeds is None:
+        # replace the description if the new class is in the description, otherwise add a new class
+        getprompt = GetPromptList(org_desc)
+        if new_class not in getprompt.desc and new_class is not None:
+            getprompt.name2idx[new_class] = len(getprompt.name2idx)
+        if new_class is not None:
+            getprompt.desc[new_class] = list(new_desc_.values())
+        idx2name = dict(zip(getprompt.name2idx.values(), getprompt.name2idx.keys()))
+        modified_class_idx = getprompt.name2idx[new_class] if new_class is not None else None
+        n_classes = len(getprompt.name2idx)
+        descs, class_idxs, class_mapping, org_desc_mapper, class_list = getprompt('chatgpt-no-template', max_len=12, pad=True)
+        query_embeds = encode_descs_xclip(owlvit_processor, model, descs, device)
+    else:
+        if new_class is not None:
+            if new_class in list(cub_idx2name.values()):
+                new_class = f"{new_class}_custom"
+            idx2name = cub_idx2name | {200: new_class}
+            descriptors |= {new_class: list(new_desc_.values())}
+            n_classes = 201
+            query_tokens = owlvit_processor(text=list(new_desc_.values()), padding="max_length", truncation=True, return_tensors="pt").to(device)
+            new_class_embed = model.owlvit.get_text_features(**query_tokens)
+            query_embeds = torch.cat([cub_embeds, new_class_embed], dim=0)
+            modified_class_idx = 200
+        else:
+            n_classes = 200
+            query_embeds = cub_embeds
+            idx2name = cub_idx2name
+            modified_class_idx = None
     model.cls_head.num_classes = n_classes
     with torch.no_grad():
         part_embeds = owlvit_processor(text=[ORG_PART_ORDER], return_tensors="pt").to(device)
         if use_precompute_embeddings:
             image_embeds = torch.load(f'data/image_embeddings/{image_name}.pt').to(device)
+            image_input = owlvit_processor(images=image, return_tensors='pt').to(device)
         else:
+            image_embeds, _ = model.image_embedder(pixel_values = image_input['pixel_values'])
+        pred_logits, part_logits, output_dict = model(image_embeds, part_embeds, query_embeds, None)
         b, c, n = part_logits.shape
         mask = torch.tensor(desc_mask, dtype=float).unsqueeze(0).unsqueeze(0).repeat(b, c, 1).to(device)
         else:
             modified_score = None
             modified_part_scores_dict = None
     output_dict = {"pred_class": pred_class_name,
                    "pred_score": softmax_score_top1,
                    "pred_desc_scores": part_scores_dict,
+                   "descriptions": descriptors[pred_class_name],
                    "modified_class": new_class,
                    "modified_score": modified_score,
                    "modified_desc_scores": modified_part_scores_dict,
+                   "modified_descriptions": descriptors.get(new_class),
                    }
+    return (output_dict, image_embeds) if return_img_embeds else output_dict
 # def sachit_pred(new_desc: list,