Spaces:

imageomics
/

bioclip-demo

Running

App Files Files Community

Samuel Stevens commited on Dec 4, 2023

Commit

6e5adf0

1 Parent(s): d4005aa

add open-domain classification back

Browse files

Files changed (5) hide show

.gitattributes +1 -1
app.py +115 -112
make_txt_embedding.py +21 -0
txt_emb_species.json +3 -0
txt_emb_species.npy +3 -0

.gitattributes CHANGED Viewed

@@ -34,6 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-*lookup.json filter=lfs diff=lfs merge=lfs -text
 *.jpeg filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
 *.jpeg filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import json
 import os
@@ -8,15 +10,18 @@ import torch.nn.functional as F
 from open_clip import create_model, get_tokenizer
 from torchvision import transforms
-import lib
 from templates import openai_imagenet_template
 hf_token = os.getenv("HF_TOKEN")
 model_str = "hf-hub:imageomics/bioclip"
 tokenizer_str = "ViT-B-16"
-name_lookup_json = "name_lookup.json"
-txt_emb_npy = "txt_emb.npy"
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -33,12 +38,12 @@ preprocess_img = transforms.Compose(
 ranks = ("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
-# open_domain_examples = [
-#     ["examples/Ursus-arctos.jpeg", "Species"],
-#     ["examples/Phoca-vitulina.png", "Species"],
-#     ["examples/Felis-catus.jpeg", "Genus"],
-#     ["examples/Sarcoscypha-coccinea.jpeg", "Order"],
-# ]
 zero_shot_examples = [
     [
         "examples/Ursus-arctos.jpeg",
@@ -73,6 +78,10 @@ zero_shot_examples = [
 ]
 @torch.no_grad()
 def get_txt_features(classnames, templates):
     all_features = []
@@ -102,52 +111,38 @@ def zero_shot_classification(img, cls_str: str) -> dict[str, float]:
 @torch.no_grad()
-def open_domain_classification(img, rank: int) -> list[dict[str, float]]:
     """
-    Predicts from the top of the tree of life down to the species.
     """
     img = preprocess_img(img).to(device)
     img_features = model.encode_image(img.unsqueeze(0))
     img_features = F.normalize(img_features, dim=-1)
-    outputs = []
-    name = []
-    for _ in range(rank + 1):
-        children = tuple(zip(*name_lookup.children(name)))
-        if not children:
-            break
-        values, indices = children
-        txt_features = txt_emb[:, indices].to(device)
-        logits = (model.logit_scale.exp() * img_features @ txt_features).view(-1)
-        probs = F.softmax(logits, dim=0).to("cpu").tolist()
-        parent = " ".join(name)
-        outputs.append(
-            {f"{parent} {value}": prob for value, prob in zip(values, probs)}
-        )
-        top = values[logits.argmax()]
-        name.append(top)
-    while len(outputs) < 7:
-        outputs.append({})
-    return list(reversed(outputs))
-def change_output(choice):
-    return [
-        gr.Label(
-            num_top_classes=5, label=rank, show_label=True, visible=(6 - i <= choice)
-        )
-        for i, rank in enumerate(reversed(ranks))
-    ]
-def get_name_lookup(path):
-    with open(path) as fd:
-        return lib.TaxonomicTree.from_dict(json.load(fd))
 if __name__ == "__main__":
@@ -161,8 +156,9 @@ if __name__ == "__main__":
     tokenizer = get_tokenizer(tokenizer_str)
-    name_lookup = get_name_lookup(name_lookup_json)
-    txt_emb = torch.from_numpy(np.load(txt_emb_npy, mmap_mode="r"))
     done = txt_emb.any(axis=0).sum().item()
     total = txt_emb.shape[1]
@@ -173,69 +169,76 @@ if __name__ == "__main__":
     with gr.Blocks() as app:
         img_input = gr.Image(height=512)
-        # with gr.Tab("Open-Ended"):
-        #     with gr.Row():
-        #         with gr.Column():
-        #             rank_dropdown = gr.Dropdown(
-        #                 label="Taxonomic Rank",
-        #                 info="Which taxonomic rank to predict. Fine-grained ranks (genus, species) are more challenging.",
-        #                 choices=ranks,
-        #                 value="Species",
-        #                 type="index",
-        #             )
-        #             open_domain_btn = gr.Button("Submit", variant="primary")
-        #             gr.Examples(
-        #                 examples=open_domain_examples,
-        #                 inputs=[img_input, rank_dropdown],
-        #             )
-        #         with gr.Column():
-        #             open_domain_outputs = [
-        #                 gr.Label(num_top_classes=5, label=rank, show_label=True)
-        #                 for rank in reversed(ranks)
-        #             ]
-        #             open_domain_flag_btn = gr.Button("Flag Mistake", variant="primary")
-        #     open_domain_callback = gr.HuggingFaceDatasetSaver(
-        #         hf_token, "imageomics/bioclip-demo-open-domain-mistakes", private=True
-        #     )
-        #     open_domain_callback.setup(
-        #         [img_input, *open_domain_outputs], flagging_dir="logs/flagged"
-        #     )
-        #     open_domain_flag_btn.click(
-        #         lambda *args: open_domain_callback.flag(args),
-        #         [img_input, *open_domain_outputs],
-        #         None,
-        #         preprocess=False,
-        #     )
-        # with gr.Tab("Zero-Shot"):
-        with gr.Row():
-            with gr.Column():
-                classes_txt = gr.Textbox(
-                    placeholder="Canis familiaris (dog)\nFelis catus (cat)\n...",
-                    lines=3,
-                    label="Classes",
-                    show_label=True,
-                    info="Use taxonomic names where possible; include common names if possible.",
                 )
-                zero_shot_btn = gr.Button("Submit", variant="primary")
-            with gr.Column():
-                zero_shot_output = gr.Label(
-                    num_top_classes=5, label="Prediction", show_label=True
-                )
-                zero_shot_flag_btn = gr.Button("Flag Mistake", variant="primary")
-        with gr.Row():
-            gr.Examples(
-                examples=zero_shot_examples,
-                inputs=[img_input, classes_txt],
-                cache_examples=True,
-                fn=zero_shot_classification,
-                outputs=[zero_shot_output],
             )
         zero_shot_callback = gr.HuggingFaceDatasetSaver(
             hf_token, "imageomics/bioclip-demo-zero-shot-mistakes", private=True
         )
@@ -249,15 +252,15 @@ if __name__ == "__main__":
             preprocess=False,
         )
-        # rank_dropdown.change(
-        #     fn=change_output, inputs=rank_dropdown, outputs=open_domain_outputs
-        # )
-        # open_domain_btn.click(
-        #     fn=open_domain_classification,
-        #     inputs=[img_input, rank_dropdown],
-        #     outputs=open_domain_outputs,
-        # )
         zero_shot_btn.click(
             fn=zero_shot_classification,

+import collections
+import heapq
 import json
 import os
 from open_clip import create_model, get_tokenizer
 from torchvision import transforms
 from templates import openai_imagenet_template
 hf_token = os.getenv("HF_TOKEN")
 model_str = "hf-hub:imageomics/bioclip"
 tokenizer_str = "ViT-B-16"
+txt_emb_npy = "txt_emb_species.npy"
+txt_names_json = "txt_emb_species.json"
+min_prob = 1e-9
+k = 5
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 ranks = ("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+open_domain_examples = [
+    ["examples/Ursus-arctos.jpeg", "Species"],
+    ["examples/Phoca-vitulina.png", "Species"],
+    ["examples/Felis-catus.jpeg", "Genus"],
+    ["examples/Sarcoscypha-coccinea.jpeg", "Order"],
+]
 zero_shot_examples = [
     [
         "examples/Ursus-arctos.jpeg",
 ]
+def indexed(lst, indices):
+    return [lst[i] for i in indices]
 @torch.no_grad()
 def get_txt_features(classnames, templates):
     all_features = []
 @torch.no_grad()
+def open_domain_classification(img, rank: int) -> dict[str, float]:
     """
+    Predicts from the entire tree of life.
+    If targeting a higher rank than species, then this function predicts among all
+    species, then sums up species-level probabilities for the given rank.
     """
     img = preprocess_img(img).to(device)
     img_features = model.encode_image(img.unsqueeze(0))
     img_features = F.normalize(img_features, dim=-1)
+    logits = (model.logit_scale.exp() * img_features @ txt_emb).squeeze()
+    probs = F.softmax(logits, dim=0)
+    # If predicting species, no need to sum probabilities.
+    if rank + 1 == len(ranks):
+        topk = probs.topk(k)
+        return {
+            " ".join(txt_names[i]): prob for i, prob in zip(topk.indices, topk.values)
+        }
+    # Sum up by the rank
+    output = collections.defaultdict(float)
+    for i in torch.nonzero(probs > min_prob).squeeze():
+        output[" ".join(txt_names[i][: rank + 1])] += probs[i]
+    topk_names = heapq.nlargest(k, output, key=output.get)
+    return {name: output[name] for name in topk_names}
+def change_output(choice):
+    return gr.Label(num_top_classes=k, label=ranks[choice], show_label=True, value=None)
 if __name__ == "__main__":
     tokenizer = get_tokenizer(tokenizer_str)
+    txt_emb = torch.from_numpy(np.load(txt_emb_npy, mmap_mode="r")).to(device)
+    with open(txt_names_json) as fd:
+        txt_names = json.load(fd)
     done = txt_emb.any(axis=0).sum().item()
     total = txt_emb.shape[1]
     with gr.Blocks() as app:
         img_input = gr.Image(height=512)
+        with gr.Tab("Open-Ended"):
+            with gr.Row():
+                with gr.Column():
+                    rank_dropdown = gr.Dropdown(
+                        label="Taxonomic Rank",
+                        info="Which taxonomic rank to predict. Fine-grained ranks (genus, species) are more challenging.",
+                        choices=ranks,
+                        value="Species",
+                        type="index",
+                    )
+                    open_domain_btn = gr.Button("Submit", variant="primary")
+                with gr.Column():
+                    open_domain_output = gr.Label(
+                        num_top_classes=k,
+                        label="Prediction",
+                        show_label=True,
+                        value=None,
+                    )
+                    open_domain_flag_btn = gr.Button("Flag Mistake", variant="primary")
+            with gr.Row():
+                gr.Examples(
+                    examples=open_domain_examples,
+                    inputs=[img_input, rank_dropdown],
+                    cache_examples=True,
+                    fn=open_domain_classification,
+                    outputs=[open_domain_output],
                 )
+            open_domain_callback = gr.HuggingFaceDatasetSaver(
+                hf_token, "imageomics/bioclip-demo-open-domain-mistakes", private=True
+            )
+            open_domain_callback.setup(
+                [img_input, rank_dropdown, open_domain_output],
+                flagging_dir="logs/flagged",
+            )
+            open_domain_flag_btn.click(
+                lambda *args: open_domain_callback.flag(args),
+                [img_input, rank_dropdown, open_domain_output],
+                None,
+                preprocess=False,
             )
+        with gr.Tab("Zero-Shot"):
+            with gr.Row():
+                with gr.Column():
+                    classes_txt = gr.Textbox(
+                        placeholder="Canis familiaris (dog)\nFelis catus (cat)\n...",
+                        lines=3,
+                        label="Classes",
+                        show_label=True,
+                        info="Use taxonomic names where possible; include common names if possible.",
+                    )
+                    zero_shot_btn = gr.Button("Submit", variant="primary")
+                with gr.Column():
+                    zero_shot_output = gr.Label(
+                        num_top_classes=k, label="Prediction", show_label=True
+                    )
+                    zero_shot_flag_btn = gr.Button("Flag Mistake", variant="primary")
+            with gr.Row():
+                gr.Examples(
+                    examples=zero_shot_examples,
+                    inputs=[img_input, classes_txt],
+                    cache_examples=True,
+                    fn=zero_shot_classification,
+                    outputs=[zero_shot_output],
+                )
         zero_shot_callback = gr.HuggingFaceDatasetSaver(
             hf_token, "imageomics/bioclip-demo-zero-shot-mistakes", private=True
         )
             preprocess=False,
         )
+        rank_dropdown.change(
+            fn=change_output, inputs=rank_dropdown, outputs=[open_domain_output]
+        )
+        open_domain_btn.click(
+            fn=open_domain_classification,
+            inputs=[img_input, rank_dropdown],
+            outputs=[open_domain_output],
+        )
         zero_shot_btn.click(
             fn=zero_shot_classification,

make_txt_embedding.py CHANGED Viewed

@@ -112,6 +112,26 @@ def convert_txt_features_to_avgs(name_lookup):
         )
 def get_name_lookup(catalog_path, cache_path):
     if os.path.isfile(cache_path):
         with open(cache_path) as fd:
@@ -170,3 +190,4 @@ if __name__ == "__main__":
     tokenizer = get_tokenizer(tokenizer_str)
     write_txt_features(name_lookup)
     convert_txt_features_to_avgs(name_lookup)

         )
+def convert_txt_features_to_species_only(name_lookup):
+    assert os.path.isfile(args.out_path)
+    all_features = np.load(args.out_path)
+    logger.info("Loaded text features from disk.")
+    species = [(d, i) for d, i in name_lookup.descendants() if len(d) == 7]
+    species_features = np.zeros((512, len(species)), dtype=np.float32)
+    species_names = [""] * len(species)
+    for new_i, (name, old_i) in enumerate(tqdm(species)):
+        species_features[:, new_i] = all_features[:, old_i]
+        species_names[new_i] = name
+    out_path, ext = os.path.splitext(args.out_path)
+    np.save(f"{out_path}_species{ext}", species_features)
+    with open(f"{out_path}_species.json", "w") as fd:
+        json.dump(species_names, fd, indent=2)
 def get_name_lookup(catalog_path, cache_path):
     if os.path.isfile(cache_path):
         with open(cache_path) as fd:
     tokenizer = get_tokenizer(tokenizer_str)
     write_txt_features(name_lookup)
     convert_txt_features_to_avgs(name_lookup)
+    convert_txt_features_to_species_only(name_lookup)

txt_emb_species.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c71babd1b7bc275a1dbb12fd36e6329bcc2487784c0b7be10c2f4d0031d34211
+size 50445969

txt_emb_species.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91ce02dff2433222e3138b8bf7eefa1dd74b30f4d406c16cd3301f66d65ab4ed
+size 787435648