Spaces:

sujitpal
/

clip-rsicd-demo

Build error

App Files Files Community

Sujit Pal commited on Jul 18, 2021

Commit

a78bf29

1 Parent(s): 96ac3ab

fix: added feature finder and small usability changes

Browse files

Files changed (6) hide show

app.py +3 -1
dashboard_featurefinder.py +151 -0
dashboard_image2image.py +28 -7
dashboard_text2image.py +2 -2
demo-images/st_tropez_1.png +0 -0
demo-images/st_tropez_2.png +0 -0

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import dashboard_text2image
 import dashboard_image2image
 import streamlit as st
 PAGES = {
     "Text to Image": dashboard_text2image,
-    "Image to Image": dashboard_image2image
 }
 st.sidebar.title("Navigation")

 import dashboard_text2image
 import dashboard_image2image
+import dashboard_featurefinder
 import streamlit as st
 PAGES = {
     "Text to Image": dashboard_text2image,
+    "Image to Image": dashboard_image2image,
+    "Feature in Image": dashboard_featurefinder,
 }
 st.sidebar.title("Navigation")

dashboard_featurefinder.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import jax
+import flax
+import matplotlib.pyplot as plt
+import nmslib
+import numpy as np
+import os
+import streamlit as st
+from tempfile import NamedTemporaryFile
+from torchvision.transforms import Compose, Resize, ToPILImage
+from transformers import CLIPProcessor, FlaxCLIPModel
+from PIL import Image
+BASELINE_MODEL = "openai/clip-vit-base-patch32"
+# MODEL_PATH = "/home/shared/models/clip-rsicd/bs128x8-lr5e-6-adam/ckpt-1"
+MODEL_PATH = "flax-community/clip-rsicd-v2"
+# IMAGE_VECTOR_FILE = "/home/shared/data/vectors/test-baseline.tsv"
+# IMAGE_VECTOR_FILE = "/home/shared/data/vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv"
+IMAGE_VECTOR_FILE = "./vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv"
+# IMAGES_DIR = "/home/shared/data/rsicd_images"
+IMAGES_DIR = "./images"
+2
+# @st.cache(allow_output_mutation=True)
+# def load_index():
+#     filenames, image_vecs = [], []
+#     fvec = open(IMAGE_VECTOR_FILE, "r")
+#     for line in fvec:
+#         cols = line.strip().split('\t')
+#         filename = cols[0]
+#         image_vec = np.array([float(x) for x in cols[1].split(',')])
+#         filenames.append(filename)
+#         image_vecs.append(image_vec)
+#     V = np.array(image_vecs)
+#     index = nmslib.init(method='hnsw', space='cosinesimil')
+#     index.addDataPointBatch(V)
+#     index.createIndex({'post': 2}, print_progress=True)
+#     return filenames, index
+@st.cache(allow_output_mutation=True)
+def load_model():
+        # model = FlaxCLIPModel.from_pretrained(MODEL_PATH)
+        # processor = CLIPProcessor.from_pretrained(BASELINE_MODEL)
+    model = FlaxCLIPModel.from_pretrained("flax-community/clip-rsicd-v2")
+    processor = CLIPProcessor.from_pretrained("flax-community/clip-rsicd-v2")
+    return model, processor
+def split_image(X):
+  num_rows = X.shape[0] // 224
+  num_cols = X.shape[1] // 224
+  Xc = X[0 : num_rows * 224, 0 : num_cols * 224, :]
+  patches = []
+  for j in range(num_rows):
+    for i in range(num_cols):
+      patches.append(Xc[j * 224 : (j + 1) * 224,
+                        i * 224 : (i + 1) * 224,
+                        :])
+  return num_rows, num_cols, patches
+def get_patch_probabilities(patches, searched_feature,
+                            image_preprocesor,
+                            model, processor):
+  images = [image_preprocesor(patch) for patch in patches]
+  text = "An aerial image of {:s}".format(searched_feature)
+  inputs = processor(images=images,
+                    text=text,
+                    return_tensors="jax",
+                    padding=True)
+  outputs = model(**inputs)
+  probs = jax.nn.softmax(outputs.logits_per_text, axis=-1)
+  probs_np = np.asarray(probs)[0]
+  return probs_np
+def get_image_ranks(probs):
+  temp = np.argsort(-probs)
+  ranks = np.empty_like(temp)
+  ranks[temp] = np.arange(len(probs))
+  return ranks
+def app():
+    model, processor = load_model()
+    st.title("Find Features in Images")
+    st.markdown("""
+        The CLIP model from OpenAI is trained in a self-supervised manner using
+        contrastive learning to project images and caption text onto a common
+        embedding space. We have fine-tuned the model using the RSICD dataset
+        (10k images and ~50k captions from the remote sensing domain).
+        This demo shows the ability of the model to find specific features
+        (specified as text queries) in the image. As an example, say you wish to
+        find the parts of the following image that contain a `beach`, `houses`,
+        or `ships`. We partition the image into tiles of (224, 224) and report
+        how likely each of them are to contain each text features.
+    """)
+    st.image("demo-images/st_tropez_1.png")
+    st.image("demo-images/st_tropez_2.png")
+    st.markdown("""
+        For this image and the queries listed above, our model reports that the
+        two left tiles are most likely to contain a `beach`, the two top right
+        tiles are most likely to contain `houses`, and the two bottom right tiles
+        are likely to contain `boats`.
+        You can try it yourself with your own photographs.
+        [Unsplash](https://unsplash.com/s/photos/aerial-view) has some good
+        aerial photographs. You will need to download from Unsplash to your
+        computer and upload it to the demo app.
+    """)
+    with st.form(key="form_3"):
+        buf = st.file_uploader("Upload Image for Analysis")
+        searched_feature = st.text_input(label="Feature to find")
+        submit_button = st.form_submit_button("Find")
+    if submit_button:
+        ftmp = NamedTemporaryFile()
+        ftmp.write(buf.getvalue())
+        image = plt.imread(ftmp.name)
+        if len(image.shape) != 3 and image.shape[2] != 3:
+            st.error("Image should be an RGB image")
+        if image.shape[0] < 224 or image.shape[1] < 224:
+            st.error("Image should be at least (224 x 224")
+        st.image(image, caption="Input Image")
+        st.markdown("---")
+        num_rows, num_cols, patches = split_image(image)
+        image_preprocessor = Compose([
+            ToPILImage(),
+            Resize(224)
+        ])
+        num_rows, num_cols, patches = split_image(image)
+        patch_probs = get_patch_probabilities(
+            patches,
+            searched_feature,
+            image_preprocessor,
+            model,
+            processor)
+        patch_ranks = get_image_ranks(patch_probs)
+        for i in range(num_rows):
+            row_patches = patches[i * num_cols : (i + 1) * num_cols]
+            row_probs = patch_probs[i * num_cols : (i + 1) * num_cols]
+            row_ranks = patch_ranks[i * num_cols : (i + 1) * num_cols]
+            captions = ["p({:s})={:.3f}, rank={:d}".format(searched_feature, p, r + 1)
+                for p, r in zip(row_probs, row_ranks)]
+            st.image(row_patches, caption=captions)

dashboard_image2image.py CHANGED Viewed

@@ -44,9 +44,27 @@ def load_model():
     return model, processor
 def app():
     filenames, index = load_index()
     model, processor = load_model()
     st.title("Image to Image Retrieval")
     st.markdown("""
@@ -63,13 +81,16 @@ def app():
         Our fine-tuned CLIP model was previously used to generate image vectors for
         our demo, and NMSLib was used for fast vector access.
-        You will need an image file name to start, we recommend copy pasting the
-        file name from one of the results of the text to image search.
-    """)
-    image_file = st.text_input("Image Query (filename):")
-    if st.button("Query"):
-        image = Image.fromarray(plt.imread(os.path.join(IMAGES_DIR, image_file)))
         inputs = processor(images=image, return_tensors="jax", padding=True)
         query_vec = model.get_image_features(**inputs)
         query_vec = np.asarray(query_vec)
@@ -77,7 +98,7 @@ def app():
         result_filenames = [filenames[id] for id in ids]
         images, captions = [], []
         for result_filename, score in zip(result_filenames, distances):
-            if result_filename == image_file:
                 continue
             images.append(
                 plt.imread(os.path.join(IMAGES_DIR, result_filename)))

     return model, processor
+@st.cache(allow_output_mutation=True)
+def load_example_images():
+    example_images = {}
+    image_names = os.listdir(IMAGES_DIR)
+    for image_name in image_names:
+        if image_name.find("_") < 0:
+            continue
+        image_class = image_name.split("_")[0]
+        if image_class in example_images.keys():
+            example_images[image_class].append(image_name)
+        else:
+            example_images[image_class] = [image_name]
+    return example_images
 def app():
     filenames, index = load_index()
     model, processor = load_model()
+    example_images = load_example_images()
+    example_image_list = sorted([v[np.random.randint(0, len(v))]
+                                for k, v in example_images.items()][0:10])
     st.title("Image to Image Retrieval")
     st.markdown("""
         Our fine-tuned CLIP model was previously used to generate image vectors for
         our demo, and NMSLib was used for fast vector access.
+        Here are some randomly generated image files from our corpus. You can
+        copy paste one of these below or use one from the results of a text to
+        image search -- {:s}
+    """.format(", ".join("`{:s}`".format(example) for example in example_image_list)))
+    image_name = st.text_input("Provide an Image File Name")
+    submit_button = st.button("Find Similar")
+    if submit_button:
+        image = Image.fromarray(plt.imread(os.path.join(IMAGES_DIR, image_name)))
         inputs = processor(images=image, return_tensors="jax", padding=True)
         query_vec = model.get_image_features(**inputs)
         query_vec = np.asarray(query_vec)
         result_filenames = [filenames[id] for id in ids]
         images, captions = [], []
         for result_filename, score in zip(result_filenames, distances):
+            if result_filename == image_name:
                 continue
             images.append(
                 plt.imread(os.path.join(IMAGES_DIR, result_filename)))

dashboard_text2image.py CHANGED Viewed

@@ -62,8 +62,8 @@ def app():
         Our fine-tuned CLIP model was previously used to generate image vectors for
         our demo, and NMSLib was used for fast vector access.
-        Some suggested queries to start you off with -- "ships", "school house",
-        "military installations", "mountains", "beaches", "airports", "lakes", etc.
     """)
     query = st.text_input("Text Query:")

         Our fine-tuned CLIP model was previously used to generate image vectors for
         our demo, and NMSLib was used for fast vector access.
+        Some suggested queries to start you off with -- `ships`, `school house`,
+        `military installations`, `mountains`, `beaches`, `airports`, `lakes`, etc.
     """)
     query = st.text_input("Text Query:")

demo-images/st_tropez_1.png ADDED Viewed

demo-images/st_tropez_2.png ADDED Viewed