Spaces:

flax-community
/

koclip

Build error

App Files Files Community

jaketae commited on Jul 19, 2021

Commit

48a1fa8

1 Parent(s): bf9c2d9

features: overall ui cleanup

Browse files

Files changed (3) hide show

app.py +2 -2
text2image.py +17 -13
most_relevant_part.py → text2patch.py +34 -31

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import streamlit as st
 import image2text
-import most_relevant_part
 import text2image
 PAGES = {
     "Text to Image": text2image,
     "Image to Text": image2text,
-    "Most Relevant Part of Image": most_relevant_part,
 }
 st.sidebar.title("Navigation")

 import streamlit as st
 import image2text
 import text2image
+import text2patch
 PAGES = {
     "Text to Image": text2image,
     "Image to Text": image2text,
+    "Patch Importance Ranking": text2patch,
 }
 st.sidebar.title("Navigation")

text2image.py CHANGED Viewed

@@ -33,16 +33,20 @@ def app(model_name):
     query = st.text_input("한글 질문을 적어주세요 (Korean Text Query) :", value="컴퓨터하는 고양이")
     if st.button("질문 (Query)"):
-        proc = processor(text=[query], images=None, return_tensors="jax", padding=True)
-        vec = np.asarray(model.get_text_features(**proc))
-        ids, dists = index.knnQuery(vec, k=10)
-        result_files = map(lambda id: files[id], ids)
-        result_imgs, result_captions = [], []
-        for file, dist in zip(result_files, dists):
-            result_imgs.append(plt.imread(os.path.join(images_directory, file)))
-            result_captions.append("Score: {:.3f}".format(1.0 - dist))
-        st.image(result_imgs[:3], caption=result_captions[:3], width=200)
-        st.image(result_imgs[3:6], caption=result_captions[3:6], width=200)
-        st.image(result_imgs[6:9], caption=result_captions[6:9], width=200)
-        st.image(result_imgs[9:], caption=result_captions[9:], width=200)

     query = st.text_input("한글 질문을 적어주세요 (Korean Text Query) :", value="컴퓨터하는 고양이")
     if st.button("질문 (Query)"):
+        st.markdown("""---""")
+        with st.spinner("Computing..."):
+            proc = processor(
+                text=[query], images=None, return_tensors="jax", padding=True
+            )
+            vec = np.asarray(model.get_text_features(**proc))
+            ids, dists = index.knnQuery(vec, k=10)
+            result_files = map(lambda id: files[id], ids)
+            result_imgs, result_captions = [], []
+            for file, dist in zip(result_files, dists):
+                result_imgs.append(plt.imread(os.path.join(images_directory, file)))
+                result_captions.append("Score: {:.3f}".format(1.0 - dist))
+            st.image(result_imgs[:3], caption=result_captions[:3], width=200)
+            st.image(result_imgs[3:6], caption=result_captions[3:6], width=200)
+            st.image(result_imgs[6:9], caption=result_captions[6:9], width=200)
+            st.image(result_imgs[9:], caption=result_captions[9:], width=200)

most_relevant_part.py → text2patch.py RENAMED Viewed

@@ -22,21 +22,10 @@ def split_image(im, num_rows=3, num_cols=3):
     return tiles
-# def split_image(X):
-#     num_rows = X.shape[0] // 224
-#     num_cols = X.shape[1] // 224
-#     Xc = X[0:num_rows * 224, 0:num_cols * 224, :]
-#     patches = []
-#     for j in range(num_rows):
-#         for i in range(num_cols):
-#             patches.append(Xc[j * 224:(j + 1) * 224, i * 224:(i + 1) * 224, :])
-#     return patches
 def app(model_name):
     model, processor = load_model(f"koclip/{model_name}")
-    st.title("Most Relevant Part of Image")
     st.markdown(
         """
         Given a piece of text, the CLIP model finds the part of an image that best explains the text.
@@ -60,29 +49,43 @@ def app(model_name):
         "Enter query to find most relevant part of image ",
         value="이건 서울의 경복궁 사진이다.",
     )
-    num_rows = st.slider("Number of rows", min_value=1, max_value=5, value=3, step=1)
-    num_cols = st.slider("Number of columns", min_value=1, max_value=5, value=3, step=1)
     if st.button("질문 (Query)"):
         if not any([query1, query2]):
             st.error("Please upload an image or paste an image URL.")
         else:
-            image_data = (
-                query2 if query2 is not None else requests.get(query1, stream=True).raw
-            )
-            image = Image.open(image_data)
-            st.image(image)
-            images = split_image(image, num_rows, num_cols)
-            inputs = processor(
-                text=captions, images=images, return_tensors="jax", padding=True
-            )
-            inputs["pixel_values"] = jnp.transpose(
-                inputs["pixel_values"], axes=[0, 2, 3, 1]
-            )
-            outputs = model(**inputs)
-            probs = jax.nn.softmax(outputs.logits_per_image, axis=0)
-            for idx, prob in sorted(enumerate(probs), key=lambda x: x[1], reverse=True):
-                st.text(f"Score: {prob[0]:.3f}")
-                st.image(images[idx])

     return tiles
 def app(model_name):
     model, processor = load_model(f"koclip/{model_name}")
+    st.title("Patch-based Relevance Retrieval")
     st.markdown(
         """
         Given a piece of text, the CLIP model finds the part of an image that best explains the text.
         "Enter query to find most relevant part of image ",
         value="이건 서울의 경복궁 사진이다.",
     )
+    col1, col2 = st.beta_columns(2)
+    with col1:
+        num_rows = st.slider(
+            "Number of rows", min_value=1, max_value=5, value=3, step=1
+        )
+    with col2:
+        num_cols = st.slider(
+            "Number of columns", min_value=1, max_value=5, value=3, step=1
+        )
     if st.button("질문 (Query)"):
         if not any([query1, query2]):
             st.error("Please upload an image or paste an image URL.")
         else:
+            st.markdown("""---""")
+            with st.spinner("Computing..."):
+                image_data = (
+                    query2
+                    if query2 is not None
+                    else requests.get(query1, stream=True).raw
+                )
+                image = Image.open(image_data)
+                st.image(image)
+                images = split_image(image, num_rows, num_cols)
+                inputs = processor(
+                    text=captions, images=images, return_tensors="jax", padding=True
+                )
+                inputs["pixel_values"] = jnp.transpose(
+                    inputs["pixel_values"], axes=[0, 2, 3, 1]
+                )
+                outputs = model(**inputs)
+                probs = jax.nn.softmax(outputs.logits_per_image, axis=0)
+                for idx, prob in sorted(
+                    enumerate(probs), key=lambda x: x[1], reverse=True
+                ):
+                    st.text(f"Score: {prob[0]:.3f}")
+                    st.image(images[idx])