Spaces:

flax-community
/

koclip

Build error

ampehta commited on Jul 18, 2021

Commit

503acf7

•

1 Parent(s): 98b26e2

Revert "Merge branch 'main' of https://huggingface.co/spaces/flax-community/koclip into main"

This reverts commit 98b26e2c6b300d6deec2c1f5a119ad6089b11224, reversing
changes made to 699df87c9ee261cf6dfc69f6a6276d3e99bfbc3e.

Files changed (5) hide show

app.py +1 -0
embed.py +11 -16
image2text.py +2 -4
text2image.py +8 -11
utils.py +13 -11

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import streamlit as st
 import image2text
 import text2image
 PAGES = {"Text to Image": text2image, "Image to Text": image2text}
 st.sidebar.title("Navigation")

 import image2text
 import text2image
 PAGES = {"Text to Image": text2image, "Image to Text": image2text}
 st.sidebar.title("Navigation")

embed.py CHANGED Viewed

@@ -2,20 +2,21 @@ import argparse
 import csv
 import os
-import jax.numpy as jnp
 from PIL import Image
-from tqdm import tqdm
 from utils import load_model
 def main(args):
     root = args.image_path
     files = list(os.listdir(root))
     for f in files:
-        assert f[-4:] == ".jpg"
     for model_name in ["koclip-base", "koclip-large"]:
-    # for model_name in ["koclip-large"]:
         model, processor = load_model(f"koclip/{model_name}")
         with tqdm(total=len(files)) as pbar:
             for counter in range(0, len(files), args.batch_size):
@@ -23,34 +24,28 @@ def main(args):
                 image_ids = []
                 for idx in range(counter, min(len(files), counter + args.batch_size)):
                     file_ = files[idx]
-                    image = Image.open(os.path.join(root, file_)).convert("RGB")
                     images.append(image)
                     image_ids.append(file_)
                 pbar.update(args.batch_size)
                 try:
-                    inputs = processor(
-                        text=[""], images=images, return_tensors="jax", padding=True
-                    )
                 except:
                     print(image_ids)
                     break
-                inputs["pixel_values"] = jnp.transpose(
-                    inputs["pixel_values"], axes=[0, 2, 3, 1]
-                )
                 features = model(**inputs).image_embeds
                 with open(os.path.join(args.out_path, f"{model_name}.tsv"), "a+") as f:
                     writer = csv.writer(f, delimiter="\t")
                     for image_id, feature in zip(image_ids, features):
-                        writer.writerow(
-                            [image_id, ",".join(map(lambda x: str(x), feature))]
-                        )
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--batch_size", default=16)
-    parser.add_argument("--image_path", default="images/val2017")
-    parser.add_argument("--out_path", default="features/val2017")
     args = parser.parse_args()
     main(args)

 import csv
 import os
 from PIL import Image
 from utils import load_model
+import jax.numpy as jnp
+from jax import jit
+from tqdm import tqdm
 def main(args):
     root = args.image_path
     files = list(os.listdir(root))
     for f in files:
+        assert(f[-4:] == ".jpg")
     for model_name in ["koclip-base", "koclip-large"]:
         model, processor = load_model(f"koclip/{model_name}")
         with tqdm(total=len(files)) as pbar:
             for counter in range(0, len(files), args.batch_size):
                 image_ids = []
                 for idx in range(counter, min(len(files), counter + args.batch_size)):
                     file_ = files[idx]
+                    image = Image.open(os.path.join(root, file_)).convert('RGB')
                     images.append(image)
                     image_ids.append(file_)
                 pbar.update(args.batch_size)
                 try:
+                    inputs = processor(text=[""], images=images, return_tensors="jax", padding=True)
                 except:
                     print(image_ids)
                     break
+                inputs['pixel_values'] = jnp.transpose(inputs['pixel_values'], axes=[0, 2, 3, 1])
                 features = model(**inputs).image_embeds
                 with open(os.path.join(args.out_path, f"{model_name}.tsv"), "a+") as f:
                     writer = csv.writer(f, delimiter="\t")
                     for image_id, feature in zip(image_ids, features):
+                        writer.writerow([image_id, ",".join(map(lambda x: str(x), feature))])
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--batch_size", default=16)
+    parser.add_argument("--image_path", default="images")
+    parser.add_argument("--out_path", default="features")
     args = parser.parse_args()
     main(args)

image2text.py CHANGED Viewed

@@ -7,8 +7,6 @@ def app(model_name):
     model, processor = load_model(model_name)
     st.title("Image to Text")
-    st.markdown(
-        """
         Some text goes in here.
-    """
-    )

     model, processor = load_model(model_name)
     st.title("Image to Text")
+    st.markdown("""
         Some text goes in here.
+    """)

text2image.py CHANGED Viewed

@@ -1,22 +1,21 @@
 import os
-import matplotlib.pyplot as plt
-import numpy as np
 import streamlit as st
-from utils import load_index, load_model
 def app(model_name):
-    images_directory = "images/val2017"
-    features_directory = f"features/val2017/{model_name}.tsv"
     files, index = load_index(features_directory)
-    model, processor = load_model(f"koclip/{model_name}")
     st.title("Text to Image Search Engine")
-    st.markdown(
-        """
         This demonstration explores capability of KoCLIP as a Korean-language Image search engine. Embeddings for each of
         5000 images from [MSCOCO](https://cocodataset.org/#home) 2017 validation set was generated using trained KoCLIP
         vision model. They are ranked based on cosine similarity distance from input Text query embeddings and top 10 images
@@ -28,11 +27,9 @@ def app(model_name):
         Larger model `koclip-large` uses `klue/roberta` as text encoder and bigger `google/vit-large-patch16-224` as image encoder.
         Example Queries : 아파트(Apartment), 자동차(Car), 컴퓨터(Computer)
-    """
-    )
     query = st.text_input("한글 질문을 적어주세요 (Korean Text Query) :", value="아파트")
     if st.button("질문 (Query)"):
         proc = processor(text=[query], images=None, return_tensors="jax", padding=True)
         vec = np.asarray(model.get_text_features(**proc))

 import os
 import streamlit as st
+from utils import load_model, load_index
+import numpy as np
+import matplotlib.pyplot as plt
 def app(model_name):
+    images_directory = 'images/val2017'
+    features_directory = f'features/val2017/{model_name}.tsv'
     files, index = load_index(features_directory)
+    model, processor = load_model(f'koclip/{model_name}')
     st.title("Text to Image Search Engine")
+    st.markdown("""
         This demonstration explores capability of KoCLIP as a Korean-language Image search engine. Embeddings for each of
         5000 images from [MSCOCO](https://cocodataset.org/#home) 2017 validation set was generated using trained KoCLIP
         vision model. They are ranked based on cosine similarity distance from input Text query embeddings and top 10 images
         Larger model `koclip-large` uses `klue/roberta` as text encoder and bigger `google/vit-large-patch16-224` as image encoder.
         Example Queries : 아파트(Apartment), 자동차(Car), 컴퓨터(Computer)
+    """)
     query = st.text_input("한글 질문을 적어주세요 (Korean Text Query) :", value="아파트")
     if st.button("질문 (Query)"):
         proc = processor(text=[query], images=None, return_tensors="jax", padding=True)
         vec = np.asarray(model.get_text_features(**proc))

utils.py CHANGED Viewed

@@ -1,28 +1,26 @@
 import nmslib
-import numpy as np
 import streamlit as st
-from transformers import AutoTokenizer, CLIPProcessor, ViTFeatureExtractor
 from koclip import FlaxHybridCLIP
 @st.cache(allow_output_mutation=True)
 def load_index(img_file):
     filenames, embeddings = [], []
     lines = open(img_file, "r")
     for line in lines:
-        cols = line.strip().split("\t")
         filename = cols[0]
-        embedding = [float(x) for x in cols[1].split(",")]
         filenames.append(filename)
         embeddings.append(embedding)
     embeddings = np.array(embeddings)
-    index = nmslib.init(method="hnsw", space="cosinesimil")
     index.addDataPointBatch(embeddings)
-    index.createIndex({"post": 2}, print_progress=True)
     return filenames, index
 @st.cache(allow_output_mutation=True)
 def load_model(model_name="koclip/koclip-base"):
     assert model_name in {"koclip/koclip-base", "koclip/koclip-large"}
@@ -30,7 +28,11 @@ def load_model(model_name="koclip/koclip-base"):
     processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
     processor.tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
     if model_name == "koclip/koclip-large":
-        processor.feature_extractor = ViTFeatureExtractor.from_pretrained(
-            "google/vit-large-patch16-224"
-        )
     return model, processor

 import nmslib
 import streamlit as st
+from transformers import CLIPProcessor, AutoTokenizer, ViTFeatureExtractor
+import numpy as np
 from koclip import FlaxHybridCLIP
 @st.cache(allow_output_mutation=True)
 def load_index(img_file):
     filenames, embeddings = [], []
     lines = open(img_file, "r")
     for line in lines:
+        cols = line.strip().split('\t')
         filename = cols[0]
+        embedding = np.array([float(x) for x in cols[1].split(',')])
         filenames.append(filename)
         embeddings.append(embedding)
     embeddings = np.array(embeddings)
+    index = nmslib.init(method='hnsw', space='cosinesimil')
     index.addDataPointBatch(embeddings)
+    index.createIndex({'post': 2}, print_progress=True)
     return filenames, index
 @st.cache(allow_output_mutation=True)
 def load_model(model_name="koclip/koclip-base"):
     assert model_name in {"koclip/koclip-base", "koclip/koclip-large"}
     processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
     processor.tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
     if model_name == "koclip/koclip-large":
+        processor.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-large-patch16-224")
+    return model, processor
+@st.cache(allow_output_mutation=True)
+def load_model_v2(model_name="koclip/koclip"):
+    model = FlaxHybridCLIP.from_pretrained(model_name)
+    processor = CLIPProcessor.from_pretrained(model_name)
     return model, processor