Spaces:

clip-italian
/

clip-italian-demo

Running

App Files Files Community

g8a9 commited on Jul 18, 2021

Commit

c5ad46a

•

1 Parent(s): 0789e97

[text2image] Add IR for the CC validation set

Browse files

Files changed (4) hide show

static/CC_val_urls.txt +0 -0
static/features/{cc_features.npy → CC_val_embeddings.npy} +2 -2
text2image.py +72 -32
utils.py +9 -5

static/CC_val_urls.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

static/features/{cc_features.npy → CC_val_embeddings.npy} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63f185e851ff9cd0a19c5b1877087d860ca53ec5fc9e6a7d608249b9aacb77df
-size 2050773120

 version https://git-lfs.github.com/spec/v1
+oid sha256:775803a42011b09e8f5d19fcbdd67123cc3447154e1f8e5990cae1bce4581662
+size 27369600

text2image.py CHANGED Viewed

@@ -22,9 +22,15 @@ def get_model():
     return FlaxHybridCLIP.from_pretrained("clip-italian/clip-italian")
-@st.cache(hash_funcs={transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None})
 def get_tokenizer():
-    return AutoTokenizer.from_pretrained("dbmdz/bert-base-italian-xxl-uncased", cache_dir="./", use_fast=True)
 @st.cache(suppress_st_warning=True)
@@ -37,10 +43,14 @@ def download_images():
         photo_filename = "unsplash-25k-photos.zip"
         if not os.path.exists(photo_filename):  # Download dataset if does not exist
             print(f"Downloading {photo_filename}...")
-            response = requests.get(f"http://sbert.net/datasets/{photo_filename}", stream=True)
-            total_size_in_bytes = int(response.headers.get('content-length', 0))
             block_size = 1024  # 1 Kb
-            progress_bar = stqdm(total=total_size_in_bytes)  # , unit='iB', unit_scale=True
             content = io.BytesIO()
             for data in response.iter_content(block_size):
                 progress_bar.update(len(data))
@@ -54,8 +64,21 @@ def download_images():
 @st.cache()
-def get_image_features():
-    return jnp.load("static/features/features.npy")
 def app():
@@ -73,7 +96,7 @@ def app():
         """
     )
-    if 'suggestion' not in st.session_state:
         st.session_state.suggestion = ""
     def update_query(value=""):
@@ -81,44 +104,61 @@ def app():
     col1, col2, col3, col4 = st.beta_columns(4)
     with col1:
-        st.button('Un gatto', on_click=update_query, kwargs=dict(value='Un gatto'))
     with col2:
-        st.button('Due gatti', on_click=update_query, kwargs=dict(value='Due gatti'))
     with col3:
-        st.button('Un fiore giallo', on_click=update_query, kwargs=dict(value='Un fiore giallo'))
     with col4:
-        st.button('Un fiore blu', on_click=update_query, kwargs=dict(value='Un fiore blu'))
-    query = st.text_input('Insert an italian query text here...', st.session_state.suggestion)
     if query:
-        with st.spinner("Computing in progress..."):
             model = get_model()
-            download_images()
-            image_features = get_image_features()
             model = get_model()
             tokenizer = get_tokenizer()
-            image_size = model.config.vision_config.image_size
-            val_preprocess = Compose(
-                [
-                    Resize([image_size], interpolation=InterpolationMode.BICUBIC),
-                    CenterCrop(image_size),
-                    ToTensor(),
-                    Normalize(
-                        (0.48145466, 0.4578275, 0.40821073),
-                        (0.26862954, 0.26130258, 0.27577711),
-                    ),
-                ]
-            )
-            dataset = utils.CustomDataSet("photos/", transform=val_preprocess)
             image_paths = utils.find_image(
-                query, model, dataset, tokenizer, image_features, n=2
             )
         st.image(image_paths)

     return FlaxHybridCLIP.from_pretrained("clip-italian/clip-italian")
+@st.cache(
+    hash_funcs={
+        transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None
+    }
+)
 def get_tokenizer():
+    return AutoTokenizer.from_pretrained(
+        "dbmdz/bert-base-italian-xxl-uncased", cache_dir="./", use_fast=True
+    )
 @st.cache(suppress_st_warning=True)
         photo_filename = "unsplash-25k-photos.zip"
         if not os.path.exists(photo_filename):  # Download dataset if does not exist
             print(f"Downloading {photo_filename}...")
+            response = requests.get(
+                f"http://sbert.net/datasets/{photo_filename}", stream=True
+            )
+            total_size_in_bytes = int(response.headers.get("content-length", 0))
             block_size = 1024  # 1 Kb
+            progress_bar = stqdm(
+                total=total_size_in_bytes
+            )  # , unit='iB', unit_scale=True
             content = io.BytesIO()
             for data in response.iter_content(block_size):
                 progress_bar.update(len(data))
 @st.cache()
+def get_image_features(dataset_name):
+    if dataset_name == "Unsplash":
+        return jnp.load("static/features/features.npy")
+    else:
+        return jnp.load("static/features/CC_val_embeddings.npy")
+@st.cache()
+def load_urls(dataset_name):
+    if dataset_name == "CC":
+        with open("static/CC_val_urls.txt") as fp:
+            urls = [l.strip() for l in fp.readlines()]
+        return urls
+    else:
+        ValueError(f"{dataset_name} not supported here")
 def app():
         """
     )
+    if "suggestion" not in st.session_state:
         st.session_state.suggestion = ""
     def update_query(value=""):
     col1, col2, col3, col4 = st.beta_columns(4)
     with col1:
+        st.button("Un gatto", on_click=update_query, kwargs=dict(value="Un gatto"))
     with col2:
+        st.button("Due gatti", on_click=update_query, kwargs=dict(value="Due gatti"))
     with col3:
+        st.button(
+            "Un fiore giallo",
+            on_click=update_query,
+            kwargs=dict(value="Un fiore giallo"),
+        )
     with col4:
+        st.button(
+            "Un fiore blu", on_click=update_query, kwargs=dict(value="Un fiore blu")
+        )
+    col1, col2 = st.beta_columns([3, 1])
+    with col1:
+        query = st.text_input(
+            "Insert an italian query text here...", st.session_state.suggestion
+        )
+    with col2:
+        dataset_name = st.selectbox("IR dataset", ["Unsplash", "CC"])
     if query:
+        with st.spinner("Computing..."):
             model = get_model()
+            if dataset_name == "Unsplash":
+                download_images()
+            image_features = get_image_features(dataset_name)
             model = get_model()
             tokenizer = get_tokenizer()
+            if dataset_name == "Unsplash":
+                image_size = model.config.vision_config.image_size
+                val_preprocess = Compose(
+                    [
+                        Resize([image_size], interpolation=InterpolationMode.BICUBIC),
+                        CenterCrop(image_size),
+                        ToTensor(),
+                        Normalize(
+                            (0.48145466, 0.4578275, 0.40821073),
+                            (0.26862954, 0.26130258, 0.27577711),
+                        ),
+                    ]
+                )
+                dataset = utils.CustomDataSet("photos/", transform=val_preprocess)
+            elif dataset_name == "CC":
+                dataset = load_urls(dataset_name)
+            else:
+                raise ValueError()
             image_paths = utils.find_image(
+                query, model, dataset, tokenizer, image_features, 2, dataset_name
             )
         st.image(image_paths)

utils.py CHANGED Viewed

@@ -45,20 +45,24 @@ def precompute_image_features(model, loader):
     image_features = []
     for i, (images) in enumerate(tqdm(loader)):
         images = images.permute(0, 2, 3, 1).numpy()
-        features = model.get_image_features(
-            images,
-        )
         features /= jnp.linalg.norm(features, axis=-1, keepdims=True)
         image_features.extend(features)
     return jnp.array(image_features)
-def find_image(text_query, model, dataset, tokenizer, image_features, n=1):
     zeroshot_weights = text_encoder(text_query, model, tokenizer)
     zeroshot_weights /= jnp.linalg.norm(zeroshot_weights)
     distances = jnp.dot(image_features, zeroshot_weights.reshape(-1, 1))
     file_paths = []
     for i in range(1, n + 1):
         idx = jnp.argsort(distances, axis=0)[-i, 0]
-        file_paths.append("photos/" + dataset.get_image_name(idx))
     return file_paths

     image_features = []
     for i, (images) in enumerate(tqdm(loader)):
         images = images.permute(0, 2, 3, 1).numpy()
+        features = model.get_image_features(images,)
         features /= jnp.linalg.norm(features, axis=-1, keepdims=True)
         image_features.extend(features)
     return jnp.array(image_features)
+def find_image(text_query, model, dataset, tokenizer, image_features, n, dataset_name):
     zeroshot_weights = text_encoder(text_query, model, tokenizer)
     zeroshot_weights /= jnp.linalg.norm(zeroshot_weights)
     distances = jnp.dot(image_features, zeroshot_weights.reshape(-1, 1))
     file_paths = []
     for i in range(1, n + 1):
         idx = jnp.argsort(distances, axis=0)[-i, 0]
+        if dataset_name == "Unsplash":
+            file_paths.append("photos/" + dataset.get_image_name(idx))
+        elif dataset_name == "CC":
+            file_paths.append(dataset[idx])
+        else:
+            raise ValueError(f"{dataset_name} not supported here")
     return file_paths