Spaces:

clip-italian
/

clip-italian-demo

Running

App Files Files Community

Silvia Terragni commited on Jul 18, 2021

Commit

ce649db

1 Parent(s): f1abd41

updated template

Browse files

Files changed (4) hide show

app.py +8 -111
home.py +11 -0
image2text.py +0 -0
text2image.py +106 -0

app.py CHANGED Viewed

@@ -1,112 +1,9 @@
-import io
-import os
-import requests
-import zipfile
-import natsort
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-from pathlib import Path
-from stqdm import stqdm
 import streamlit as st
-from jax import numpy as jnp
-import transformers
-from transformers import AutoTokenizer
-from torchvision.transforms import Compose, CenterCrop, Normalize, Resize, ToTensor
-from torchvision.transforms.functional import InterpolationMode
-from modeling_hybrid_clip import FlaxHybridCLIP
-import utils
-@st.cache(hash_funcs={FlaxHybridCLIP: lambda _: None})
-def get_model():
-    return FlaxHybridCLIP.from_pretrained("clip-italian/clip-italian")
-@st.cache(hash_funcs={transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None})
-def get_tokenizer():
-    return AutoTokenizer.from_pretrained("dbmdz/bert-base-italian-xxl-uncased", cache_dir="./", use_fast=True)
-@st.cache(suppress_st_warning=True)
-def download_images():
-    # from sentence_transformers import SentenceTransformer, util
-    img_folder = "photos/"
-    if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
-        os.makedirs(img_folder, exist_ok=True)
-        photo_filename = "unsplash-25k-photos.zip"
-        if not os.path.exists(photo_filename):  # Download dataset if does not exist
-            print(f"Downloading {photo_filename}...")
-            response = requests.get(f"http://sbert.net/datasets/{photo_filename}", stream=True)
-            total_size_in_bytes= int(response.headers.get('content-length', 0))
-            block_size = 1024 #1 Kb
-            progress_bar = stqdm(total=total_size_in_bytes)  # , unit='iB', unit_scale=True
-            content = io.BytesIO()
-            for data in response.iter_content(block_size):
-                progress_bar.update(len(data))
-                content.write(data)
-            progress_bar.close()
-            z = zipfile.ZipFile(content)
-            # content.close()
-            print("Extracting the dataset...")
-            z.extractall(path=img_folder)
-    print("Done.")
-@st.cache()
-def get_image_features():
-    return jnp.load("static/features/features.npy")
-def read_markdown_file(markdown_file):
-    return Path(markdown_file).read_text()
-"""
-# 👋 Ciao!
-# CLIP Italian Demo
-## HF-Flax Community Week
-In this demo you can search for images in the Unsplash 25k Photos dataset.
-🤌 Italian mode on! 🤌
-"""
-query = st.text_input("Insert an italian query text here...")
-if query:
-    with st.spinner("Computing in progress..."):
-        model = get_model()
-        download_images()
-        image_features = get_image_features()
-        model = get_model()
-        tokenizer = get_tokenizer()
-        image_size = model.config.vision_config.image_size
-        val_preprocess = Compose(
-            [
-                Resize([image_size], interpolation=InterpolationMode.BICUBIC),
-                CenterCrop(image_size),
-                ToTensor(),
-                Normalize(
-                    (0.48145466, 0.4578275, 0.40821073),
-                    (0.26862954, 0.26130258, 0.27577711),
-                ),
-            ]
-        )
-        dataset = utils.CustomDataSet("photos/", transform=val_preprocess)
-        image_paths = utils.find_image(
-            query, model, dataset, tokenizer, image_features, n=2
-        )
-    st.image(image_paths)
-intro_markdown = read_markdown_file("introduction.md")
-st.markdown(intro_markdown, unsafe_allow_html=True)

 import streamlit as st
+import image2text
+import text2image
+import home
+PAGES = {"Home": home, "Text to Image": text2image, "Image to Text": image2text}
+st.sidebar.title("Navigation")
+page = st.sidebar.selectbox("Choose a task", list(PAGES.keys()))
+PAGES[page].app()

home.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from pathlib import Path
+import streamlit as st
+def read_markdown_file(markdown_file):
+    return Path(markdown_file).read_text()
+def app():
+    intro_markdown = read_markdown_file("introduction.md")
+    st.markdown(intro_markdown, unsafe_allow_html=True)

image2text.py ADDED Viewed

File without changes

text2image.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import io
+import os
+import requests
+import zipfile
+import natsort
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+from stqdm import stqdm
+import streamlit as st
+from jax import numpy as jnp
+import transformers
+from transformers import AutoTokenizer
+from torchvision.transforms import Compose, CenterCrop, Normalize, Resize, ToTensor
+from torchvision.transforms.functional import InterpolationMode
+from modeling_hybrid_clip import FlaxHybridCLIP
+import utils
+@st.cache(hash_funcs={FlaxHybridCLIP: lambda _: None})
+def get_model():
+    return FlaxHybridCLIP.from_pretrained("clip-italian/clip-italian")
+@st.cache(hash_funcs={transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None})
+def get_tokenizer():
+    return AutoTokenizer.from_pretrained("dbmdz/bert-base-italian-xxl-uncased", cache_dir="./", use_fast=True)
+@st.cache(suppress_st_warning=True)
+def download_images():
+    # from sentence_transformers import SentenceTransformer, util
+    img_folder = "photos/"
+    if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
+        os.makedirs(img_folder, exist_ok=True)
+        photo_filename = "unsplash-25k-photos.zip"
+        if not os.path.exists(photo_filename):  # Download dataset if does not exist
+            print(f"Downloading {photo_filename}...")
+            response = requests.get(f"http://sbert.net/datasets/{photo_filename}", stream=True)
+            total_size_in_bytes = int(response.headers.get('content-length', 0))
+            block_size = 1024  # 1 Kb
+            progress_bar = stqdm(total=total_size_in_bytes)  # , unit='iB', unit_scale=True
+            content = io.BytesIO()
+            for data in response.iter_content(block_size):
+                progress_bar.update(len(data))
+                content.write(data)
+            progress_bar.close()
+            z = zipfile.ZipFile(content)
+            # content.close()
+            print("Extracting the dataset...")
+            z.extractall(path=img_folder)
+    print("Done.")
+@st.cache()
+def get_image_features():
+    return jnp.load("static/features/features.npy")
+def app():
+    """
+    # 👋 Ciao!
+    # CLIP Italian Demo
+    ## HF-Flax Community Week
+    In this demo you can search for images in the Unsplash 25k Photos dataset.
+    🤌 Italian mode on! 🤌
+    """
+    query = st.text_input("Insert an italian query text here...")
+    if query:
+        with st.spinner("Computing in progress..."):
+            model = get_model()
+            download_images()
+            image_features = get_image_features()
+            model = get_model()
+            tokenizer = get_tokenizer()
+            image_size = model.config.vision_config.image_size
+            val_preprocess = Compose(
+                [
+                    Resize([image_size], interpolation=InterpolationMode.BICUBIC),
+                    CenterCrop(image_size),
+                    ToTensor(),
+                    Normalize(
+                        (0.48145466, 0.4578275, 0.40821073),
+                        (0.26862954, 0.26130258, 0.27577711),
+                    ),
+                ]
+            )
+            dataset = utils.CustomDataSet("photos/", transform=val_preprocess)
+            image_paths = utils.find_imageread_markdown_file(
+                query, model, dataset, tokenizer, image_features, n=2
+            )
+        st.image(image_paths)