4rtemi5's picture
Try to add progress bar to image download.
b4605c3
raw history blame
No virus
3.27 kB
import io
import os
# import requests
import urllib.request
import zipfile
import natsort
os.environ["TOKENIZERS_PARALLELISM"] = "false"
from pathlib import Path
import streamlit as st
from jax import numpy as jnp
import transformers
from transformers import AutoTokenizer
from torchvision.transforms import Compose, CenterCrop, Normalize, Resize, ToTensor
from torchvision.transforms.functional import InterpolationMode
from modeling_hybrid_clip import FlaxHybridCLIP
import utils
@st.cache(hash_funcs={FlaxHybridCLIP: lambda _: None})
def get_model():
return FlaxHybridCLIP.from_pretrained("clip-italian/clip-italian")
@st.cache(hash_funcs={transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None})
def get_tokenizer():
return AutoTokenizer.from_pretrained("dbmdz/bert-base-italian-xxl-uncased", cache_dir="./", use_fast=True)
@st.cache
def download_images():
# from sentence_transformers import SentenceTransformer, util
img_folder = "photos/"
if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
os.makedirs(img_folder, exist_ok=True)
photo_filename = "unsplash-25k-photos.zip"
if not os.path.exists(photo_filename): # Download dataset if does not exist
print(f"Downloading {photo_filename}...")
response = requests.get(f"http://sbert.net/datasets/{photo_filename}", stream=True)
total_size_in_bytes= int(response.headers.get('content-length', 0))
block_size = 1024 #1 Kb
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
for data in response.iter_content(block_size):
progress_bar.update(len(data))
progress_bar.close()
z = zipfile.ZipFile(io.BytesIO(response.content))
print("Extracting the dataset...")
z.extractall(path=img_folder)
print("Done.")
@st.cache()
def get_image_features():
return jnp.load("static/features/features.npy")
def read_markdown_file(markdown_file):
return Path(markdown_file).read_text()
"""
# πŸ‘‹ Ciao!
# CLIP Italian Demo
## HF-Flax Community Week
In this demo you can search for images in the
"""
query = st.text_input("Insert an italian query text here...")
if query:
with st.spinner("Computing in progress..."):
model = get_model()
download_images()
image_features = get_image_features()
model = get_model()
tokenizer = get_tokenizer()
image_size = model.config.vision_config.image_size
val_preprocess = Compose(
[
Resize([image_size], interpolation=InterpolationMode.BICUBIC),
CenterCrop(image_size),
ToTensor(),
Normalize(
(0.48145466, 0.4578275, 0.40821073),
(0.26862954, 0.26130258, 0.27577711),
),
]
)
dataset = utils.CustomDataSet("photos/", transform=val_preprocess)
image_paths = utils.find_image(
query, model, dataset, tokenizer, image_features, n=2
)
st.image(image_paths)
intro_markdown = read_markdown_file("readme.md")
st.markdown(intro_markdown, unsafe_allow_html=True)