Spaces:
Running
Running
File size: 3,441 Bytes
027887e 128a4c1 622f373 027887e f7c5373 80200b5 e6eb8f2 027887e 39b8fb7 128a4c1 027887e 128a4c1 69dfe66 88e550b 7369efb 69dfe66 9c02573 69dfe66 e747f27 69dfe66 9e5321f 92dc785 39b8fb7 92dc785 ac6650e 128a4c1 b4605c3 7b79fbf e625a8b b4605c3 e625a8b b4605c3 e625a8b 4d35382 128a4c1 7369efb b4605c3 128a4c1 69dfe66 0f27d7b 027887e b4605c3 a44a03a e747f27 69dfe66 92dc785 128a4c1 7369efb 92dc785 128a4c1 dc1d715 027887e dc1d715 7369efb dc1d715 7369efb dc1d715 128a4c1 80200b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import io
import os
import requests
import zipfile
import natsort
os.environ["TOKENIZERS_PARALLELISM"] = "false"
from pathlib import Path
from stqdm import stqdm
import streamlit as st
from jax import numpy as jnp
import transformers
from transformers import AutoTokenizer
from torchvision.transforms import Compose, CenterCrop, Normalize, Resize, ToTensor
from torchvision.transforms.functional import InterpolationMode
from modeling_hybrid_clip import FlaxHybridCLIP
import utils
@st.cache(hash_funcs={FlaxHybridCLIP: lambda _: None})
def get_model():
return FlaxHybridCLIP.from_pretrained("clip-italian/clip-italian")
@st.cache(hash_funcs={transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None})
def get_tokenizer():
return AutoTokenizer.from_pretrained("dbmdz/bert-base-italian-xxl-uncased", cache_dir="./", use_fast=True)
@st.cache(suppress_st_warning=True)
def download_images():
# from sentence_transformers import SentenceTransformer, util
img_folder = "photos/"
if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
os.makedirs(img_folder, exist_ok=True)
photo_filename = "unsplash-25k-photos.zip"
if not os.path.exists(photo_filename): # Download dataset if does not exist
print(f"Downloading {photo_filename}...")
response = requests.get(f"http://sbert.net/datasets/{photo_filename}", stream=True)
total_size_in_bytes= int(response.headers.get('content-length', 0))
block_size = 1024 #1 Kb
progress_bar = stqdm(total=total_size_in_bytes) # , unit='iB', unit_scale=True
content = io.BytesIO()
for data in response.iter_content(block_size):
progress_bar.update(len(data))
content.write(data)
progress_bar.close()
z = zipfile.ZipFile(content)
# content.close()
print("Extracting the dataset...")
z.extractall(path=img_folder)
print("Done.")
@st.cache()
def get_image_features():
return jnp.load("static/features/features.npy")
def read_markdown_file(markdown_file):
return Path(markdown_file).read_text()
"""
# 👋 Ciao!
# CLIP Italian Demo
## HF-Flax Community Week
In this demo you can search for images in the Unsplash 25k Photos dataset.
🤌 Italian mode on! 🤌
"""
query = st.text_input("Insert an italian query text here...")
if query:
with st.spinner("Computing in progress..."):
model = get_model()
download_images()
image_features = get_image_features()
model = get_model()
tokenizer = get_tokenizer()
image_size = model.config.vision_config.image_size
val_preprocess = Compose(
[
Resize([image_size], interpolation=InterpolationMode.BICUBIC),
CenterCrop(image_size),
ToTensor(),
Normalize(
(0.48145466, 0.4578275, 0.40821073),
(0.26862954, 0.26130258, 0.27577711),
),
]
)
dataset = utils.CustomDataSet("photos/", transform=val_preprocess)
image_paths = utils.find_image(
query, model, dataset, tokenizer, image_features, n=2
)
st.image(image_paths)
intro_markdown = read_markdown_file("readme.md")
st.markdown(intro_markdown, unsafe_allow_html=True) |