File size: 2,553 Bytes
ae92333
 
b2c2198
5ddf50c
ae92333
b2c2198
ae92333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ddf50c
 
4bd9371
b2c2198
 
ae92333
 
b2c2198
4bd9371
b2c2198
 
 
4bd9371
cd52a4f
ae92333
 
 
 
 
 
6623aa6
 
 
 
4bd9371
 
26cbfcd
 
5ddf50c
7db14ed
 
26cbfcd
 
4bd9371
26cbfcd
 
4bd9371
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import json
import os
import numpy as np
import streamlit as st
from PIL import Image
from transformers import CLIPProcessor, FlaxCLIPModel
import nmslib


def load_index(image_vector_file):
    filenames, image_vecs = [], []
    fvec = open(image_vector_file, "r")
    for line in fvec:
        cols = line.strip().split('	')
        filename = cols[0]
        image_vec = np.array([float(x) for x in cols[1].split(',')])
        filenames.append(filename)
        image_vecs.append(image_vec)
    V = np.array(image_vecs)
    index = nmslib.init(method='hnsw', space='cosinesimil')
    index.addDataPointBatch(V)
    index.createIndex({'post': 2}, print_progress=True)
    return filenames, index


def load_captions(caption_file):
    image2caption = {}
    with open(caption_file, "r") as fcap:
        for line in fcap:
            data = json.loads(line.strip())
            filename = data["filename"]
            captions = data["captions"]
            image2caption[filename] = captions
    return image2caption


def get_image(text, number):
    model = FlaxCLIPModel.from_pretrained("flax-community/clip-rsicd-v2")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    filename, index = load_index("./vectors/test-bs128x8-lr5e-6-adam-ckpt-1.tsv")
    image2caption = load_captions("./images/test-captions.json")

    inputs = processor(text=[text], images=None, return_tensors="jax", padding=True)

    vector = model.get_text_features(**inputs)
    vector = np.asarray(vector)
    ids, distances = index.knnQuery(vector, k=number)
    result_filenames = [filename[index] for index in ids]
    for rank, (result_filename, score) in enumerate(zip(result_filenames, distances)):
        caption = "{:s} (score: {:.3f})".format(result_filename, 1.0 - score)
        col1, col2, col3 = st.columns([2, 10, 10])
        col1.markdown("{:d}.".format(rank + 1))
        col2.image(Image.open(os.path.join("./images", result_filename)),
                   caption=caption)
        # caption_text = []
        # for caption in image2caption[result_filename]:
        #    caption_text.append("* {:s}".format(caption))
        #    col3.markdown("".join(caption_text))
        st.markdown("---")
        suggest_idx = -1


def app():
    st.title("Welcome to Space Vector")
    st.text("You want search an image with given text.")

    text = st.text_input("Enter text: ")
    number = st.number_input("Enter number of images result: ", min_value=1, max_value=10)

    if st.button("Search"):
        get_image(text, number)