File size: 3,398 Bytes
7239b15
 
 
c45b099
 
 
 
 
 
 
7239b15
c45b099
 
 
 
 
 
 
 
7239b15
c45b099
 
 
 
 
 
 
7239b15
c45b099
 
 
 
 
7239b15
c45b099
 
7239b15
c45b099
 
7239b15
c45b099
7239b15
c45b099
 
 
 
 
 
 
 
 
 
7239b15
c45b099
 
7239b15
c45b099
 
 
 
 
 
 
 
 
7239b15
c45b099
 
 
 
 
 
7239b15
c45b099
 
7239b15
c45b099
 
 
 
 
 
 
7239b15
 
c45b099
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
st.set_page_config(page_title='ITR', page_icon="🧊", layout='centered')
st.title("LCM-Independent for Pascal Dataset")
import faiss
import numpy as np
from PIL import Image
import json
import zipfile
import pickle
from transformers import AutoTokenizer, CLIPTextModelWithProjection

# loading the train dataset
with open('clip_train.pkl', 'rb') as f: 
    temp_d = pickle.load(f)
    # train_xv = temp_d['image'].astype(np.float64)   # Array of image features : np ndarray
    # train_xt = temp_d['text'].astype(np.float64)    # Array of text features : np ndarray
    # train_yv  = temp_d['label']                     # Array of labels 
    train_yt  = temp_d['label']                     # Array of labels 
    # ids = list(temp_d['ids'])                       # image names == len(images)

# loading the test dataset
with open('clip_test.pkl', 'rb') as f:
    temp_d = pickle.load(f)
    # test_xv = temp_d['image'].astype(np.float64)
    test_xt = temp_d['text'].astype(np.float64)
    # test_yv = temp_d['label']
    # test_yt = temp_d['label']

# Map the image ids to the corresponding image URLs
image_map_name = 'pascal_dataset.csv'
df = pd.read_csv(image_map_name)
image_list = list(df['image'])
class_list = list(df['class'])

zip_path = "pascal_raw.zip"
zip_file = zipfile.ZipFile(zip_path)

# text_model = CLIPTextModelWithProjection.from_pretrained("openai/clip-vit-base-patch32")
# text_tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")

text_index = faiss.read_index("text_index.index")

def T2Isearch(query, k=50):
    # Encode the text query        
    # inputs = text_tokenizer([query], padding=True, return_tensors="pt")    
    # outputs = text_model(**inputs)
    # query_embedding = outputs.text_embeds
    query_embedding = test_xt[0]
    query_vector = np.array([query_embedding])
    faiss.normalize_L2(query_vector)
    # text_index.nprobe = index.ntotal
    text_index.nprobe = 100
    
    # Search for the nearest neighbors in the FAISS text index
    D, I = text_index.search(query_vector, k)

    # get rank of all classes wrt to query
    classes_all = []
    Y = train_yt
    neighbor_ys = Y[I]
    class_freq = np.zeros(Y.shape[1])
    for neighbor_y in neighbor_ys:
        classes = np.where(neighbor_y > 0.5)[0]
        for _class in classes:
            class_freq[_class] += 1

    count = 0
    for i in range(len(class_freq)):
        if class_freq[i]>0:
            count +=1
    ranked_classes = np.argsort(-class_freq) # chosen order of pivots   -- predicted sequence of all labels for the query
    ranked_classes_after_knn = ranked_classes[:count]  # predicted sequence of top labels after knn search

    lis  = ['aeroplane', 'bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']
    class_ = lis[ranked_classes_after_knn[0]-1]

    # Map the image ids to the corresponding image URLs
    for i in range(len(image_list)):
        if class_list[i] == class_ :            
            image_name = image_list[i]
            image_data = zip_file.open("pascal_raw/images/dataset/"+ image_name)
            image = Image.open(image_data)
            st.image(image, width=600)

query = st.text_input("Enter your search query here:")
if st.button("Search"):
    if query:
        T2Isearch(query)