Spaces:

shivangibithel
/

LCMI_T2I

Sleeping

App Files Files Community

shivangibithel commited on Mar 15, 2023

Commit

25ae722

•

1 Parent(s): 86ba518

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -9

app.py CHANGED Viewed

@@ -11,6 +11,9 @@ import pickle
 import pickletools
 from transformers import AutoTokenizer, CLIPTextModelWithProjection
 from sklearn.preprocessing import normalize, OneHotEncoder
 # loading the train dataset
 with open('clip_train.pkl', 'rb') as f:
@@ -29,6 +32,10 @@ with open('clip_test.pkl', 'rb') as f:
     test_yv = temp_d['label']
     test_yt = temp_d['label']
 enc = OneHotEncoder(sparse=False)
 enc.fit(np.concatenate((train_yt, test_yt)).reshape((-1, 1)))
 train_yv = enc.transform(train_yv.reshape((-1, 1))).astype(np.float64)
@@ -36,6 +43,45 @@ test_yv = enc.transform(test_yv.reshape((-1, 1))).astype(np.float64)
 train_yt = enc.transform(train_yt.reshape((-1, 1))).astype(np.float64)
 test_yt = enc.transform(test_yt.reshape((-1, 1))).astype(np.float64)
 # Map the image ids to the corresponding image URLs
 image_map_name = 'pascal_dataset.csv'
 df = pd.read_csv(image_map_name)
@@ -51,14 +97,17 @@ d = 32
 text_index = faiss.index_factory(d, "Flat", faiss.METRIC_INNER_PRODUCT)
 text_index = faiss.read_index("text_index.index")
-def T2Isearch(query, k=50):
     # Encode the text query
-    inputs = text_tokenizer([query], padding=True, return_tensors="pt")
-    outputs = text_model(**inputs)
-    query_embedding = outputs.text_embeds
-    query_vector = query_embedding.detach().numpy()
     # query_vector = np.concatenate((query_vector[0], query_vector[1]), dtype=np.float32)
-    query_vector = query_vector.reshape(1,512)
     faiss.normalize_L2(query_vector)
     index.nprobe = index.ntotal
@@ -66,7 +115,7 @@ def T2Isearch(query, k=50):
     D, I = text_index.search(query_vector, k)
     # get rank of all classes wrt to query
-    classes_all = []
     Y = train_yt
     neighbor_ys = Y[I]
     class_freq = np.zeros(Y.shape[1])
@@ -98,7 +147,7 @@ def T2Isearch(query, k=50):
             if count == 5: break
 query = st.text_input("Enter your search query here:")
 if st.button("Search"):
     if query:
-        T2Isearch(query)

 import pickletools
 from transformers import AutoTokenizer, CLIPTextModelWithProjection
 from sklearn.preprocessing import normalize, OneHotEncoder
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
 # loading the train dataset
 with open('clip_train.pkl', 'rb') as f:
     test_yv = temp_d['label']
     test_yt = temp_d['label']
+test_xt_proj = np.load("test_text_proj.npy")
+# test_xv_proj = np.load("test_image_proj.npy")
+# encoding the labels
 enc = OneHotEncoder(sparse=False)
 enc.fit(np.concatenate((train_yt, test_yt)).reshape((-1, 1)))
 train_yv = enc.transform(train_yv.reshape((-1, 1))).astype(np.float64)
 train_yt = enc.transform(train_yt.reshape((-1, 1))).astype(np.float64)
 test_yt = enc.transform(test_yt.reshape((-1, 1))).astype(np.float64)
+# # Model structure
+# torch.manual_seed(3074)
+# class imgModel(nn.Module):
+#     def __init__(self, in_features, out_features):
+#         super(imgModel, self).__init__()
+#         self.l1 = nn.Linear(in_features=in_features, out_features=256)
+#         self.bn1 = nn.BatchNorm1d(256)
+#         self.dl1 = nn.Dropout(p=0.2)
+#         self.l2 = nn.Linear(in_features=256, out_features=out_features)
+#     def forward(self, x):
+#         x = self.l1(x)
+#         x = torch.sigmoid(x)
+#         x = self.dl1(x)
+#         x = self.bn1(x)
+#         x = self.l2(x)
+#         x = torch.tanh(x)
+#         return x
+torch.manual_seed(3074)
+class txtModel(nn.Module):
+    def __init__(self, in_features, out_features):
+        super(txtModel, self).__init__()
+        self.l1 = nn.Linear(in_features=in_features, out_features=256)
+        self.bn1 = nn.BatchNorm1d(256)
+        self.dl2= nn.Dropout(p=0.2)
+        self.l2 = nn.Linear(in_features=256, out_features=out_features)
+    def forward(self, x):
+        # print(x[0].shape)
+        x = self.l1(x)
+        x = torch.sigmoid(x)
+        x = self.dl2(x)
+        x = self.bn1(x)
+        x = torch.tanh(self.l2(x))
+        # print(x[0].shape)
+        return x
 # Map the image ids to the corresponding image URLs
 image_map_name = 'pascal_dataset.csv'
 df = pd.read_csv(image_map_name)
 text_index = faiss.index_factory(d, "Flat", faiss.METRIC_INNER_PRODUCT)
 text_index = faiss.read_index("text_index.index")
+def T2Isearch(query, i, k=50):
     # Encode the text query
+    # inputs = text_tokenizer([query], padding=True, return_tensors="pt")
+    # outputs = text_model(**inputs)
+    # query_embedding = outputs.text_embeds
+    # query_vector = query_embedding.detach().numpy()
     # query_vector = np.concatenate((query_vector[0], query_vector[1]), dtype=np.float32)
+    # query_vector = query_vector.reshape(1,512)
+    query_vector = test_xt_proj[i-1]
+    query_vector = query_vector.reshape(1,32)
     faiss.normalize_L2(query_vector)
     index.nprobe = index.ntotal
     D, I = text_index.search(query_vector, k)
     # get rank of all classes wrt to query
     Y = train_yt
     neighbor_ys = Y[I]
     class_freq = np.zeros(Y.shape[1])
             if count == 5: break
 query = st.text_input("Enter your search query here:")
+i = st.text_input("Enter the index of test set from 1 - 200")
 if st.button("Search"):
     if query:
+        T2Isearch(query, i)