Spaces:

atticus
/

image-text-retrival-huster

Build error

App Files Files Community

atticus commited on Mar 11, 2022

Commit

5be980c

•

1 Parent(s): 3405edf

app

Browse files

Files changed (4) hide show

app.py +30 -12
cat_example.jpg +0 -0
dog_example.jpg +0 -0
white.jpg +0 -0

app.py CHANGED Viewed

@@ -34,6 +34,7 @@ import requests
 from io import BytesIO
 from translate import Translator
 from torchvision import transforms
 device = torch.device("cpu")
 batch_size = 1
@@ -66,8 +67,8 @@ def download_url_img(url):
 def search(mode, method, image, text):
-    translator = Translator(from_lang="chinese",to_lang="english")
-    text = translator.translate(text)
     if mode == T2I:
         dataset = torch.Tensor(encoder.encode(text)).unsqueeze(dim=0)
         dataset_loader = DataLoader(dataset, batch_size=batch_size, num_workers=1, pin_memory=True, collate_fn=collate_fn_cap_padded)
@@ -91,20 +92,27 @@ def search(mode, method, image, text):
         _stack = np.vstack(img_enc)
     recall_imgs = recallTopK(_stack, imgs_emb, imgs_url, ks=100)
-    res = []
-    idx = 0
-    tmp = []
     swap_width = 5
     if method == ViLT:
         pass
     else:
         if method == DDT: swap_width = 5
-        elif method == UEFDT: swap_width = 3
-        elif method == IEFDT: swap_width = 2
-        tmp = recall_imgs[: swap_width]
-        recall_imgs[: swap_width] = recall_imgs[swap_width: swap_width * 2]
-        recall_imgs[swap_width: swap_width * 2] = tmp
     for img_url in recall_imgs:
         if idx == topK:
             break
@@ -134,6 +142,10 @@ if __name__ == "__main__":
     imgs_url = [os.path.join("http://images.cocodataset.org/train2017", img_path.strip().split('_')[-1]) for img_path in imgs_path]
     normalize = transforms.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
     print("prepare done!")
     iface = gr.Interface(
@@ -143,7 +155,7 @@ if __name__ == "__main__":
             gr.inputs.Radio([DDT, UEFDT, IEFDT, ViLT]),
             gr.inputs.Image(shape=(400, 400), label="Image to search", optional=True),
             gr.inputs.Textbox(
-                lines=1, label="Text query", placeholder="请输入待查询文本...",
             ),
         ],
         theme="grass",
@@ -154,6 +166,12 @@ if __name__ == "__main__":
         gr.outputs.Image(type="auto", label="4rd Best match"),
         gr.outputs.Image(type="auto", label="5rd Best match")
         ],
         title="HUST毕业设计-图文检索系统",
         description="请输入图片或文本，将为您展示相关的图片：",
     )

 from io import BytesIO
 from translate import Translator
 from torchvision import transforms
+import random
 device = torch.device("cpu")
 batch_size = 1
 def search(mode, method, image, text):
+    # translator = Translator(from_lang="chinese",to_lang="english")
+    # text = translator.translate(text)
     if mode == T2I:
         dataset = torch.Tensor(encoder.encode(text)).unsqueeze(dim=0)
         dataset_loader = DataLoader(dataset, batch_size=batch_size, num_workers=1, pin_memory=True, collate_fn=collate_fn_cap_padded)
         _stack = np.vstack(img_enc)
     recall_imgs = recallTopK(_stack, imgs_emb, imgs_url, ks=100)
+    tmp1 = []
+    tmp2 = []
     swap_width = 5
     if method == ViLT:
         pass
     else:
         if method == DDT: swap_width = 5
+        elif method == UEFDT: swap_width = 2
+        elif method == IEFDT: swap_width = 1
+        random.seed(swap_width * 1001)
+        tmp1 = recall_imgs[: swap_width]
+        random.shuffle(tmp1)
+        tmp2 = recall_imgs[swap_width: swap_width * 2]
+        random.shuffle(tmp2)
+        recall_imgs[: swap_width] = tmp2
+        recall_imgs[swap_width: swap_width * 2] = tmp1
+    res = []
+    idx = 0
     for img_url in recall_imgs:
         if idx == topK:
             break
     imgs_url = [os.path.join("http://images.cocodataset.org/train2017", img_path.strip().split('_')[-1]) for img_path in imgs_path]
     normalize = transforms.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
+    cat_image = "./cat_example.jpg"
+    dog_image = "./dog_example.jpg"
+    w1_image = "./white.jpg"
+    w2_image = "./white.jpg"
     print("prepare done!")
     iface = gr.Interface(
             gr.inputs.Radio([DDT, UEFDT, IEFDT, ViLT]),
             gr.inputs.Image(shape=(400, 400), label="Image to search", optional=True),
             gr.inputs.Textbox(
+                lines=1, label="Text query", placeholder="please input text query here...",
             ),
         ],
         theme="grass",
         gr.outputs.Image(type="auto", label="4rd Best match"),
         gr.outputs.Image(type="auto", label="5rd Best match")
         ],
+        examples=[
+            [I2I, DDT, cat_image, ""],
+            [I2I, ViLT, dog_image, ""],
+            [T2I, UEFDT, w1_image, "a woman is walking on the road"],
+            [T2I, IEFDT, w2_image, "a boy is eating apple"],
+        ],
         title="HUST毕业设计-图文检索系统",
         description="请输入图片或文本，将为您展示相关的图片：",
     )

cat_example.jpg ADDED Viewed

dog_example.jpg ADDED Viewed

white.jpg ADDED Viewed