Spaces:

atticus
/

image-text-retrival-huster

Build error

App Files Files Community

atticus commited on Mar 9, 2022

Commit

953580c

•

1 Parent(s): 950c874

test

Browse files

Files changed (19) hide show

.vscode/sftp.json +32 -0
app.py +46 -57
coco_img_emb.pkl +2 -2
data/best_model.pth.tar +2 -2
data/utable.npy +2 -2
flagged/1st Best match/0.png +0 -0
flagged/2nd Best match/0.png +0 -0
flagged/3rd Best match/0.png +0 -0
flagged/4rd Best match/0.png +0 -0
flagged/5rd Best match/0.png +0 -0
flagged/log.csv +3 -0
misc/__pycache__/dataset.cpython-37.pyc +0 -0
misc/__pycache__/evaluation.cpython-37.pyc +0 -0
misc/dataset.py +1 -1
misc/evaluation.py +8 -7
requirements.txt +16 -0
run.sh +5 -0
run_train.sh +1 -0
tmp.py +23 -0

.vscode/sftp.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "name": "itr-ddt",
+    "host": "192.168.0.109",
+    "protocol": "sftp",
+    "port": 22,
+    "username": "atticus",
+    "password":"qs123",
+    "passphrase": "null",
+    "passive": false,
+    "interactiveAuth": true,
+    "remotePath": "/home/atticus/proj/matching/itr-ddt",
+    "context": "D:/Projects/MultiModal/itr-ddt",
+    "uploadOnSave": true,
+    "downloadOnOpen":true,
+    "syncMode": "update",
+    "ignore": [
+        "**/.vscode/**",
+        "**/.git/**",
+        "**/.DS_Store",
+        "**/*.tar",
+        "**/*.zip",
+        "**/*.pkl",
+        "**/*.json",
+        "**/*.npy"
+    ],
+    "watcher": {
+        "files": "*",
+        "autoUpload": false,
+        "autoDelete": false
+    }
+}

app.py CHANGED Viewed

@@ -32,18 +32,22 @@ import sys
 from misc.dataset import TextEncoder
 import requests
 import cv2
 device = torch.device("cuda")
-batch_size = 32
 T2I = "Text 2 Image"
 I2I = "Image 2 Image"
 model_path =  "data/best_model.pth.tar"
 # model = SentenceTransformer("clip-ViT-B-32")
 def download_url_img(url):
@@ -55,83 +59,68 @@ def download_url_img(url):
         return False, []
     if response is not None and response.status_code == 200:
         input_image_data = response.content
-        np_arr = np.asarray(bytearray(input_image_data), np.uint8).reshape(1, -1)
-        parsed_image = cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
-        return True, parsed_image
-def search(image, mode, text):
-    print("Loading model from:", model_path)
-    checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
-    join_emb = joint_embedding(checkpoint['args_dict'])
-    join_emb.load_state_dict(checkpoint["state_dict"])
-    for param in join_emb.parameters():
-        param.requires_grad = False
-    join_emb.to(device)
-    join_emb.eval()
-    encoder = TextEncoder()
-    print("Loading model done")
-    # (4) design intersection mode.
-    print("Please input your description of the image that you wanna search >>>")
-        # with open(args.data_path, 'w') as cap_file:
-        #     cap_file.writelines(cap_str)
     dataset = torch.Tensor(encoder.encode(text)).unsqueeze(dim=0)
     dataset_loader = DataLoader(dataset, batch_size=batch_size, num_workers=1, pin_memory=True, collate_fn=collate_fn_cap_padded)
     caps_enc = list()
     for _, (caps, length) in enumerate(dataset_loader, 0):
         input_caps = caps.to(device)
         with torch.no_grad():
             _, caps_emb = join_emb(None, input_caps, length)
-        caps_enc.append(caps_emb.cpu().data.numpy())
-    caps_stack = np.vstack(caps_enc)
-    print("recall from resources ...")
-    # (1) load candidate imgs from saved embeding pkl file.
-    imgs_emb_file_path = "./coco_img_emb"
-    # imgs_emb(40775, 2400)
-    imgs_emb, imgs_path = load_obj(imgs_emb_file_path)
-    # (2) calculate the sim between cap and imgs.
-    # (3) rank imgs and display the searching result.
-    imgs_url = os.path.join("http://images.cocodataset.org/train2017", imgs_path.strip().split('_')[-1])
-    recall_imgs = recallTopK(caps_stack, imgs_emb, imgs_url, ks=5)
     # Cat image downloaded from https://www.flickr.com/photos/blacktigersdream/23119711630
     # cat_image = "./cat_example.jpg"
     # Dog example downloaded from https://upload.wikimedia.org/wikipedia/commons/1/18/Dog_Breeds.jpg
     # dog_image = "./dog_example.jpg"
-    res = [download_url_img(img_url)[1] for img_url in recall_imgs if download_url_img(img_url)[0] == True]
-    # logger.info(f"Mode {mode} selected")
-    # if mode == I2I:
-    #     logger.info(f"Processing image in mode {mode}")
-    #     emb = model.encode([Image.fromarray(image)], convert_to_tensor=True)
-    # elif mode == T2I:
-    #     logger.info(f"Processing text in mode {mode}")
-    #     emb = model.encode([text], convert_to_tensor=True)
-    # cos_sim = util.cos_sim(img_emb, emb)
-    # logger.info(f"Best match: {img_names[torch.argmax(cos_sim)]}")
-    # return [Image.open(img_folder / img_names[top_k_best_image]) for top_k_best_image in torch.topk(cos_sim, 5, 0).indices]
-    return res
-if __name__ == "__main__":
     iface = gr.Interface(
         fn=search,
         inputs=[
-            gr.inputs.Image(label="Image to search", optional=True),
-            gr.inputs.Radio([T2I, I2I]),
             gr.inputs.Textbox(
                 lines=1, label="Text query", placeholder="Introduce the search text...",
             ),
@@ -147,4 +136,4 @@ if __name__ == "__main__":
         title="HUST毕业设计-图文检索系统",
         description="请输入图片或文本，将为您展示相关的图片：",
     )
-    iface.launch()

 from misc.dataset import TextEncoder
 import requests
 import cv2
+from io import BytesIO
+from translate import Translator
+import cupy as cp
 device = torch.device("cuda")
+batch_size = 1
+topK = 5
 T2I = "Text 2 Image"
 I2I = "Image 2 Image"
 model_path =  "data/best_model.pth.tar"
 # model = SentenceTransformer("clip-ViT-B-32")
+img_folder = Path("./photos/")
 def download_url_img(url):
         return False, []
     if response is not None and response.status_code == 200:
         input_image_data = response.content
+        # np_arr = np.asarray(bytearray(input_image_data), np.uint8).reshape(1, -1)
+        # parsed_image = cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
+        image=Image.open(BytesIO(input_image_data))
+        return True, image
+    return False, []
+def search(mode, text):
+    # translator = Translator(from_lang="chinese",to_lang="english")
+    # text = translator.translate(text)
     dataset = torch.Tensor(encoder.encode(text)).unsqueeze(dim=0)
     dataset_loader = DataLoader(dataset, batch_size=batch_size, num_workers=1, pin_memory=True, collate_fn=collate_fn_cap_padded)
     caps_enc = list()
     for _, (caps, length) in enumerate(dataset_loader, 0):
         input_caps = caps.to(device)
         with torch.no_grad():
             _, caps_emb = join_emb(None, input_caps, length)
+        caps_enc.append(caps_emb)
+    caps_stack = cp.vstack(caps_enc)
+    imgs_url = [os.path.join("http://images.cocodataset.org/train2017", img_path.strip().split('_')[-1]) for img_path in imgs_path]
+    recall_imgs = recallTopK(caps_stack, imgs_emb, imgs_url, ks=100)
     # Cat image downloaded from https://www.flickr.com/photos/blacktigersdream/23119711630
     # cat_image = "./cat_example.jpg"
     # Dog example downloaded from https://upload.wikimedia.org/wikipedia/commons/1/18/Dog_Breeds.jpg
     # dog_image = "./dog_example.jpg"
+    res = []
+    idx = 0
+    for img_url in recall_imgs:
+        if idx == topK:
+            break
+        b, img = download_url_img(img_url)
+        if b:
+            res.append(img)
+            idx += 1
+    return res
+if __name__ == "__main__":
+    # print("Loading model from:", model_path)
+    checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
+    join_emb = joint_embedding(checkpoint['args_dict'])
+    join_emb.load_state_dict(checkpoint["state_dict"])
+    for param in join_emb.parameters():
+        param.requires_grad = False
+    join_emb.to(device)
+    join_emb.eval()
+    encoder = TextEncoder()
+    imgs_emb_file_path = "./coco_img_emb"
+    imgs_emb, imgs_path = load_obj(imgs_emb_file_path)
+    imgs_emb = cp.asarray(imgs_emb)
+    print("prepare done!")
     iface = gr.Interface(
         fn=search,
         inputs=[
+            gr.inputs.Radio([T2I]),
             gr.inputs.Textbox(
                 lines=1, label="Text query", placeholder="Introduce the search text...",
             ),
         title="HUST毕业设计-图文检索系统",
         description="请输入图片或文本，将为您展示相关的图片：",
     )
+    iface.launch(share=True)

coco_img_emb.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43ca7d88339063c202638beab9203f1acbc86acaaf43aa7f61a87b2789070bdd
-size 1587836571

 version https://git-lfs.github.com/spec/v1
+oid sha256:012377f7e09f9f95cc15a391f2da541ede470d4c6d6c36f9239bb59def6ec269
+size 108068864

data/best_model.pth.tar CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1615e4ce1ee8f906ded31c29817945758f532e4793faba45c7df546a593efb3e
-size 1500259972

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8ada75eacbe26ecf1c3507238b542e1db689254a1dac3825ffe4842443d2947
+size 108068864

data/utable.npy CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:790951d4b08e843e3bca0563570f4134ffd17b6bd4ab8d237d2e5ae15e4febb3
-size 2342138474

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c8af23b32fcfb69ad00bc22f39c557e2926b66e2edb3275437157967b5f8257
+size 120258560

flagged/1st Best match/0.png ADDED Viewed

flagged/2nd Best match/0.png ADDED Viewed

flagged/3rd Best match/0.png ADDED Viewed

flagged/4rd Best match/0.png ADDED Viewed

flagged/5rd Best match/0.png ADDED Viewed

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+Image to search,Text query,1st Best match,2nd Best match,3rd Best match,4rd Best match,5rd Best match,timestamp
+,,,,,,,2022-03-10 00:37:13.783708
+,Text 2 Image,,1st Best match/0.png,2nd Best match/0.png,3rd Best match/0.png,4rd Best match/0.png,5rd Best match/0.png,2022-03-10 01:17:09.801153

misc/__pycache__/dataset.cpython-37.pyc CHANGED Viewed

Binary files a/misc/__pycache__/dataset.cpython-37.pyc and b/misc/__pycache__/dataset.cpython-37.pyc differ

misc/__pycache__/evaluation.cpython-37.pyc CHANGED Viewed

Binary files a/misc/__pycache__/evaluation.cpython-37.pyc and b/misc/__pycache__/evaluation.cpython-37.pyc differ

misc/dataset.py CHANGED Viewed

@@ -269,7 +269,7 @@ class TextEncoder(object):
     def __init__(self, word_dict_path=path["WORD_DICT"]):
         path_params = os.path.join(word_dict_path, 'utable.npy')
-        self.params = np.load(path_params, encoding='latin1')
         self.dico = _load_dictionary(word_dict_path)
     def encode(self, text):

     def __init__(self, word_dict_path=path["WORD_DICT"]):
         path_params = os.path.join(word_dict_path, 'utable.npy')
+        self.params = np.load(path_params, encoding='latin1', allow_pickle=True)
         self.dico = _load_dictionary(word_dict_path)
     def encode(self, text):

misc/evaluation.py CHANGED Viewed

@@ -23,26 +23,27 @@ Author: Martin Engilberge
 import numpy as np
 from misc.utils import flatten
 def cosine_sim(A, B):
-    img_norm = np.linalg.norm(A, axis=1)
-    caps_norm = np.linalg.norm(B, axis=1)
-    scores = np.dot(A, B.T)
-    norms = np.dot(np.expand_dims(img_norm, 1),
-                   np.expand_dims(caps_norm.T, 1).T)
     scores = (scores / norms)
     return scores
-def recallTopK(cap_enc, imgs_enc, imgs_url, ks=10, scores=None):
     if scores is None:
         scores = cosine_sim(cap_enc, imgs_enc)
-    recall_imgs = [imgs_url[i] for i in np.argsort(scores, axis=1)[0][::-1][:ks]]
     return recall_imgs

 import numpy as np
 from misc.utils import flatten
+import cupy as cp
 def cosine_sim(A, B):
+    img_norm = cp.linalg.norm(A, axis=1)
+    caps_norm = cp.linalg.norm(B, axis=1)
+    scores = cp.dot(A, B.T)
+    norms = cp.dot(cp.expand_dims(img_norm, 1),
+                   cp.expand_dims(caps_norm.T, 1).T)
     scores = (scores / norms)
     return scores
+def recallTopK(cap_enc, imgs_enc, imgs_path, ks=10, scores=None):
     if scores is None:
         scores = cosine_sim(cap_enc, imgs_enc)
+    recall_imgs = [imgs_path[cp.asnumpy(i)] for i in cp.argsort(scores, axis=1)[0][::-1][:ks]]
     return recall_imgs

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+cupy==10.2.0
+cupy_cuda101==9.6.0
+gradio==2.8.9
+matplotlib==2.2.2
+nltk==3.3
+numpy==1.21.5
+Pillow==9.0.1
+pycocotools==2.0.4
+requests==2.27.1
+scipy==1.1.0
+sru==2.6.0
+torch==1.10.2
+torchvision==0.2.1
+tqdm==4.63.0
+translate==3.6.1
+visual_genome==1.1.1

run.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/bin/bash
+echo "Welcome to image search system !"
+echo "Please enjoy your time !"
+python pred_retrieval.py -p "data/best_model.pth.tar" -d "data/cap_file.txt" -bs 1

run_train.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python train.py -bs 160 -gpu 1,2,3

tmp.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import cv2
+import requests
+import numpy as np
+def download_url_img(url):
+    """
+    下载url图像
+    """
+    try:
+        response = requests.get(url, timeout=3)
+    except Exception as e:
+        print(str(e))
+        return False, []
+    if response is not None and response.status_code == 200:
+        input_image_data = response.content
+        np_arr = np.asarray(bytearray(input_image_data), np.uint8).reshape(1, -1)
+        parsed_image = cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
+        return True, parsed_image
+download_url_img("http://images.cocodataset.org/train2017/000000146722.jpg")