Spaces:
Running
Running
File size: 5,321 Bytes
c4c7cee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
import sys
import PIL
import json
import torch
import numpy as np
import pandas as pd
import operator
from PIL import Image
from itertools import cycle
from tqdm.auto import tqdm, trange
from os.path import join
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from backbone import get_backbone
from utils import haversine, get_filenames, get_match_values, compute_print_accuracy
def compute_features(path, data_dir, csv_file, tag, args):
data = GeoDataset(data_dir, csv_file, tag=tag)
if not os.path.isdir(test_features_dir) or len(
os.listdir(test_features_dir)
) != len(data):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, transform, inference, collate_fn = get_backbone(args.name)
dataloader = DataLoader(
data,
batch_size=args.batch_size,
shuffle=False,
num_workers=8,
collate_fn=collate_fn,
)
model = model.to(device)
os.makedirs(path, exist_ok=True)
for i, x in enumerate(tqdm(dataloader)):
image_ids, features = inference(model, x)
# save features as numpy array
for j, image_id in zip(range(features.shape[0]), image_ids):
np.save(join(path, f"{image_id}.npy"), features[j].unsqueeze(0).numpy())
def get_results(args, train_test):
import joblib
if not os.path.isfile(join(args.features_parent, ".cache", "1-nn.pkl")):
import faiss, glob, bisect
# import sys; sys.exit(0)
indexes = [
get_filenames(idx) for idx in tqdm(range(1, 6), desc="Loading indexes...")
]
train_gt = pd.read_csv(
join(args.data_parent, args.annotation_file), dtype={"image_id": str}
)[["image_id", "latitude", "longitude"]]
test_gt = pd.read_csv(test_path_csv, dtype={"id": str})[
["id", "latitude", "longitude"]
]
# make a map between image_id and lat/lon
train_gt = {
g[1]["image_id"]: np.array([g[1]["latitude"], g[1]["longitude"]])
for g in tqdm(
train_gt.iterrows(), total=len(train_gt), desc="Loading train_gt"
)
}
test_gt = {
g[1]["id"]: np.array([g[1]["latitude"], g[1]["longitude"]])
for g in tqdm(
test_gt.iterrows(), total=len(test_gt), desc="Loading test_gt"
)
}
train_test = []
os.makedirs(join(args.features_parent, ".cache"), exist_ok=True)
for f in tqdm(os.listdir(test_features_dir)):
query_vector = np.load(join(test_features_dir, f))
neighbors = []
for index, ids in indexes:
distances, indices = index.search(query_vector, 1)
distances, indices = np.squeeze(distances), np.squeeze(indices)
bisect.insort(
neighbors, (ids[indices], distances), key=operator.itemgetter(1)
)
neighbors = list(reversed(neighbors))
train_gps = train_gt[neighbors[0][0].replace(".npy", "")][None, :]
test_gps = test_gt[f.replace(".npy", "")][None, :]
train_test.append((train_gps, test_gps))
joblib.dump(train_test, join(args.features_parent, ".cache", "1-nn.pkl"))
else:
train_test = joblib.load(join(args.features_parent, ".cache", "1-nn.pkl"))
return train_test
if __name__ == "__main__":
# make a train/eval argparser
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--id", type=int, default=1) # maybe need to remove/refactor
parser.add_argument("--batch_size", type=int, default=512)
parser.add_argument(
"--annotation_file", type=str, required=False, default="train.csv"
)
parser.add_argument("--name", type=str, default="openai/clip-vit-base-patch32")
parser.add_argument("--features_parent", type=str, default="faiss/")
parser.add_argument("--data_parent", type=str, default="data/")
parser.add_argument("--test", action="store_true")
args = parser.parse_args()
args.features_parent = join(args.features_parent, args.name)
if args.test:
csv_file = join(args.data_parent, "test.csv")
data_dir = join(args.data_parent, "test")
path = join(args.features_parent, "features-test")
model = get_backbone(args.name)
compute_features(path, data_dir, csv_file, tag="id", args=args)
train_test = get_results(args, train_test)
from collections import Counter
N, pos = Counter(), Counter()
for train_gps, test_gps in tqdm(train_test, desc="Computing accuracy..."):
get_match_values(train_gps, test_gps, N, pos)
for train_gps, test_gps in tqdm(train_test, desc="Computing haversine..."):
haversine(train_gps, test_gps, N, pos)
compute_print_accuracy(N, pos)
else:
csv_file = join(args.data_parent, args.annotation_file)
path = join(args.features_parent, f"features-{args.id}")
data_dir = join(args.data_parent, f"images-{args.id}", "train")
compute_features(path, data_dir, csv_file, tag="image_id", args=args)
|