Spaces:
Sleeping
Sleeping
File size: 11,853 Bytes
719d0db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
import os
import argparse
import json
import multiprocessing
import torch
import time
from tqdm import tqdm
from torch.utils.data import DataLoader
from torchmetrics.classification import MulticlassAccuracy, MulticlassF1Score
from utils.util_calc import TemporalConfusionMatrix
from models.classifiers.nn_classifiers.nn_classifier import NNClassifier
from models.classifiers.ground_truth.ground_truth import GroundTruth
from models.classifiers.ground_truth.ground_truth_base import FAIL_FLAG
from utils.data_utils.tsptw_dataset import TSPTWDataloader
from utils.data_utils.pctsp_dataset import PCTSPDataloader
from utils.data_utils.pctsptw_dataset import PCTSPTWDataloader
from utils.data_utils.cvrp_dataset import CVRPDataloader
from utils.utils import set_device
from utils.utils import load_dataset
def load_eval_dataset(dataset_path, problem, model_type, batch_size, num_workers, parallel, num_cpus):
if model_type == "nn":
if problem == "tsptw":
eval_dataset = TSPTWDataloader(dataset_path, sequential=True, parallel=parallel, num_cpus=num_cpus)
elif problem == "pctsp":
eval_dataset = PCTSPDataloader(dataset_path, sequential=True, parallel=parallel, num_cpus=num_cpus)
elif problem == "pctsptw":
eval_dataset = PCTSPTWDataloader(dataset_path, sequential=True, parallel=parallel, num_cpus=num_cpus)
elif problem == "cvrp":
eval_dataset = CVRPDataloader(dataset_path, sequential=True, parallel=parallel, num_cpus=num_cpus)
else:
raise NotImplementedError
#------------
# dataloader
#------------
def pad_seq_length(batch):
data = {}
for key in batch[0].keys():
padding_value = True if key == "mask" else 0.0
# post-padding
data[key] = torch.nn.utils.rnn.pad_sequence([d[key] for d in batch], batch_first=True, padding_value=padding_value)
pad_mask = torch.nn.utils.rnn.pad_sequence([torch.full((d["mask"].size(0), ), True) for d in batch], batch_first=True, padding_value=False)
data.update({"pad_mask": pad_mask})
return data
eval_dataloader = DataLoader(eval_dataset,
batch_size=batch_size,
shuffle=False,
collate_fn=pad_seq_length,
num_workers=num_workers)
return eval_dataloader
else:
eval_dataset = load_dataset(dataset_path)
return eval_dataset
def eval_classifier(problem: str,
dataset,
model_type: str,
model_dir: str = None,
gpu: int = -1,
num_workers: int = 4,
batch_size: int = 128,
parallel: bool = True,
solver: str = "ortools",
num_cpus: int = 1):
#--------------
# gpu settings
#--------------
use_cuda, device = set_device(gpu)
#-------
# model
#-------
num_classes = 3 if problem == "pctsptw" else 2
if model_type == "nn":
assert model_dir is not None, "please specify model_path when model_type is nn."
params = argparse.ArgumentParser()
# model_dir = os.path.split(args.model_path)[0]
with open(f"{model_dir}/cmd_args.dat", "r") as f:
params.__dict__ = json.load(f)
assert params.problem == problem, "problem of the trained model should match that of the dataset"
model = NNClassifier(problem=params.problem,
node_enc_type=params.node_enc_type,
edge_enc_type=params.edge_enc_type,
dec_type=params.dec_type,
emb_dim=params.emb_dim,
num_enc_mlp_layers=params.num_enc_mlp_layers,
num_dec_mlp_layers=params.num_dec_mlp_layers,
num_classes=num_classes,
dropout=params.dropout,
pos_encoder=params.pos_encoder)
# load trained weights (the best epoch)
with open(f"{model_dir}/best_epoch.dat", "r") as f:
best_epoch = int(f.read())
print(f"loaded {model_dir}/model_epoch{best_epoch}.pth.")
model.load_state_dict(torch.load(f"{model_dir}/model_epoch{best_epoch}.pth"))
if use_cuda:
model.to(device)
is_sequential = model.is_sequential
elif model_type == "ground_truth":
model = GroundTruth(problem=problem, solver_type=solver)
is_sequential = False
else:
assert False, f"Invalid model type: {model_type}"
#---------
# Metrics
#---------
overall_accuracy = MulticlassF1Score(num_classes=num_classes, average="macro").to(device)
eval_accuracy_dict = {} # MulticlassAccuracy(num_classes=num_classes, average="macro")
temp_confmat_dict = {} # TemporalConfusionMatrix(num_classes=num_classes, seq_length=50, device=device)
temporal_accuracy_dict = {}
num_nodes_dist_dict = {}
#------------
# Evaluation
#------------
if model_type == "nn":
model.eval()
eval_time = 0.0
print("Evaluating models ...", end="")
start_time = time.perf_counter()
for data in dataset:
if use_cuda:
data = {key: value.to(device) for key, value in data.items()}
if not is_sequential:
shp = data["curr_node_id"].size()
data = {key: value.flatten(0, 1) for key, value in data.items()}
probs = model(data) # [batch_size x num_classes] or [batch_size x max_seq_length x num_classes]
if not is_sequential:
probs = probs.view(*shp, -1) # [batch_size x max_seq_length x num_classes]
data["labels"] = data["labels"].view(*shp)
data["pad_mask"] = data["pad_mask"].view(*shp)
#------------
# evaluation
#------------
start_eval_time = time.perf_counter()
# accuracy
seq_length_list = torch.unique(data["pad_mask"].sum(-1))
for seq_length_tensor in seq_length_list:
seq_length = seq_length_tensor.item()
if seq_length not in eval_accuracy_dict.keys():
eval_accuracy_dict[seq_length] = MulticlassF1Score(num_classes=num_classes, average="macro").to(device)
temp_confmat_dict[seq_length] = TemporalConfusionMatrix(num_classes=num_classes, seq_length=seq_length, device=device)
temporal_accuracy_dict[seq_length] = [MulticlassF1Score(num_classes=num_classes, average="macro").to(device) for _ in range(seq_length)]
num_nodes_dist_dict[seq_length] = 0
seq_length_mask = (data["pad_mask"].sum(-1) == seq_length) # [batch_size]
extracted_labels = data["labels"][seq_length_mask]
extracted_probs = probs[seq_length_mask]
extracted_mask = data["pad_mask"][seq_length_mask].view(-1) # [batch_size x max_seq_length] -> [(batch_size*max_seq_length)]
eval_accuracy_dict[seq_length](extracted_probs.argmax(-1).view(-1)[extracted_mask], extracted_labels.view(-1)[extracted_mask])
mask = data["pad_mask"].view(-1)
overall_accuracy(probs.argmax(-1).view(-1)[mask], data["labels"].view(-1)[mask])
# confusion matrix
temp_confmat_dict[seq_length].update(probs.argmax(-1), data["labels"], data["pad_mask"])
# temporal accuracy
for step in range(seq_length):
temporal_accuracy_dict[seq_length][step](extracted_probs[:, step, :], extracted_labels[:, step])
# number of samples whose sequence length is seq_length
num_nodes_dist_dict[seq_length] += len(extracted_labels)
eval_time += time.perf_counter() - start_eval_time
calc_time = time.perf_counter() - start_time - eval_time
total_eval_accuracy = {key: value.compute().item() for key, value in eval_accuracy_dict.items()}
overall_accuracy = overall_accuracy.compute() #.item()
temporal_confmat = {key: value.compute() for key, value in temp_confmat_dict.items()}
temporal_accuracy = {key: [value.compute().item() for value in values] for key, values in temporal_accuracy_dict.items()}
print("done")
return overall_accuracy, total_eval_accuracy, temporal_accuracy, calc_time, temporal_confmat, num_nodes_dist_dict
else:
eval_accuracy = MulticlassF1Score(num_classes=num_classes, average="macro").to(device)
print("Loading data ...", end=" ")
with multiprocessing.Pool(num_cpus) as pool:
input_list = list(pool.starmap(model.get_inputs, [(instance["tour"], 0, instance) for instance in dataset]))
print("done")
print("Infering labels ...", end="")
pool = multiprocessing.Pool(num_cpus)
start_time = time.perf_counter()
prob_list = list(pool.starmap(model, tqdm([(inputs, False, False) for inputs in input_list])))
calc_time = time.perf_counter() - start_time
pool.close()
print("done")
print("Evaluating models ...", end="")
for i, instance in enumerate(dataset):
labels = instance["labels"]
for vehicle_id in range(len(labels)):
for step, label in labels[vehicle_id]:
pred_label = prob_list[i][vehicle_id][step-1] # [num_classes]
if pred_label == FAIL_FLAG:
pred_label = label - 1 if label != 0 else label + 1
eval_accuracy(torch.LongTensor([pred_label]).view(1, -1), torch.LongTensor([label]).view(1, -1))
total_eval_accuracy = eval_accuracy.compute()
print("done")
return total_eval_accuracy.item(), calc_time
if __name__ == "__main__":
parser = argparse.ArgumentParser()
#-----------------
# general settings
#-----------------
parser.add_argument("--gpu", default=-1, type=int, help="Used GPU Number: gpu=-1 indicates using cpu")
parser.add_argument("--num_workers", default=4, type=int, help="Number of workers in dataloader")
parser.add_argument("--parallel", )
#-------------
# data setting
#-------------
parser.add_argument("--dataset_path", type=str, help="Path to a dataset", required=True)
#------------------
# Metrics settings
#------------------
#----------------
# model settings
#----------------
parser.add_argument("--model_type", type=str, default="nn", help="Select from [nn, ground_truth]")
# nn classifier
parser.add_argument("--model_dir", type=str, default=None)
parser.add_argument("--batch_size", type=int, default=256)
parser.add_argument("--parallel", action="store_true")
# ground truth
parser.add_argument("--solver", type=str, default="ortools")
parser.add_argument("--num_cpus", type=int, default=os.cpu_count())
args = parser.parse_args()
problem = str(os.path.basename(os.path.dirname(args.dataset_path)))
dataset = load_eval_dataset(args.dataset_path, problem, args.model_type, args.batch_size, args.num_workers, args.parallel, args.num_cpus)
eval_classifier(problem=problem,
dataset=dataset,
model_type=args.model_type,
model_dir=args.model_dir,
gpu=args.gpu,
num_workers=args.num_workers,
batch_size=args.batch_size,
parallel=args.parallel,
solver=args.solver,
num_cpus=args.num_cpus) |