Subspace_Prompting / parse_test_res.py
tongyujun's picture
Upload 641 files
8c6b5ee verified
"""
Goal
---
1. Read test results from log.txt files
2. Compute mean and std across different folders (seeds)
3. Compute all datasets' accuracy and h-mean
4. Save the results to an Excel file
Usage
---
Assume the output files are saved under output/my_experiment,
which contains results of different seeds, e.g.,
my_experiment/
seed1/
log.txt
seed2/
log.txt
seed3/
log.txt
Run the following command from the root directory:
$ python tools/parse_test_res.py output/my_experiment
Add --ci95 to the argument if you wanna get 95% confidence
interval instead of standard deviation:
$ python tools/parse_test_res.py output/my_experiment --ci95
If my_experiment/ has the following structure,
my_experiment/
exp-1/
seed1/
log.txt
...
seed2/
log.txt
...
seed3/
log.txt
...
exp-2/
...
exp-3/
...
Run
$ python tools/parse_test_res.py output/my_experiment --multi-exp
"""
import re
import numpy as np
import os.path as osp
import argparse
import pandas as pd
from collections import OrderedDict, defaultdict
from dassl.utils import check_isfile, listdir_nohidden
b2n_dataset = [
"imagenet",
"caltech101",
"fgvc_aircraft",
"oxford_flowers",
"dtd",
"eurosat",
"food101",
"oxford_pets",
"stanford_cars",
"sun397",
"ucf101",
]
cross_dataset = [
"caltech101",
"fgvc_aircraft",
"oxford_flowers",
"dtd",
"eurosat",
"food101",
"oxford_pets",
"stanford_cars",
"sun397",
"ucf101",
]
dg_dataset = [
"imagenet",
"imagenetv2",
"imagenet_sketch",
"imagenet_a",
"imagenet_r",
]
def compute_ci95(res):
return 1.96 * np.std(res) / np.sqrt(len(res))
def parse_function(*metrics, directory="", args=None, end_signal=None):
print(f"Parsing files in {directory}")
output_results = OrderedDict()
output_results['accuracy'] = 0.0
try:
subdirs = listdir_nohidden(directory, sort=True)
except:
print("no folder")
return output_results
# subdirs = [directory]
outputs = []
for subdir in subdirs:
fpath = osp.join(directory, subdir, "log.txt")
# fpath = osp.join(directory, "log.txt")
assert check_isfile(fpath)
good_to_go = False
output = OrderedDict()
with open(fpath, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line == end_signal:
good_to_go = True
for metric in metrics:
match = metric["regex"].search(line)
if match and good_to_go:
if "file" not in output:
output["file"] = fpath
num = float(match.group(1))
name = metric["name"]
output[name] = num
if output:
outputs.append(output)
if len(outputs) <= 0:
print("Nothing found in :")
print(directory)
return output_results
metrics_results = defaultdict(list)
for output in outputs:
msg = ""
for key, value in output.items():
if isinstance(value, float):
msg += f"{key}: {value:.2f}%. "
else:
msg += f"{key}: {value}. "
if key != "file":
metrics_results[key].append(value)
print(msg)
print("===")
print(f"Summary of directory: {directory}")
for key, values in metrics_results.items():
avg = np.mean(values)
std = compute_ci95(values) if args.ci95 else np.std(values)
print(f"* {key}: {avg:.2f}% +- {std:.2f}%")
output_results[key] = avg
print("===")
return output_results
def main(args, end_signal):
metric = {
"name": args.keyword,
"regex": re.compile(fr"\* {args.keyword}: ([\.\deE+-]+)%"),
}
if args.type == "base2new":
all_dataset = b2n_dataset
final_results = defaultdict(list)
final_results1 = defaultdict(list)
pattern = r'\b(' + '|'.join(map(re.escape, all_dataset)) + r')\b'
# 替换匹配到的单词为 '{}'
p=args.directory
path_str = re.sub(pattern, "{}", p)
all_dic = [path_str.format(dataset)for dataset in all_dataset]
all_dic1 = []
if "train_base" in all_dic[0]:
for p in all_dic:
all_dic1.append(p.replace("train_base", "test_new"))
elif "test_new" in all_dic[0]:
for p in all_dic:
all_dic1.append(p.replace("test_new", "train_base"))
temp = all_dic
all_dic = all_dic1
all_dic1= temp
for i, directory in enumerate(all_dic):
results = parse_function(
metric, directory=directory, args=args, end_signal=end_signal
)
for key, value in results.items():
final_results[key].append(value)
for i, directory in enumerate(all_dic1):
results1 = parse_function(
metric, directory=directory, args=args, end_signal=end_signal
)
for key, value in results1.items():
final_results1[key].append(value)
output_data = []
for i in range(len(all_dataset)):
base = final_results['accuracy'][i]
new = final_results1['accuracy'][i]
try:
h = 2 / (1/base + 1/new)
except:
h = 0
result = {
'Dataset': all_dataset[i],
'Base Accuracy': base,
'New Accuracy': new,
'H-Mean': h
}
output_data.append(result)
print(f"{all_dataset[i]:<20}: base: {base:>6.2f} new: {new:>6.2f} h: {h:>6.2f}")
output_df = pd.DataFrame(output_data)
# 将结果保存到 Excel
output_file = "form_results_base2new.xlsx"
output_df.to_excel(output_file, index=False)
print("Average performance:")
for key, values in final_results.items():
avg_base = np.mean(values)
print('base')
print(f"* {key}: {avg_base:.2f}%")
for key, values in final_results1.items():
avg_new = np.mean(values)
print('new')
print(f"* {key}: {avg_new:.2f}%")
try:
avg_h = 2 / (1/avg_base + 1/avg_new)
except:
avg_h = 0
print(f'h: {avg_h:.2f}%')
else:
if args.type == "fewshot":
all_dataset = b2n_dataset
elif args.type == "cross":
all_dataset = cross_dataset
elif args.type == "dg":
all_dataset = dg_dataset
final_results = defaultdict(list)
pattern = r'\b(' + '|'.join(map(re.escape, all_dataset)) + r')\b'
p=args.directory
path_str = re.sub(pattern, "{}", p)
all_dic = [path_str.format(dataset)for dataset in all_dataset]
for i, directory in enumerate(all_dic):
results = parse_function(
metric, directory=directory, args=args, end_signal=end_signal
)
for key, value in results.items():
final_results[key].append(value)
output_data = []
for i in range(len(all_dataset)):
base = final_results['accuracy'][i]
result = {
'Dataset': all_dataset[i],
'Accuracy': base,
}
output_data.append(result)
print(f"{all_dataset[i]:<20}: Accuracy: {base:>6.2f}")
output_df = pd.DataFrame(output_data)
# 将结果保存到 Excel
output_file = "form_results_"+args.type+".xlsx"
output_df.to_excel(output_file, index=False)
print("Average performance:")
for key, values in final_results.items():
avg_base = np.mean(values)
print(f"* {key}: {avg_base:.2f}%")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("directory", type=str, help="path to directory")
parser.add_argument("-type", type=str,
choices=['base2new', 'fewshot', 'cross', 'dg'], # 添加参数校验
help="task type:base2new, fewshot, cross, dg")
parser.add_argument(
"--ci95", action="store_true", help=r"compute 95\% confidence interval"
)
parser.add_argument("--test-log", action="store_true", help="parse test-only logs")
parser.add_argument(
"--multi-exp", action="store_true", help="parse multiple experiments"
)
parser.add_argument(
"--keyword", default="accuracy", type=str, help="which keyword to extract"
)
args = parser.parse_args()
end_signal = "=> result"
if args.test_log:
end_signal = "=> result"
main(args, end_signal)