"""
Goal
---
1. Read test results from log.txt files
2. Compute mean and std across different folders (seeds)
3. Compute all datasets' accuracy and h-mean
4. Save the results to an Excel file
Usage
---
Assume the output files are saved under output/my_experiment,
which contains results of different seeds, e.g.,

my_experiment/
    seed1/
        log.txt
    seed2/
        log.txt
    seed3/
        log.txt

Run the following command from the root directory:

$ python tools/parse_test_res.py output/my_experiment

Add --ci95 to the argument if you wanna get 95% confidence
interval instead of standard deviation:

$ python tools/parse_test_res.py output/my_experiment --ci95

If my_experiment/ has the following structure,

my_experiment/
    exp-1/
        seed1/
            log.txt
            ...
        seed2/
            log.txt
            ...
        seed3/
            log.txt
            ...
    exp-2/
        ...
    exp-3/
        ...

Run

$ python tools/parse_test_res.py output/my_experiment --multi-exp
"""
import re
import numpy as np
import os.path as osp
import argparse
import pandas as pd
from collections import OrderedDict, defaultdict

from dassl.utils import check_isfile, listdir_nohidden


b2n_dataset = [
                "imagenet",
                "caltech101",
                "fgvc_aircraft",
                "oxford_flowers",
                "dtd",
                "eurosat",
                "food101",
                "oxford_pets",
                "stanford_cars",
                "sun397",
                "ucf101",
               ]
cross_dataset = [
                "caltech101",
                "fgvc_aircraft",
                "oxford_flowers",
                "dtd",
                "eurosat",
                "food101",
                "oxford_pets",
                "stanford_cars",
                "sun397",
                "ucf101",
               ]
dg_dataset = [  
                "imagenet",
                "imagenetv2",
                "imagenet_sketch",
                "imagenet_a",
                "imagenet_r",
             ]
def compute_ci95(res):
    return 1.96 * np.std(res) / np.sqrt(len(res))


def parse_function(*metrics, directory="", args=None, end_signal=None):
    print(f"Parsing files in {directory}")
    output_results = OrderedDict()
    output_results['accuracy'] = 0.0

    try:
        subdirs = listdir_nohidden(directory, sort=True)
    except:
        print("no folder")
        return output_results

    # subdirs = [directory]
    outputs = []

    for subdir in subdirs:
        fpath = osp.join(directory, subdir, "log.txt")
        # fpath = osp.join(directory, "log.txt")
        assert check_isfile(fpath)
        good_to_go = False
        output = OrderedDict()

        with open(fpath, "r") as f:
            lines = f.readlines()

            for line in lines:
                line = line.strip()

                if line == end_signal:
                    good_to_go = True
                
                
                for metric in metrics:
                    match = metric["regex"].search(line)
                    if match and good_to_go:
                        if "file" not in output:
                            output["file"] = fpath
                        num = float(match.group(1))
                        name = metric["name"]
                        output[name] = num

        if output:
            outputs.append(output)

    
    if len(outputs) <= 0:
        print("Nothing found in :")
        print(directory)
        return output_results 

    metrics_results = defaultdict(list)

    for output in outputs:
        msg = ""
        for key, value in output.items():
            if isinstance(value, float):
                msg += f"{key}: {value:.2f}%. "
            else:
                msg += f"{key}: {value}. "
            if key != "file":
                metrics_results[key].append(value)
        print(msg)

    
    print("===")
    print(f"Summary of directory: {directory}")
    for key, values in metrics_results.items():
        avg = np.mean(values)
        std = compute_ci95(values) if args.ci95 else np.std(values)
        print(f"* {key}: {avg:.2f}% +- {std:.2f}%")
        output_results[key] = avg
    print("===")

    return output_results


def main(args, end_signal):
    metric = {
        "name": args.keyword,
        "regex": re.compile(fr"\* {args.keyword}: ([\.\deE+-]+)%"),
    }

    if args.type == "base2new":
        all_dataset = b2n_dataset
        final_results = defaultdict(list)
        final_results1 = defaultdict(list)
        pattern = r'\b(' + '|'.join(map(re.escape, all_dataset)) + r')\b'
        # 替换匹配到的单词为 '{}'
        p=args.directory
        path_str = re.sub(pattern, "{}", p)
        all_dic = [path_str.format(dataset)for dataset in all_dataset]
        
        all_dic1 = []
        if "train_base" in all_dic[0]:
            for p in all_dic:
                
                all_dic1.append(p.replace("train_base", "test_new"))
        
        elif "test_new" in all_dic[0]:
            for p in all_dic:
                
                
                all_dic1.append(p.replace("test_new", "train_base"))

            temp = all_dic
            all_dic = all_dic1
            all_dic1= temp
            
        for i, directory in enumerate(all_dic):
            results = parse_function(
                metric, directory=directory, args=args, end_signal=end_signal
            )
            for key, value in results.items():
                final_results[key].append(value)
            
        for i, directory in enumerate(all_dic1):
            results1 = parse_function(
                metric, directory=directory, args=args, end_signal=end_signal
            )
            for key, value in results1.items():
                final_results1[key].append(value)
        
        
        output_data = []
        for i in range(len(all_dataset)):
            base = final_results['accuracy'][i]
            new  = final_results1['accuracy'][i]
            try:
                h = 2 / (1/base + 1/new)
            except: 
                h = 0
            result = {
                'Dataset': all_dataset[i],
                'Base Accuracy': base,
                'New Accuracy': new,
                'H-Mean': h
            }
            output_data.append(result)
            print(f"{all_dataset[i]:<20}: base: {base:>6.2f}  new: {new:>6.2f}  h: {h:>6.2f}")

        output_df = pd.DataFrame(output_data)

        # 将结果保存到 Excel
        output_file = "form_results_base2new.xlsx"
        output_df.to_excel(output_file, index=False)


        print("Average performance:")
        
        for key, values in final_results.items():
            avg_base = np.mean(values)
            print('base')
            print(f"* {key}: {avg_base:.2f}%")

        for key, values in final_results1.items():
            avg_new = np.mean(values)
            print('new')
            print(f"* {key}: {avg_new:.2f}%")
        
        try:
            avg_h = 2 / (1/avg_base + 1/avg_new)
        except:
            avg_h = 0
        print(f'h: {avg_h:.2f}%')
    else:
        if args.type == "fewshot":
            all_dataset = b2n_dataset
        elif args.type == "cross":
            all_dataset = cross_dataset
        elif args.type == "dg":
            all_dataset = dg_dataset

        final_results = defaultdict(list)
        pattern = r'\b(' + '|'.join(map(re.escape, all_dataset)) + r')\b'
        p=args.directory
        path_str = re.sub(pattern, "{}", p)
        all_dic = [path_str.format(dataset)for dataset in all_dataset]
    

        for i, directory in enumerate(all_dic):
            results = parse_function(
                metric, directory=directory, args=args, end_signal=end_signal
            )
            for key, value in results.items():
                final_results[key].append(value)
           
        output_data = []
        for i in range(len(all_dataset)):
            base = final_results['accuracy'][i]
             
            result = {
                'Dataset': all_dataset[i],
                'Accuracy': base,
            }
            output_data.append(result)
            print(f"{all_dataset[i]:<20}: Accuracy: {base:>6.2f}")

        output_df = pd.DataFrame(output_data)

        # 将结果保存到 Excel
        output_file = "form_results_"+args.type+".xlsx"
        output_df.to_excel(output_file, index=False)


        print("Average performance:")

        for key, values in final_results.items():
            avg_base = np.mean(values)
            print(f"* {key}: {avg_base:.2f}%")

    
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("directory", type=str, help="path to directory")
    parser.add_argument("-type", type=str, 
                    choices=['base2new', 'fewshot', 'cross', 'dg'],  # 添加参数校验
                    help="task type:base2new, fewshot, cross, dg")
    parser.add_argument(
        "--ci95", action="store_true", help=r"compute 95\% confidence interval"
    )
    parser.add_argument("--test-log", action="store_true", help="parse test-only logs")
    parser.add_argument(
        "--multi-exp", action="store_true", help="parse multiple experiments"
    )
    parser.add_argument(
        "--keyword", default="accuracy", type=str, help="which keyword to extract"
    )
    args = parser.parse_args()
   
    end_signal = "=> result"
    if args.test_log:
        end_signal = "=> result"

    main(args, end_signal)