| |
| |
| |
|
|
| import sys |
| import os |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| import pandas as pd |
| import torch |
| import random, argparse |
| import numpy as np |
| from sklearn.preprocessing import MinMaxScaler |
| from evaluation.metrics import get_metrics |
| from utils.slidingWindows import find_length_rank |
| from model_wrapper import * |
| from HP_list import Optimal_Uni_algo_HP_dict, Optimal_Multi_algo_HP_dict |
| import os |
| |
| os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
| |
| seed = 2024 |
| torch.manual_seed(seed) |
| torch.cuda.manual_seed(seed) |
| torch.cuda.manual_seed_all(seed) |
| np.random.seed(seed) |
| random.seed(seed) |
| torch.backends.cudnn.benchmark = False |
| torch.backends.cudnn.deterministic = True |
| import os |
| print("CUDA Available: ", torch.cuda.is_available()) |
| print("cuDNN Version: ", torch.backends.cudnn.version()) |
| import pickle |
|
|
|
|
| def get_result(filename): |
| pickle_filename = filename.replace('.csv', '_results.pkl') |
| df = pickle.load(open(pickle_filename, 'rb')) |
|
|
| return df['anomaly_score'].to_numpy() |
|
|
| if __name__ == '__main__': |
| |
| parser = argparse.ArgumentParser(description='Running TSB-AD') |
| parser.add_argument('--mode', type=str, default='uni', choices=['uni', 'multi'], |
| help='Encoder mode: uni for univariate, multi for multivariate') |
| parser.add_argument('--AD_Name', type=str, default='Time_RCD') |
| parser.add_argument('--filename', type=str, default='') |
| parser.add_argument('--data_direc', type=str, default='') |
| parser.add_argument('--save', type=bool, default=True) |
| Multi = parser.parse_args().mode == 'multi' |
| |
| all_results = [] |
| all_logits = [] |
| if Multi: |
| filter_list = [ |
| "GHL", |
| "Daphnet", |
| "Exathlon", |
| "Genesis", |
| "OPP", |
| "SMD", |
| |
| |
| "SMAP", |
| "MSL", |
| "CreditCard", |
| "GECCO", |
| "MITDB", |
| "SVDB", |
| "LTDB", |
| "CATSv2", |
| "TAO" |
| ] |
| base_dir = 'datasets/TSB-AD-M/' |
| files = os.listdir(base_dir) |
| else: |
| filter_list = [ |
| "Daphnet", |
| "CATSv2", |
| "SWaT", |
| "LTDB", |
| "TAO", |
| "Exathlon", |
| "MITDB", |
| "MSL", |
| "SMAP", |
| "SMD", |
| "SVDB", |
| "OPP", |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| ] |
| base_dir = 'datasets/TSB-AD-U/' |
| files = os.listdir(base_dir) |
|
|
|
|
|
|
| |
| for file in files: |
|
|
| if any(filter_item in file for filter_item in filter_list): |
| print(f"Skipping file: {file} due to filter criteria.") |
| continue |
|
|
| |
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
| torch.cuda.synchronize() |
|
|
| args = parser.parse_args() |
| |
| args.filename = file |
| args.data_direc = base_dir |
| |
| if Multi: |
| Optimal_Det_HP = Optimal_Multi_algo_HP_dict[args.AD_Name] |
| else: |
| Optimal_Det_HP = Optimal_Uni_algo_HP_dict[args.AD_Name] |
| |
| |
| df_path = os.path.join(args.data_direc, args.filename) |
| df = pd.read_csv(df_path).dropna() |
| data = df.iloc[:, 0:-1].values.astype(float) |
| label = df['Label'].astype(int).to_numpy() |
|
|
| slidingWindow = find_length_rank(data, rank=1) |
| train_index = args.filename.split('.')[0].split('_')[-3] |
| data_train = data[:int(train_index), :] |
| test_data = data[int(train_index):, :] |
| label_test = label[int(train_index):] |
|
|
|
|
|
|
| logits = None |
|
|
| print(f"Running {args.AD_Name} on {args.filename}...") |
| if args.AD_Name in Semisupervise_AD_Pool: |
| output = run_Semisupervise_AD(args.AD_Name, data_train, test_data, **Optimal_Det_HP) |
| elif args.AD_Name in Unsupervise_AD_Pool: |
| if args.AD_Name == 'Time_RCD': |
| |
| output, logits = run_Unsupervise_AD(args.AD_Name, data_train, test_data, Multi=Multi, **Optimal_Det_HP) |
| else: |
| output = run_Unsupervise_AD(args.AD_Name, data_train, test_data, **Optimal_Det_HP) |
| else: |
| raise Exception(f"{args.AD_Name} is not defined") |
|
|
| if isinstance(output, np.ndarray): |
| |
|
|
| |
| min_length = min(len(output), len(label_test)) |
| output_aligned = output[:min_length] |
| label_aligned = label_test[:min_length] |
| logits_aligned = None |
| if logits is not None: |
| logits_aligned = logits[:min_length] |
|
|
|
|
| evaluation_result = get_metrics(output_aligned, label_aligned, slidingWindow=slidingWindow, pred=output_aligned > (np.mean(output_aligned)+3*np.std(output_aligned))) |
| evaluation_result_logits = None |
| if logits is not None: |
| evaluation_result_logits = get_metrics(logits_aligned, label_aligned, slidingWindow=slidingWindow, pred=logits_aligned > (np.mean(logits_aligned)+3*np.std(logits_aligned))) |
| |
| print(evaluation_result) |
|
|
| |
| result_dict = { |
| 'filename': args.filename, |
| 'AD_Name': args.AD_Name, |
| 'sliding_window': slidingWindow, |
| 'train_index': train_index, |
| 'data_shape': f"{data.shape[0]}x{data.shape[1]}", |
| 'output_length': len(output), |
| 'label_length': len(label_test), |
| 'aligned_length': min_length, |
| **evaluation_result |
| } |
| all_results.append(result_dict) |
|
|
| if logits is not None: |
| logit_dict = { |
| 'filename': args.filename, |
| 'AD_Name': args.AD_Name, |
| 'sliding_window': slidingWindow, |
| 'train_index': train_index, |
| 'data_shape': f"{data.shape[0]}x{data.shape[1]}", |
| 'output_length': len(logits), |
| 'label_length': len(label_test), |
| 'aligned_length': min_length, |
| **evaluation_result_logits |
| } |
| all_logits.append(logit_dict) |
| |
| if args.save: |
| output_filename = f'{args.filename.split(".")[0]}_results.pkl' |
| output_path = os.path.join( |
| os.path.join(os.getcwd(), (f"{'Multi' if Multi else 'Uni'}_"+args.AD_Name), output_filename)) |
| if not os.path.exists(output_path): |
| os.makedirs(os.path.dirname(output_path), exist_ok=True) |
| pd.DataFrame({ |
| 'value': test_data[:min_length].tolist(), |
| 'label': label_aligned.tolist(), |
| 'anomaly_score': output_aligned.tolist(), |
| 'logits': logits_aligned.tolist() if logits is not None else None |
| }).to_pickle(output_path) |
| print(f'Results saved to {output_path}') |
| else: |
| print(f'At {args.filename}: '+output) |
| |
| result_dict = { |
| 'filename': args.filename, |
| 'AD_Name': args.AD_Name, |
| 'sliding_window': None, |
| 'train_index': None, |
| 'data_shape': None, |
| 'error_message': output |
| } |
| all_results.append(result_dict) |
|
|
| |
| if all_results: |
| results_df = pd.DataFrame(all_results) |
| |
| output_filename = f'{"Multi" if Multi else "Uni"}_{args.AD_Name}.csv' |
| results_df.to_csv(output_filename, index=False) |
| print(f"\nAll results saved to {output_filename}") |
| print(f"Total file processed: {len(all_results)}") |
| print(f"Results shape: {results_df.shape}") |
| if all_logits: |
| logits_df = pd.DataFrame(all_logits) |
| logits_output_filename = f'{"Multi" if Multi else "Uni"}_{args.AD_Name}.csv' |
| logits_df.to_csv(logits_output_filename, index=False) |
| print(f"Logits results saved to {logits_output_filename}") |
| else: |
| print("No results to save.") |
|
|