diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..8d4e4c7ff72679146f2ce36c2e6dac641a04d133
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+models/granite_tsfm
diff --git a/HP_list.py b/HP_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..49a1c7f6da6baf0b95ca28c210696608c8663c1c
--- /dev/null
+++ b/HP_list.py
@@ -0,0 +1,283 @@
+Multi_algo_HP_dict = {
+    'IForest': {
+        'n_estimators': [25, 50, 100, 150, 200],
+        'max_features': [0.2, 0.4, 0.6, 0.8, 1.0]
+    },
+    'LOF': {
+        'n_neighbors': [10, 20, 30, 40, 50],
+        'metric': ['minkowski', 'manhattan', 'euclidean']
+    },    
+    'PCA': {
+        'n_components': [0.25, 0.5, 0.75, None]
+    },        
+    'HBOS': {
+        'n_bins': [5, 10, 20, 30, 40],
+        'tol': [0.1, 0.3, 0.5, 0.7]
+    },
+    'OCSVM': {
+        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
+        'nu': [0.1, 0.3, 0.5, 0.7]
+    },        
+    'MCD': {
+        'support_fraction': [0.2, 0.4, 0.6, 0.8, None]
+    },
+    'KNN': {
+        'n_neighbors': [10, 20, 30, 40, 50],
+        'method': ['largest', 'mean', 'median']
+    },        
+    'KMeansAD': {
+        'n_clusters': [10, 20, 30, 40],
+        'window_size': [10, 20, 30, 40]
+    },
+    'COPOD': {
+        'HP': [None]
+    },    
+    'CBLOF': {
+        'n_clusters': [4, 8, 16, 32],
+        'alpha': [0.6, 0.7, 0.8, 0.9]
+    },
+    'EIF': {
+        'n_trees': [25, 50, 100, 200]
+    },   
+    'RobustPCA': {
+        'max_iter': [500, 1000, 1500]
+    },
+    'AutoEncoder': {
+        'hidden_neurons': [[64, 32], [32, 16], [128, 64]]
+    },
+    'CNN': {
+        'window_size': [50, 100, 150],
+        'num_channel': [[32, 32, 40], [16, 32, 64]]
+    },
+    'LSTMAD': {
+        'window_size': [50, 100, 150],
+        'lr': [0.0004, 0.0008]
+    },  
+    'TranAD': {
+        'win_size': [5, 10, 50],
+        'lr': [1e-3, 1e-4]
+    },  
+    'AnomalyTransformer': {
+        'win_size': [50, 100, 150],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },  
+    'OmniAnomaly': {
+        'win_size': [5, 50, 100],
+        'lr': [0.002, 0.0002]
+    },
+    'USAD': {
+        'win_size': [5, 50, 100],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },  
+    'Donut': {
+        'win_size': [60, 90, 120],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },  
+    'TimesNet': {
+        'win_size': [32, 96, 192],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },
+    'FITS': {
+        'win_size': [100, 200],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },    
+    'OFA': {
+        'win_size': [50, 100, 150]
+    },
+    'Time_RCD': {
+        'win_size': 7000
+    },
+    'TSPulse': {
+        'win_size': [64, 128, 256],
+        'batch_size': [32, 64, 128],
+        'aggregation_length': [32, 64, 128],
+        'aggr_function': ['max', 'mean'],
+        'smoothing_length': [4, 8, 16]
+    }
+}
+
+
+Optimal_Multi_algo_HP_dict = {
+    'IForest': {'n_estimators': 25, 'max_features': 0.8},
+    'LOF': {'n_neighbors': 50, 'metric': 'euclidean'},    
+    'PCA': {'n_components': 0.25},        
+    'HBOS': {'n_bins': 30, 'tol': 0.5},
+    'OCSVM': {'kernel': 'rbf', 'nu': 0.1},        
+    'MCD': {'support_fraction': 0.8},
+    'KNN': {'n_neighbors': 50, 'method': 'mean'},        
+    'KMeansAD': {'n_clusters': 10, 'window_size': 40},
+    'KShapeAD': {'n_clusters': 20, 'window_size': 40},
+    'COPOD': {'n_jobs':1},    
+    'CBLOF': {'n_clusters': 4, 'alpha': 0.6},
+    'EIF': {'n_trees': 50},   
+    'RobustPCA': {'max_iter': 1000},
+    'AutoEncoder': {'hidden_neurons': [128, 64]},
+    'CNN': {'window_size': 50, 'num_channel': [32, 32, 40]},
+    'LSTMAD': {'window_size': 150, 'lr': 0.0008},  
+    'TranAD': {'win_size': 10, 'lr': 0.001},  
+    'AnomalyTransformer': {'win_size': 50, 'lr': 0.001},  
+    'OmniAnomaly': {'win_size': 100, 'lr': 0.002},
+    'USAD': {'win_size': 100, 'lr': 0.001},  
+    'Donut': {'win_size': 60, 'lr': 0.001},  
+    'TimesNet': {'win_size': 96, 'lr': 0.0001},
+    'FITS': {'win_size': 100, 'lr': 0.001},
+    'OFA': {'win_size': 50},
+    'Time_RCD': {'win_size':5000, 'batch_size': 1},
+    'DADA': {'win_size': 100, 'batch_size': 64},
+    'TSPulse': {'win_size': 96 , 'batch_size': 64, 'aggregation_length': 64, 'aggr_function': 'max', 'smoothing_length': 8}
+}
+
+
+Uni_algo_HP_dict = {
+    'Sub_IForest': {
+        'periodicity': [1, 2, 3],
+        'n_estimators': [25, 50, 100, 150, 200]
+    },
+    'IForest': {
+        'n_estimators': [25, 50, 100, 150, 200]
+    },
+    'Sub_LOF': {
+        'periodicity': [1, 2, 3],
+        'n_neighbors': [10, 20, 30, 40, 50]
+    }, 
+    'LOF': {
+        'n_neighbors': [10, 20, 30, 40, 50]
+    }, 
+    'POLY': {
+        'periodicity': [1, 2, 3],
+        'power': [1, 2, 3, 4]
+    },
+    'MatrixProfile': {
+        'periodicity': [1, 2, 3]
+    },
+    'NORMA': {
+        'periodicity': [1, 2, 3],
+        'clustering': ['hierarchical', 'kshape']
+    },
+    'SAND': {
+        'periodicity': [1, 2, 3]
+    }, 
+    'Series2Graph': {
+        'periodicity': [1, 2, 3]
+    },
+    'Sub_PCA': {
+        'periodicity': [1, 2, 3],
+        'n_components': [0.25, 0.5, 0.75, None]
+    },
+    'Sub_HBOS': {
+        'periodicity': [1, 2, 3],
+        'n_bins': [5, 10, 20, 30, 40]
+    },
+    'Sub_OCSVM': {
+        'periodicity': [1, 2, 3],
+        'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
+    },
+    'Sub_MCD': {
+        'periodicity': [1, 2, 3],
+        'support_fraction': [0.2, 0.4, 0.6, 0.8, None]
+    },
+    'Sub_KNN': {
+        'periodicity': [1, 2, 3],
+        'n_neighbors': [10, 20, 30, 40, 50],
+    },
+    'KMeansAD_U': {
+        'periodicity': [1, 2, 3],
+        'n_clusters': [10, 20, 30, 40],
+    },
+    'KShapeAD': {
+        'periodicity': [1, 2, 3]
+    },
+    'AutoEncoder': {
+        'window_size': [50, 100, 150],
+        'hidden_neurons': [[64, 32], [32, 16], [128, 64]]
+    },
+    'CNN': {
+        'window_size': [50, 100, 150],
+        'num_channel': [[32, 32, 40], [16, 32, 64]]
+    },
+    'LSTMAD': {
+        'window_size': [50, 100, 150],
+        'lr': [0.0004, 0.0008]
+    },  
+    'TranAD': {
+        'win_size': [5, 10, 50],
+        'lr': [1e-3, 1e-4]
+    },
+    'AnomalyTransformer': {
+        'win_size': [50, 100, 150],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },  
+    'OmniAnomaly': {
+        'win_size': [5, 50, 100],
+        'lr': [0.002, 0.0002]
+    },
+    'USAD': {
+        'win_size': [5, 50, 100],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },  
+    'Donut': {
+        'win_size': [60, 90, 120],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },  
+    'TimesNet': {
+        'win_size': [32, 96, 192],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },
+    'FITS': {
+        'win_size': [100, 200],
+        'lr': [1e-3, 1e-4, 1e-5]
+    },
+    'OFA': {
+        'win_size': [50, 100, 150]
+    },    
+    # 'Time_RCD': {
+        # 'win_size': [1000, 2000, 3000, 4000, 5000, 6000, 8000, 10000],
+        # 'batch_size': [32, 64, 128]
+    # }
+}
+
+Optimal_Uni_algo_HP_dict = {
+    'Sub_IForest': {'periodicity': 1, 'n_estimators': 150},
+    'IForest': {'n_estimators': 200},
+    'Sub_LOF': {'periodicity': 2, 'n_neighbors': 30},
+    'LOF': {'n_neighbors': 50},
+    'POLY': {'periodicity': 1, 'power': 4},
+    'MatrixProfile': {'periodicity': 1},
+    'NORMA': {'periodicity': 1, 'clustering': 'kshape'},
+    'SAND': {'periodicity': 1},
+    'Series2Graph': {'periodicity': 1},
+    'SR': {'periodicity': 1},
+    'Sub_PCA': {'periodicity': 1, 'n_components': None},
+    'Sub_HBOS': {'periodicity': 1, 'n_bins': 10},
+    'Sub_OCSVM': {'periodicity': 2, 'kernel': 'rbf'},        
+    'Sub_MCD': {'periodicity': 3, 'support_fraction': None},
+    'Sub_KNN': {'periodicity': 2, 'n_neighbors': 50}, 
+    'KMeansAD_U': {'periodicity': 2, 'n_clusters': 10},
+    'KShapeAD': {'periodicity': 1},
+    'FFT': {},
+    'Left_STAMPi': {},
+    'AutoEncoder': {'window_size': 100, 'hidden_neurons': [128, 64]},
+    'CNN': {'window_size': 50, 'num_channel': [32, 32, 40]},
+    'LSTMAD': {'window_size': 100, 'lr': 0.0008},  
+    'TranAD': {'win_size': 10, 'lr': 0.0001},
+    'AnomalyTransformer': {'win_size': 50, 'lr': 0.001},  
+    'OmniAnomaly': {'win_size': 5, 'lr': 0.002},
+    'USAD': {'win_size': 100, 'lr': 0.001},
+    'Donut': {'win_size': 60, 'lr': 0.0001},  
+    'TimesNet': {'win_size': 32, 'lr': 0.0001},
+    'FITS': {'win_size': 100, 'lr': 0.0001},
+    'OFA': {'win_size': 50},
+    'Lag_Llama': {'win_size': 96},
+    'Chronos': {'win_size': 100},
+    'TimesFM': {'win_size': 96},
+    'MOMENT_ZS': {'win_size': 64},
+    'MOMENT_FT': {'win_size': 64},
+    'M2N2': {},
+    'DADA': {'win_size': 100},
+    'Time_MOE': {'win_size':96},
+    'Time_RCD': {'win_size':5000, 'batch_size': 64},
+    'Time_RCD_Reconstruction': {'win_size':5000, 'batch_size': 128},
+    'Time_RCD_Reconstruction_Anomaly_Head': {'win_size':5000, 'batch_size': 128},
+    'Time_RCD_Reconstruction_Random_Mask_Anomaly_Head': {'win_size':5000, 'batch_size': 128},
+    'TSPulse': {'win_size':96, 'batch_size': 64, 'aggregation_length': 64, 'aggr_function': 'max', 'smoothing_length': 8}
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ab4a4b26bb0f96d9adf4ad51e9b132a2bf46351
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+---
+title: Time RCD
+emoji: 🐠
+colorFrom: purple
+colorTo: blue
+sdk: gradio
+sdk_version: 5.49.1
+app_file: app.py
+pinned: false
+license: mit
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..379b90fb57d37a631037fb6a194602c68bbda052
--- /dev/null
+++ b/app.py
@@ -0,0 +1,237 @@
+import io
+import zipfile
+from pathlib import Path
+from typing import List, Tuple
+
+import gradio as gr
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from huggingface_hub import HfHubHTTPError, hf_hub_download
+
+from model_wrapper import run_Time_RCD
+
+REPO_ID = "thu-sail-lab/Time-RCD"
+
+CHECKPOINT_FILES = [
+    "checkpoints/full_mask_anomaly_head_pretrain_checkpoint_best.pth",
+    "checkpoints/dataset_10_20.pth",
+    "checkpoints/full_mask_10_20.pth",
+    "checkpoints/dataset_15_56.pth",
+    "checkpoints/full_mask_15_56.pth",
+]
+
+
+def ensure_checkpoints() -> None:
+    """Ensure that the required checkpoint files are present locally."""
+    missing = [path for path in CHECKPOINT_FILES if not Path(path).exists()]
+    if not missing:
+        return
+
+    try:
+        zip_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename="checkpoints.zip",
+            repo_type="model",
+            cache_dir=".cache/hf",
+        )
+    except HfHubHTTPError:
+        zip_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename="checkpoints.zip",
+            repo_type="dataset",
+            cache_dir=".cache/hf",
+        )
+
+    with zipfile.ZipFile(zip_path, "r") as zf:
+        zf.extractall(".")
+
+
+def load_timeseries(file_obj, feature_columns: List[str] | None = None) -> Tuple[pd.DataFrame, np.ndarray]:
+    """Load the uploaded file into a numeric dataframe and numpy array."""
+    path = Path(file_obj.name)
+    if path.suffix.lower() == ".npy":
+        data = np.load(path, allow_pickle=False)
+        if data.ndim == 1:
+            data = data.reshape(-1, 1)
+        if not isinstance(data, np.ndarray):
+            raise ValueError("Loaded data is not a numpy array.")
+        df = pd.DataFrame(data)
+        return df, data.astype(np.float32)
+
+    if path.suffix.lower() not in {".csv", ".txt"}:
+        raise ValueError("Unsupported file type. Please upload a .csv, .txt, or .npy file.")
+
+    df = pd.read_csv(path)
+    numeric_df = df.select_dtypes(include=np.number)
+    if numeric_df.empty:
+        raise ValueError("No numeric columns detected. Ensure your file contains numeric values.")
+
+    if feature_columns:
+        missing = [col for col in feature_columns if col not in numeric_df.columns]
+        if missing:
+            raise ValueError(f"Selected columns not found in the file: {', '.join(missing)}")
+        numeric_df = numeric_df[feature_columns]
+
+    array = numeric_df.to_numpy(dtype=np.float32)
+    if array.ndim == 1:
+        array = array.reshape(-1, 1)
+
+    return numeric_df, array
+
+
+def infer(
+    file_obj,
+    is_multivariate: bool,
+    window_size: int,
+    batch_size: int,
+    mask_type: str,
+    multi_size: str,
+    feature_columns: List[str],
+) -> Tuple[str, pd.DataFrame, plt.Figure]:
+    """Run Time-RCD inference and produce outputs for the Gradio UI."""
+    ensure_checkpoints()
+    numeric_df, array = load_timeseries(file_obj, feature_columns or None)
+
+    kwargs = {
+        "Multi": is_multivariate,
+        "win_size": window_size,
+        "batch_size": batch_size,
+        "random_mask": mask_type,
+        "size": multi_size,
+        "device": "cpu",
+    }
+
+    scores, logits = run_Time_RCD(array, **kwargs)
+    score_vector = np.asarray(scores).reshape(-1)
+    logit_vector = np.asarray(logits).reshape(-1)
+
+    valid_length = min(len(score_vector), len(numeric_df))
+    score_series = pd.Series(score_vector[:valid_length], index=numeric_df.index[:valid_length], name="anomaly_score")
+    logit_series = pd.Series(logit_vector[:valid_length], index=numeric_df.index[:valid_length], name="anomaly_logit")
+
+    result_df = numeric_df.iloc[:valid_length, :].copy()
+    result_df["anomaly_score"] = score_series
+    result_df["anomaly_logit"] = logit_series
+
+    top_indices = score_series.nlargest(5).index.tolist()
+    highlight_message = (
+        "Top anomaly indices (by score): " + ", ".join(str(idx) for idx in top_indices)
+        if len(top_indices) > 0
+        else "No anomalies detected."
+    )
+
+    figure = build_plot(result_df)
+
+    return highlight_message, result_df, figure
+
+
+def build_plot(result_df: pd.DataFrame) -> plt.Figure:
+    """Create a matplotlib plot of the first feature vs. anomaly score."""
+    fig, ax_primary = plt.subplots(figsize=(10, 4))
+    index = result_df.index
+    feature_cols = [col for col in result_df.columns if col not in {"anomaly_score", "anomaly_logit"}]
+
+    primary_col = feature_cols[0]
+    ax_primary.plot(index, result_df[primary_col], label=f"{primary_col}", color="#1f77b4", linewidth=1.0)
+    ax_primary.set_xlabel("Index")
+    ax_primary.set_ylabel("Value")
+    ax_primary.grid(alpha=0.2)
+
+    ax_secondary = ax_primary.twinx()
+    ax_secondary.plot(index, result_df["anomaly_score"], label="Anomaly Score", color="#d62728", linewidth=1.0)
+    ax_secondary.set_ylabel("Anomaly Score")
+
+    fig.tight_layout()
+    return fig
+
+
+def build_interface() -> gr.Blocks:
+    """Define the Gradio UI."""
+    with gr.Blocks(title="Time-RCD Zero-Shot Anomaly Detection") as demo:
+        gr.Markdown(
+            "# Time-RCD Zero-Shot Anomaly Detection\n"
+            "Upload a time series to run zero-shot anomaly detection with the pretrained Time-RCD checkpoints. "
+            "You can choose univariate or multivariate mode, adjust the window size, and configure mask settings."
+        )
+
+        with gr.Row():
+            file_input = gr.File(label="Upload time series file (.csv, .txt, .npy)", file_types=[".csv", ".txt", ".npy"])
+            column_selector = gr.Textbox(
+                label="Columns to use (comma-separated, optional)",
+                placeholder="e.g. value,feature_1,feature_2",
+            )
+
+        with gr.Row():
+            multivariate = gr.Radio(
+                choices=["Univariate", "Multivariate"],
+                value="Univariate",
+                label="Data type",
+            )
+            window_size_in = gr.Slider(
+                minimum=128,
+                maximum=8192,
+                value=2048,
+                step=128,
+                label="Window size",
+            )
+            batch_size_in = gr.Slider(
+                minimum=1,
+                maximum=128,
+                value=16,
+                step=1,
+                label="Batch size",
+            )
+
+        with gr.Row():
+            mask_type_in = gr.Radio(
+                choices=["random_mask", "full_mask"],
+                value="random_mask",
+                label="Mask type (multivariate only)",
+            )
+            multi_size_in = gr.Radio(
+                choices=["full", "small"],
+                value="full",
+                label="Multivariate model size",
+            )
+
+        run_button = gr.Button("Run Inference", variant="primary")
+
+        result_message = gr.Textbox(label="Summary", interactive=False)
+        result_dataframe = gr.DataFrame(label="Anomaly Scores", interactive=False)
+        plot_output = gr.Plot(label="Series vs. Anomaly Score")
+
+        def _submit(file_obj, multivariate_choice, win, batch, mask, size, columns_text):
+            if file_obj is None:
+                raise gr.Error("Please upload a time series file.")
+
+            feature_columns = [col.strip() for col in columns_text.split(",") if col.strip()] if columns_text else []
+            is_multi = multivariate_choice == "Multivariate"
+            summary, df, fig = infer(
+                file_obj=file_obj,
+                is_multivariate=is_multi,
+                window_size=int(win),
+                batch_size=int(batch),
+                mask_type=mask,
+                multi_size=size,
+                feature_columns=feature_columns,
+            )
+            return summary, df, fig
+
+        run_button.click(
+            fn=_submit,
+            inputs=[file_input, multivariate, window_size_in, batch_size_in, mask_type_in, multi_size_in, column_selector],
+            outputs=[result_message, result_dataframe, plot_output],
+        )
+
+    return demo
+
+
+demo = build_interface()
+
+if __name__ == "__main__":
+    demo.launch()
+
diff --git a/evaluation/.DS_Store b/evaluation/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..44a4f1d6ee25b74d8f7aa4890b777ad3eb6e0647
Binary files /dev/null and b/evaluation/.DS_Store differ
diff --git a/evaluation/__init__.py b/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/evaluation/__init__.py
@@ -0,0 +1 @@
+
diff --git a/evaluation/affiliation/__init__.py b/evaluation/affiliation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/evaluation/affiliation/__init__.py
@@ -0,0 +1 @@
+
diff --git a/evaluation/affiliation/__pycache__/__init__.cpython-310.pyc b/evaluation/affiliation/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..01de6cc0e5fd1837fac8fba810ef86f5bfa2e820
Binary files /dev/null and b/evaluation/affiliation/__pycache__/__init__.cpython-310.pyc differ
diff --git a/evaluation/affiliation/__pycache__/__init__.cpython-311.pyc b/evaluation/affiliation/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..099e52f143c08ed42b1ecfcfeb5525fbf3b67721
Binary files /dev/null and b/evaluation/affiliation/__pycache__/__init__.cpython-311.pyc differ
diff --git a/evaluation/affiliation/__pycache__/__init__.cpython-38.pyc b/evaluation/affiliation/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f524f4846155f45f168b518d735f566b9061ad53
Binary files /dev/null and b/evaluation/affiliation/__pycache__/__init__.cpython-38.pyc differ
diff --git a/evaluation/affiliation/__pycache__/__init__.cpython-39.pyc b/evaluation/affiliation/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8cbe9ebb2aafa41420469ab8c79bd8e68b207d22
Binary files /dev/null and b/evaluation/affiliation/__pycache__/__init__.cpython-39.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-310.pyc b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..732eacacd831af55c99556b605ade53ccbde6fdf
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-310.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-311.pyc b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cffac1505b964c1a351d60bb339f4b8d15cb62e4
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-311.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-38.pyc b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a4f3e169fd726beaae51a5296d61e5acad41bae
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-38.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-39.pyc b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1da7fa16055659a2cb8d5c6103f707dee1efd86b
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_affiliation_zone.cpython-39.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_integral_interval.cpython-310.pyc b/evaluation/affiliation/__pycache__/_integral_interval.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a099d40c298c032c4e5db72ece54555da46deba8
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_integral_interval.cpython-310.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_integral_interval.cpython-311.pyc b/evaluation/affiliation/__pycache__/_integral_interval.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db69436055e96ea3abe82f2ef8e9bf6cfe406bbd
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_integral_interval.cpython-311.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_integral_interval.cpython-38.pyc b/evaluation/affiliation/__pycache__/_integral_interval.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b12ee1c9c92ba63c40c98272b4e00a9746d8064f
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_integral_interval.cpython-38.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_integral_interval.cpython-39.pyc b/evaluation/affiliation/__pycache__/_integral_interval.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5fbd5ff14f683d9fe0fd8560e0f1d67f9808753b
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_integral_interval.cpython-39.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-310.pyc b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2afc1ab4abace42066b29a8061b53b7bb35d62cb
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-310.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-311.pyc b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc3ac10baa8c9cdd80de62e540b968d83bb215a4
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-311.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-38.pyc b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..393a41a2f528bc9ea26408feb31f80e3fe4ddff3
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-38.pyc differ
diff --git a/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-39.pyc b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..21724006b17295189e26b3d709c53af6c6a9be09
Binary files /dev/null and b/evaluation/affiliation/__pycache__/_single_ground_truth_event.cpython-39.pyc differ
diff --git a/evaluation/affiliation/__pycache__/generics.cpython-310.pyc b/evaluation/affiliation/__pycache__/generics.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04a476491e4953eedc8100ce614af0aa697c55d8
Binary files /dev/null and b/evaluation/affiliation/__pycache__/generics.cpython-310.pyc differ
diff --git a/evaluation/affiliation/__pycache__/generics.cpython-311.pyc b/evaluation/affiliation/__pycache__/generics.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43b3337bda73c9ba81e69b76df1e6d9ae33238a3
Binary files /dev/null and b/evaluation/affiliation/__pycache__/generics.cpython-311.pyc differ
diff --git a/evaluation/affiliation/__pycache__/generics.cpython-38.pyc b/evaluation/affiliation/__pycache__/generics.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..871266843ef6ff55fdf260920556f2b66b4dbcec
Binary files /dev/null and b/evaluation/affiliation/__pycache__/generics.cpython-38.pyc differ
diff --git a/evaluation/affiliation/__pycache__/generics.cpython-39.pyc b/evaluation/affiliation/__pycache__/generics.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2066a32b453e9cd0cc7749b41e39de87ebb42797
Binary files /dev/null and b/evaluation/affiliation/__pycache__/generics.cpython-39.pyc differ
diff --git a/evaluation/affiliation/__pycache__/metrics.cpython-310.pyc b/evaluation/affiliation/__pycache__/metrics.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..642aec10c080ba2040aab52a8d017d663a53628e
Binary files /dev/null and b/evaluation/affiliation/__pycache__/metrics.cpython-310.pyc differ
diff --git a/evaluation/affiliation/__pycache__/metrics.cpython-311.pyc b/evaluation/affiliation/__pycache__/metrics.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a3ccb19afd0b8c49202708b823249bbc1aa0e014
Binary files /dev/null and b/evaluation/affiliation/__pycache__/metrics.cpython-311.pyc differ
diff --git a/evaluation/affiliation/__pycache__/metrics.cpython-38.pyc b/evaluation/affiliation/__pycache__/metrics.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92bc2394a31d4b10fb0e8d628dcde9415a63f157
Binary files /dev/null and b/evaluation/affiliation/__pycache__/metrics.cpython-38.pyc differ
diff --git a/evaluation/affiliation/__pycache__/metrics.cpython-39.pyc b/evaluation/affiliation/__pycache__/metrics.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ff09a7b466a28d59d89f34757fc07a163a2ab2f
Binary files /dev/null and b/evaluation/affiliation/__pycache__/metrics.cpython-39.pyc differ
diff --git a/evaluation/affiliation/_affiliation_zone.py b/evaluation/affiliation/_affiliation_zone.py
new file mode 100644
index 0000000000000000000000000000000000000000..a291e59da5043f156c818806017cfe4d17837173
--- /dev/null
+++ b/evaluation/affiliation/_affiliation_zone.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from ._integral_interval import interval_intersection
+
+def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
+    """
+    Helper for `E_gt_func`
+    
+    :param j: index from 0 to len(Js) (included) on which to get the start
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included
+    :return: generalized start such that the middle of t_start and t_stop 
+    always gives the affiliation zone
+    """
+    b = max(Trange)
+    n = len(Js)
+    if j == n:
+        return(2*b - t_stop(n-1, Js, Trange))
+    else:
+        return(Js[j][0])
+
+def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
+    """
+    Helper for `E_gt_func`
+    
+    :param j: index from 0 to len(Js) (included) on which to get the stop
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included
+    :return: generalized stop such that the middle of t_start and t_stop 
+    always gives the affiliation zone
+    """
+    if j == -1:
+        a = min(Trange)
+        return(2*a - t_start(0, Js, Trange))
+    else:
+        return(Js[j][1])
+
+def E_gt_func(j, Js, Trange):
+    """
+    Get the affiliation zone of element j of the ground truth
+    
+    :param j: index from 0 to len(Js) (excluded) on which to get the zone
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included, can 
+    be (-math.inf, math.inf) for distance measures
+    :return: affiliation zone of element j of the ground truth represented
+    as a couple
+    """
+    range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2
+    range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2
+    return((range_left, range_right))
+
+def get_all_E_gt_func(Js, Trange):
+    """
+    Get the affiliation partition from the ground truth point of view
+    
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included, can 
+    be (-math.inf, math.inf) for distance measures
+    :return: affiliation partition of the events
+    """
+    # E_gt is the limit of affiliation/attraction for each ground truth event
+    E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
+    return(E_gt)
+
+def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]):
+    """
+    Cut the events into the affiliation zones
+    The presentation given here is from the ground truth point of view,
+    but it is also used in the reversed direction in the main function.
+    
+    :param Is: events as a list of couples
+    :param E_gt: range of the affiliation zones
+    :return: a list of list of intervals (each interval represented by either 
+    a couple or None for empty interval). The outer list is indexed by each
+    affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`.
+    """
+    out = [None] * len(E_gt)
+    for j in range(len(E_gt)):
+        E_gt_j = E_gt[j]
+        discarded_idx_before = [I[1] < E_gt_j[0] for I in Is]  # end point of predicted I is before the begin of E
+        discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
+        kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
+        Is_j = [x for x, y in zip(Is, kept_index)]
+        out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
+    return(out)
diff --git a/evaluation/affiliation/_integral_interval.py b/evaluation/affiliation/_integral_interval.py
new file mode 100644
index 0000000000000000000000000000000000000000..b02bab8ad71f50641f300e46c464199947e9c5be
--- /dev/null
+++ b/evaluation/affiliation/_integral_interval.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import math
+from .generics import _sum_wo_nan
+"""
+In order to shorten the length of the variables,
+the general convention in this file is to let:
+    - I for a predicted event (start, stop),
+    - Is for a list of predicted events,
+    - J for a ground truth event,
+    - Js for a list of ground truth events.
+"""
+
+def interval_length(J = (1,2)):
+    """
+    Length of an interval
+    
+    :param J: couple representating the start and stop of an interval, or None
+    :return: length of the interval, and 0 for a None interval
+    """
+    if J is None:
+        return(0)
+    return(J[1] - J[0])
+
+def sum_interval_lengths(Is = [(1,2),(3,4),(5,6)]):
+    """
+    Sum of length of the intervals
+    
+    :param Is: list of intervals represented by starts and stops
+    :return: sum of the interval length
+    """
+    return(sum([interval_length(I) for I in Is]))
+
+def interval_intersection(I = (1, 3), J = (2, 4)): 
+    """
+    Intersection between two intervals I and J
+    I and J should be either empty or represent a positive interval (no point)
+    
+    :param I: an interval represented by start and stop
+    :param J: a second interval of the same form
+    :return: an interval representing the start and stop of the intersection (or None if empty)
+    """
+    if I is None:
+        return(None)
+    if J is None:
+        return(None)
+        
+    I_inter_J = (max(I[0], J[0]), min(I[1], J[1]))
+    if I_inter_J[0] >= I_inter_J[1]:
+        return(None)
+    else:
+        return(I_inter_J)
+
+def interval_subset(I = (1, 3), J = (0, 6)):
+    """
+    Checks whether I is a subset of J
+    
+    :param I: an non empty interval represented by start and stop
+    :param J: a second non empty interval of the same form
+    :return: True if I is a subset of J
+    """
+    if (I[0] >= J[0]) and (I[1] <= J[1]):
+        return True
+    else:
+        return False
+
+def cut_into_three_func(I, J):
+    """
+    Cut an interval I into a partition of 3 subsets:
+        the elements before J,
+        the elements belonging to J,
+        and the elements after J
+    
+    :param I: an interval represented by start and stop, or None for an empty one
+    :param J: a non empty interval
+    :return: a triplet of three intervals, each represented by either (start, stop) or None
+    """
+    if I is None:
+        return((None, None, None))
+    
+    I_inter_J = interval_intersection(I, J)
+    if I == I_inter_J:
+        I_before = None
+        I_after = None
+    elif I[1] <= J[0]:
+        I_before = I
+        I_after = None
+    elif I[0] >= J[1]:
+        I_before = None
+        I_after = I
+    elif (I[0] <= J[0]) and (I[1] >= J[1]):
+        I_before = (I[0], I_inter_J[0])
+        I_after = (I_inter_J[1], I[1])
+    elif I[0] <= J[0]:
+        I_before = (I[0], I_inter_J[0])
+        I_after = None
+    elif I[1] >= J[1]:
+        I_before = None
+        I_after = (I_inter_J[1], I[1])
+    else:
+        raise ValueError('unexpected unconsidered case')
+    return(I_before, I_inter_J, I_after)
+  
+def get_pivot_j(I, J):
+    """
+    Get the single point of J that is the closest to I, called 'pivot' here,
+    with the requirement that I should be outside J
+    
+    :param I: a non empty interval (start, stop)
+    :param J: another non empty interval, with empty intersection with I
+    :return: the element j of J that is the closest to I
+    """
+    if interval_intersection(I, J) is not None:
+        raise ValueError('I and J should have a void intersection')
+
+    j_pivot = None # j_pivot is a border of J
+    if max(I) <= min(J):
+        j_pivot = min(J)
+    elif min(I) >= max(J):
+        j_pivot = max(J)
+    else:
+        raise ValueError('I should be outside J')
+    return(j_pivot)
+
+def integral_mini_interval(I, J):
+    """
+    In the specific case where interval I is located outside J,
+    integral of distance from x to J over the interval x \in I.
+    This is the *integral* i.e. the sum.
+    It's not the mean (not divided by the length of I yet)
+    
+    :param I: a interval (start, stop), or None
+    :param J: a non empty interval, with empty intersection with I
+    :return: the integral of distances d(x, J) over x \in I
+    """
+    if I is None:
+        return(0)
+
+    j_pivot = get_pivot_j(I, J)
+    a = min(I)
+    b = max(I)
+    return((b-a)*abs((j_pivot - (a+b)/2)))
+
+def integral_interval_distance(I, J):
+    """
+    For any non empty intervals I, J, compute the
+    integral of distance from x to J over the interval x \in I.
+    This is the *integral* i.e. the sum. 
+    It's not the mean (not divided by the length of I yet)
+    The interval I can intersect J or not
+    
+    :param I: a interval (start, stop), or None
+    :param J: a non empty interval
+    :return: the integral of distances d(x, J) over x \in I
+    """
+    # I and J are single intervals (not generic sets)
+    # I is a predicted interval in the range of affiliation of J
+    
+    def f(I_cut):
+        return(integral_mini_interval(I_cut, J))
+    # If I_middle is fully included into J, it is
+    # the distance to J is always 0
+    def f0(I_middle):
+        return(0)
+
+    cut_into_three = cut_into_three_func(I, J)
+    # Distance for now, not the mean:
+    # Distance left: Between cut_into_three[0] and the point min(J)
+    d_left = f(cut_into_three[0])
+    # Distance middle: Between cut_into_three[1] = I inter J, and J
+    d_middle = f0(cut_into_three[1])
+    # Distance right: Between cut_into_three[2] and the point max(J)
+    d_right = f(cut_into_three[2])
+    # It's an integral so summable
+    return(d_left + d_middle + d_right)
+
+def integral_mini_interval_P_CDFmethod__min_piece(I, J, E):
+    """
+    Helper of `integral_mini_interval_Pprecision_CDFmethod`
+    In the specific case where interval I is located outside J,
+    compute the integral $\int_{d_min}^{d_max} \min(m, x) dx$, with:
+    - m the smallest distance from J to E,
+    - d_min the smallest distance d(x, J) from x \in I to J
+    - d_max the largest distance d(x, J) from x \in I to J
+    
+    :param I: a single predicted interval, a non empty interval (start, stop)
+    :param J: ground truth interval, a non empty interval, with empty intersection with I
+    :param E: the affiliation/influence zone for J, represented as a couple (start, stop)
+    :return: the integral $\int_{d_min}^{d_max} \min(m, x) dx$
+    """
+    if interval_intersection(I, J) is not None:
+        raise ValueError('I and J should have a void intersection')
+    if not interval_subset(J, E):
+        raise ValueError('J should be included in E')
+    if not interval_subset(I, E):
+        raise ValueError('I should be included in E')
+
+    e_min = min(E)
+    j_min = min(J)
+    j_max = max(J)
+    e_max = max(E)
+    i_min = min(I)
+    i_max = max(I)
+  
+    d_min = max(i_min - j_max, j_min - i_max)
+    d_max = max(i_max - j_max, j_min - i_min)
+    m = min(j_min - e_min, e_max - j_max)
+    A = min(d_max, m)**2 - min(d_min, m)**2
+    B = max(d_max, m) - max(d_min, m)
+    C = (1/2)*A + m*B
+    return(C)
+
+def integral_mini_interval_Pprecision_CDFmethod(I, J, E):
+    """
+    Integral of the probability of distances over the interval I.
+    In the specific case where interval I is located outside J,
+    compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
+    This is the *integral* i.e. the sum (not the mean)
+    
+    :param I: a single predicted interval, a non empty interval (start, stop)
+    :param J: ground truth interval, a non empty interval, with empty intersection with I
+    :param E: the affiliation/influence zone for J, represented as a couple (start, stop)
+    :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
+    """
+    integral_min_piece = integral_mini_interval_P_CDFmethod__min_piece(I, J, E)
+  
+    e_min = min(E)
+    j_min = min(J)
+    j_max = max(J)
+    e_max = max(E)
+    i_min = min(I)
+    i_max = max(I)
+    d_min = max(i_min - j_max, j_min - i_max)
+    d_max = max(i_max - j_max, j_min - i_min)
+    integral_linear_piece = (1/2)*(d_max**2 - d_min**2)
+    integral_remaining_piece = (j_max - j_min)*(i_max - i_min)
+    
+    DeltaI = i_max - i_min
+    DeltaE = e_max - e_min
+    
+    output = DeltaI - (1/DeltaE)*(integral_min_piece + integral_linear_piece + integral_remaining_piece)
+    return(output)
+
+def integral_interval_probaCDF_precision(I, J, E):
+    """
+    Integral of the probability of distances over the interval I.
+    Compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
+    This is the *integral* i.e. the sum (not the mean)
+    
+    :param I: a single (non empty) predicted interval in the zone of affiliation of J
+    :param J: ground truth interval
+    :param E: affiliation/influence zone for J
+    :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
+    """
+    # I and J are single intervals (not generic sets)
+    def f(I_cut):
+        if I_cut is None:
+            return(0)
+        else:
+            return(integral_mini_interval_Pprecision_CDFmethod(I_cut, J, E))
+            
+    # If I_middle is fully included into J, it is
+    # integral of 1 on the interval I_middle, so it's |I_middle|
+    def f0(I_middle):
+        if I_middle is None:
+            return(0)
+        else:
+            return(max(I_middle) - min(I_middle))
+    
+    cut_into_three = cut_into_three_func(I, J)
+    # Distance for now, not the mean:
+    # Distance left: Between cut_into_three[0] and the point min(J)
+    d_left = f(cut_into_three[0])
+    # Distance middle: Between cut_into_three[1] = I inter J, and J
+    d_middle = f0(cut_into_three[1])
+    # Distance right: Between cut_into_three[2] and the point max(J)
+    d_right = f(cut_into_three[2])
+    # It's an integral so summable
+    return(d_left + d_middle + d_right)
+
+def cut_J_based_on_mean_func(J, e_mean):
+    """
+    Helper function for the recall.
+    Partition J into two intervals: before and after e_mean
+    (e_mean represents the center element of E the zone of affiliation)
+    
+    :param J: ground truth interval
+    :param e_mean: a float number (center value of E)
+    :return: a couple partitionning J into (J_before, J_after)
+    """
+    if J is None:
+        J_before = None
+        J_after = None
+    elif e_mean >= max(J):
+        J_before = J
+        J_after = None
+    elif e_mean <= min(J):
+        J_before = None
+        J_after = J
+    else: # e_mean is across J
+        J_before = (min(J), e_mean)
+        J_after = (e_mean, max(J))
+        
+    return((J_before, J_after))
+
+def integral_mini_interval_Precall_CDFmethod(I, J, E):
+    """
+    Integral of the probability of distances over the interval J.
+    In the specific case where interval J is located outside I,
+    compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
+    This is the *integral* i.e. the sum (not the mean)
+    
+    :param I: a single (non empty) predicted interval
+    :param J: ground truth (non empty) interval, with empty intersection with I
+    :param E: the affiliation/influence zone for J, represented as a couple (start, stop)
+    :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
+    """
+    # The interval J should be located outside I 
+    # (so it's either the left piece or the right piece w.r.t I)
+    i_pivot = get_pivot_j(J, I)
+    e_min = min(E)
+    e_max = max(E)
+    e_mean = (e_min + e_max) / 2
+    
+    # If i_pivot is outside E (it's possible), then
+    # the distance is worst that any random element within E,
+    # so we set the recall to 0
+    if i_pivot <= min(E):
+        return(0)
+    elif i_pivot >= max(E):
+        return(0)
+    # Otherwise, we have at least i_pivot in E and so d < M so min(d,M)=d
+    
+    cut_J_based_on_e_mean = cut_J_based_on_mean_func(J, e_mean)
+    J_before = cut_J_based_on_e_mean[0]
+    J_after = cut_J_based_on_e_mean[1]
+  
+    iemin_mean = (e_min + i_pivot)/2
+    cut_Jbefore_based_on_iemin_mean = cut_J_based_on_mean_func(J_before, iemin_mean)
+    J_before_closeE = cut_Jbefore_based_on_iemin_mean[0] # before e_mean and closer to e_min than i_pivot ~ J_before_before
+    J_before_closeI = cut_Jbefore_based_on_iemin_mean[1] # before e_mean and closer to i_pivot than e_min ~ J_before_after
+    
+    iemax_mean = (e_max + i_pivot)/2
+    cut_Jafter_based_on_iemax_mean = cut_J_based_on_mean_func(J_after, iemax_mean)
+    J_after_closeI = cut_Jafter_based_on_iemax_mean[0] # after e_mean and closer to i_pivot than e_max ~ J_after_before
+    J_after_closeE = cut_Jafter_based_on_iemax_mean[1] # after e_mean and closer to e_max than i_pivot ~ J_after_after
+    
+    if J_before_closeE is not None:
+        j_before_before_min = min(J_before_closeE) # == min(J)
+        j_before_before_max = max(J_before_closeE)
+    else:
+        j_before_before_min = math.nan
+        j_before_before_max = math.nan
+  
+    if J_before_closeI is not None:
+        j_before_after_min = min(J_before_closeI) # == j_before_before_max if existing
+        j_before_after_max = max(J_before_closeI) # == max(J_before)
+    else:
+        j_before_after_min = math.nan
+        j_before_after_max = math.nan
+   
+    if J_after_closeI is not None:
+        j_after_before_min = min(J_after_closeI) # == min(J_after)
+        j_after_before_max = max(J_after_closeI) 
+    else:
+        j_after_before_min = math.nan
+        j_after_before_max = math.nan
+    
+    if J_after_closeE is not None:
+        j_after_after_min = min(J_after_closeE) # == j_after_before_max if existing
+        j_after_after_max = max(J_after_closeE) # == max(J)
+    else:
+        j_after_after_min = math.nan
+        j_after_after_max = math.nan
+  
+    # <-- J_before_closeE --> <-- J_before_closeI --> <-- J_after_closeI --> <-- J_after_closeE -->
+    # j_bb_min       j_bb_max j_ba_min       j_ba_max j_ab_min      j_ab_max j_aa_min      j_aa_max
+    # (with `b` for before and `a` for after in the previous variable names)
+    
+    #                                          vs e_mean  m = min(t-e_min, e_max-t)  d=|i_pivot-t|   min(d,m)                            \int min(d,m)dt   \int d dt        \int_(min(d,m)+d)dt                                    \int_{t \in J}(min(d,m)+d)dt
+    # Case J_before_closeE & i_pivot after J   before     t-e_min                    i_pivot-t       min(i_pivot-t,t-e_min) = t-e_min    t^2/2-e_min*t     i_pivot*t-t^2/2  t^2/2-e_min*t+i_pivot*t-t^2/2 = (i_pivot-e_min)*t      (i_pivot-e_min)*tB - (i_pivot-e_min)*tA = (i_pivot-e_min)*(tB-tA)
+    # Case J_before_closeI & i_pivot after J   before     t-e_min                    i_pivot-t       min(i_pivot-t,t-e_min) = i_pivot-t  i_pivot*t-t^2/2   i_pivot*t-t^2/2  i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2      2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
+    # Case J_after_closeI & i_pivot after J    after      e_max-t                    i_pivot-t       min(i_pivot-t,e_max-t) = i_pivot-t  i_pivot*t-t^2/2   i_pivot*t-t^2/2  i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2      2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
+    # Case J_after_closeE & i_pivot after J    after      e_max-t                    i_pivot-t       min(i_pivot-t,e_max-t) = e_max-t    e_max*t-t^2/2     i_pivot*t-t^2/2  e_max*t-t^2/2+i_pivot*t-t^2/2 = (e_max+i_pivot)*t-t^2  (e_max+i_pivot)*tB-tB^2 - (e_max+i_pivot)*tA + tA^2 = (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2)
+    #
+    # Case J_before_closeE & i_pivot before J  before     t-e_min                    t-i_pivot       min(t-i_pivot,t-e_min) = t-e_min    t^2/2-e_min*t     t^2/2-i_pivot*t  t^2/2-e_min*t+t^2/2-i_pivot*t = t^2-(e_min+i_pivot)*t  tB^2-(e_min+i_pivot)*tB - tA^2 + (e_min+i_pivot)*tA = (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA)
+    # Case J_before_closeI & i_pivot before J  before     t-e_min                    t-i_pivot       min(t-i_pivot,t-e_min) = t-i_pivot  t^2/2-i_pivot*t   t^2/2-i_pivot*t  t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t      tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
+    # Case J_after_closeI & i_pivot before J   after      e_max-t                    t-i_pivot       min(t-i_pivot,e_max-t) = t-i_pivot  t^2/2-i_pivot*t   t^2/2-i_pivot*t  t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t      tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
+    # Case J_after_closeE & i_pivot before J   after      e_max-t                    t-i_pivot       min(t-i_pivot,e_max-t) = e_max-t    e_max*t-t^2/2     t^2/2-i_pivot*t  e_max*t-t^2/2+t^2/2-i_pivot*t = (e_max-i_pivot)*t      (e_max-i_pivot)*tB - (e_max-i_pivot)*tA = (e_max-i_pivot)*(tB-tA)
+    
+    if i_pivot >= max(J):
+        part1_before_closeE = (i_pivot-e_min)*(j_before_before_max - j_before_before_min) # (i_pivot-e_min)*(tB-tA) # j_before_before_max - j_before_before_min
+        part2_before_closeI = 2*i_pivot*(j_before_after_max-j_before_after_min) - (j_before_after_max**2 - j_before_after_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_before_after_max - j_before_after_min
+        part3_after_closeI = 2*i_pivot*(j_after_before_max-j_after_before_min) - (j_after_before_max**2 - j_after_before_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_after_before_max - j_after_before_min  
+        part4_after_closeE = (e_max+i_pivot)*(j_after_after_max-j_after_after_min) - (j_after_after_max**2 - j_after_after_min**2) # (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # j_after_after_max - j_after_after_min
+        out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
+    elif i_pivot <= min(J):
+        part1_before_closeE = (j_before_before_max**2 - j_before_before_min**2) - (e_min+i_pivot)*(j_before_before_max-j_before_before_min) # (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # j_before_before_max - j_before_before_min
+        part2_before_closeI = (j_before_after_max**2 - j_before_after_min**2) - 2*i_pivot*(j_before_after_max-j_before_after_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_before_after_max - j_before_after_min
+        part3_after_closeI = (j_after_before_max**2 - j_after_before_min**2) - 2*i_pivot*(j_after_before_max - j_after_before_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_after_before_max - j_after_before_min
+        part4_after_closeE = (e_max-i_pivot)*(j_after_after_max - j_after_after_min) # (e_max-i_pivot)*(tB-tA) # j_after_after_max - j_after_after_min
+        out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
+    else:
+        raise ValueError('The i_pivot should be outside J')
+    
+    out_integral_min_dm_plus_d = _sum_wo_nan(out_parts) # integral on all J, i.e. sum of the disjoint parts
+
+    # We have for each point t of J:
+    # \bar{F}_{t, recall}(d) = 1 - (1/|E|) * (min(d,m) + d)
+    # Since t is a single-point here, and we are in the case where i_pivot is inside E.
+    # The integral is then given by:
+    # C = \int_{t \in J} \bar{F}_{t, recall}(D(t)) dt
+    #   = \int_{t \in J} 1 - (1/|E|) * (min(d,m) + d) dt
+    #   = |J| - (1/|E|) * [\int_{t \in J} (min(d,m) + d) dt]
+    #   = |J| - (1/|E|) * out_integral_min_dm_plus_d    
+    DeltaJ = max(J) - min(J)
+    DeltaE = max(E) - min(E)
+    C = DeltaJ - (1/DeltaE) * out_integral_min_dm_plus_d
+    
+    return(C)
+
+def integral_interval_probaCDF_recall(I, J, E):
+    """
+    Integral of the probability of distances over the interval J.
+    Compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
+    This is the *integral* i.e. the sum (not the mean)
+
+    :param I: a single (non empty) predicted interval
+    :param J: ground truth (non empty) interval
+    :param E: the affiliation/influence zone for J
+    :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
+    """
+    # I and J are single intervals (not generic sets)
+    # E is the outside affiliation interval of J (even for recall!)
+    # (in particular J \subset E)
+    #
+    # J is the portion of the ground truth affiliated to I
+    # I is a predicted interval (can be outside E possibly since it's recall)
+    def f(J_cut):
+        if J_cut is None:
+            return(0)
+        else:
+            return integral_mini_interval_Precall_CDFmethod(I, J_cut, E)
+
+    # If J_middle is fully included into I, it is
+    # integral of 1 on the interval J_middle, so it's |J_middle|
+    def f0(J_middle):
+        if J_middle is None:
+            return(0)
+        else:
+            return(max(J_middle) - min(J_middle))
+    
+    cut_into_three = cut_into_three_func(J, I) # it's J that we cut into 3, depending on the position w.r.t I
+    # since we integrate over J this time.
+    #
+    # Distance for now, not the mean:
+    # Distance left: Between cut_into_three[0] and the point min(I)
+    d_left = f(cut_into_three[0])
+    # Distance middle: Between cut_into_three[1] = J inter I, and I
+    d_middle = f0(cut_into_three[1])
+    # Distance right: Between cut_into_three[2] and the point max(I)
+    d_right = f(cut_into_three[2])
+    # It's an integral so summable
+    return(d_left + d_middle + d_right)
diff --git a/evaluation/affiliation/_single_ground_truth_event.py b/evaluation/affiliation/_single_ground_truth_event.py
new file mode 100644
index 0000000000000000000000000000000000000000..3822b875645ea30ac17b73b746de2be3e7b63f58
--- /dev/null
+++ b/evaluation/affiliation/_single_ground_truth_event.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import math
+from ._affiliation_zone import (
+        get_all_E_gt_func, 
+        affiliation_partition)
+from ._integral_interval import (
+        integral_interval_distance,
+        integral_interval_probaCDF_precision, 
+        integral_interval_probaCDF_recall, 
+        interval_length,
+        sum_interval_lengths)
+
+def affiliation_precision_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
+    """
+    Compute the individual average distance from Is to a single ground truth J
+    
+    :param Is: list of predicted events within the affiliation zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :return: individual average precision directed distance number
+    """
+    if all([I is None for I in Is]): # no prediction in the current area
+        return(math.nan) # undefined
+    return(sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is))
+
+def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
+    """
+    Compute the individual precision probability from Is to a single ground truth J
+    
+    :param Is: list of predicted events within the affiliation zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :param E: couple representing the start and stop of the zone of affiliation of J
+    :return: individual precision probability in [0, 1], or math.nan if undefined
+    """
+    if all([I is None for I in Is]): # no prediction in the current area
+        return(math.nan) # undefined
+    return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
+
+def affiliation_recall_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
+    """
+    Compute the individual average distance from a single J to the predictions Is
+    
+    :param Is: list of predicted events within the affiliation zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :return: individual average recall directed distance number
+    """
+    Is = [I for I in Is if I is not None] # filter possible None in Is
+    if len(Is) == 0: # there is no prediction in the current area
+        return(math.inf)
+    E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf))  # here from the point of view of the predictions
+    Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
+    return(sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J))
+
+def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
+    """
+    Compute the individual recall probability from a single ground truth J to Is
+    
+    :param Is: list of predicted events within the affiliation zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :param E: couple representing the start and stop of the zone of affiliation of J
+    :return: individual recall probability in [0, 1]
+    """
+    Is = [I for I in Is if I is not None] # filter possible None in Is
+    if len(Is) == 0: # there is no prediction in the current area
+        return(0)
+    E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
+    Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
+    return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
diff --git a/evaluation/affiliation/generics.py b/evaluation/affiliation/generics.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfe41d8149950472a82825a86ea8579fd4317994
--- /dev/null
+++ b/evaluation/affiliation/generics.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from itertools import groupby
+from operator import itemgetter
+import math
+import gzip
+import glob
+import os
+
+def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]):
+    """
+    Convert a binary vector (indicating 1 for the anomalous instances)
+    to a list of events. The events are considered as durations,
+    i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
+    
+    :param vector: a list of elements belonging to {0, 1}
+    :return: a list of couples, each couple representing the start and stop of
+    each event
+    """
+    positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
+    events = []
+    for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]):
+        cur_cut = list(map(itemgetter(1), g))
+        events.append((cur_cut[0], cur_cut[-1]))
+    
+    # Consistent conversion in case of range anomalies (for indexes):
+    # A positive index i is considered as the interval [i, i+1),
+    # so the last index should be moved by 1
+    events = [(x, y+1) for (x,y) in events]
+        
+    return(events)
+
+def infer_Trange(events_pred, events_gt):
+    """
+    Given the list of events events_pred and events_gt, get the
+    smallest possible Trange corresponding to the start and stop indexes 
+    of the whole series.
+    Trange will not influence the measure of distances, but will impact the
+    measures of probabilities.
+    
+    :param events_pred: a list of couples corresponding to predicted events
+    :param events_gt: a list of couples corresponding to ground truth events
+    :return: a couple corresponding to the smallest range containing the events
+    """
+    if len(events_gt) == 0:
+        raise ValueError('The gt events should contain at least one event')
+    if len(events_pred) == 0:
+        # empty prediction, base Trange only on events_gt (which is non empty)
+        return(infer_Trange(events_gt, events_gt))
+        
+    min_pred = min([x[0] for x in events_pred])
+    min_gt = min([x[0] for x in events_gt])
+    max_pred = max([x[1] for x in events_pred])
+    max_gt = max([x[1] for x in events_gt])
+    Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
+    return(Trange)
+
+def has_point_anomalies(events):
+    """
+    Checking whether events contain point anomalies, i.e.
+    events starting and stopping at the same time.
+    
+    :param events: a list of couples corresponding to predicted events
+    :return: True is the events have any point anomalies, False otherwise
+    """
+    if len(events) == 0:
+        return(False)
+    return(min([x[1] - x[0] for x in events]) == 0)
+
+def _sum_wo_nan(vec):
+    """
+    Sum of elements, ignoring math.isnan ones
+    
+    :param vec: vector of floating numbers
+    :return: sum of the elements, ignoring math.isnan ones
+    """
+    vec_wo_nan = [e for e in vec if not math.isnan(e)]
+    return(sum(vec_wo_nan))
+    
+def _len_wo_nan(vec):
+    """
+    Count of elements, ignoring math.isnan ones
+    
+    :param vec: vector of floating numbers
+    :return: count of the elements, ignoring math.isnan ones
+    """
+    vec_wo_nan = [e for e in vec if not math.isnan(e)]
+    return(len(vec_wo_nan))
+
+def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'):
+    """
+    Load a file compressed with gz, such that each line of the
+    file is either 0 (representing a normal instance) or 1 (representing)
+    an anomalous instance.
+    :param filename: file path to the gz compressed file
+    :return: list of integers with either 0 or 1
+    """
+    with gzip.open(filename, 'rb') as f:
+        content = f.read().splitlines()
+    content = [int(x) for x in content]
+    return(content)
+
+def read_all_as_events():
+    """
+    Load the files contained in the folder `data/` and convert
+    to events. The length of the series is kept.
+    The convention for the file name is: `dataset_algorithm.gz`
+    :return: two dictionaries:
+        - the first containing the list of events for each dataset and algorithm,
+        - the second containing the range of the series for each dataset
+    """
+    filepaths = glob.glob('data/*.gz')
+    datasets = dict()
+    Tranges = dict()
+    for filepath in filepaths:
+        vector = read_gz_data(filepath)
+        events = convert_vector_to_events(vector)
+        # ad hoc cut for those files
+        cut_filepath = (os.path.split(filepath)[1]).split('_')
+        data_name = cut_filepath[0]
+        algo_name = (cut_filepath[1]).split('.')[0]
+        if not data_name in datasets:
+            datasets[data_name] = dict()
+            Tranges[data_name] = (0, len(vector))
+        datasets[data_name][algo_name] = events
+    return(datasets, Tranges)
+
+def f1_func(p, r):
+    """
+    Compute the f1 function
+    :param p: precision numeric value
+    :param r: recall numeric value
+    :return: f1 numeric value
+    """
+    return(2*p*r/(p+r))
diff --git a/evaluation/affiliation/metrics.py b/evaluation/affiliation/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..a305dfa58dd54db49a4f3bdd908ed6fb380068ac
--- /dev/null
+++ b/evaluation/affiliation/metrics.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from .generics import (
+        infer_Trange,
+        has_point_anomalies, 
+        _len_wo_nan, 
+        _sum_wo_nan,
+        read_all_as_events)
+from ._affiliation_zone import (
+        get_all_E_gt_func, 
+        affiliation_partition)
+from ._single_ground_truth_event import (
+        affiliation_precision_distance,
+        affiliation_recall_distance,
+        affiliation_precision_proba,
+        affiliation_recall_proba)
+
+def test_events(events):
+    """
+    Verify the validity of the input events
+    :param events: list of events, each represented by a couple (start, stop)
+    :return: None. Raise an error for incorrect formed or non ordered events
+    """
+    if type(events) is not list:
+        raise TypeError('Input `events` should be a list of couples')
+    if not all([type(x) is tuple for x in events]):
+        raise TypeError('Input `events` should be a list of tuples')
+    if not all([len(x) == 2 for x in events]):
+        raise ValueError('Input `events` should be a list of couples (start, stop)')
+    if not all([x[0] <= x[1] for x in events]):
+        raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
+    if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]):
+        raise ValueError('Couples of input `events` should be disjoint and ordered')
+
+def pr_from_events(events_pred, events_gt, Trange):
+    """
+    Compute the affiliation metrics including the precision/recall in [0,1],
+    along with the individual precision/recall distances and probabilities
+    
+    :param events_pred: list of predicted events, each represented by a couple
+    indicating the start and the stop of the event
+    :param events_gt: list of ground truth events, each represented by a couple
+    indicating the start and the stop of the event
+    :param Trange: range of the series where events_pred and events_gt are included,
+    represented as a couple (start, stop)
+    :return: dictionary with precision, recall, and the individual metrics
+    """
+    # testing the inputs
+    test_events(events_pred)
+    test_events(events_gt)
+    
+    # other tests
+    minimal_Trange = infer_Trange(events_pred, events_gt)
+    if not Trange[0] <= minimal_Trange[0]:
+        raise ValueError('`Trange` should include all the events')
+    if not minimal_Trange[1] <= Trange[1]:
+        raise ValueError('`Trange` should include all the events')
+    
+    if len(events_gt) == 0:
+        raise ValueError('Input `events_gt` should have at least one event')
+
+    if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
+        raise ValueError('Cannot manage point anomalies currently')
+
+    if Trange is None:
+        # Set as default, but Trange should be indicated if probabilities are used
+        raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
+
+    E_gt = get_all_E_gt_func(events_gt, Trange)
+    aff_partition = affiliation_partition(events_pred, E_gt)
+
+    # Computing precision distance
+    d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
+    
+    # Computing recall distance
+    d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
+
+    # Computing precision
+    p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
+
+    # Computing recall
+    p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
+
+    if _len_wo_nan(p_precision) > 0:
+        p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
+    else:
+        p_precision_average = p_precision[0] # math.nan
+    p_recall_average = sum(p_recall) / len(p_recall)
+
+    dict_out = dict({'Affiliation_Precision': p_precision_average,
+                     'Affiliation_Recall': p_recall_average,
+                     'individual_precision_probabilities': p_precision,
+                     'individual_recall_probabilities': p_recall,
+                     'individual_precision_distances': d_precision,
+                     'individual_recall_distances': d_recall})
+    return(dict_out)
+
+def produce_all_results():
+    """
+    Produce the affiliation precision/recall for all files
+    contained in the `data` repository
+    :return: a dictionary indexed by data names, each containing a dictionary
+    indexed by algorithm names, each containing the results of the affiliation
+    metrics (precision, recall, individual probabilities and distances)
+    """
+    datasets, Tranges = read_all_as_events() # read all the events in folder `data`
+    results = dict()
+    for data_name in datasets.keys():
+        results_data = dict()
+        for algo_name in datasets[data_name].keys():
+            if algo_name != 'groundtruth':
+                results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
+                                                         datasets[data_name]['groundtruth'],
+                                                         Tranges[data_name])
+        results[data_name] = results_data
+    return(results)
diff --git a/evaluation/basic_metrics.py b/evaluation/basic_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..45eee7a7b95f68b9d862cb35d6864dba31d58c8d
--- /dev/null
+++ b/evaluation/basic_metrics.py
@@ -0,0 +1,2801 @@
+import torch
+from tqdm import tqdm
+from sklearn.metrics import precision_recall_fscore_support
+from sklearn import metrics
+import numpy as np
+import math
+import copy
+import sklearn
+from typing import Callable, Dict, Any, Tuple, Optional, List
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
+from functools import partial
+import time
+import multiprocessing as mp
+def generate_curve(label, score, slidingWindow, version='opt', thre=250):
+    if version =='opt_mem':
+        tpr_3d, fpr_3d, prec_3d, window_3d, avg_auc_3d, avg_ap_3d = basic_metricor().RangeAUC_volume_opt_mem(labels_original=label, score=score, windowSize=slidingWindow, thre=thre)
+    else:
+        tpr_3d, fpr_3d, prec_3d, window_3d, avg_auc_3d, avg_ap_3d = basic_metricor().RangeAUC_volume_opt(labels_original=label, score=score, windowSize=slidingWindow, thre=thre)
+
+
+    X = np.array(tpr_3d).reshape(1,-1).ravel()
+    X_ap = np.array(tpr_3d)[:,:-1].reshape(1,-1).ravel()
+    Y = np.array(fpr_3d).reshape(1,-1).ravel()
+    W = np.array(prec_3d).reshape(1,-1).ravel()
+    Z = np.repeat(window_3d, len(tpr_3d[0]))
+    Z_ap = np.repeat(window_3d, len(tpr_3d[0])-1)
+
+    return Y, Z, X, X_ap, W, Z_ap,avg_auc_3d, avg_ap_3d
+
+def inverse_proportional_cardinality_fn(cardinality: int, gt_length: int) -> float:
+    r"""
+    Cardinality function that assigns an inversely proportional weight to predictions within a single ground-truth
+    window.
+
+    This is the default cardinality function recommended in [Tatbul2018]_.
+
+    .. note::
+       This function leads to a metric that is not recall-consistent! Please see [Wagner2023]_ for more details.
+
+    :param cardinality: Number of predicted windows that overlap the ground-truth window in question.
+    :param gt_length: Length of the ground-truth window (unused).
+    :return: The cardinality factor :math:`\frac{1}{\text{cardinality}}`.
+
+    .. [Tatbul2018] N. Tatbul, T.J. Lee, S. Zdonik, M. Alam, J. Gottschlich.
+        Precision and recall for time series. Advances in neural information processing systems. 2018;31.
+    .. [Wagner2023] D. Wagner, T. Michels, F.C.F. Schulz, A. Nair, M. Rudolph, and M. Kloft.
+        TimeSeAD: Benchmarking Deep Multivariate Time-Series Anomaly Detection.
+        Transactions on Machine Learning Research (TMLR), (to appear) 2023.
+    """
+    return 1 / max(1, cardinality)
+
+def constant_bias_fn(inputs: torch.Tensor) -> float:
+    r"""
+    Compute the overlap size for a constant bias function that assigns the same weight to all positions.
+
+    This functions computes
+
+    .. math::
+        \omega(\text{inputs}) = \frac{1}{n} \sum_{i = 1}^{n} \text{inputs}_i,
+
+    where :math:`n = \lvert \text{inputs} \rvert`.
+
+    .. note::
+       To improve the runtime of our algorithm, we calculate the overlap :math:`\omega` directly as part of the bias
+       function.
+
+    :param inputs: A 1-D :class:`~torch.Tensor` containing the predictions inside a ground-truth window.
+    :return: The overlap :math:`\omega`.
+    """
+    return torch.sum(inputs).item() / inputs.shape[0]
+
+def improved_cardinality_fn(cardinality: int, gt_length: int):
+    r"""
+    Recall-consistent cardinality function introduced by [Wagner2023]_ that assigns lower weight to ground-truth windows
+    that overlap with many predicted windows.
+
+    This function computes
+
+    .. math::
+        \left(\frac{\text{gt_length} - 1}{\text{gt_length}}\right)^{\text{cardinality} - 1}.
+
+    :param cardinality: Number of predicted windows that overlap the ground-truth window in question.
+    :param gt_length: Length of the ground-truth window.
+    :return: The cardinality factor.
+    """
+    return ((gt_length - 1) / gt_length) ** (cardinality - 1)
+
+class basic_metricor():
+    def __init__(self, a = 1, probability = True, bias = 'flat', ):
+        self.a = a
+        self.probability = probability
+        self.bias = bias
+        self.eps = 1e-15
+
+    def detect_model(self, model, label, contamination = 0.1, window = 100, is_A = False, is_threshold = True):
+        if is_threshold:
+            score = self.scale_threshold(model.decision_scores_, model._mu, model._sigma)
+        else:
+            score = self.scale_contamination(model.decision_scores_, contamination = contamination)
+        if is_A is False:
+            scoreX = np.zeros(len(score)+window)
+            scoreX[math.ceil(window/2): len(score)+window - math.floor(window/2)] = score
+        else:
+            scoreX = score
+
+        self.score_=scoreX
+        L = self.metric(label, scoreX)
+        return L
+
+    def w(self, AnomalyRange, p):
+        MyValue = 0
+        MaxValue = 0
+        start = AnomalyRange[0]
+        AnomalyLength = AnomalyRange[1] - AnomalyRange[0] + 1
+        for i in range(start, start +AnomalyLength):
+            bi = self.b(i, AnomalyLength)
+            MaxValue +=  bi
+            if i in p:
+                MyValue += bi
+        return MyValue/MaxValue
+
+    def Cardinality_factor(self, Anomolyrange, Prange):
+        score = 0
+        start = Anomolyrange[0]
+        end = Anomolyrange[1]
+        for i in Prange:
+            if i[0] >= start and i[0] <= end:
+                score +=1
+            elif start >= i[0] and start <= i[1]:
+                score += 1
+            elif end >= i[0] and end <= i[1]:
+                score += 1
+            elif start >= i[0] and end <= i[1]:
+                score += 1
+        if score == 0:
+            return 0
+        else:
+            return 1/score
+
+    def b(self, i, length):
+        bias = self.bias
+        if bias == 'flat':
+            return 1
+        elif bias == 'front-end bias':
+            return length - i + 1
+        elif bias == 'back-end bias':
+            return i
+        else:
+            if i <= length/2:
+                return i
+            else:
+                return length - i + 1
+
+    def scale_threshold(self, score, score_mu, score_sigma):
+        return (score >= (score_mu + 3*score_sigma)).astype(int)
+
+    def _adjust_predicts(self, score, label, threshold=None, pred=None, calc_latency=False):
+        """
+        Calculate adjusted predict labels using given `score`, `threshold` (or given `pred`) and `label`.
+
+        Args:
+            score (np.ndarray): The anomaly score
+            label (np.ndarray): The ground-truth label
+            threshold (float): The threshold of anomaly score.
+                A point is labeled as "anomaly" if its score is higher than the threshold.
+            pred (np.ndarray or None): if not None, adjust `pred` and ignore `score` and `threshold`,
+            calc_latency (bool):
+
+        Returns:
+            np.ndarray: predict labels
+        """
+        if len(score) != len(label):
+            raise ValueError("score and label must have the same length")
+        score = np.asarray(score)
+        label = np.asarray(label)
+        latency = 0
+        if pred is None:
+            predict = score > threshold
+        else:
+            predict = copy.deepcopy(pred)
+        actual = label > 0.1
+        anomaly_state = False
+        anomaly_count = 0
+        for i in range(len(score)):
+            if actual[i] and predict[i] and not anomaly_state:
+                    anomaly_state = True
+                    anomaly_count += 1
+                    for j in range(i, 0, -1):
+                        if not actual[j]:
+                            break
+                        else:
+                            if not predict[j]:
+                                predict[j] = True
+                                latency += 1
+            elif not actual[i]:
+                anomaly_state = False
+            if anomaly_state:
+                predict[i] = True
+        if calc_latency:
+            return predict, latency / (anomaly_count + 1e-4)
+        else:
+            return predict
+
+    def adjustment(self, gt, pred):
+        adjusted_pred = np.array(pred)
+        anomaly_state = False
+        for i in range(len(gt)):
+            if gt[i] == 1 and adjusted_pred[i] == 1 and not anomaly_state:
+                anomaly_state = True
+                for j in range(i, 0, -1):
+                    if gt[j] == 0:
+                        break
+                    else:
+                        if adjusted_pred[j] == 0:
+                            adjusted_pred[j] = 1
+                for j in range(i, len(gt)):
+                    if gt[j] == 0:
+                        break
+                    else:
+                        if adjusted_pred[j] == 0:
+                            adjusted_pred[j] = 1
+            elif gt[i] == 0:
+                anomaly_state = False
+            if anomaly_state:
+                adjusted_pred[i] = 1
+        return adjusted_pred
+
+    def metric_new(self, label, score, preds, plot_ROC=False, alpha=0.2):
+        '''input:
+               Real labels and anomaly score in prediction
+
+           output:
+               AUC,
+               Precision,
+               Recall,
+               F-score,
+               Range-precision,
+               Range-recall,
+               Range-Fscore,
+               Precison@k,
+
+            k is chosen to be # of outliers in real labels
+        '''
+        if np.sum(label) == 0:
+            print('All labels are 0. Label must have groud truth value for calculating AUC score.')
+            return None
+
+        if np.isnan(score).any() or score is None:
+            print('Score must not be none.')
+            return None
+
+        #area under curve
+        auc = metrics.roc_auc_score(label, score)
+        # plor ROC curve
+        if plot_ROC:
+            fpr, tpr, thresholds  = metrics.roc_curve(label, score)
+            # display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=auc)
+            # display.plot()
+
+        #precision, recall, F
+        if preds is None:
+            preds = score > (np.mean(score)+3*np.std(score))
+        Precision, Recall, F, Support = metrics.precision_recall_fscore_support(label, preds, zero_division=0)
+        precision = Precision[1]
+        recall = Recall[1]
+        f = F[1]
+
+        #point-adjust
+        adjust_preds = self._adjust_predicts(score, label, pred=preds)
+        PointF1PA = metrics.f1_score(label, adjust_preds)
+
+        #range anomaly
+        Rrecall, ExistenceReward, OverlapReward = self.range_recall_new(label, preds, alpha)
+        Rprecision = self.range_recall_new(preds, label, 0)[0]
+
+        if Rprecision + Rrecall==0:
+            Rf=0
+        else:
+            Rf = 2 * Rrecall * Rprecision / (Rprecision + Rrecall)
+
+        # top-k
+        k = int(np.sum(label))
+        threshold = np.percentile(score, 100 * (1-k/len(label)))
+
+        # precision_at_k = metrics.top_k_accuracy_score(label, score, k)
+        p_at_k = np.where(preds > threshold)[0]
+        TP_at_k = sum(label[p_at_k])
+        precision_at_k = TP_at_k/k
+
+        L = [auc, precision, recall, f, PointF1PA, Rrecall, ExistenceReward, OverlapReward, Rprecision, Rf, precision_at_k]
+        if plot_ROC:
+            return L, fpr, tpr
+        return L
+
+    def metric_ROC(self, label, score):
+        return metrics.roc_auc_score(label, score)
+
+    def metric_PR(self, label, score):
+        return metrics.average_precision_score(label, score)
+
+    def metric_PointF1(self, label, score, preds=None):
+        if preds is None:
+            precision, recall, thresholds = metrics.precision_recall_curve(label, score)
+            f1_scores = 2 * (precision * recall) / (precision + recall + 0.00001)
+            F1 = np.max(f1_scores)
+            threshold = thresholds[np.argmax(f1_scores)]
+        else:
+            Precision, Recall, F, Support = metrics.precision_recall_fscore_support(label, preds, zero_division=0)
+            F1 = F[1]
+        return F1
+
+    def metric_standard_F1(self, true_labels, anomaly_scores, threshold=None):
+        """
+        Calculate F1, Precision, Recall, and other metrics for anomaly detection.
+
+        Args:
+            anomaly_scores: np.ndarray, anomaly scores (continuous values)
+            true_labels: np.ndarray, ground truth binary labels (0=normal, 1=anomaly)
+            threshold: float, optional. If None, will use optimal threshold based on F1 score
+
+        Returns:
+            dict: Dictionary containing various metrics
+        """
+        # If no threshold provided, find optimal threshold
+        if threshold is None:
+            thresholds = np.linspace(0, 1, 1500)
+            best_f1 = 0
+            best_threshold = 0
+
+            for t in tqdm(thresholds, total=len(thresholds), desc="Finding optimal threshold"):
+                threshold = np.quantile(anomaly_scores, t)
+                predictions = (anomaly_scores >= threshold).astype(int)
+                if len(np.unique(predictions)) > 1:  # Avoid division by zero
+                    precision, recall, f1, _ = precision_recall_fscore_support(
+                        true_labels, predictions, average='binary', zero_division=0
+                    )
+                    # print(f1, t)
+                    if f1 > best_f1:
+                        best_f1 = f1
+                        best_threshold = threshold
+            threshold = best_threshold
+        # print("aaa", threshold, best_threshold, best_f1)
+        # Calculate predictions based on threshold
+        predictions = (anomaly_scores >= threshold).astype(int)
+
+        # Calculate basic metrics
+        precision, recall, f1, _ = precision_recall_fscore_support(
+            true_labels, predictions, average='binary', zero_division=0
+        )
+        # print(threshold, f1)
+        return {
+            'F1': f1,
+            'Recall': recall,
+            'Precision': precision, }
+
+
+    def metric_Affiliation(self, label, score, preds=None):
+        from .affiliation.generics import convert_vector_to_events
+        from .affiliation.metrics import pr_from_events
+
+        # Ensure proper data types to avoid float/integer issues
+        label = np.asarray(label, dtype=int)
+        score = np.asarray(score, dtype=float)
+
+        # Convert ground truth to events once, outside the loop
+        events_gt = convert_vector_to_events(label)
+
+        if preds is None:
+            # print("Calculating afiliation metrics using score thresholds.")
+            p_values = np.linspace(0, 1, 1500)
+            # print(f"Using {thresholds} thresholds for affiliation metrics.")
+            Affiliation_scores = []
+            Affiliation_Precision_scores = []
+            Affiliation_Recall_scores = []
+            # print("Score values", score)
+
+            for p in tqdm(p_values, total=(len(p_values)), desc="Calculating Affiliation Metrics"):
+                threshold = np.quantile(score, p)
+                preds_loop = (score > threshold).astype(int)
+
+                events_pred = convert_vector_to_events(preds_loop)
+                # events_gt is already calculated
+                Trange = (0, len(preds_loop))
+
+                affiliation_metrics = pr_from_events(events_pred, events_gt, Trange)
+
+                Affiliation_Precision = affiliation_metrics['Affiliation_Precision']
+                Affiliation_Recall = affiliation_metrics['Affiliation_Recall']
+                # --- FIX 1: Prevent division by zero ---
+                denominator = Affiliation_Precision + Affiliation_Recall
+                if denominator > 0:
+                    Affiliation_F = 2 * Affiliation_Precision * Affiliation_Recall / (denominator + self.eps)
+                else:
+                    Affiliation_F = 0.0
+                # # Use a local variable for the F1 score in the loop
+                # Affiliation_F = 2 * Affiliation_Precision * Affiliation_Recall / (
+                #             Affiliation_Precision + Affiliation_Recall + self.eps)
+
+                Affiliation_scores.append(Affiliation_F)
+                Affiliation_Precision_scores.append(Affiliation_Precision)
+                Affiliation_Recall_scores.append(Affiliation_Recall)
+
+            # Find the best scores after the loop
+            # print("Here are the Affiliation scores:", Affiliation_scores)
+            best_index = np.argmax(Affiliation_scores)
+            # print(f"Best Affiliation F1 score found at index {best_index} with value {Affiliation_scores[best_index]}")
+            Best_Affiliation_F1 = Affiliation_scores[best_index]
+            Best_Affiliation_Precision = Affiliation_Precision_scores[best_index]
+            Best_Affiliation_Recall = Affiliation_Recall_scores[best_index]
+
+        else:
+            print("Using provided predictions for affiliation metrics.")
+            # This block runs when 'preds' is provided
+            events_pred = convert_vector_to_events(preds)
+            Trange = (0, len(preds))
+
+            affiliation_metrics = pr_from_events(events_pred, events_gt, Trange)
+
+            # FIX: Assign the calculated values to the 'Best_' variables
+            # so they exist for the return statement.
+            Best_Affiliation_Precision = affiliation_metrics['Affiliation_Precision']
+            Best_Affiliation_Recall = affiliation_metrics['Affiliation_Recall']
+            Best_Affiliation_F1 = 2 * Best_Affiliation_Precision * Best_Affiliation_Recall / (
+                        Best_Affiliation_Precision + Best_Affiliation_Recall + self.eps)
+
+        # FIX: Corrected the typo from Best_Affiliation_Rec to Best_Affiliation_Recall
+        return Best_Affiliation_F1, Best_Affiliation_Precision, Best_Affiliation_Recall
+
+    def metric_RF1(self, label, score, preds=None):
+
+        if preds is None:
+            q_values = np.linspace(0, 1, 1000)
+            Rf1_scores = []
+            thresholds = []
+            for q in tqdm(q_values, total=(len(q_values)), desc="Calculating RF1 Metrics"):
+                # Calculate prediction
+                threshold = np.quantile(score, q)
+                preds = (score > threshold).astype(int)
+
+                Rrecall, ExistenceReward, OverlapReward = self.range_recall_new(label, preds, alpha=0.2)
+                Rprecision = self.range_recall_new(preds, label, 0)[0]
+                if Rprecision + Rrecall==0:
+                    Rf=0
+                else:
+                    Rf = 2 * Rrecall * Rprecision / (Rprecision + Rrecall)
+
+                Rf1_scores.append(Rf)
+                thresholds.append(threshold)
+
+            RF1_Threshold = thresholds[np.argmax(Rf1_scores)]
+            RF1 = max(Rf1_scores)
+        else:
+            Rrecall, ExistenceReward, OverlapReward = self.range_recall_new(label, preds, alpha=0.2)
+            Rprecision = self.range_recall_new(preds, label, 0)[0]
+            if Rprecision + Rrecall==0:
+                RF1=0
+            else:
+                RF1 = 2 * Rrecall * Rprecision / (Rprecision + Rrecall)
+        return RF1
+
+    # def metric_F1_T(self, labels: torch.Tensor, scores: torch.Tensor):
+    #     """
+    #     Computes the F1 score for time series anomaly detection by finding the best threshold.
+    #
+    #     Args:
+    #         labels (torch.Tensor): Ground truth labels for the time series data.
+    #         scores (torch.Tensor): Anomaly scores predicted by the model.
+    #
+    #     Returns:
+    #         Tuple[float, Dict[str, Any]]: The best F1 score and a dictionary with additional metrics.
+    #     """
+    #     result = {}
+    #     labels = torch.tensor(labels, dtype=torch.int)
+    #     score = torch.tensor(scores, dtype=torch.float)
+    #     f1, details = self.__best_ts_fbeta_score(labels, score, beta=1,)
+    #     result['thre_T'] = details['threshold']
+    #     result['ACC_T'] = sklearn.metrics.accuracy_score(labels, score > details['threshold'])
+    #     result['P_T'] = details['precision']
+    #     result['R_T'] = details['recall']
+    #     result['F1_T'] = f1
+    #
+    #     return result
+
+    def metric_F1_T(self, labels: torch.Tensor, scores: torch.Tensor, use_parallel=True, 
+                    parallel_method='chunked', chunk_size=10, max_workers=8):
+        """
+        Computes the F1 score with optional parallel processing.
+
+        Args:
+            labels: Ground truth labels
+            scores: Anomaly scores
+            use_parallel: Whether to use parallel processing (default: True)
+            parallel_method: Type of parallel processing ('standard' or 'chunked')
+            chunk_size: Size of chunks for chunked parallel processing
+            max_workers: Maximum number of worker threads
+        """
+        result = {}
+        labels = torch.tensor(labels, dtype=torch.int)
+        score = torch.tensor(scores, dtype=torch.float)
+
+        # Choose which method to use
+        if use_parallel:
+            if parallel_method == 'chunked':
+                f1, details = self.__best_ts_fbeta_score_parallel_chunked(
+                    labels, score, beta=1, chunk_size=chunk_size, max_workers=max_workers
+                )
+            else:  # standard parallel
+                f1, details = self.__best_ts_fbeta_score_parallel(labels, score, beta=1)
+        else:
+            f1, details = self.__best_ts_fbeta_score(labels, score, beta=1)
+
+        result['thre_T'] = details['threshold']
+        result['ACC_T'] = sklearn.metrics.accuracy_score(labels, score > details['threshold'])
+        result['P_T'] = details['precision']
+        result['R_T'] = details['recall']
+        result['F1_T'] = f1
+
+        return result
+
+    def __best_ts_fbeta_score_parallel(self, labels: torch.Tensor, scores: torch.Tensor, beta: float,
+                                       recall_cardinality_fn: Callable = improved_cardinality_fn,
+                                       weighted_precision: bool = True, n_splits: int = 1500) -> Tuple[
+        float, Dict[str, Any]]:
+        """
+        Parallel version of best_ts_fbeta_score using ThreadPoolExecutor.
+        
+        Uses threading instead of multiprocessing to avoid serialization issues
+        with PyTorch tensors and instance methods.
+        """
+        
+        # Use same parameter range as sequential version for consistency
+        device = scores.device
+        p_values = torch.linspace(0, 1.0, steps=n_splits, device=device)
+        thresholds = torch.quantile(scores, p_values)
+
+        label_ranges = self.compute_window_indices(labels)
+        precision = torch.empty_like(thresholds, dtype=torch.float)
+        recall = torch.empty_like(thresholds, dtype=torch.float)
+
+        def process_single_threshold(idx_threshold_pair):
+            """Process a single threshold computation"""
+            idx, threshold = idx_threshold_pair
+            
+            # Create predictions for this threshold
+            predictions = (scores > threshold).long()
+            
+            # Calculate precision and recall using instance method
+            prec, rec = self.ts_precision_and_recall(
+                labels,
+                predictions,
+                alpha=0,
+                recall_cardinality_fn=recall_cardinality_fn,
+                anomaly_ranges=label_ranges,
+                weighted_precision=weighted_precision,
+            )
+            
+            # Handle edge case to avoid 0/0 in F-score computation
+            if prec == 0 and rec == 0:
+                rec = 1
+                
+            return idx, prec, rec
+
+        # Use ThreadPoolExecutor instead of ProcessPoolExecutor
+        # This allows us to use instance methods and share PyTorch tensors safely
+        max_workers = min(16, len(thresholds))  # Don't create more threads than thresholds
+        
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all threshold computations
+            futures = {
+                executor.submit(process_single_threshold, (i, t)): i
+                for i, t in enumerate(thresholds)
+            }
+
+            # Collect results as they complete
+            for future in tqdm(as_completed(futures), total=len(futures),
+                               desc="Calculating F-beta score (parallel)"):
+                idx, prec, rec = future.result()
+                precision[idx] = prec
+                recall[idx] = rec
+
+        # Compute F-scores and find the best one
+        f_score = (1 + beta ** 2) * precision * recall / (beta ** 2 * precision + recall)
+        max_score_index = torch.argmax(f_score)
+
+        return (
+            f_score[max_score_index].item(),
+            dict(
+                threshold=thresholds[max_score_index].item(),
+                precision=precision[max_score_index].item(),
+                recall=recall[max_score_index].item(),
+            ),
+        )
+
+    def __best_ts_fbeta_score_parallel_chunked(self, labels: torch.Tensor, scores: torch.Tensor, beta: float,
+                                               recall_cardinality_fn: Callable = improved_cardinality_fn,
+                                               weighted_precision: bool = True, n_splits: int = 1500,
+                                               chunk_size: int = 10, max_workers: int = 8) -> Tuple[float, Dict[str, Any]]:
+        """
+        Chunked parallel version of best_ts_fbeta_score using ThreadPoolExecutor.
+        
+        This version processes thresholds in chunks to reduce overhead and improve efficiency.
+        
+        Args:
+            labels: Ground truth labels
+            scores: Anomaly scores  
+            beta: Beta parameter for F-beta score
+            recall_cardinality_fn: Cardinality function for recall calculation
+            weighted_precision: Whether to use weighted precision
+            n_splits: Number of threshold splits
+            chunk_size: Number of thresholds to process in each chunk
+            max_workers: Maximum number of worker threads
+        """
+        
+        # Use same parameter range as sequential version for consistency
+        device = scores.device
+        p_values = torch.linspace(0, 1.0, steps=n_splits, device=device)
+        thresholds = torch.quantile(scores, p_values)
+
+        label_ranges = self.compute_window_indices(labels)
+        precision = torch.empty_like(thresholds, dtype=torch.float)
+        recall = torch.empty_like(thresholds, dtype=torch.float)
+        
+        def process_threshold_chunk(chunk_data):
+            """Process a chunk of thresholds"""
+            chunk_indices, chunk_thresholds = chunk_data
+            chunk_results = []
+            
+            # Process each threshold in the chunk
+            for i, (idx, threshold) in enumerate(zip(chunk_indices, chunk_thresholds)):
+                # Create predictions for this threshold
+                predictions = (scores > threshold).long()
+                
+                # Calculate precision and recall using instance method
+                prec, rec = self.ts_precision_and_recall(
+                    labels,
+                    predictions,
+                    alpha=0,
+                    recall_cardinality_fn=recall_cardinality_fn,
+                    anomaly_ranges=label_ranges,
+                    weighted_precision=weighted_precision,
+                )
+                
+                # Handle edge case to avoid 0/0 in F-score computation
+                if prec == 0 and rec == 0:
+                    rec = 1
+                    
+                chunk_results.append((idx, prec, rec))
+            
+            return chunk_results
+
+        # Create chunks of threshold indices and values
+        chunks = []
+        for i in range(0, len(thresholds), chunk_size):
+            end_idx = min(i + chunk_size, len(thresholds))
+            chunk_indices = list(range(i, end_idx))
+            chunk_thresholds = thresholds[i:end_idx]
+            chunks.append((chunk_indices, chunk_thresholds))
+        
+        print(f"Processing {len(thresholds)} thresholds in {len(chunks)} chunks of size ~{chunk_size}")
+        
+        # Use ThreadPoolExecutor to process chunks in parallel
+        actual_workers = min(max_workers, len(chunks))
+        
+        with ThreadPoolExecutor(max_workers=actual_workers) as executor:
+            # Submit all chunk computations
+            futures = {
+                executor.submit(process_threshold_chunk, chunk): i
+                for i, chunk in enumerate(chunks)
+            }
+
+            # Collect results as they complete
+            for future in tqdm(as_completed(futures), total=len(futures),
+                               desc=f"Processing {len(chunks)} chunks (chunked parallel)"):
+                chunk_results = future.result()
+                
+                # Store results in the appropriate positions
+                for idx, prec, rec in chunk_results:
+                    precision[idx] = prec
+                    recall[idx] = rec
+
+        # Compute F-scores and find the best one
+        f_score = (1 + beta ** 2) * precision * recall / (beta ** 2 * precision + recall)
+        max_score_index = torch.argmax(f_score)
+
+        return (
+            f_score[max_score_index].item(),
+            dict(
+                threshold=thresholds[max_score_index].item(),
+                precision=precision[max_score_index].item(),
+                recall=recall[max_score_index].item(),
+            ),
+        )
+
+    def compute_window_indices(self, binary_labels: torch.Tensor) -> List[Tuple[int, int]]:
+        """
+        Compute a list of indices where anomaly windows begin and end.
+
+        :param binary_labels: A 1-D :class:`~torch.Tensor` containing ``1`` for an anomalous time step or ``0`` otherwise.
+        :return: A list of tuples ``(start, end)`` for each anomaly window in ``binary_labels``, where ``start`` is the
+            index at which the window starts and ``end`` is the first index after the end of the window.
+        """
+        boundaries = torch.empty_like(binary_labels)
+        boundaries[0] = 0
+        boundaries[1:] = binary_labels[:-1]
+        boundaries *= -1
+        boundaries += binary_labels
+        # boundaries will be 1 where a window starts and -1 at the end of a window
+
+        indices = torch.nonzero(boundaries, as_tuple=True)[0].tolist()
+        if len(indices) % 2 != 0:
+            # Add the last index as the end of a window if appropriate
+            indices.append(binary_labels.shape[0])
+        indices = [(indices[i], indices[i + 1]) for i in range(0, len(indices), 2)]
+
+        return indices
+
+    def _compute_overlap(self, preds: torch.Tensor, pred_indices: List[Tuple[int, int]],
+                         gt_indices: List[Tuple[int, int]], alpha: float,
+                         bias_fn: Callable, cardinality_fn: Callable,
+                         use_window_weight: bool = False) -> float:
+        n_gt_windows = len(gt_indices)
+        n_pred_windows = len(pred_indices)
+        total_score = 0.0
+        total_gt_points = 0
+
+        i = j = 0
+        while i < n_gt_windows and j < n_pred_windows:
+            gt_start, gt_end = gt_indices[i]
+            window_length = gt_end - gt_start
+            total_gt_points += window_length
+            i += 1
+
+            cardinality = 0
+            while j < n_pred_windows and pred_indices[j][1] <= gt_start:
+                j += 1
+            while j < n_pred_windows and pred_indices[j][0] < gt_end:
+                j += 1
+                cardinality += 1
+
+            if cardinality == 0:
+                # cardinality == 0 means no overlap at all, hence no contribution
+                continue
+
+            # The last predicted window that overlaps our current window could also overlap the next window.
+            # Therefore, we must consider it again in the next loop iteration.
+            j -= 1
+
+            cardinality_multiplier = cardinality_fn(cardinality, window_length)
+
+            prediction_inside_ground_truth = preds[gt_start:gt_end]
+            # We calculate omega directly in the bias function, because this can greatly improve running time
+            # for the constant bias, for example.
+            omega = bias_fn(prediction_inside_ground_truth)
+
+            # Either weight evenly across all windows or based on window length
+            weight = window_length if use_window_weight else 1
+
+            # Existence reward (if cardinality > 0 then this is certainly 1)
+            total_score += alpha * weight
+            # Overlap reward
+            total_score += (1 - alpha) * cardinality_multiplier * omega * weight
+
+        denom = total_gt_points if use_window_weight else n_gt_windows
+
+        return total_score / denom
+
+    def ts_precision_and_recall(self, anomalies: torch.Tensor, predictions: torch.Tensor, alpha: float = 0,
+                                recall_bias_fn: Callable[[torch.Tensor], float] = constant_bias_fn,
+                                recall_cardinality_fn: Callable[[int], float] = inverse_proportional_cardinality_fn,
+                                precision_bias_fn: Optional[Callable] = None,
+                                precision_cardinality_fn: Optional[Callable] = None,
+                                anomaly_ranges: Optional[List[Tuple[int, int]]] = None,
+                                prediction_ranges: Optional[List[Tuple[int, int]]] = None,
+                                weighted_precision: bool = False) -> Tuple[float, float]:
+        """
+        Computes precision and recall for time series as defined in [Tatbul2018]_.
+
+        .. note::
+           The default parameters for this function correspond to the defaults recommended in [Tatbul2018]_. However,
+           those might not be desirable in most cases, please see [Wagner2023]_ for a detailed discussion.
+
+        :param anomalies: Binary 1-D :class:`~torch.Tensor` of shape ``(length,)`` containing the true labels.
+        :param predictions: Binary 1-D :class:`~torch.Tensor` of shape ``(length,)`` containing the predicted labels.
+        :param alpha: Weight for existence term in recall.
+        :param recall_bias_fn: Function that computes the bias term for a given ground-truth window.
+        :param recall_cardinality_fn: Function that compute the cardinality factor for a given ground-truth window.
+        :param precision_bias_fn: Function that computes the bias term for a given predicted window.
+            If ``None``, this will be the same as ``recall_bias_function``.
+        :param precision_cardinality_fn: Function that computes the cardinality factor for a given predicted window.
+            If ``None``, this will be the same as ``recall_cardinality_function``.
+        :param weighted_precision: If True, the precision score of a predicted window will be weighted with the
+            length of the window in the final score. Otherwise, each window will have the same weight.
+        :param anomaly_ranges: A list of tuples ``(start, end)`` for each anomaly window in ``anomalies``, where ``start``
+            is the index at which the window starts and ``end`` is the first index after the end of the window. This can
+            be ``None``, in which case the list is computed automatically from ``anomalies``.
+        :param prediction_ranges: A list of tuples ``(start, end)`` for each anomaly window in ``predictions``, where
+            ``start`` is the index at which the window starts and ``end`` is the first index after the end of the window.
+            This can be ``None``, in which case the list is computed automatically from ``predictions``.
+        :return: A tuple consisting of the time-series precision and recall for the given labels.
+        """
+        has_anomalies = torch.any(anomalies > 0).item()
+        has_predictions = torch.any(predictions > 0).item()
+
+        # Catch special cases which would cause a division by zero
+        if not has_predictions and not has_anomalies:
+            # In this case, the classifier is perfect, so it makes sense to set precision and recall to 1
+            return 1, 1
+        elif not has_predictions or not has_anomalies:
+            return 0, 0
+
+        # Set precision functions to the same as recall functions if they are not given
+        if precision_bias_fn is None:
+            precision_bias_fn = recall_bias_fn
+        if precision_cardinality_fn is None:
+            precision_cardinality_fn = recall_cardinality_fn
+
+        if anomaly_ranges is None:
+            anomaly_ranges = self.compute_window_indices(anomalies)
+        if prediction_ranges is None:
+            prediction_ranges = self.compute_window_indices(predictions)
+
+        recall = self._compute_overlap(predictions, prediction_ranges, anomaly_ranges, alpha, recall_bias_fn,
+                                  recall_cardinality_fn)
+        precision = self._compute_overlap(anomalies, anomaly_ranges, prediction_ranges, 0, precision_bias_fn,
+                                     precision_cardinality_fn, use_window_weight=weighted_precision)
+
+        return precision, recall
+
+    def __best_ts_fbeta_score(self, labels: torch.Tensor, scores: torch.Tensor, beta: float,
+                              recall_cardinality_fn: Callable = improved_cardinality_fn,
+                              weighted_precision: bool = True, n_splits: int = 1500) -> Tuple[float, Dict[str, Any]]:
+        # Build thresholds from p-values (quantiles/percentiles) of the score distribution
+        # p_values in [0, 1]; thresholds = percentile(scores, p_values)
+        device = scores.device
+        p_values = torch.linspace(0, 1.0, steps=n_splits, device=device)
+        thresholds = torch.quantile(scores, p_values)
+        print("Here is the shape of thresholds",thresholds.shape)
+        precision = torch.empty_like(thresholds, dtype=torch.float)
+        recall = torch.empty_like(thresholds, dtype=torch.float)
+        predictions = torch.empty_like(scores, dtype=torch.long)
+
+        print("Here is the shape of labels",labels.shape)
+        print("Here is the shape of scores",scores.shape)
+        print("Here is the shape of predictions",predictions.shape)
+        print("Here is the shape of precision",precision.shape)
+        print("Here is the shape of recall",recall.shape)
+
+        label_ranges = self.compute_window_indices(labels)
+
+        for i, t in tqdm(enumerate(thresholds), total=len(thresholds),
+                        desc="Calculating F-beta score for thresholds"):
+            # predictions are 0/1 longs to be compatible with downstream computations
+            torch.greater(scores, t, out=predictions)
+            prec, rec = self.ts_precision_and_recall(
+                labels,
+                predictions,
+                alpha=0,
+                recall_cardinality_fn=recall_cardinality_fn,
+                anomaly_ranges=label_ranges,
+                weighted_precision=weighted_precision,
+            )
+
+            # Avoid 0/0 in F-score computation when both prec and rec are 0
+            if prec == 0 and rec == 0:
+                rec = 1
+
+            precision[i] = prec
+            recall[i] = rec
+
+        f_score = (1 + beta ** 2) * precision * recall / (beta ** 2 * precision + recall)
+        max_score_index = torch.argmax(f_score)
+
+        return (
+        f_score[max_score_index].item(),
+            dict(
+                threshold=thresholds[max_score_index].item(),
+                precision=precision[max_score_index].item(),
+                recall=recall[max_score_index].item(),
+            ),
+        )
+
+
+
+    def metric_PointF1PA(self, label, score, preds=None):
+        import sklearn.metrics
+
+        best_f1_adjusted = 0
+        best_result = None
+        q_values = np.arange(0.7, 0.99, 0.001)
+        for q in tqdm(q_values, total= len(q_values), desc="Calculating PointF1PA"):
+            thre = np.quantile(score, q)
+            result = {}
+            pred = (score > thre).astype(int)
+            adjusted_pred = self.adjustment(label, pred)
+            accuracy = sklearn.metrics.accuracy_score(label, adjusted_pred)
+            P, R, F1, _ = sklearn.metrics.precision_recall_fscore_support(label, adjusted_pred, average="binary")
+            result['thre_PA'] = thre
+            result['ACC_PA'] = accuracy
+            result['P_PA'] = P
+            result['R_PA'] = R
+            result['F1_PA'] = F1
+            # results.append(pd.DataFrame([result]))
+            if F1 >= best_f1_adjusted:
+                best_f1_adjusted = F1
+                best_result = result
+        if best_result is not None:
+            return best_result
+        else:
+            assert False, "No best result found, check the input data."
+        # results_storage['f1_pa'] = pd.concat(results, axis=0).reset_index(drop=True)
+
+    def _get_events(self, y_test, outlier=1, normal=0):
+        events = dict()
+        label_prev = normal
+        event = 0  # corresponds to no event
+        event_start = 0
+        for tim, label in enumerate(y_test):
+            if label == outlier:
+                if label_prev == normal:
+                    event += 1
+                    event_start = tim
+            else:
+                if label_prev == outlier:
+                    event_end = tim - 1
+                    events[event] = (event_start, event_end)
+            label_prev = label
+
+        if label_prev == outlier:
+            event_end = tim - 1
+            events[event] = (event_start, event_end)
+        return events
+
+    def metric_EventF1PA(self, label, score, preds=None):
+        from sklearn.metrics import precision_score
+        true_events = self._get_events(label)
+
+        if preds is None:
+            thresholds = np.linspace(score.min(), score.max(), 100)
+            EventF1PA_scores = []
+
+            for threshold in tqdm(thresholds, total=len(thresholds), desc="Calculating EventF1PA"):
+                preds = (score > threshold).astype(int)
+
+                tp = np.sum([preds[start:end + 1].any() for start, end in true_events.values()])
+                fn = len(true_events) - tp
+                rec_e = tp/(tp + fn)
+                prec_t = precision_score(label, preds)
+                EventF1PA = 2 * rec_e * prec_t / (rec_e + prec_t + self.eps)
+
+                EventF1PA_scores.append(EventF1PA)
+
+            EventF1PA_Threshold = thresholds[np.argmax(EventF1PA_scores)]
+            EventF1PA1 = max(EventF1PA_scores)
+
+        else:
+
+            tp = np.sum([preds[start:end + 1].any() for start, end in true_events.values()])
+            fn = len(true_events) - tp
+            rec_e = tp/(tp + fn)
+            prec_t = precision_score(label, preds)
+            EventF1PA1 = 2 * rec_e * prec_t / (rec_e + prec_t + self.eps)
+
+        return EventF1PA1
+
+    def range_recall_new(self, labels, preds, alpha):
+        p = np.where(preds == 1)[0]    # positions of predicted label==1
+        range_pred = self.range_convers_new(preds)
+        range_label = self.range_convers_new(labels)
+
+        Nr = len(range_label)    # total # of real anomaly segments
+
+        ExistenceReward = self.existence_reward(range_label, preds)
+
+
+        OverlapReward = 0
+        for i in range_label:
+            OverlapReward += self.w(i, p) * self.Cardinality_factor(i, range_pred)
+
+
+        score = alpha * ExistenceReward + (1-alpha) * OverlapReward
+        if Nr != 0:
+            return score/Nr, ExistenceReward/Nr, OverlapReward/Nr
+        else:
+            return 0,0,0
+
+    def range_convers_new(self, label):
+        '''
+        input: arrays of binary values
+        output: list of ordered pair [[a0,b0], [a1,b1]... ] of the inputs
+        '''
+        anomaly_starts = np.where(np.diff(label) == 1)[0] + 1
+        anomaly_ends, = np.where(np.diff(label) == -1)
+        if len(anomaly_ends):
+            if not len(anomaly_starts) or anomaly_ends[0] < anomaly_starts[0]:
+                # we started with an anomaly, so the start of the first anomaly is the start of the labels
+                anomaly_starts = np.concatenate([[0], anomaly_starts])
+        if len(anomaly_starts):
+            if not len(anomaly_ends) or anomaly_ends[-1] < anomaly_starts[-1]:
+                # we ended on an anomaly, so the end of the last anomaly is the end of the labels
+                anomaly_ends = np.concatenate([anomaly_ends, [len(label) - 1]])
+        return list(zip(anomaly_starts, anomaly_ends))
+
+    def existence_reward(self, labels, preds):
+        '''
+        labels: list of ordered pair
+        preds predicted data
+        '''
+
+        score = 0
+        for i in labels:
+            if preds[i[0]:i[1]+1].any():
+                score += 1
+        return score
+
+    def num_nonzero_segments(self, x):
+        count=0
+        if x[0]>0:
+            count+=1
+        for i in range(1, len(x)):
+            if x[i]>0 and x[i-1]==0:
+                count+=1
+        return count
+
+    def extend_postive_range(self, x, window=5):
+        label = x.copy().astype(float)
+        L = self.range_convers_new(label)   # index of non-zero segments
+        length = len(label)
+        for k in range(len(L)):
+            s = L[k][0]
+            e = L[k][1]
+
+
+            x1 = np.arange(e,min(e+window//2,length))
+            label[x1] += np.sqrt(1 - (x1-e)/(window))
+
+            x2 = np.arange(max(s-window//2,0),s)
+            label[x2] += np.sqrt(1 - (s-x2)/(window))
+
+        label = np.minimum(np.ones(length), label)
+        return label
+
+    def extend_postive_range_individual(self, x, percentage=0.2):
+        label = x.copy().astype(float)
+        L = self.range_convers_new(label)   # index of non-zero segments
+        length = len(label)
+        for k in range(len(L)):
+            s = L[k][0]
+            e = L[k][1]
+
+            l0 = int((e-s+1)*percentage)
+
+            x1 = np.arange(e,min(e+l0,length))
+            label[x1] += np.sqrt(1 - (x1-e)/(2*l0))
+
+            x2 = np.arange(max(s-l0,0),s)
+            label[x2] += np.sqrt(1 - (s-x2)/(2*l0))
+
+        label = np.minimum(np.ones(length), label)
+        return label
+
+    def TPR_FPR_RangeAUC(self, labels, pred, P, L):
+        indices = np.where(labels == 1)[0]
+        product = labels * pred
+        TP = np.sum(product)
+        newlabels = product.copy()
+        newlabels[indices] = 1
+
+        # recall = min(TP/P,1)
+        P_new = (P + np.sum(newlabels)) / 2  # so TPR is neither large nor small
+        # P_new = np.sum(labels)
+        recall = min(TP / P_new, 1)
+        # recall = TP/np.sum(labels)
+        # print('recall '+str(recall))
+
+        existence = 0
+        for seg in L:
+            if np.sum(product[seg[0]:(seg[1] + 1)]) > 0:  # if newlabels>0, that segment must contained
+                existence += 1
+
+        existence_ratio = existence / len(L)
+        # print(existence_ratio)
+
+        # TPR_RangeAUC = np.sqrt(recall*existence_ratio)
+        # print(existence_ratio)
+        TPR_RangeAUC = recall * existence_ratio
+
+        FP = np.sum(pred) - TP
+        # TN = np.sum((1-pred) * (1-labels))
+
+        # FPR_RangeAUC = FP/(FP+TN)
+        N_new = len(labels) - P_new
+        FPR_RangeAUC = FP / N_new
+
+        Precision_RangeAUC = TP / np.sum(pred)
+
+        return TPR_RangeAUC, FPR_RangeAUC, Precision_RangeAUC
+
+    def RangeAUC(self, labels, score, window=0, percentage=0, plot_ROC=False, AUC_type='window'):
+        # AUC_type='window'/'percentage'
+        score_sorted = -np.sort(-score)
+
+        P = np.sum(labels)
+        # print(np.sum(labels))
+        if AUC_type == 'window':
+            labels = self.extend_postive_range(labels, window=window)
+        else:
+            labels = self.extend_postive_range_individual(labels, percentage=percentage)
+
+        # print(np.sum(labels))
+        L = self.range_convers_new(labels)
+        TPR_list = [0]
+        FPR_list = [0]
+        Precision_list = [1]
+
+        for i in np.linspace(0, len(score) - 1, 250).astype(int):
+            threshold = score_sorted[i]
+            # print('thre='+str(threshold))
+            pred = score >= threshold
+            TPR, FPR, Precision = self.TPR_FPR_RangeAUC(labels, pred, P, L)
+
+            TPR_list.append(TPR)
+            FPR_list.append(FPR)
+            Precision_list.append(Precision)
+
+        TPR_list.append(1)
+        FPR_list.append(1)  # otherwise, range-AUC will stop earlier than (1,1)
+
+        tpr = np.array(TPR_list)
+        fpr = np.array(FPR_list)
+        prec = np.array(Precision_list)
+
+        width = fpr[1:] - fpr[:-1]
+        height = (tpr[1:] + tpr[:-1]) / 2
+        AUC_range = np.sum(width * height)
+
+        width_PR = tpr[1:-1] - tpr[:-2]
+        height_PR = prec[1:]
+        AP_range = np.sum(width_PR * height_PR)
+
+        if plot_ROC:
+            return AUC_range, AP_range, fpr, tpr, prec
+
+        return AUC_range
+
+    def range_convers_new(self, label):
+        '''
+        input: arrays of binary values
+        output: list of ordered pair [[a0,b0], [a1,b1]... ] of the inputs
+        '''
+        anomaly_starts = np.where(np.diff(label) == 1)[0] + 1
+        anomaly_ends, = np.where(np.diff(label) == -1)
+        if len(anomaly_ends):
+            if not len(anomaly_starts) or anomaly_ends[0] < anomaly_starts[0]:
+                # we started with an anomaly, so the start of the first anomaly is the start of the labels
+                anomaly_starts = np.concatenate([[0], anomaly_starts])
+        if len(anomaly_starts):
+            if not len(anomaly_ends) or anomaly_ends[-1] < anomaly_starts[-1]:
+                # we ended on an anomaly, so the end of the last anomaly is the end of the labels
+                anomaly_ends = np.concatenate([anomaly_ends, [len(label) - 1]])
+        return list(zip(anomaly_starts, anomaly_ends))
+
+    def new_sequence(self, label, sequence_original, window):
+        a = max(sequence_original[0][0] - window // 2, 0)
+        sequence_new = []
+        for i in range(len(sequence_original) - 1):
+            if sequence_original[i][1] + window // 2 < sequence_original[i + 1][0] - window // 2:
+                sequence_new.append((a, sequence_original[i][1] + window // 2))
+                a = sequence_original[i + 1][0] - window // 2
+        sequence_new.append((a, min(sequence_original[len(sequence_original) - 1][1] + window // 2, len(label) - 1)))
+        return sequence_new
+
+    def sequencing(self, x, L, window=5):
+        label = x.copy().astype(float)
+        length = len(label)
+
+        for k in range(len(L)):
+            s = L[k][0]
+            e = L[k][1]
+
+            x1 = np.arange(e + 1, min(e + window // 2 + 1, length))
+            label[x1] += np.sqrt(1 - (x1 - e) / (window))
+
+            x2 = np.arange(max(s - window // 2, 0), s)
+            label[x2] += np.sqrt(1 - (s - x2) / (window))
+
+        label = np.minimum(np.ones(length), label)
+        return label
+
+    # TPR_FPR_window
+    def RangeAUC_volume_opt(self, labels_original, score, windowSize, thre=250):
+        window_3d = np.arange(0, windowSize + 1, 1)
+        P = np.sum(labels_original)
+        seq = self.range_convers_new(labels_original)
+        l = self.new_sequence(labels_original, seq, windowSize)
+
+        score_sorted = -np.sort(-score)
+
+        tpr_3d = np.zeros((windowSize + 1, thre + 2))
+        fpr_3d = np.zeros((windowSize + 1, thre + 2))
+        prec_3d = np.zeros((windowSize + 1, thre + 1))
+
+        auc_3d = np.zeros(windowSize + 1)
+        ap_3d = np.zeros(windowSize + 1)
+
+        tp = np.zeros(thre)
+        N_pred = np.zeros(thre)
+
+        for k, i in enumerate(np.linspace(0, len(score) - 1, thre).astype(int)):
+            threshold = score_sorted[i]
+            pred = score >= threshold
+            N_pred[k] = np.sum(pred)
+
+        for window in window_3d:
+
+            labels_extended = self.sequencing(labels_original, seq, window)
+            L = self.new_sequence(labels_extended, seq, window)
+
+            TF_list = np.zeros((thre + 2, 2))
+            Precision_list = np.ones(thre + 1)
+            j = 0
+
+            for i in np.linspace(0, len(score) - 1, thre).astype(int):
+                threshold = score_sorted[i]
+                pred = score >= threshold
+                labels = labels_extended.copy()
+                existence = 0
+
+                for seg in L:
+                    labels[seg[0]:seg[1] + 1] = labels_extended[seg[0]:seg[1] + 1] * pred[seg[0]:seg[1] + 1]
+                    if (pred[seg[0]:(seg[1] + 1)] > 0).any():
+                        existence += 1
+                for seg in seq:
+                    labels[seg[0]:seg[1] + 1] = 1
+
+                TP = 0
+                N_labels = 0
+                for seg in l:
+                    TP += np.dot(labels[seg[0]:seg[1] + 1], pred[seg[0]:seg[1] + 1])
+                    N_labels += np.sum(labels[seg[0]:seg[1] + 1])
+
+                TP += tp[j]
+                FP = N_pred[j] - TP
+
+                existence_ratio = existence / len(L)
+
+                P_new = (P + N_labels) / 2
+                recall = min(TP / P_new, 1)
+
+                TPR = recall * existence_ratio
+                N_new = len(labels) - P_new
+                FPR = FP / N_new
+
+                Precision = TP / N_pred[j]
+
+                j += 1
+                TF_list[j] = [TPR, FPR]
+                Precision_list[j] = Precision
+
+            TF_list[j + 1] = [1, 1]  # otherwise, range-AUC will stop earlier than (1,1)
+
+            tpr_3d[window] = TF_list[:, 0]
+            fpr_3d[window] = TF_list[:, 1]
+            prec_3d[window] = Precision_list
+
+            width = TF_list[1:, 1] - TF_list[:-1, 1]
+            height = (TF_list[1:, 0] + TF_list[:-1, 0]) / 2
+            AUC_range = np.dot(width, height)
+            auc_3d[window] = (AUC_range)
+
+            width_PR = TF_list[1:-1, 0] - TF_list[:-2, 0]
+            height_PR = Precision_list[1:]
+
+            AP_range = np.dot(width_PR, height_PR)
+            ap_3d[window] = AP_range
+
+        return tpr_3d, fpr_3d, prec_3d, window_3d, sum(auc_3d) / len(window_3d), sum(ap_3d) / len(window_3d)
+
+    def RangeAUC_volume_opt_mem(self, labels_original, score, windowSize, thre=250):
+        window_3d = np.arange(0, windowSize + 1, 1)
+        P = np.sum(labels_original)
+        seq = self.range_convers_new(labels_original)
+        l = self.new_sequence(labels_original, seq, windowSize)
+
+        score_sorted = -np.sort(-score)
+
+        tpr_3d = np.zeros((windowSize + 1, thre + 2))
+        fpr_3d = np.zeros((windowSize + 1, thre + 2))
+        prec_3d = np.zeros((windowSize + 1, thre + 1))
+
+        auc_3d = np.zeros(windowSize + 1)
+        ap_3d = np.zeros(windowSize + 1)
+
+        tp = np.zeros(thre)
+        N_pred = np.zeros(thre)
+        p = np.zeros((thre, len(score)))
+
+        for k, i in enumerate(np.linspace(0, len(score) - 1, thre).astype(int)):
+            threshold = score_sorted[i]
+            pred = score >= threshold
+            p[k] = pred
+            N_pred[k] = np.sum(pred)
+
+        for window in window_3d:
+            labels_extended = self.sequencing(labels_original, seq, window)
+            L = self.new_sequence(labels_extended, seq, window)
+
+            TF_list = np.zeros((thre + 2, 2))
+            Precision_list = np.ones(thre + 1)
+            j = 0
+
+            for i in np.linspace(0, len(score) - 1, thre).astype(int):
+                labels = labels_extended.copy()
+                existence = 0
+
+                for seg in L:
+                    labels[seg[0]:seg[1] + 1] = labels_extended[seg[0]:seg[1] + 1] * p[j][seg[0]:seg[1] + 1]
+                    if (p[j][seg[0]:(seg[1] + 1)] > 0).any():
+                        existence += 1
+                for seg in seq:
+                    labels[seg[0]:seg[1] + 1] = 1
+
+                N_labels = 0
+                TP = 0
+                for seg in l:
+                    TP += np.dot(labels[seg[0]:seg[1] + 1], p[j][seg[0]:seg[1] + 1])
+                    N_labels += np.sum(labels[seg[0]:seg[1] + 1])
+
+                TP += tp[j]
+                FP = N_pred[j] - TP
+
+                existence_ratio = existence / len(L)
+
+                P_new = (P + N_labels) / 2
+                recall = min(TP / P_new, 1)
+
+                TPR = recall * existence_ratio
+
+                N_new = len(labels) - P_new
+                FPR = FP / N_new
+                Precision = TP / N_pred[j]
+                j += 1
+
+                TF_list[j] = [TPR, FPR]
+                Precision_list[j] = Precision
+
+            TF_list[j + 1] = [1, 1]
+            tpr_3d[window] = TF_list[:, 0]
+            fpr_3d[window] = TF_list[:, 1]
+            prec_3d[window] = Precision_list
+
+            width = TF_list[1:, 1] - TF_list[:-1, 1]
+            height = (TF_list[1:, 0] + TF_list[:-1, 0]) / 2
+            AUC_range = np.dot(width, height)
+            auc_3d[window] = (AUC_range)
+
+            width_PR = TF_list[1:-1, 0] - TF_list[:-2, 0]
+            height_PR = Precision_list[1:]
+            AP_range = np.dot(width_PR, height_PR)
+            ap_3d[window] = (AP_range)
+        return tpr_3d, fpr_3d, prec_3d, window_3d, sum(auc_3d) / len(window_3d), sum(ap_3d) / len(window_3d)
+
+
+    def metric_VUS_pred(self, labels, preds, windowSize):
+        window_3d = np.arange(0, windowSize + 1, 1)
+        P = np.sum(labels)
+        seq = self.range_convers_new(labels)
+        l = self.new_sequence(labels, seq, windowSize)
+
+        recall_3d = np.zeros((windowSize + 1))
+        prec_3d = np.zeros((windowSize + 1))
+        f_3d = np.zeros((windowSize + 1))
+
+        N_pred = np.sum(preds)
+
+        for window in window_3d:
+
+            labels_extended = self.sequencing(labels, seq, window)
+            L = self.new_sequence(labels_extended, seq, window)
+
+            labels = labels_extended.copy()
+            existence = 0
+
+            for seg in L:
+                labels[seg[0]:seg[1] + 1] = labels_extended[seg[0]:seg[1] + 1] * preds[seg[0]:seg[1] + 1]
+                if (preds[seg[0]:(seg[1] + 1)] > 0).any():
+                    existence += 1
+            for seg in seq:
+                labels[seg[0]:seg[1] + 1] = 1
+
+            TP = 0
+            N_labels = 0
+            for seg in l:
+                TP += np.dot(labels[seg[0]:seg[1] + 1], preds[seg[0]:seg[1] + 1])
+                N_labels += np.sum(labels[seg[0]:seg[1] + 1])
+
+            P_new = (P + N_labels) / 2
+            recall = min(TP / P_new, 1)
+            Precision = TP / N_pred
+
+            recall_3d[window] = recall
+            prec_3d[window] = Precision
+            f_3d[window] = 2 * Precision * recall / (Precision + recall) if (Precision + recall) > 0 else 0
+        return sum(recall_3d) / len(window_3d), sum(prec_3d) / len(window_3d), sum(f_3d) / len(window_3d)
+
+    # def metric_F1_T_gpu_corrected(self, labels, scores, device='cuda', batch_size=50):
+    #     """
+    #     GPU-accelerated F1_T that maintains exact compatibility with CPU version
+    #     Only the threshold generation and prediction computation is done on GPU
+    #     The actual metric calculation uses your original CPU functions
+    #     """
+    #     if not torch.cuda.is_available():
+    #         print("CUDA not available, falling back to CPU implementation")
+    #         return self.metric_F1_T(labels, scores)
+    #
+    #     print(f"Computing F1_T on {device} (corrected version)")
+    #     start_time = time.time()
+    #
+    #     # Keep original data types for compatibility
+    #     labels_np = np.array(labels)
+    #     scores_np = np.array(scores)
+    #
+    #     # Use GPU only for threshold generation
+    #     scores_gpu = torch.tensor(scores_np, dtype=torch.float32, device=device)
+    #     n_splits = 1000
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits, device=device)
+    #     thresholds_gpu = torch.quantile(scores_gpu, p_values)
+    #     thresholds = thresholds_gpu.cpu().numpy()
+    #
+    #     # Convert to torch tensors for CPU computation (matching original)
+    #     labels_torch = torch.tensor(labels_np, dtype=torch.int)
+    #     scores_torch = torch.tensor(scores_np, dtype=torch.float)
+    #
+    #     # Compute label ranges once
+    #     label_ranges = self.compute_window_indices(labels_torch)
+    #
+    #     # Process thresholds in batches but use original metric calculation
+    #     precision_list = []
+    #     recall_list = []
+    #
+    #     if batch_size is None:
+    #         batch_size = 50  # Default batch size
+    #
+    #     beta = 1
+    #     predictions = torch.empty_like(scores_torch, dtype=torch.long)
+    #
+    #     for i in tqdm(range(0, n_splits, batch_size),
+    #                   desc="Computing metrics (corrected)"):
+    #         end_idx = min(i + batch_size, n_splits)
+    #
+    #         batch_precisions = []
+    #         batch_recalls = []
+    #
+    #         for j in range(i, end_idx):
+    #             threshold = thresholds[j]
+    #
+    #             # Compute predictions
+    #             torch.greater(scores_torch, threshold, out=predictions)
+    #
+    #             # Use your original ts_precision_and_recall function
+    #             prec, rec = self.ts_precision_and_recall(
+    #                 labels_torch,
+    #                 predictions,
+    #                 alpha=0,
+    #                 recall_cardinality_fn=improved_cardinality_fn,
+    #                 anomaly_ranges=label_ranges,
+    #                 weighted_precision=True,
+    #             )
+    #
+    #             # Handle edge case
+    #             if prec == 0 and rec == 0:
+    #                 rec = 1
+    #
+    #             batch_precisions.append(prec)
+    #             batch_recalls.append(rec)
+    #
+    #         precision_list.extend(batch_precisions)
+    #         recall_list.extend(batch_recalls)
+    #
+    #     # Convert to tensors for final computation
+    #     precision = torch.tensor(precision_list, dtype=torch.float)
+    #     recall = torch.tensor(recall_list, dtype=torch.float)
+    #
+    #     # Compute F-scores
+    #     f_scores = (1 + beta ** 2) * precision * recall / (beta ** 2 * precision + recall)
+    #
+    #     # Find best threshold
+    #     best_idx = torch.argmax(f_scores)
+    #     best_threshold = thresholds[best_idx]
+    #
+    #     # Compute accuracy
+    #     best_predictions = scores_np > best_threshold
+    #     accuracy = np.mean(best_predictions == labels_np)
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s")
+    #
+    #     return {
+    #         'F1_T': f_scores[best_idx].item(),
+    #         'P_T': precision[best_idx].item(),
+    #         'R_T': recall[best_idx].item(),
+    #         'thre_T': best_threshold,
+    #         'ACC_T': accuracy
+    #     }
+    #
+    # def metric_F1_T_parallel_cpu(self, labels, scores, num_workers=8):
+    #     """
+    #     CPU-parallel version that matches the original exactly
+    #     Uses multiprocessing to speed up threshold evaluation
+    #     """
+    #     from concurrent.futures import ProcessPoolExecutor
+    #     import multiprocessing as mp
+    #
+    #     print(f"Computing F1_T with {num_workers} CPU workers")
+    #     start_time = time.time()
+    #
+    #     # Convert to torch tensors
+    #     labels = torch.tensor(labels, dtype=torch.int)
+    #     scores = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Generate thresholds
+    #     n_splits = 1000
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits)
+    #     thresholds = torch.quantile(scores, p_values)
+    #
+    #     # Compute label ranges once
+    #     label_ranges = self.compute_window_indices(labels)
+    #
+    #     # Split thresholds for parallel processing
+    #     threshold_chunks = torch.chunk(thresholds, num_workers)
+    #
+    #     # Process in parallel
+    #     beta = 1
+    #     with ProcessPoolExecutor(max_workers=num_workers) as executor:
+    #         futures = []
+    #         for chunk in threshold_chunks:
+    #             future = executor.submit(
+    #                 self._compute_f1t_chunk,
+    #                 chunk, labels, scores, beta, label_ranges, True
+    #             )
+    #             futures.append(future)
+    #
+    #         # Collect results
+    #         all_results = []
+    #         for future in tqdm(as_completed(futures),
+    #                            total=len(futures),
+    #                            desc="Processing chunks"):
+    #             chunk_results = future.result()
+    #             all_results.extend(chunk_results)
+    #
+    #     # Find best result
+    #     best_result = max(all_results, key=lambda x: x['f_score'])
+    #
+    #     # Compute accuracy
+    #     best_predictions = scores > best_result['threshold']
+    #     accuracy = torch.mean((best_predictions == labels).float()).item()
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s")
+    #
+    #     return {
+    #         'F1_T': best_result['f_score'],
+    #         'P_T': best_result['precision'],
+    #         'R_T': best_result['recall'],
+    #         'thre_T': best_result['threshold'],
+    #         'ACC_T': accuracy
+    #     }
+    #
+    # def metric_F1_T_hybrid(self, labels, scores, device='cuda'):
+    #     """
+    #     Hybrid approach: GPU for threshold generation and prediction,
+    #     CPU parallel for metric calculation
+    #     """
+    #     if not torch.cuda.is_available():
+    #         return self.metric_F1_T_parallel_cpu(labels, scores)
+    #
+    #     print(f"Computing F1_T with hybrid GPU/CPU approach")
+    #     start_time = time.time()
+    #
+    #     # Generate thresholds on GPU (fast)
+    #     labels_gpu = torch.tensor(labels, dtype=torch.int32, device=device)
+    #     scores_gpu = torch.tensor(scores, dtype=torch.float32, device=device)
+    #
+    #     n_splits = 1000
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits, device=device)
+    #     thresholds_gpu = torch.quantile(scores_gpu, p_values)
+    #
+    #     # Generate all predictions on GPU at once (if memory allows)
+    #     try:
+    #         # This creates a matrix of shape (n_thresholds, n_samples)
+    #         all_predictions_gpu = scores_gpu.unsqueeze(0) > thresholds_gpu.unsqueeze(1)
+    #         all_predictions = all_predictions_gpu.cpu().long()
+    #         thresholds = thresholds_gpu.cpu()
+    #         print("  Generated all predictions on GPU")
+    #     except RuntimeError as e:
+    #         if "out of memory" in str(e):
+    #             print("  Not enough GPU memory, falling back to batched approach")
+    #             return self.metric_F1_T_gpu_corrected(labels, scores, batch_size=50)
+    #         else:
+    #             raise e
+    #
+    #     # Move back to CPU for metric calculation
+    #     labels_cpu = torch.tensor(labels, dtype=torch.int)
+    #     scores_cpu = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Compute label ranges
+    #     label_ranges = self.compute_window_indices(labels_cpu)
+    #
+    #     # Parallel CPU computation of metrics
+    #     beta = 1
+    #     from concurrent.futures import ThreadPoolExecutor
+    #
+    #     def compute_single_threshold(idx):
+    #         predictions = all_predictions[idx]
+    #
+    #         prec, rec = self.ts_precision_and_recall(
+    #             labels_cpu,
+    #             predictions,
+    #             alpha=0,
+    #             recall_cardinality_fn=improved_cardinality_fn,
+    #             anomaly_ranges=label_ranges,
+    #             weighted_precision=True,
+    #         )
+    #
+    #         if prec == 0 and rec == 0:
+    #             rec = 1
+    #
+    #         f_score = (1 + beta ** 2) * prec * rec / (beta ** 2 * prec + rec)
+    #
+    #         return {
+    #             'idx': idx,
+    #             'f_score': f_score,
+    #             'precision': prec,
+    #             'recall': rec,
+    #             'threshold': thresholds[idx].item()
+    #         }
+    #
+    #     # Process with thread pool
+    #     with ThreadPoolExecutor(max_workers=8) as executor:
+    #         futures = [executor.submit(compute_single_threshold, i)
+    #                    for i in range(n_splits)]
+    #
+    #         results = []
+    #         for future in tqdm(as_completed(futures),
+    #                            total=n_splits,
+    #                            desc="Computing metrics"):
+    #             results.append(future.result())
+    #
+    #     # Find best result
+    #     best_result = max(results, key=lambda x: x['f_score'])
+    #
+    #     # Compute accuracy
+    #     best_predictions = scores_cpu > best_result['threshold']
+    #     accuracy = torch.mean((best_predictions == labels_cpu).float()).item()
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s")
+    #
+    #     return {
+    #         'F1_T': best_result['f_score'],
+    #         'P_T': best_result['precision'],
+    #         'R_T': best_result['recall'],
+    #         'thre_T': best_result['threshold'],
+    #         'ACC_T': accuracy
+    #     }
+    #
+    # def metric_F1_T_optimized(self, labels, scores, num_workers=None):
+    #     """
+    #     Optimized version using the best strategies from our tests
+    #     """
+    #     if num_workers is None:
+    #         num_workers = min(mp.cpu_count(), 8)
+    #
+    #     print(f"Computing F1_T (optimized) with {num_workers} workers")
+    #     start_time = time.time()
+    #
+    #     # Convert to torch tensors
+    #     labels = torch.tensor(labels, dtype=torch.int)
+    #     scores = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Generate thresholds
+    #     n_splits = 1000
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits)
+    #     thresholds = torch.quantile(scores, p_values)
+    #
+    #     # Pre-compute label ranges once
+    #     label_ranges = self.compute_window_indices(labels)
+    #
+    #     # Pre-generate all predictions at once (memory efficient)
+    #     print("Pre-computing predictions...")
+    #     predictions_list = []
+    #     for i in range(0, n_splits, 100):  # Process in chunks to save memory
+    #         end_idx = min(i + 100, n_splits)
+    #         batch_thresholds = thresholds[i:end_idx]
+    #         # Create boolean predictions then convert to long
+    #         batch_preds = (scores.unsqueeze(0) > batch_thresholds.unsqueeze(1)).long()  # FIX: Convert to long
+    #         predictions_list.append(batch_preds)
+    #
+    #     all_predictions = torch.cat(predictions_list, dim=0)
+    #     print(f"Predictions ready, computing metrics...")
+    #
+    #     # Define worker function
+    #     def compute_metrics_batch(indices):
+    #         results = []
+    #         for idx in indices:
+    #             predictions = all_predictions[idx]
+    #
+    #             prec, rec = self.ts_precision_and_recall(
+    #                 labels,
+    #                 predictions,
+    #                 alpha=0,
+    #                 recall_cardinality_fn=improved_cardinality_fn,
+    #                 anomaly_ranges=label_ranges,
+    #                 weighted_precision=True,
+    #             )
+    #
+    #             if prec == 0 and rec == 0:
+    #                 rec = 1
+    #
+    #             f_score = 2 * prec * rec / (prec + rec)
+    #
+    #             results.append({
+    #                 'idx': idx,
+    #                 'f_score': f_score,
+    #                 'precision': prec,
+    #                 'recall': rec,
+    #                 'threshold': thresholds[idx].item()
+    #             })
+    #
+    #         return results
+    #
+    #     # Split indices for workers
+    #     indices = list(range(n_splits))
+    #     chunk_size = len(indices) // num_workers
+    #     if chunk_size == 0:
+    #         chunk_size = 1
+    #     index_chunks = [indices[i:i + chunk_size] for i in range(0, len(indices), chunk_size)]
+    #
+    #     # Process with thread pool (better for this workload than process pool)
+    #     all_results = []
+    #     with ThreadPoolExecutor(max_workers=num_workers) as executor:
+    #         futures = [executor.submit(compute_metrics_batch, chunk) for chunk in index_chunks]
+    #
+    #         completed = 0
+    #         for future in as_completed(futures):
+    #             all_results.extend(future.result())
+    #             completed += 1
+    #             print(f"Progress: {completed}/{len(futures)} chunks completed", end='\r')
+    #
+    #     print()  # New line after progress
+    #
+    #     # Find best result
+    #     best_result = max(all_results, key=lambda x: x['f_score'])
+    #
+    #     # Compute accuracy
+    #     best_predictions = scores > best_result['threshold']
+    #     accuracy = torch.mean((best_predictions == labels).float()).item()
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s")
+    #
+    #     return {
+    #         'F1_T': best_result['f_score'],
+    #         'P_T': best_result['precision'],
+    #         'R_T': best_result['recall'],
+    #         'thre_T': best_result['threshold'],
+    #         'ACC_T': accuracy
+    #     }
+    #
+    # def metric_F1_T_sampling(self, labels, scores, sample_rate=0.2):
+    #     """
+    #     Fast approximation by sampling thresholds
+    #     Good for quick estimates or hyperparameter tuning
+    #     """
+    #     print(f"Computing F1_T with threshold sampling (rate={sample_rate})")
+    #     start_time = time.time()
+    #
+    #     # Convert to torch tensors
+    #     labels = torch.tensor(labels, dtype=torch.int)
+    #     scores = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Generate fewer thresholds
+    #     n_splits = int(1000 * sample_rate)
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits)
+    #     thresholds = torch.quantile(scores, p_values)
+    #
+    #     # Rest is same as original
+    #     precision = torch.empty_like(thresholds, dtype=torch.float)
+    #     recall = torch.empty_like(thresholds, dtype=torch.float)
+    #     predictions = torch.empty_like(scores, dtype=torch.long)
+    #
+    #     label_ranges = self.compute_window_indices(labels)
+    #     beta = 1
+    #
+    #     for i, t in enumerate(thresholds):
+    #         torch.greater(scores, t, out=predictions)
+    #         prec, rec = self.ts_precision_and_recall(
+    #             labels,
+    #             predictions,
+    #             alpha=0,
+    #             recall_cardinality_fn=improved_cardinality_fn,
+    #             anomaly_ranges=label_ranges,
+    #             weighted_precision=True,
+    #         )
+    #
+    #         if prec == 0 and rec == 0:
+    #             rec = 1
+    #
+    #         precision[i] = prec
+    #         recall[i] = rec
+    #
+    #     f_score = (1 + beta ** 2) * precision * recall / (beta ** 2 * precision + recall)
+    #     max_score_index = torch.argmax(f_score)
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s (approximate)")
+    #
+    #     return {
+    #         'F1_T': f_score[max_score_index].item(),
+    #         'P_T': precision[max_score_index].item(),
+    #         'R_T': recall[max_score_index].item(),
+    #         'thre_T': thresholds[max_score_index].item(),
+    #         'ACC_T': sklearn.metrics.accuracy_score(labels, scores > thresholds[max_score_index])
+    #     }
+    #
+    # def metric_F1_T_chunked(self, labels, scores, chunk_size=50, num_workers=4):
+    #     """
+    #     Simple chunked parallel processing without pre-computing all predictions
+    #     More memory efficient and often faster
+    #     """
+    #     from concurrent.futures import ProcessPoolExecutor
+    #     import multiprocessing as mp
+    #
+    #     print(f"Computing F1_T (chunked) with {num_workers} workers, chunk_size={chunk_size}")
+    #     start_time = time.time()
+    #
+    #     # Convert to torch tensors
+    #     labels_t = torch.tensor(labels, dtype=torch.int)
+    #     scores_t = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Generate thresholds
+    #     n_splits = 1000
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits)
+    #     thresholds = torch.quantile(scores_t, p_values).numpy()
+    #
+    #     # Convert back to numpy for pickling
+    #     labels_np = labels_t.numpy()
+    #     scores_np = scores_t.numpy()
+    #
+    #     # Helper function for parallel processing
+    #     def process_chunk(args):
+    #         chunk_thresholds, labels_local, scores_local = args
+    #         results = []
+    #
+    #         # Convert back to torch tensors in worker
+    #         labels_tensor = torch.tensor(labels_local, dtype=torch.int)
+    #         scores_tensor = torch.tensor(scores_local, dtype=torch.float)
+    #         predictions = torch.empty_like(scores_tensor, dtype=torch.long)
+    #
+    #         # Compute label ranges in worker
+    #         label_ranges_local = self.compute_window_indices(labels_tensor)
+    #
+    #         for threshold in chunk_thresholds:
+    #             torch.greater(scores_tensor, threshold, out=predictions)
+    #
+    #             prec, rec = self.ts_precision_and_recall(
+    #                 labels_tensor,
+    #                 predictions,
+    #                 alpha=0,
+    #                 recall_cardinality_fn=improved_cardinality_fn,
+    #                 anomaly_ranges=label_ranges_local,
+    #                 weighted_precision=True,
+    #             )
+    #
+    #             if prec == 0 and rec == 0:
+    #                 rec = 1
+    #
+    #             f_score = 2 * prec * rec / (prec + rec)
+    #
+    #             results.append({
+    #                 'f_score': f_score,
+    #                 'precision': prec,
+    #                 'recall': rec,
+    #                 'threshold': threshold
+    #             })
+    #
+    #         return results
+    #
+    #     # Create chunks of thresholds
+    #     threshold_chunks = [thresholds[i:i + chunk_size]
+    #                         for i in range(0, len(thresholds), chunk_size)]
+    #
+    #     # Prepare arguments for workers
+    #     chunk_args = [(chunk, labels_np, scores_np) for chunk in threshold_chunks]
+    #
+    #     # Process in parallel
+    #     all_results = []
+    #     with ProcessPoolExecutor(max_workers=num_workers) as executor:
+    #         for i, result_chunk in enumerate(executor.map(process_chunk, chunk_args)):
+    #             all_results.extend(result_chunk)
+    #             print(f"Progress: {(i + 1) * chunk_size}/{n_splits} thresholds processed", end='\r')
+    #
+    #     print()  # New line
+    #
+    #     # Find best result
+    #     best_result = max(all_results, key=lambda x: x['f_score'])
+    #
+    #     # Compute accuracy
+    #     best_predictions = scores_np > best_result['threshold']
+    #     accuracy = np.mean(best_predictions == labels_np)
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s")
+    #
+    #     return {
+    #         'F1_T': best_result['f_score'],
+    #         'P_T': best_result['precision'],
+    #         'R_T': best_result['recall'],
+    #         'thre_T': best_result['threshold'],
+    #         'ACC_T': accuracy
+    #     }
+
+    # def metric_F1_T_optimized(self, labels, scores, num_workers=None):
+    #     """
+    #     Optimized version using the best strategies from our tests
+    #     """
+    #     if num_workers is None:
+    #         num_workers = min(mp.cpu_count(), 8)
+    #
+    #     print(f"Computing F1_T (optimized) with {num_workers} workers")
+    #     start_time = time.time()
+    #
+    #     # Convert to torch tensors
+    #     labels = torch.tensor(labels, dtype=torch.int)
+    #     scores = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Generate thresholds
+    #     n_splits = 1000
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits)
+    #     thresholds = torch.quantile(scores, p_values)
+    #
+    #     # Pre-compute label ranges once
+    #     label_ranges = self.compute_window_indices(labels)
+    #
+    #     # Pre-generate all predictions at once (memory efficient)
+    #     print("Pre-computing predictions...")
+    #     predictions_list = []
+    #     for i in range(0, n_splits, 100):  # Process in chunks to save memory
+    #         end_idx = min(i + 100, n_splits)
+    #         batch_thresholds = thresholds[i:end_idx]
+    #         # Create boolean predictions then convert to long
+    #         batch_preds = (scores.unsqueeze(0) > batch_thresholds.unsqueeze(1)).long()  # FIX: Convert to long
+    #         predictions_list.append(batch_preds)
+    #
+    #     all_predictions = torch.cat(predictions_list, dim=0)
+    #     print(f"Predictions ready, computing metrics...")
+    #
+    #     # Define worker function
+    #     def compute_metrics_batch(indices):
+    #         results = []
+    #         for idx in indices:
+    #             predictions = all_predictions[idx]
+    #
+    #             prec, rec = self.ts_precision_and_recall(
+    #                 labels,
+    #                 predictions,
+    #                 alpha=0,
+    #                 recall_cardinality_fn=improved_cardinality_fn,
+    #                 anomaly_ranges=label_ranges,
+    #                 weighted_precision=True,
+    #             )
+    #
+    #             if prec == 0 and rec == 0:
+    #                 rec = 1
+    #
+    #             f_score = 2 * prec * rec / (prec + rec)
+    #
+    #             results.append({
+    #                 'idx': idx,
+    #                 'f_score': f_score,
+    #                 'precision': prec,
+    #                 'recall': rec,
+    #                 'threshold': thresholds[idx].item()
+    #             })
+    #
+    #         return results
+    #
+    #     # Split indices for workers
+    #     indices = list(range(n_splits))
+    #     chunk_size = len(indices) // num_workers
+    #     if chunk_size == 0:
+    #         chunk_size = 1
+    #     index_chunks = [indices[i:i + chunk_size] for i in range(0, len(indices), chunk_size)]
+    #
+    #     # Process with thread pool (better for this workload than process pool)
+    #     all_results = []
+    #     with ThreadPoolExecutor(max_workers=num_workers) as executor:
+    #         futures = [executor.submit(compute_metrics_batch, chunk) for chunk in index_chunks]
+    #
+    #         completed = 0
+    #         for future in as_completed(futures):
+    #             all_results.extend(future.result())
+    #             completed += 1
+    #             print(f"Progress: {completed}/{len(futures)} chunks completed", end='\r')
+    #
+    #     print()  # New line after progress
+    #
+    #     # Find best result
+    #     best_result = max(all_results, key=lambda x: x['f_score'])
+    #
+    #     # Compute accuracy
+    #     best_predictions = scores > best_result['threshold']
+    #     accuracy = torch.mean((best_predictions == labels).float()).item()
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s")
+    #
+    #     return {
+    #         'F1_T': best_result['f_score'],
+    #         'P_T': best_result['precision'],
+    #         'R_T': best_result['recall'],
+    #         'thre_T': best_result['threshold'],
+    #         'ACC_T': accuracy
+    #     }
+    #
+    # def metric_F1_T_sampling(self, labels, scores, sample_rate=0.2):
+    #     """
+    #     Fast approximation by sampling thresholds
+    #     Good for quick estimates or hyperparameter tuning
+    #     """
+    #     print(f"Computing F1_T with threshold sampling (rate={sample_rate})")
+    #     start_time = time.time()
+    #
+    #     # Convert to torch tensors
+    #     labels = torch.tensor(labels, dtype=torch.int)
+    #     scores = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Generate fewer thresholds
+    #     n_splits = int(1000 * sample_rate)
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits)
+    #     thresholds = torch.quantile(scores, p_values)
+    #
+    #     # Rest is same as original
+    #     precision = torch.empty_like(thresholds, dtype=torch.float)
+    #     recall = torch.empty_like(thresholds, dtype=torch.float)
+    #     predictions = torch.empty_like(scores, dtype=torch.long)  # FIX: Ensure long type
+    #
+    #     label_ranges = self.compute_window_indices(labels)
+    #     beta = 1
+    #
+    #     for i, t in enumerate(thresholds):
+    #         torch.greater(scores, t, out=predictions)
+    #         prec, rec = self.ts_precision_and_recall(
+    #             labels,
+    #             predictions,
+    #             alpha=0,
+    #             recall_cardinality_fn=improved_cardinality_fn,
+    #             anomaly_ranges=label_ranges,
+    #             weighted_precision=True,
+    #         )
+    #
+    #         if prec == 0 and rec == 0:
+    #             rec = 1
+    #
+    #         precision[i] = prec
+    #         recall[i] = rec
+    #
+    #     f_score = (1 + beta ** 2) * precision * recall / (beta ** 2 * precision + recall)
+    #     max_score_index = torch.argmax(f_score)
+    #
+    #     # Calculate accuracy
+    #     best_predictions = (scores > thresholds[max_score_index]).long()
+    #     accuracy = torch.mean((best_predictions == labels).float()).item()
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"F1_T computed in {elapsed:.2f}s (approximate)")
+    #
+    #     return {
+    #         'F1_T': f_score[max_score_index].item(),
+    #         'P_T': precision[max_score_index].item(),
+    #         'R_T': recall[max_score_index].item(),
+    #         'thre_T': thresholds[max_score_index].item(),
+    #         'ACC_T': accuracy
+    #     }
+    #
+    # def metric_F1_T_chunked(self, labels, scores, chunk_size=50, num_workers=4):
+    #     """
+    #     Simple chunked parallel processing with detailed progress bar
+    #     """
+    #     from concurrent.futures import ProcessPoolExecutor, as_completed
+    #     from tqdm import tqdm
+    #     import multiprocessing as mp
+    #
+    #     print(f"Computing F1_T (chunked) with {num_workers} workers, chunk_size={chunk_size}")
+    #     start_time = time.time()
+    #
+    #     # Convert to torch tensors
+    #     labels_t = torch.tensor(labels, dtype=torch.int)
+    #     scores_t = torch.tensor(scores, dtype=torch.float)
+    #
+    #     # Generate thresholds
+    #     n_splits = 1000
+    #     p_values = torch.linspace(0.0, 1.0, steps=n_splits)
+    #     thresholds = torch.quantile(scores_t, p_values).numpy()
+    #
+    #     # Convert back to numpy for pickling
+    #     labels_np = labels_t.numpy()
+    #     scores_np = scores_t.numpy()
+    #
+    #     # Create chunks of thresholds
+    #     threshold_chunks = [thresholds[i:i + chunk_size]
+    #                         for i in range(0, len(thresholds), chunk_size)]
+    #
+    #     total_chunks = len(threshold_chunks)
+    #     print(f"Split {n_splits} thresholds into {total_chunks} chunks")
+    #
+    #     # Process in parallel with progress bar
+    #     all_results = []
+    #
+    #     # Method 1: Using executor.map with tqdm
+    #     with ProcessPoolExecutor(max_workers=num_workers) as executor:
+    #         with tqdm(total=n_splits, desc="Processing F1_T thresholds", unit="threshold", colour="blue") as pbar:
+    #             # Prepare arguments
+    #             chunk_args = [(chunk, labels_np, scores_np) for chunk in threshold_chunks]
+    #
+    #             # Process and update progress bar
+    #             for i, result_chunk in enumerate(executor.map(self._process_f1t_chunk, chunk_args)):
+    #                 all_results.extend(result_chunk)
+    #                 pbar.update(len(threshold_chunks[i]))  # Update by number of thresholds in chunk
+    #                 pbar.set_postfix({
+    #                     'chunk': f"{i + 1}/{total_chunks}",
+    #                     'results': len(all_results)
+    #                 })
+    #
+    #     # Find best result
+    #     best_result = max(all_results, key=lambda x: x['f_score'])
+    #
+    #     # Compute accuracy
+    #     best_predictions = scores_np > best_result['threshold']
+    #     accuracy = np.mean(best_predictions == labels_np)
+    #
+    #     elapsed = time.time() - start_time
+    #     print(f"✓ F1_T computed in {elapsed:.2f}s")
+    #     print(f"  Best F1: {best_result['f_score']:.4f} at threshold {best_result['threshold']:.4f}")
+    #
+    #     return {
+    #         'F1_T': best_result['f_score'],
+    #         'P_T': best_result['precision'],
+    #         'R_T': best_result['recall'],
+    #         'thre_T': best_result['threshold'],
+    #         'ACC_T': accuracy
+    #     }
+    #
+    # @staticmethod
+    # def _process_f1t_chunk(args):
+    #     """
+    #     Static method to process a chunk of thresholds for F1_T metrics.
+    #     This can be pickled for multiprocessing.
+    #     """
+    #     chunk_thresholds, labels_local, scores_local = args
+    #     results = []
+    #
+    #     # Convert back to torch tensors in worker
+    #     labels_tensor = torch.tensor(labels_local, dtype=torch.int)
+    #     scores_tensor = torch.tensor(scores_local, dtype=torch.float)
+    #     predictions = torch.empty_like(scores_tensor, dtype=torch.long)
+    #
+    #     # Compute label ranges in worker
+    #     # We need to create a basic_metricor instance to access methods
+    #     grader = basic_metricor()
+    #     label_ranges_local = grader.compute_window_indices(labels_tensor)
+    #
+    #     for threshold in chunk_thresholds:
+    #         torch.greater(scores_tensor, threshold, out=predictions)
+    #
+    #         prec, rec = grader.ts_precision_and_recall(
+    #             labels_tensor,
+    #             predictions,
+    #             alpha=0,
+    #             recall_cardinality_fn=improved_cardinality_fn,
+    #             anomaly_ranges=label_ranges_local,
+    #             weighted_precision=True,
+    #         )
+    #
+    #         if prec == 0 and rec == 0:
+    #             rec = 1
+    #
+    #         f_score = 2 * prec * rec / (prec + rec)
+    #
+    #         results.append({
+    #             'f_score': f_score,
+    #             'precision': prec,
+    #             'recall': rec,
+    #             'threshold': threshold
+    #         })
+    #
+    #     return results
+
+    def metric_Affiliation_optimized(self, label, score, num_workers=None):
+        """
+        Optimized version with ThreadPool and better chunking
+        """
+        if num_workers is None:
+            num_workers = min(mp.cpu_count(), 8)
+
+        print(f"Computing Affiliation (optimized) with {num_workers} workers")
+        start_time = time.time()
+
+        from .affiliation.generics import convert_vector_to_events
+        from .affiliation.metrics import pr_from_events
+
+        # Pre-compute ground truth events once
+        events_gt = convert_vector_to_events(label)
+        Trange = (0, len(label))
+
+        # Generate p-values and thresholds
+        p_values = np.linspace(0.8, 1, 300)
+
+        # Pre-compute all thresholds
+        thresholds = np.quantile(score, p_values)
+
+        # Pre-compute all predictions
+        print("Pre-computing predictions...")
+        all_predictions = []
+        for threshold in thresholds:
+            preds = (score > threshold).astype(int)
+            all_predictions.append(preds)
+
+        print("Computing affiliation metrics...")
+
+        # Function to process a batch of indices
+        def compute_metrics_batch(indices):
+            results = []
+            for idx in indices:
+                preds = all_predictions[idx]
+
+                events_pred = convert_vector_to_events(preds)
+                affiliation_metrics = pr_from_events(events_pred, events_gt, Trange)
+
+                prec = affiliation_metrics['Affiliation_Precision']
+                rec = affiliation_metrics['Affiliation_Recall']
+
+                if prec + rec > 0:
+                    f1 = 2 * prec * rec / (prec + rec + self.eps)
+                else:
+                    f1 = 0.0
+
+                results.append({
+                    'f1': f1,
+                    'precision': prec,
+                    'recall': rec,
+                    'p_value': p_values[idx],
+                    'threshold': thresholds[idx]
+                })
+
+            return results
+
+        # Split indices for workers
+        indices = list(range(len(p_values)))
+        chunk_size = len(indices) // num_workers
+        if chunk_size == 0:
+            chunk_size = 1
+        index_chunks = [indices[i:i + chunk_size] for i in range(0, len(indices), chunk_size)]
+
+        # Process with thread pool
+        all_results = []
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            futures = [executor.submit(compute_metrics_batch, chunk) for chunk in index_chunks]
+
+            completed = 0
+            for future in as_completed(futures):
+                all_results.extend(future.result())
+                completed += 1
+                print(f"Progress: {completed}/{len(futures)} chunks completed", end='\r')
+
+        print()  # New line
+
+        # Find best result
+        best_result = max(all_results, key=lambda x: x['f1'])
+
+        elapsed = time.time() - start_time
+        print(f"Affiliation computed in {elapsed:.2f}s")
+
+        return best_result['f1'], best_result['precision'], best_result['recall']
+
+    def metric_Affiliation_chunked(self, label, score, chunk_size=30, num_workers=4):
+        """
+        Simple chunked parallel processing
+        """
+        print(f"Computing Affiliation (chunked) with {num_workers} workers, chunk_size={chunk_size}")
+        start_time = time.time()
+
+        # Generate p-values
+        p_values = np.linspace(0.8, 1, 300)
+
+        # Create chunks of p-values
+        p_value_chunks = [p_values[i:i + chunk_size]
+                          for i in range(0, len(p_values), chunk_size)]
+
+        # Prepare arguments for workers
+        chunk_args = [(chunk, label, score) for chunk in p_value_chunks]
+
+        # Process in parallel
+        all_results = []
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            for i, result_chunk in enumerate(executor.map(self._process_affiliation_chunk, chunk_args)):
+                all_results.extend(result_chunk)
+                print(f"Progress: {(i + 1) * chunk_size}/{len(p_values)} thresholds processed", end='\r')
+
+        print()  # New line
+
+        # Find best result
+        best_result = max(all_results, key=lambda x: x['f1'])
+
+        elapsed = time.time() - start_time
+        print(f"Affiliation computed in {elapsed:.2f}s")
+
+        return best_result['f1'], best_result['precision'], best_result['recall']
+
+    def _compute_affiliation_chunk(self, p_values_chunk, score, label, eps=1e-7):
+        """
+        Process a chunk of p-values for affiliation metrics
+        """
+        from .affiliation.generics import convert_vector_to_events
+        from .affiliation.metrics import pr_from_events
+        
+        # Ensure proper data types to avoid float/integer issues
+        label = np.asarray(label, dtype=int)
+        score = np.asarray(score, dtype=float)
+        
+        # Convert ground truth to events once for this chunk
+        events_gt = convert_vector_to_events(label)
+        Trange = (0, len(label))
+        
+        chunk_results = []
+        for p in p_values_chunk:
+            threshold = np.quantile(score, p)
+            preds_loop = (score > threshold).astype(int)
+            
+            events_pred = convert_vector_to_events(preds_loop)
+            affiliation_metrics = pr_from_events(events_pred, events_gt, Trange)
+            
+            Affiliation_Precision = affiliation_metrics['Affiliation_Precision']
+            Affiliation_Recall = affiliation_metrics['Affiliation_Recall']
+            
+            denominator = Affiliation_Precision + Affiliation_Recall
+            if denominator > 0:
+                Affiliation_F = 2 * Affiliation_Precision * Affiliation_Recall / (denominator + eps)
+            else:
+                Affiliation_F = 0.0
+            
+            chunk_results.append({
+                'f1': Affiliation_F,
+                'precision': Affiliation_Precision,
+                'recall': Affiliation_Recall,
+                'p_value': p,
+                'threshold': threshold
+            })
+        
+        return chunk_results
+
+    def _compute_affiliation_parallel(self, label, score, num_workers=8):
+        """
+        Parallel computation with progress bar
+        """
+        print(f"Computing Affiliation (parallel) with {num_workers} workers")
+        start_time = time.time()
+
+        # Generate p-values
+        p_values = np.linspace(0.8, 1, 300)
+        total_thresholds = len(p_values)
+
+        # Split p-values into chunks for parallel processing
+        p_value_chunks = np.array_split(p_values, num_workers)
+
+        # Process chunks in parallel with progress bar
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            # Submit all tasks and track chunk sizes
+            futures = {}
+            for i, chunk in enumerate(p_value_chunks):
+                future = executor.submit(self._compute_affiliation_chunk, chunk, score, label)
+                futures[future] = len(chunk)
+
+            # Collect results with progress bar
+            all_results = []
+            with tqdm(
+                    total=total_thresholds,
+                    desc="Computing affiliation metrics",
+                    unit="threshold",
+                    colour="green"
+            ) as pbar:
+                for future in as_completed(futures):
+                    chunk_results = future.result()
+                    all_results.extend(chunk_results)
+                    # Update by the number of thresholds processed in this chunk
+                    pbar.update(futures[future])
+
+        # Find best result
+        best_result = max(all_results, key=lambda x: x['f1'])
+
+        elapsed = time.time() - start_time
+        print(f"Affiliation computed in {elapsed:.2f}s")
+
+        return best_result['f1'], best_result['precision'], best_result['recall']
+
+    def metric_Affiliation_optimized(self, label, score, num_workers=None):
+        """
+        Optimized version with ThreadPool and better chunking
+        """
+        if num_workers is None:
+            num_workers = min(mp.cpu_count(), 8)
+
+        print(f"Computing Affiliation (optimized) with {num_workers} workers")
+        start_time = time.time()
+
+        from .affiliation.generics import convert_vector_to_events
+        from .affiliation.metrics import pr_from_events
+
+        # Pre-compute ground truth events once
+        events_gt = convert_vector_to_events(label)
+        Trange = (0, len(label))
+
+        # Generate p-values and thresholds
+        p_values = np.linspace(0.8, 1, 300)
+
+        # Pre-compute all thresholds
+        thresholds = np.quantile(score, p_values)
+
+        # Pre-compute all predictions
+        print("Pre-computing predictions...")
+        all_predictions = []
+        for threshold in thresholds:
+            preds = (score > threshold).astype(int)
+            all_predictions.append(preds)
+
+        print("Computing affiliation metrics...")
+
+        # Function to process a batch of indices
+        def compute_metrics_batch(indices):
+            results = []
+            for idx in indices:
+                preds = all_predictions[idx]
+
+                events_pred = convert_vector_to_events(preds)
+                affiliation_metrics = pr_from_events(events_pred, events_gt, Trange)
+
+                prec = affiliation_metrics['Affiliation_Precision']
+                rec = affiliation_metrics['Affiliation_Recall']
+
+                if prec + rec > 0:
+                    f1 = 2 * prec * rec / (prec + rec + self.eps)
+                else:
+                    f1 = 0.0
+
+                results.append({
+                    'f1': f1,
+                    'precision': prec,
+                    'recall': rec,
+                    'p_value': p_values[idx],
+                    'threshold': thresholds[idx]
+                })
+
+            return results
+
+        # Split indices for workers
+        indices = list(range(len(p_values)))
+        chunk_size = len(indices) // num_workers
+        if chunk_size == 0:
+            chunk_size = 1
+        index_chunks = [indices[i:i + chunk_size] for i in range(0, len(indices), chunk_size)]
+
+        # Process with thread pool
+        all_results = []
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            futures = [executor.submit(compute_metrics_batch, chunk) for chunk in index_chunks]
+
+            completed = 0
+            for future in as_completed(futures):
+                all_results.extend(future.result())
+                completed += 1
+                print(f"Progress: {completed}/{len(futures)} chunks completed", end='\r')
+
+        print()  # New line
+
+        # Find best result
+        best_result = max(all_results, key=lambda x: x['f1'])
+
+        elapsed = time.time() - start_time
+        print(f"Affiliation computed in {elapsed:.2f}s")
+
+        return best_result['f1'], best_result['precision'], best_result['recall']
+
+    def metric_Affiliation_chunked(self, label, score, chunk_size=30, num_workers=4):
+        """
+        Simple chunked parallel processing
+        """
+        print(f"Computing Affiliation (chunked) with {num_workers} workers, chunk_size={chunk_size}")
+        start_time = time.time()
+
+        # Generate p-values
+        p_values = np.linspace(0.8, 1, 300)
+
+        # Create chunks of p-values
+        p_value_chunks = [p_values[i:i + chunk_size]
+                          for i in range(0, len(p_values), chunk_size)]
+
+        # Prepare arguments for workers
+        chunk_args = [(chunk, label, score) for chunk in p_value_chunks]
+
+        # Process in parallel
+        all_results = []
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            for i, result_chunk in enumerate(executor.map(self._process_affiliation_chunk, chunk_args)):
+                all_results.extend(result_chunk)
+                print(f"Progress: {(i + 1) * chunk_size}/{len(p_values)} thresholds processed", end='\r')
+
+        print()  # New line
+
+        # Find best result
+        best_result = max(all_results, key=lambda x: x['f1'])
+
+        elapsed = time.time() - start_time
+        print(f"Affiliation computed in {elapsed:.2f}s")
+
+        return best_result['f1'], best_result['precision'], best_result['recall']
+
+    @staticmethod
+    def _process_affiliation_chunk(args):
+        """
+        Static method to process a chunk of p-values for affiliation metrics.
+        This can be pickled for multiprocessing.
+        """
+        chunk_p_values, label_local, score_local = args
+        from .affiliation.generics import convert_vector_to_events
+        from .affiliation.metrics import pr_from_events
+
+        # Convert ground truth to events once for this chunk
+        events_gt = convert_vector_to_events(label_local)
+        Trange = (0, len(label_local))
+
+        results = []
+        for p in chunk_p_values:
+            threshold = np.quantile(score_local, p)
+            preds = (score_local > threshold).astype(int)
+
+            events_pred = convert_vector_to_events(preds)
+            affiliation_metrics = pr_from_events(events_pred, events_gt, Trange)
+
+            prec = affiliation_metrics['Affiliation_Precision']
+            rec = affiliation_metrics['Affiliation_Recall']
+
+            if prec + rec > 0:
+                f1 = 2 * prec * rec / (prec + rec + 1e-7)
+            else:
+                f1 = 0.0
+
+            results.append({
+                'f1': f1,
+                'precision': prec,
+                'recall': rec,
+                'p_value': p,
+                'threshold': threshold
+            })
+
+        return results
+
+    def metric_Affiliation_sampling(self, label, score, sample_rate=0.2):
+        """
+        Fast approximation by sampling thresholds
+        """
+        print(f"Computing Affiliation with threshold sampling (rate={sample_rate})")
+        start_time = time.time()
+
+        from .affiliation.generics import convert_vector_to_events
+        from .affiliation.metrics import pr_from_events
+
+        # Convert ground truth to events once
+        events_gt = convert_vector_to_events(label)
+        Trange = (0, len(label))
+
+        # Generate fewer p-values
+        n_samples = int(300 * sample_rate)
+        p_values = np.linspace(0.8, 1, n_samples)
+
+        results = []
+        for p in tqdm(p_values, desc="Sampling affiliation", unit="threshold"):
+            threshold = np.quantile(score, p)
+            preds = (score > threshold).astype(int)
+
+            events_pred = convert_vector_to_events(preds)
+            affiliation_metrics = pr_from_events(events_pred, events_gt, Trange)
+
+            prec = affiliation_metrics['Affiliation_Precision']
+            rec = affiliation_metrics['Affiliation_Recall']
+
+            if prec + rec > 0:
+                f1 = 2 * prec * rec / (prec + rec + self.eps)
+            else:
+                f1 = 0.0
+
+            results.append({
+                'f1': f1,
+                'precision': prec,
+                'recall': rec,
+                'p_value': p,
+                'threshold': threshold
+            })
+
+        # Find best result
+        best_result = max(results, key=lambda x: x['f1'])
+
+        elapsed = time.time() - start_time
+        print(f"Affiliation computed in {elapsed:.2f}s (approximate)")
+
+        return best_result['f1'], best_result['precision'], best_result['recall']
+
+    def metric_standard_F1_chunked(self, true_labels, anomaly_scores, threshold=None, chunk_size=50, num_workers=4):
+        """
+        Optimized chunked parallel version of metric_standard_F1.
+        
+        Calculate F1, Precision, Recall using parallel threshold processing.
+
+        Args:
+            true_labels: np.ndarray, ground truth binary labels (0=normal, 1=anomaly)
+            anomaly_scores: np.ndarray, anomaly scores (continuous values)
+            threshold: float, optional. If None, will use optimal threshold based on F1 score
+            chunk_size: int, number of thresholds to process in each chunk
+            num_workers: int, number of parallel workers
+
+        Returns:
+            dict: Dictionary containing various metrics
+        """
+        # If threshold is provided, use original method
+        if threshold is not None:
+            return self.metric_standard_F1(true_labels, anomaly_scores, threshold)
+            
+        print(f"Computing standard F1 (chunked) with {num_workers} workers, chunk_size={chunk_size}")
+        start_time = time.time()
+
+        # Generate thresholds
+        thresholds = np.linspace(0.5, 1, 500)
+        total_thresholds = len(thresholds)
+
+        # Create chunks of thresholds
+        threshold_chunks = [thresholds[i:i + chunk_size]
+                           for i in range(0, len(thresholds), chunk_size)]
+
+        print(f"Split {total_thresholds} thresholds into {len(threshold_chunks)} chunks")
+
+        # Process in parallel
+        all_results = []
+        
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            with tqdm(total=total_thresholds, desc="Processing standard F1 thresholds", unit="threshold", colour="blue") as pbar:
+                # Prepare arguments
+                chunk_args = [(chunk, true_labels, anomaly_scores) for chunk in threshold_chunks]
+
+                # Process and update progress bar
+                for i, result_chunk in enumerate(executor.map(self._process_standard_f1_chunk, chunk_args)):
+                    all_results.extend(result_chunk)
+                    pbar.update(len(threshold_chunks[i]))
+                    pbar.set_postfix({
+                        'chunk': f"{i + 1}/{len(threshold_chunks)}",
+                        'results': len(all_results)
+                    })
+
+        # Find best result
+        best_result = max(all_results, key=lambda x: x['f1'])
+
+        elapsed = time.time() - start_time
+        print(f"✓ Standard F1 computed in {elapsed:.2f}s")
+        print(f"  Best F1: {best_result['f1']:.4f} at threshold {best_result['threshold']:.4f}")
+
+        return {
+            'F1': best_result['f1'],
+            'Recall': best_result['recall'],
+            'Precision': best_result['precision']
+        }
+
+    @staticmethod
+    def _process_standard_f1_chunk(args):
+        """
+        Static method to process a chunk of thresholds for standard F1 metrics.
+        This can be pickled for multiprocessing.
+        """
+        chunk_thresholds, true_labels, anomaly_scores = args
+        results = []
+
+        for t in chunk_thresholds:
+            threshold = np.quantile(anomaly_scores, t)
+            predictions = (anomaly_scores >= threshold).astype(int)
+            
+            if len(np.unique(predictions)) > 1:  # Avoid division by zero
+                precision, recall, f1, _ = precision_recall_fscore_support(
+                    true_labels, predictions, average='binary', zero_division=0
+                )
+            else:
+                precision, recall, f1 = 0.0, 0.0, 0.0
+
+            results.append({
+                'f1': f1,
+                'precision': precision,
+                'recall': recall,
+                'threshold': threshold,
+                'quantile': t
+            })
+
+        return results
+
+    def metric_PointF1PA_chunked(self, label, score, preds=None, chunk_size=50, num_workers=4):
+        """
+        Optimized chunked parallel version of metric_PointF1PA.
+        
+        Calculate Point F1 with Point Adjustment using parallel threshold processing.
+
+        Args:
+            label: np.ndarray, ground truth binary labels
+            score: np.ndarray, anomaly scores
+            preds: np.ndarray, optional. If provided, use these predictions directly
+            chunk_size: int, number of thresholds to process in each chunk
+            num_workers: int, number of parallel workers
+
+        Returns:
+            dict: Dictionary containing various metrics (same format as original method)
+        """
+        # If predictions are provided, use original method
+        if preds is not None:
+            return self.metric_PointF1PA(label, score, preds)
+            
+        print(f"Computing PointF1PA (chunked) with {num_workers} workers, chunk_size={chunk_size}")
+        start_time = time.time()
+
+        # Generate q_values (quantiles)
+        q_values = np.arange(0.7, 0.99, 0.001)
+        total_thresholds = len(q_values)
+
+        # Create chunks of q_values
+        q_value_chunks = [q_values[i:i + chunk_size]
+                         for i in range(0, len(q_values), chunk_size)]
+
+        print(f"Split {total_thresholds} thresholds into {len(q_value_chunks)} chunks")
+
+        # Process in parallel
+        all_results = []
+        
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            with tqdm(total=total_thresholds, desc="Processing PointF1PA thresholds", unit="threshold", colour="green") as pbar:
+                # Prepare arguments
+                chunk_args = [(chunk, label, score) for chunk in q_value_chunks]
+
+                # Process and update progress bar
+                for i, result_chunk in enumerate(executor.map(self._process_pointf1pa_chunk, chunk_args)):
+                    all_results.extend(result_chunk)
+                    pbar.update(len(q_value_chunks[i]))
+                    pbar.set_postfix({
+                        'chunk': f"{i + 1}/{len(q_value_chunks)}",
+                        'results': len(all_results)
+                    })
+
+        # Find best result
+        best_result = max(all_results, key=lambda x: x['F1_PA'])
+
+        elapsed = time.time() - start_time
+        print(f"✓ PointF1PA computed in {elapsed:.2f}s")
+        print(f"  Best F1_PA: {best_result['F1_PA']:.4f} at threshold {best_result['thre_PA']:.4f}")
+
+        return best_result
+
+    @staticmethod
+    def _process_pointf1pa_chunk(args):
+        """
+        Static method to process a chunk of q_values for PointF1PA metrics.
+        This can be pickled for multiprocessing.
+        """
+        import sklearn.metrics
+        
+        chunk_q_values, label, score = args
+        results = []
+
+        # Create a basic_metricor instance to access adjustment method
+        grader = basic_metricor()
+
+        for q in chunk_q_values:
+            thre = np.quantile(score, q)
+            pred = (score > thre).astype(int)
+            adjusted_pred = grader.adjustment(label, pred)
+            
+            accuracy = sklearn.metrics.accuracy_score(label, adjusted_pred)
+            P, R, F1, _ = sklearn.metrics.precision_recall_fscore_support(label, adjusted_pred, average="binary")
+            
+            result = {
+                'thre_PA': thre,
+                'ACC_PA': accuracy,
+                'P_PA': P,
+                'R_PA': R,
+                'F1_PA': F1,
+                'quantile': q
+            }
+            
+            results.append(result)
+
+        return results
\ No newline at end of file
diff --git a/evaluation/metrics.py b/evaluation/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ed77e5062cf7c1673d02c56ff8cf952386db6ea
--- /dev/null
+++ b/evaluation/metrics.py
@@ -0,0 +1,379 @@
+import sys
+import time
+from .basic_metrics import basic_metricor, generate_curve
+from statsmodels.tsa.stattools import acf
+from scipy.signal import argrelextrema
+import numpy as np
+import multiprocessing
+
+import multiprocessing
+import numpy as np
+import torch
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
+from functools import partial
+from tqdm import tqdm
+import time
+
+# ============== Parallelized Affiliation ==============
+
+def _compute_auc_roc(labels, score):
+    grader = basic_metricor()
+    try:
+        return grader.metric_ROC(labels, score)
+    except Exception:
+        return 0.0
+
+def _compute_auc_pr(labels, score):
+    grader = basic_metricor()
+    try:
+        return grader.metric_PR(labels, score)
+    except Exception:
+        return 0.0
+
+def _compute_vus(labels, score, slidingWindow, version):
+    try:
+        _, _, _, _, _, _, VUS_ROC, VUS_PR = generate_curve(labels.astype(int), score, slidingWindow, version)
+        return VUS_ROC, VUS_PR
+    except Exception:
+        return 0.0, 0.0
+
+def _compute_pointf1(labels, score):
+    # print("Evaluating F1 standard...")
+    grader = basic_metricor()
+    try:
+        # print("Using chunked parallel F1 computation...")
+        return grader.metric_standard_F1_chunked(
+            true_labels=labels, 
+            anomaly_scores=score,
+            chunk_size=25,  # Process 25 thresholds per chunk
+            num_workers=4   # Use 4 parallel workers
+        )
+    except Exception:
+        # print("F1 standard computation failed, returning zeros.")
+        return {'F1': 0.0, 'Precision': 0.0, 'Recall': 0.0}
+
+def _compute_pointf1pa(labels, score):
+    grader = basic_metricor()
+    try:
+        return grader.metric_PointF1PA_chunked(
+            label=labels, 
+            score=score,
+            chunk_size=30,  # Process 30 quantiles per chunk
+            num_workers=6   # Use 6 parallel workers
+        )
+    except Exception:
+        return {'F1_PA': 0.0, 'P_PA': 0.0, 'R_PA': 0.0}
+
+def _compute_affiliation(labels, score):
+    grader = basic_metricor()
+    try:
+        return grader.metric_Affiliation(labels, score)
+    except Exception:
+        return 0.0, 0.0, 0.0
+
+def _compute_t_score(labels, score):
+    grader = basic_metricor()
+    try:
+        return grader.metric_F1_T(labels, score)
+    except Exception:
+        return {'F1_T': 0.0, 'P_T': 0.0, 'R_T': 0.0}
+
+def _compute_f1_t(labels, score):
+    grader = basic_metricor()
+    try:
+        # Use non-parallel path here to avoid pickling issues inside thread workers
+        # metric_F1_T(use_parallel=False) runs in-process and returns a dict
+        return grader.metric_F1_T(labels, score, use_parallel=True)
+    except Exception:
+        # Always return a dict to keep downstream code consistent
+        return {'F1_T': 0.0, 'P_T': 0.0, 'R_T': 0.0}
+
+def _run_task(func, args):
+    return func(*args)
+
+
+def get_metrics_optimized(score, labels, slidingWindow=100, pred=None, version='opt', thre=250):
+    """
+    Fully optimized metrics computation with proper parallelization
+    """
+    metrics = {}
+    start_total = time.time()
+    
+    # Ensure proper data types to avoid float/integer issues
+    labels = np.asarray(labels, dtype=int)
+    score = np.asarray(score, dtype=float)
+    
+    # Determine optimal number of workers based on CPU count and workload
+    n_cores = multiprocessing.cpu_count()
+    
+    # For threshold-iterating functions (affiliation and F1_T)
+    # Use more workers since they have heavy loops
+    heavy_workers = min(n_cores - 2, 8)  # Leave some cores for system
+    
+    # For simple metrics
+    light_workers = min(n_cores // 2, 8)
+    
+    print(f"Using {heavy_workers} workers for heavy metrics, {light_workers} for light metrics")
+    
+    # Start the heavy computations first (they take longest)
+    print("Starting heavy computations (Affiliation and F1_T)...")
+    heavy_start = time.time()
+    grader = basic_metricor() 
+    with ProcessPoolExecutor(max_workers=2) as main_executor:
+        # Launch the two heaviest computations with their own internal parallelization
+        affiliation_future = main_executor.submit(
+            grader._compute_affiliation_parallel, 
+            labels, 
+            score, 
+            num_workers=heavy_workers
+        )
+        
+        # t_score_future = main_executor.submit(
+        #     grader.metric_F1_T_fast,
+        #     labels,
+        #     score,
+        #     num_workers=heavy_workers*2
+        # )
+        #
+        # While heavy computations are running, compute light metrics
+        print("Computing light metrics in parallel...")
+        light_start = time.time()
+        
+        with ThreadPoolExecutor(max_workers=light_workers) as light_executor:
+            light_futures = {
+                'auc_roc': light_executor.submit(_compute_auc_roc, labels, score),
+                'auc_pr': light_executor.submit(_compute_auc_pr, labels, score),
+                'vus': light_executor.submit(_compute_vus, labels, score, slidingWindow, version),
+                'pointf1': light_executor.submit(_compute_pointf1, labels, score),
+                'pointf1pa': light_executor.submit(_compute_pointf1pa, labels, score),
+                'f1_t': light_executor.submit(_compute_f1_t, labels, score)
+            }
+            
+            # Collect light metric results as they complete
+            light_results = {}
+            for name, future in light_futures.items():
+                try:
+                    light_results[name] = future.result()
+                    print(f"  ✓ {name} completed")
+                except Exception as e:
+                    print(f"  ✗ {name} failed: {e}")
+                    light_results[name] = None
+        
+        print(f"Light metrics completed in {time.time() - light_start:.2f}s")
+        
+        # Wait for heavy computations to complete
+        print("Waiting for heavy computations...")
+        
+        try:
+            Affiliation_F, Affiliation_P, Affiliation_R = affiliation_future.result()
+            print(f"  ✓ Affiliation completed")
+        except Exception as e:
+            print(f"  ✗ Affiliation failed: {e}")
+            Affiliation_F, Affiliation_P, Affiliation_R = 0.0, 0.0, 0.0
+        
+        # try:
+        #     T_score = t_score_future.result()
+        #     print(f"  ✓ F1_T completed")
+        # except Exception as e:
+        #     print(f"  ✗ F1_T failed: {e}")
+        #     T_score = {'F1_T': 0.0, 'P_T': 0.0, 'R_T': 0.0}
+    
+    print(f"Heavy metrics completed in {time.time() - heavy_start:.2f}s")
+    
+    # Unpack light results
+    AUC_ROC = light_results.get('auc_roc', 0.0)
+    AUC_PR = light_results.get('auc_pr', 0.0)
+    VUS_result = light_results.get('vus', (0.0, 0.0))
+    if isinstance(VUS_result, tuple):
+        VUS_ROC, VUS_PR = VUS_result
+    else:
+        VUS_ROC, VUS_PR = 0.0, 0.0
+    # print("HERE IS POINTF1: ")
+    # print(light_results.get('pointf1',)) 
+    # sys.exit()
+    PointF1 = light_results.get('pointf1', {'F1': 0.0, 'Precision': 0.0, 'Recall': 0.0})
+    PointF1PA = light_results.get('pointf1pa', {'F1_PA': 0.0, 'P_PA': 0.0, 'R_PA': 0.0})
+    T_score = light_results.get('f1_t', {'F1_T': 0.0, 'P_T': 0.0, 'R_T': 0.0})
+    # Safeguard: if upstream returned a tuple (e.g., from an older fallback), coerce to dict
+    if isinstance(T_score, tuple):
+        try:
+            T_score = {'F1_T': T_score[0], 'P_T': T_score[1], 'R_T': T_score[2]}
+        except Exception:
+            T_score = {'F1_T': 0.0, 'P_T': 0.0, 'R_T': 0.0}
+    
+    # Build final metrics dictionary
+    metrics['AUC-PR'] = AUC_PR
+    metrics['AUC-ROC'] = AUC_ROC
+    metrics['VUS-PR'] = VUS_PR
+    metrics['VUS-ROC'] = VUS_ROC
+    
+    metrics['Standard-F1'] = PointF1.get('F1', 0.0)
+    metrics['Standard-Precision'] = PointF1.get('Precision', 0.0)
+    metrics['Standard-Recall'] = PointF1.get('Recall', 0.0)
+    
+    metrics['PA-F1'] = PointF1PA.get('F1_PA', 0.0)
+    metrics['PA-Precision'] = PointF1PA.get('P_PA', 0.0)
+    metrics['PA-Recall'] = PointF1PA.get('R_PA', 0.0)
+    
+    metrics['Affiliation-F'] = Affiliation_F
+    metrics['Affiliation-P'] = Affiliation_P
+    metrics['Affiliation-R'] = Affiliation_R
+    
+    metrics['F1_T'] = T_score.get('F1_T', 0.0)
+    metrics['Precision_T'] = T_score.get('P_T', 0.0)
+    metrics['Recall_T'] = T_score.get('R_T', 0.0)
+    
+    print(f"\nTotal computation time: {time.time() - start_total:.2f}s")
+    
+    return metrics
+
+
+def get_metrics(score, labels, slidingWindow=100, pred=None, version='opt', thre=250):
+    metrics = {}
+
+    # Ensure proper data types to avoid float/integer issues
+    labels = np.asarray(labels, dtype=int)
+    score = np.asarray(score, dtype=float)
+
+    '''
+    Threshold Independent
+    '''
+    grader = basic_metricor()
+    # AUC_ROC, Precision, Recall, PointF1, PointF1PA, Rrecall, ExistenceReward, OverlapReward, Rprecision, RF, Precision_at_k = grader.metric_new(labels, score, pred, plot_ROC=False)
+    try:
+        AUC_ROC = grader.metric_ROC(labels, score)
+    except Exception:
+        AUC_ROC = 0.0
+    try:
+        AUC_PR = grader.metric_PR(labels, score)
+    except Exception:
+        AUC_PR = 0.0
+
+    # R_AUC_ROC, R_AUC_PR, _, _, _ = grader.RangeAUC(labels=labels, score=score, window=slidingWindow, plot_ROC=True)
+    try:
+        _, _, _, _, _, _,VUS_ROC, VUS_PR = generate_curve(labels.astype(int), score, slidingWindow, version, )
+    except Exception:
+        VUS_ROC, VUS_PR = 0.0, 0.0
+
+    '''
+    Threshold Dependent
+    if pred is None --> use the oracle threshold
+    '''
+
+    PointF1 = grader.metric_standard_F1(labels, score,)
+    PointF1PA = grader.metric_PointF1PA(labels, score,)
+    # EventF1PA = grader.metric_EventF1PA(labels, score,)
+    # RF1 = grader.metric_RF1(labels, score,)
+    try:
+        Affiliation_F, Affiliation_P, Affiliation_R  = grader.metric_Affiliation(labels, score)
+    except Exception:
+        Affiliation_F, Affiliation_P, Affiliation_R = 0.0, 0.0, 0.0
+    T_score = grader.metric_F1_T(labels, score)
+
+    metrics['AUC-PR'] = AUC_PR
+    metrics['AUC-ROC'] = AUC_ROC
+    metrics['VUS-PR'] = VUS_PR
+    metrics['VUS-ROC'] = VUS_ROC
+
+    metrics['Standard-F1'] = PointF1['F1']
+    metrics['Standard-Precision'] = PointF1['Precision']
+    metrics['Standard-Recall'] = PointF1['Recall']
+    metrics['PA-F1'] = PointF1PA['F1_PA']
+    metrics['PA-Precision'] = PointF1PA['P_PA']
+    metrics['PA-Recall'] = PointF1PA['R_PA']
+    # metrics['Event-based-F1'] = EventF1PA
+    # metrics['R-based-F1'] = RF1
+    metrics['Affiliation-F'] = Affiliation_F
+    metrics['Affiliation-P'] = Affiliation_P
+    metrics['Affiliation-R'] = Affiliation_R
+
+    metrics['F1_T'] = T_score['F1_T']
+    metrics['Precision_T'] = T_score['P_T']
+    metrics['Recall_T'] = T_score['R_T']
+
+    return metrics
+
+
+def get_metrics_pred(score, labels, pred, slidingWindow=100):
+    metrics = {}
+
+    # Ensure proper data types to avoid float/integer issues
+    labels = np.asarray(labels, dtype=int)
+    score = np.asarray(score, dtype=float)
+    pred = np.asarray(pred, dtype=int)
+
+    grader = basic_metricor()
+
+    PointF1 = grader.standard_F1(labels, score, preds=pred)
+    PointF1PA = grader.metric_PointF1PA(labels, score, preds=pred)
+    EventF1PA = grader.metric_EventF1PA(labels, score, preds=pred)
+    RF1 = grader.metric_RF1(labels, score, preds=pred)
+    Affiliation_F, Affiliation_P, Affiliation_R = grader.metric_Affiliation(labels, score, preds=pred)
+    VUS_R, VUS_P, VUS_F = grader.metric_VUS_pred(labels, preds=pred, windowSize=slidingWindow)
+
+    metrics['Standard-F1'] = PointF1['F1']
+    metrics['Standard-Precision'] = PointF1['Precision']
+    metrics['Standard-Recall'] = PointF1['Recall']
+    metrics['PA-F1'] = PointF1PA
+    metrics['Event-based-F1'] = EventF1PA
+    metrics['R-based-F1'] = RF1
+    metrics['Affiliation-F'] = Affiliation_F
+    metrics['Affiliation-P'] = Affiliation_P
+    metrics['Affiliation-R'] = Affiliation_R
+
+    metrics['VUS-Recall'] = VUS_R
+    metrics['VUS-Precision'] = VUS_P
+    metrics['VUS-F'] = VUS_F
+
+    return metrics
+
+def find_length_rank(data, rank=1):
+    data = data.squeeze()
+    if len(data.shape) > 1:
+        return 0
+    if rank == 0:
+        return 1
+    data = data[: min(20000, len(data))]
+
+    base = 3
+    auto_corr = acf(data, nlags=400, fft=True)[base:]
+
+    # plot_acf(data, lags=400, fft=True)
+    # plt.xlabel('Lags')
+    # plt.ylabel('Autocorrelation')
+    # plt.title('Autocorrelation Function (ACF)')
+    # plt.savefig('/data/liuqinghua/code/ts/TSAD-AutoML/AutoAD_Solution/candidate_pool/cd_diagram/ts_acf.png')
+
+    local_max = argrelextrema(auto_corr, np.greater)[0]
+
+    # print('auto_corr: ', auto_corr)
+    # print('local_max: ', local_max)
+
+    try:
+        # max_local_max = np.argmax([auto_corr[lcm] for lcm in local_max])
+        sorted_local_max = np.argsort([auto_corr[lcm] for lcm in local_max])[::-1]  # Ascending order
+        max_local_max = sorted_local_max[0]  # Default
+        if rank == 1:
+            max_local_max = sorted_local_max[0]
+        if rank == 2:
+            for i in sorted_local_max[1:]:
+                if i > sorted_local_max[0]:
+                    max_local_max = i
+                    break
+        if rank == 3:
+            id_tmp = 1
+            for i in sorted_local_max[1:]:
+                if i > sorted_local_max[0]:
+                    id_tmp = i
+                    break
+            for i in sorted_local_max[id_tmp:]:
+                if i > sorted_local_max[id_tmp]:
+                    max_local_max = i
+                    break
+        # print('sorted_local_max: ', sorted_local_max)
+        # print('max_local_max: ', max_local_max)
+        if local_max[max_local_max] < 3 or local_max[max_local_max] > 300:
+            return 125
+        return local_max[max_local_max] + base
+    except Exception:
+        return 125
\ No newline at end of file
diff --git a/evaluation/visualize.py b/evaluation/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfcbec6effdfac9447d53632914345da7c01df42
--- /dev/null
+++ b/evaluation/visualize.py
@@ -0,0 +1,99 @@
+from basic_metrics import metricor
+import matplotlib.pyplot as plt
+import numpy as np
+import matplotlib.patches as mpatches 
+
+def plotFig(data, label, score, slidingWindow, fileName, modelName, plotRange=None):
+    grader = metricor()
+    
+    R_AUC, R_AP, R_fpr, R_tpr, R_prec = grader.RangeAUC(labels=label, score=score, window=slidingWindow, plot_ROC=True) #
+    
+    L, fpr, tpr= grader.metric_new(label, score, plot_ROC=True)
+    precision, recall, AP = grader.metric_PR(label, score)
+    
+    range_anomaly = grader.range_convers_new(label)
+    # print(range_anomaly)
+    
+    # max_length = min(len(score),len(data), 20000)
+    max_length = len(score)
+
+    if plotRange==None:
+        plotRange = [0,max_length]
+    
+    fig3 = plt.figure(figsize=(12, 10), constrained_layout=True)
+    gs = fig3.add_gridspec(3, 4)
+    
+    
+    f3_ax1 = fig3.add_subplot(gs[0, :-1])
+    plt.tick_params(labelbottom=False)
+
+    plt.plot(data[:max_length],'k')
+    for r in range_anomaly:
+        if r[0]==r[1]:
+            plt.plot(r[0],data[r[0]],'r.')
+        else:
+            plt.plot(range(r[0],r[1]+1),data[range(r[0],r[1]+1)],'r')
+    # plt.xlim([0,max_length])
+    plt.xlim(plotRange)
+    
+        
+    # L = [auc, precision, recall, f, Rrecall, ExistenceReward, 
+    #       OverlapReward, Rprecision, Rf, precision_at_k]
+    f3_ax2 = fig3.add_subplot(gs[1, :-1])
+    # plt.tick_params(labelbottom=False)
+    L1 = [ '%.2f' % elem for elem in L]
+    plt.plot(score[:max_length])
+    plt.hlines(np.mean(score)+3*np.std(score),0,max_length,linestyles='--',color='red')
+    plt.ylabel('score')
+    # plt.xlim([0,max_length])
+    plt.xlim(plotRange)
+    
+    
+    #plot the data
+    f3_ax3 = fig3.add_subplot(gs[2, :-1])
+    index = ( label + 2*(score > (np.mean(score)+3*np.std(score))))
+    cf = lambda x: 'k' if x==0 else ('r' if x == 1 else ('g' if x == 2 else 'b') )
+    cf = np.vectorize(cf)
+    
+    color = cf(index[:max_length])
+    black_patch = mpatches.Patch(color = 'black', label = 'TN')
+    red_patch = mpatches.Patch(color = 'red', label = 'FN')
+    green_patch = mpatches.Patch(color = 'green', label = 'FP')
+    blue_patch = mpatches.Patch(color = 'blue', label = 'TP')
+    plt.scatter(np.arange(max_length), data[:max_length], c=color, marker='.')
+    plt.legend(handles = [black_patch, red_patch, green_patch, blue_patch], loc= 'best')
+    # plt.xlim([0,max_length])
+    plt.xlim(plotRange)
+    
+    
+    f3_ax4 = fig3.add_subplot(gs[0, -1])
+    plt.plot(fpr, tpr)
+    # plt.plot(R_fpr,R_tpr)
+    # plt.title('R_AUC='+str(round(R_AUC,3)))
+    plt.xlabel('FPR')
+    plt.ylabel('TPR')
+    # plt.legend(['ROC','Range-ROC'])
+    
+    # f3_ax5 = fig3.add_subplot(gs[1, -1])
+    # plt.plot(recall, precision)
+    # plt.plot(R_tpr[:-1],R_prec)   # I add (1,1) to (TPR, FPR) at the end !!!
+    # plt.xlabel('Recall')
+    # plt.ylabel('Precision')
+    # plt.legend(['PR','Range-PR'])
+
+    # print('AUC=', L1[0])
+    # print('F=', L1[3])
+
+    plt.suptitle(fileName + '    window='+str(slidingWindow) +'   '+ modelName
+    +'\nAUC='+L1[0]+'     R_AUC='+str(round(R_AUC,2))+'     Precision='+L1[1]+ '     Recall='+L1[2]+'     F='+L1[3]
+    + '     ExistenceReward='+L1[5]+'   OverlapReward='+L1[6]
+    +'\nAP='+str(round(AP,2))+'     R_AP='+str(round(R_AP,2))+'     Precision@k='+L1[9]+'     Rprecision='+L1[7] + '     Rrecall='+L1[4] +'    Rf='+L1[8]
+    )
+    
+def printResult(data, label, score, slidingWindow, fileName, modelName):
+    grader = metricor()
+    R_AUC = grader.RangeAUC(labels=label, score=score, window=slidingWindow, plot_ROC=False) #
+    L= grader.metric_new(label, score, plot_ROC=False)
+    L.append(R_AUC)
+    return L
+    
\ No newline at end of file
diff --git a/model_wrapper.py b/model_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..5279cfd2e3738f641ad42b823d288ad3ab8c7a70
--- /dev/null
+++ b/model_wrapper.py
@@ -0,0 +1,532 @@
+import numpy as np
+import math
+from utils.slidingWindows import find_length_rank
+
+Unsupervise_AD_Pool = ['FFT', 'SR', 'NORMA', 'Series2Graph', 'Sub_IForest', 'IForest', 'LOF', 'Sub_LOF', 'POLY', 'MatrixProfile', 'Sub_PCA', 'PCA', 'HBOS', 
+                        'Sub_HBOS', 'KNN', 'Sub_KNN','KMeansAD', 'KMeansAD_U', 'KShapeAD', 'COPOD', 'CBLOF', 'COF', 'EIF', 'RobustPCA', 'Lag_Llama',
+                       'TimesFM', 'Chronos', 'MOMENT_ZS', 'DADA', 'Time_MOE', 'Time_RCD',  'TSPulse']
+Semisupervise_AD_Pool = ['Left_STAMPi', 'SAND', 'MCD', 'Sub_MCD', 'OCSVM', 'Sub_OCSVM', 'AutoEncoder', 'CNN', 'LSTMAD', 'TranAD', 'USAD', 'OmniAnomaly', 
+                        'AnomalyTransformer', 'TimesNet', 'FITS', 'Donut', 'OFA', 'MOMENT_FT', 'M2N2', ]
+
+def run_Unsupervise_AD(model_name, training_data, testing_data, **kwargs):
+    # Extract data_index if present, but don't pass it to all functions
+    data_index = kwargs.pop('data_index', None)
+
+    function_name = f'run_{model_name}'
+    function_to_call = globals()[function_name]
+
+
+    # Only pass data_index to functions that need it
+    if 'Reconstruction' in model_name:
+        results = function_to_call(data, data_index, **kwargs)
+    else:
+        results = function_to_call(testing_data, **kwargs)
+
+    return results
+
+def run_Semisupervise_AD(model_name, data_train, data_test, **kwargs):
+    try:
+        function_name = f'run_{model_name}'
+        function_to_call = globals()[function_name]
+        results = function_to_call(data_train, data_test, **kwargs)
+        return results
+    except KeyError:
+        error_message = f"Model function '{function_name}' is not defined."
+        print(error_message)
+        return error_message
+    except Exception as e:
+        error_message = f"An error occurred while running the model '{function_name}': {str(e)}"
+        print(error_message)
+        return error_message
+
+def run_FFT(data, ifft_parameters=5, local_neighbor_window=21, local_outlier_threshold=0.6, max_region_size=50, max_sign_change_distance=10):
+    from models.FFT import FFT
+    clf = FFT(ifft_parameters=ifft_parameters, local_neighbor_window=local_neighbor_window, local_outlier_threshold=local_outlier_threshold, max_region_size=max_region_size, max_sign_change_distance=max_sign_change_distance)
+    clf.fit(data)  
+    score = clf.decision_scores_ 
+    return score.ravel()
+
+def run_Sub_IForest(data, periodicity=1, n_estimators=100, max_features=1, n_jobs=1):
+    from models.IForest import IForest
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_IForest(train_data, test_data, slidingWindow=100, n_estimators=100, max_features=1, n_jobs=1):
+    from models.IForest import IForest
+    clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs)
+    clf.fit(train_data)
+    score = clf.decision_function(test_data)
+    # score = clf.decision_scores_
+    return score.ravel()
+
+def run_Sub_LOF(data, periodicity=1, n_neighbors=30, metric='minkowski', n_jobs=1):
+    from models.LOF import LOF
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = LOF(slidingWindow=slidingWindow, n_neighbors=n_neighbors, metric=metric, n_jobs=n_jobs)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_LOF(train_data, test_data, slidingWindow=1, n_neighbors=30, metric='minkowski', n_jobs=1):
+    from models.LOF import LOF
+    clf = LOF(slidingWindow=slidingWindow, n_neighbors=n_neighbors, metric=metric, n_jobs=n_jobs)
+    clf.fit(train_data)
+    score = clf.decision_function(test_data)
+    return score.ravel()
+
+def run_POLY(data, periodicity=1, power=3, n_jobs=1):
+    from models.POLY import POLY
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = POLY(power=power, window = slidingWindow)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_MatrixProfile(data, periodicity=1, n_jobs=1):
+    from models.MatrixProfile import MatrixProfile
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = MatrixProfile(window=slidingWindow)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_Left_STAMPi(data_train, data):
+    from models.Left_STAMPi import Left_STAMPi
+    clf = Left_STAMPi(n_init_train=len(data_train), window_size=100)
+    clf.fit(data)
+    score = clf.decision_function(data)
+    return score.ravel()
+
+def run_SAND(data_train, data_test, periodicity=1):
+    from models.SAND import SAND
+    slidingWindow = find_length_rank(data_test, rank=periodicity)
+    clf = SAND(pattern_length=slidingWindow, subsequence_length=4*(slidingWindow))
+    clf.fit(data_test.squeeze(), online=True, overlaping_rate=int(1.5*slidingWindow), init_length=len(data_train), alpha=0.5, batch_size=max(5*(slidingWindow), int(0.1*len(data_test))))
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_KShapeAD(data, periodicity=1):
+    from models.SAND import SAND
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = SAND(pattern_length=slidingWindow, subsequence_length=4*(slidingWindow))
+    clf.fit(data.squeeze(), overlaping_rate=int(1.5*slidingWindow))
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_Series2Graph(data, periodicity=1):
+    from models.Series2Graph import Series2Graph
+    slidingWindow = find_length_rank(data, rank=periodicity)
+
+    data = data.squeeze()
+    s2g = Series2Graph(pattern_length=slidingWindow)
+    s2g.fit(data)
+    query_length = 2*slidingWindow
+    s2g.score(query_length=query_length,dataset=data)
+
+    score = s2g.decision_scores_
+    score = np.array([score[0]]*math.ceil(query_length//2) + list(score) + [score[-1]]*(query_length//2))
+    return score.ravel()
+
+def run_Sub_PCA(train_data, test_data, periodicity=1, n_components=None, n_jobs=1):
+    from models.PCA import PCA
+    slidingWindow = find_length_rank(train_data, rank=periodicity)
+    clf = PCA(slidingWindow = slidingWindow, n_components=n_components)
+    clf.fit(train_data)
+    score = clf.decision_function(test_data)
+    return score.ravel()
+
+def run_PCA(train_data, test_data, slidingWindow=100, n_components=None, n_jobs=1):
+    from models.PCA import PCA
+    clf = PCA(slidingWindow = slidingWindow, n_components=n_components)
+    clf.fit(train_data)
+    score = clf.decision_function(test_data)
+    return score.ravel()
+
+def run_NORMA(data, periodicity=1, clustering='hierarchical', n_jobs=1):
+    from models.NormA import NORMA
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = NORMA(pattern_length=slidingWindow, nm_size=3*slidingWindow, clustering=clustering)
+    clf.fit(data)
+    score = clf.decision_scores_
+    score = np.array([score[0]]*math.ceil((slidingWindow-1)/2) + list(score) + [score[-1]]*((slidingWindow-1)//2))
+    if len(score) > len(data):
+        start = len(score) - len(data)
+        score = score[start:]
+    return score.ravel()
+
+def run_Sub_HBOS(data, periodicity=1, n_bins=10, tol=0.5, n_jobs=1):
+    from models.HBOS import HBOS
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = HBOS(slidingWindow=slidingWindow, n_bins=n_bins, tol=tol)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_HBOS(data, slidingWindow=1, n_bins=10, tol=0.5, n_jobs=1):
+    from models.HBOS import HBOS
+    clf = HBOS(slidingWindow=slidingWindow, n_bins=n_bins, tol=tol)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_Sub_OCSVM(data_train, data_test, kernel='rbf', nu=0.5, periodicity=1, n_jobs=1):
+    from models.OCSVM import OCSVM
+    slidingWindow = find_length_rank(data_test, rank=periodicity)
+    clf = OCSVM(slidingWindow=slidingWindow, kernel=kernel, nu=nu)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_OCSVM(data_train, data_test, kernel='rbf', nu=0.5, slidingWindow=1, n_jobs=1):
+    from models.OCSVM import OCSVM
+    clf = OCSVM(slidingWindow=slidingWindow, kernel=kernel, nu=nu)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_Sub_MCD(data_train, data_test, support_fraction=None, periodicity=1, n_jobs=1):
+    from models.MCD import MCD
+    slidingWindow = find_length_rank(data_test, rank=periodicity)
+    clf = MCD(slidingWindow=slidingWindow, support_fraction=support_fraction)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_MCD(data_train, data_test, support_fraction=None, slidingWindow=1, n_jobs=1):
+    from models.MCD import MCD
+    clf = MCD(slidingWindow=slidingWindow, support_fraction=support_fraction)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_Sub_KNN(data, n_neighbors=10, method='largest', periodicity=1, n_jobs=1):
+    from models.KNN import KNN
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = KNN(slidingWindow=slidingWindow, n_neighbors=n_neighbors,method=method, n_jobs=n_jobs)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_KNN(data, slidingWindow=1, n_neighbors=10, method='largest', n_jobs=1):
+    from models.KNN import KNN
+    clf = KNN(slidingWindow=slidingWindow, n_neighbors=n_neighbors, method=method, n_jobs=n_jobs)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_KMeansAD(data, n_clusters=20, window_size=20, n_jobs=1):
+    from models.KMeansAD import KMeansAD
+    clf = KMeansAD(k=n_clusters, window_size=window_size, stride=1, n_jobs=n_jobs)
+    score = clf.fit_predict(data)
+    return score.ravel()
+
+def run_KMeansAD_U(data, n_clusters=20, periodicity=1,n_jobs=1):
+    from models.KMeansAD import KMeansAD
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    clf = KMeansAD(k=n_clusters, window_size=slidingWindow, stride=1, n_jobs=n_jobs)
+    score = clf.fit_predict(data)
+    return score.ravel()
+
+def run_COPOD(data, n_jobs=1):
+    from models.COPOD import COPOD
+    clf = COPOD(n_jobs=n_jobs)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_CBLOF(data, n_clusters=8, alpha=0.9, n_jobs=1):
+    from models.CBLOF import CBLOF
+    clf = CBLOF(n_clusters=n_clusters, alpha=alpha, n_jobs=n_jobs)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_COF(data, n_neighbors=30):
+    from models.COF import COF
+    clf = COF(n_neighbors=n_neighbors)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_EIF(data, n_trees=100):
+    from models.EIF import EIF
+    clf = EIF(n_trees=n_trees)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_RobustPCA(data, max_iter=1000):
+    from models.RobustPCA import RobustPCA
+    clf = RobustPCA(max_iter=max_iter)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_SR(data, periodicity=1):
+    from models.SR import SR
+    slidingWindow = find_length_rank(data, rank=periodicity)
+    return SR(data, window_size=slidingWindow)
+
+def run_AutoEncoder(data_train, data_test, window_size=100, hidden_neurons=[64, 32], n_jobs=1):
+    from models.AE import AutoEncoder
+    clf = AutoEncoder(slidingWindow=window_size, hidden_neurons=hidden_neurons, batch_size=128, epochs=50)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_CNN(data_train, data_test, window_size=100, num_channel=[32, 32, 40], lr=0.0008, n_jobs=1):
+    from models.CNN import CNN
+    clf = CNN(window_size=window_size, num_channel=num_channel, feats=data_test.shape[1], lr=lr, batch_size=128)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_LSTMAD(data_train, data_test, window_size=100, lr=0.0008):
+    from models.LSTMAD import LSTMAD
+    clf = LSTMAD(window_size=window_size, pred_len=1, lr=lr, feats=data_test.shape[1], batch_size=128)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_TranAD(data_train, data_test, win_size=10, lr=1e-3):
+    from models.TranAD import TranAD
+    clf = TranAD(win_size=win_size, feats=data_test.shape[1], lr=lr)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_AnomalyTransformer(data_train, data_test, win_size=100, lr=1e-4, batch_size=128):
+    from models.AnomalyTransformer import AnomalyTransformer
+    clf = AnomalyTransformer(win_size=win_size, input_c=data_test.shape[1], lr=lr, batch_size=batch_size)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_OmniAnomaly(data_train, data_test, win_size=100, lr=0.002):
+    from models.OmniAnomaly import OmniAnomaly
+    clf = OmniAnomaly(win_size=win_size, feats=data_test.shape[1], lr=lr)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_USAD(data_train, data_test, win_size=5, lr=1e-4):
+    from models.USAD import USAD
+    clf = USAD(win_size=win_size, feats=data_test.shape[1], lr=lr)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_Donut(data_train, data_test, win_size=120, lr=1e-4, batch_size=128):
+    from models.Donut import Donut
+    clf = Donut(win_size=win_size, input_c=data_test.shape[1], lr=lr, batch_size=batch_size)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_TimesNet(data_train, data_test, win_size=96, lr=1e-4):
+    from models.TimesNet import TimesNet
+    clf = TimesNet(win_size=win_size, enc_in=data_test.shape[1], lr=lr, epochs=50)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_FITS(data_train, data_test, win_size=100, lr=1e-3):
+    from models.FITS import FITS
+    clf = FITS(win_size=win_size, input_c=data_test.shape[1], lr=lr, batch_size=128)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_OFA(data_train, data_test, win_size=100, batch_size = 64):
+    from models.OFA import OFA
+    clf = OFA(win_size=win_size, enc_in=data_test.shape[1], epochs=10, batch_size=batch_size)
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_Lag_Llama(data, win_size=96, batch_size=64):
+    from models.Lag_Llama import Lag_Llama
+    clf = Lag_Llama(win_size=win_size, input_c=data.shape[1], batch_size=batch_size)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_Chronos(data, win_size=50, batch_size=64):
+    from models.Chronos import Chronos
+    clf = Chronos(win_size=win_size, prediction_length=1, input_c=1, model_size='base', batch_size=batch_size)
+    data =data.reshape(-1,1)
+    clf.fit(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_TimesFM(data, win_size=96):
+    from models.TimesFM import TimesFM
+    clf = TimesFM(win_size=win_size)
+    data_normalized = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
+    data_normalized = data_normalized.reshape(-1,1)
+    clf.fit(data_normalized)
+    #normalizd data:
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_MOMENT_ZS(data, win_size=256):
+    from models.MOMENT import MOMENT
+    clf = MOMENT(win_size=win_size, input_c=1)
+    data = data.reshape(-1,1)
+    # Zero shot
+    clf.zero_shot(data)
+    score = clf.decision_scores_
+    return score.ravel()
+
+def run_MOMENT_FT(data_train, data_test, win_size=256):
+    from models.MOMENT import MOMENT
+    clf = MOMENT(win_size=win_size, input_c=data_test.shape[1])
+
+    # Finetune
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_M2N2(
+        data_train, data_test, win_size=12, stride=12,
+        batch_size=64, epochs=100, latent_dim=16,
+        lr=1e-3, ttlr=1e-3, normalization='Detrend',
+        gamma=0.99, th=0.9, valid_size=0.2, infer_mode='online'
+    ):
+    from models.M2N2 import M2N2
+    clf = M2N2(
+        win_size=win_size, stride=stride,
+        num_channels=data_test.shape[1],
+        batch_size=batch_size, epochs=epochs,
+        latent_dim=latent_dim,
+        lr=lr, ttlr=ttlr,
+        normalization=normalization,
+        gamma=gamma, th=th, valid_size=valid_size,
+        infer_mode=infer_mode
+    )
+    clf.fit(data_train)
+    score = clf.decision_function(data_test)
+    return score.ravel()
+
+def run_DADA(data_test, device=0, win_size=100, batch_size=32):
+    from models.DADA import DADA
+    clf = DADA(device=device, win_size=win_size, batch_size=batch_size)
+    score = clf.zero_shot(data_test)
+    return score.ravel()
+
+def run_Time_MOE(data, device=0, win_size=64, batch_size=32):
+    from models.time_moe import Time_MOE
+    clf = Time_MOE(device=device, win_size=win_size, batch_size=batch_size)
+    score = clf.zero_shot(data)
+    return score.ravel()
+
+def run_Time_RCD(data,  **kwargs):
+    Multi = kwargs.get('Multi', False)
+    win_size = kwargs.get('win_size', 5000)
+    batch_size = kwargs.get('batch_size', 64)
+    random_mask = kwargs.get('random_mask', 'random_mask')
+    size = kwargs.get('size', 'full')
+    device = kwargs.get('device', '2')  # Extract device parameter
+    """
+    Wrapper function for Time_RCD model
+    """
+    from models.TimeRCD import TimeRCDPretrainTester
+    from models.time_rcd.time_rcd_config import TimeRCDConfig, default_config
+
+    config = default_config
+    if Multi:
+        if size == 'small':
+            if random_mask == 'random_mask':
+                checkpoint_path = 'checkpoints/dataset_10_20.pth'
+            else:
+                checkpoint_path = 'checkpoints/full_mask_10_20.pth'
+            config.ts_config.patch_size = 16
+        else:
+            if random_mask == 'random_mask':
+                checkpoint_path = 'checkpoints/dataset_15_56.pth'
+            else:
+                checkpoint_path = 'checkpoints/full_mask_15_56.pth'
+            config.ts_config.patch_size = 32
+    else:
+        checkpoint_path = 'checkpoints/full_mask_anomaly_head_pretrain_checkpoint_best.pth'
+        config.ts_config.patch_size = 16
+
+    config.cuda_devices = device  # Use the device parameter properly
+    print("Using CUDA device:", config.cuda_devices)
+    config.win_size = win_size
+    config.batch_size = batch_size
+    config.ts_config.num_features = data.shape[1]
+    print(f"Checkpoint path: {checkpoint_path}")
+    cls = TimeRCDPretrainTester(checkpoint_path, config)
+    score_list, logit_list = cls.zero_shot(data)
+
+    # Concatenate across batches robustly to avoid inhomogeneous shape errors
+    score = np.concatenate([np.asarray(s).reshape(-1) for s in score_list], axis=0)
+    logit = np.concatenate([np.asarray(l).reshape(-1) for l in logit_list], axis=0)
+
+    return score, logit
+
+
+def run_TSPulse(data, win_size=256, batch_size=64, prediction_mode=None, aggregation_length=64, 
+                aggr_function="max", smoothing_length=8, least_significant_scale=0.01, 
+                least_significant_score=0.1, device=None):
+    """
+    Wrapper function for TSPulse anomaly detection model
+    
+    Parameters
+    ----------
+    data : numpy.ndarray
+        Time series data of shape (n_samples, n_features)
+    win_size : int, default=256
+        Window size (for compatibility, not directly used by TSPulse)
+    batch_size : int, default=64
+        Batch size for processing
+    prediction_mode : list, optional
+        List of prediction modes. If None, uses default time and frequency reconstruction
+    aggregation_length : int, default=64
+        Length for aggregation of scores
+    aggr_function : str, default="max"
+        Aggregation function ("max", "mean", "median")
+    smoothing_length : int, default=8
+        Length for smoothing the anomaly scores
+    least_significant_scale : float, default=0.01
+        Minimum scale for significance
+    least_significant_score : float, default=0.1
+        Minimum score for significance
+    device : str, optional
+        Device to use ("cuda" or "cpu"). Auto-detected if None.
+    
+    Returns
+    -------
+    numpy.ndarray
+        Anomaly scores of shape (n_samples,)
+    """
+    from models.TSPulse import run_TSPulse as tspulse_runner
+    
+    # Prepare kwargs for TSPulse
+    kwargs = {
+        'batch_size': batch_size,
+        'aggregation_length': aggregation_length,
+        'aggr_function': aggr_function,
+        'smoothing_length': smoothing_length,
+        'least_significant_scale': least_significant_scale,
+        'least_significant_score': least_significant_score,
+    }
+    
+    if prediction_mode is not None:
+        kwargs['prediction_mode'] = prediction_mode
+    if device is not None:
+        kwargs['device'] = device
+    
+    try:
+        # Run TSPulse anomaly detection
+        score = tspulse_runner(data, **kwargs)
+        return score.ravel()
+    except Exception as e:
+        print(f"Warning: TSPulse failed with error: {str(e)}")
+        print("Falling back to random scores")
+        # Return random scores as fallback
+        return np.random.random(len(data)) * 0.1
\ No newline at end of file
diff --git a/models/.DS_Store b/models/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..506bad542993ec92b3b07819b435e406c9975e65
Binary files /dev/null and b/models/.DS_Store differ
diff --git a/models/AE.py b/models/AE.py
new file mode 100644
index 0000000000000000000000000000000000000000..077db8be8b6464de0881b07537d4a366541ca559
--- /dev/null
+++ b/models/AE.py
@@ -0,0 +1,407 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import torch, math
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+from torch import nn
+from sklearn.preprocessing import MinMaxScaler
+
+from .feature import Window
+from .base import BaseDetector
+from ..utils.stat_models import pairwise_distances_no_broadcast
+from ..utils.dataset import TSDataset
+from ..utils.utility import get_activation_by_name   
+
+class InnerAutoencoder(nn.Module):
+    def __init__(self,
+                 n_features,
+                 hidden_neurons=(128, 64),
+                 dropout_rate=0.2,
+                 batch_norm=True,
+                 hidden_activation='relu'):
+
+        # initialize the super class
+        super(InnerAutoencoder, self).__init__()
+
+        # save the default values
+        self.n_features = n_features
+        self.dropout_rate = dropout_rate
+        self.batch_norm = batch_norm
+        self.hidden_activation = hidden_activation
+
+        # create the dimensions for the input and hidden layers
+        self.layers_neurons_encoder_ = [self.n_features, *hidden_neurons]
+        self.layers_neurons_decoder_ = self.layers_neurons_encoder_[::-1]
+
+        # get the object for the activations functions
+        self.activation = get_activation_by_name(hidden_activation)
+
+        # initialize encoder and decoder as a sequential
+        self.encoder = nn.Sequential()
+        self.decoder = nn.Sequential()
+
+        # fill the encoder sequential with hidden layers
+        for idx, layer in enumerate(self.layers_neurons_encoder_[:-1]):
+
+            # create a linear layer of neurons
+            self.encoder.add_module(
+                "linear" + str(idx),
+                torch.nn.Linear(layer,self.layers_neurons_encoder_[idx + 1]))
+
+            # add a batch norm per layer if wanted (leave out first layer)
+            if batch_norm:
+                self.encoder.add_module("batch_norm" + str(idx),
+                                        nn.BatchNorm1d(self.layers_neurons_encoder_[idx + 1]))
+
+            # create the activation
+            self.encoder.add_module(self.hidden_activation + str(idx),
+                                    self.activation)
+
+            # create a dropout layer
+            self.encoder.add_module("dropout" + str(idx),
+                                    torch.nn.Dropout(dropout_rate))
+
+        # fill the decoder layer
+        for idx, layer in enumerate(self.layers_neurons_decoder_[:-1]):
+
+            # create a linear layer of neurons
+            self.decoder.add_module(
+                "linear" + str(idx),
+                torch.nn.Linear(layer,self.layers_neurons_decoder_[idx + 1]))
+
+            # create a batch norm per layer if wanted (only if it is not the
+            # last layer)
+            if batch_norm and idx < len(self.layers_neurons_decoder_[:-1]) - 1:
+                self.decoder.add_module("batch_norm" + str(idx),
+                                        nn.BatchNorm1d(self.layers_neurons_decoder_[idx + 1]))
+
+            # create the activation
+            self.decoder.add_module(self.hidden_activation + str(idx),
+                                    self.activation)
+
+            # create a dropout layer (only if it is not the last layer)
+            if idx < len(self.layers_neurons_decoder_[:-1]) - 1:
+                self.decoder.add_module("dropout" + str(idx),
+                                        torch.nn.Dropout(dropout_rate))
+
+    def forward(self, x):
+        # we could return the latent representation here after the encoder
+        # as the latent representation
+        x = self.encoder(x)
+        x = self.decoder(x)
+        return x
+
+class AutoEncoder(BaseDetector):
+    """Auto Encoder (AE) is a type of neural networks for learning useful data
+    representations in an unsupervised manner. Similar to PCA, AE could be used
+    to detect outlying objects in the data by calculating the reconstruction
+    errors. See :cite:`aggarwal2015outlier` Chapter 3 for details.
+
+    Notes
+    -----
+        This is the PyTorch version of AutoEncoder.
+        The documentation is not finished!
+
+    Parameters
+    ----------
+    hidden_neurons : list, optional (default=[64, 32])
+        The number of neurons per hidden layers. So the network has the
+        structure as [n_features, 64, 32, 32, 64, n_features]
+
+    hidden_activation : str, optional (default='relu')
+        Activation function to use for hidden layers.
+        All hidden layers are forced to use the same type of activation.
+        See https://pytorch.org/docs/stable/nn.html for details.
+
+    batch_norm : boolean, optional (default=True)
+        Whether to apply Batch Normalization,
+        See https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html
+
+    learning_rate : float, optional (default=1e-3)
+        Learning rate for the optimizer. This learning_rate is given to
+        an Adam optimizer (torch.optim.Adam).
+        See https://pytorch.org/docs/stable/generated/torch.optim.Adam.html
+
+    epochs : int, optional (default=100)
+        Number of epochs to train the model.
+
+    batch_size : int, optional (default=32)
+        Number of samples per gradient update.
+
+    dropout_rate : float in (0., 1), optional (default=0.2)
+        The dropout to be used across all layers.
+
+    weight_decay : float, optional (default=1e-5)
+        The weight decay for Adam optimizer.
+        See https://pytorch.org/docs/stable/generated/torch.optim.Adam.html
+
+    preprocessing : bool, optional (default=True)
+        If True, apply standardization on the data.
+
+    loss_fn : obj, optional (default=torch.nn.MSELoss)
+        Optimizer instance which implements torch.nn._Loss.
+        One of https://pytorch.org/docs/stable/nn.html#loss-functions
+        or a custom loss. Custom losses are currently unstable.
+
+    verbose : int, optional (default=1)
+        Verbosity mode.
+
+        - 0 = silent
+        - 1 = progress bar
+        - 2 = one line per epoch.
+
+        For verbose >= 1, model summary may be printed.
+        !CURRENTLY NOT SUPPORTED.!
+
+    random_state : random_state: int, RandomState instance or None, optional
+        (default=None)
+        If int, random_state is the seed used by the random
+        number generator; If RandomState instance, random_state is the random
+        number generator; If None, the random number generator is the
+        RandomState instance used by `np.random`.
+        !CURRENTLY NOT SUPPORTED.!
+
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set, i.e.
+        the proportion of outliers in the data set. When fitting this is used
+        to define the threshold on the decision function.
+
+    Attributes
+    ----------
+    encoding_dim_ : int
+        The number of neurons in the encoding layer.
+
+    compression_rate_ : float
+        The ratio between the original feature and
+        the number of neurons in the encoding layer.
+
+    model_ : Keras Object
+        The underlying AutoEncoder in Keras.
+
+    history_: Keras Object
+        The AutoEncoder training history.
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self,
+                 slidingWindow=100,
+                 hidden_neurons=None,
+                 hidden_activation='relu',
+                 batch_norm=True,
+                 learning_rate=1e-3,
+                 epochs=100,
+                 batch_size=32,
+                 dropout_rate=0.2,
+                 weight_decay=1e-5,
+                 # validation_size=0.1,
+                 preprocessing=True,
+                 loss_fn=None,
+                 verbose=False,
+                 # random_state=None,
+                 contamination=0.1,
+                 device=None):
+        super(AutoEncoder, self).__init__(contamination=contamination)
+
+        # save the initialization values
+        self.slidingWindow = slidingWindow
+        self.hidden_neurons = hidden_neurons
+        self.hidden_activation = hidden_activation
+        self.batch_norm = batch_norm
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+        self.batch_size = batch_size
+        self.dropout_rate = dropout_rate
+        self.weight_decay = weight_decay
+        self.preprocessing = preprocessing
+        self.loss_fn = loss_fn
+        self.verbose = verbose
+        self.device = device
+
+        # create default loss functions
+        if self.loss_fn is None:
+            self.loss_fn = torch.nn.MSELoss()
+
+        # create default calculation device (support GPU if available)
+        if self.device is None:
+            self.device = torch.device(
+                "cuda:0" if torch.cuda.is_available() else "cpu")
+
+        # default values for the amount of hidden neurons
+        if self.hidden_neurons is None:
+            self.hidden_neurons = [64, 32]
+
+    # noinspection PyUnresolvedReferences
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        if n_features == 1: 
+            # Converting time series data into matrix format
+            X = Window(window = self.slidingWindow).convert(X)
+
+        # validate inputs X and y (optional)
+        X = check_array(X)
+        self._set_n_classes(y)
+
+        n_samples, n_features = X.shape[0], X.shape[1]
+        X = MinMaxScaler(feature_range=(0,1)).fit_transform(X.T).T
+
+        # conduct standardization if needed
+        if self.preprocessing:
+            self.mean, self.std = np.mean(X, axis=0), np.std(X, axis=0)
+            self.std = np.where(self.std == 0, 1e-8, self.std)
+            train_set = TSDataset(X=X, mean=self.mean, std=self.std)
+        else:
+            train_set = TSDataset(X=X)
+
+        train_loader = torch.utils.data.DataLoader(train_set, batch_size=self.batch_size, shuffle=True, drop_last=True)
+
+        # initialize the model
+        self.model = InnerAutoencoder(
+            n_features=n_features,
+            hidden_neurons=self.hidden_neurons,
+            dropout_rate=self.dropout_rate,
+            batch_norm=self.batch_norm,
+            hidden_activation=self.hidden_activation)
+
+        # move to device and print model information
+        self.model = self.model.to(self.device)
+        if self.verbose:
+            print(self.model)
+
+        # train the autoencoder to find the best one
+        self._train_autoencoder(train_loader)
+
+        self.model.load_state_dict(self.best_model_dict)
+        self.decision_scores_ = self.decision_function(X)
+
+        self._process_decision_scores()
+        return self
+
+    def _train_autoencoder(self, train_loader):
+        """Internal function to train the autoencoder
+
+        Parameters
+        ----------
+        train_loader : torch dataloader
+            Train data.
+        """
+        optimizer = torch.optim.Adam(
+            self.model.parameters(), lr=self.learning_rate,
+            weight_decay=self.weight_decay)
+
+        self.best_loss = float('inf')
+        self.best_model_dict = None
+
+        for epoch in range(self.epochs):
+            overall_loss = []
+            for data, data_idx in train_loader:
+                data = data.to(self.device).float()
+                loss = self.loss_fn(data, self.model(data))
+
+                self.model.zero_grad()
+                loss.backward()
+                optimizer.step()
+                overall_loss.append(loss.item())
+            if self.verbose:
+                print('epoch {epoch}: training loss {train_loss} '.format(
+                    epoch=epoch, train_loss=np.mean(overall_loss)))
+
+            # track the best model so far
+            if np.mean(overall_loss) <= self.best_loss:
+                # print("epoch {ep} is the current best; loss={loss}".format(ep=epoch, loss=np.mean(overall_loss)))
+                self.best_loss = np.mean(overall_loss)
+                self.best_model_dict = self.model.state_dict()
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        check_is_fitted(self, ['model', 'best_model_dict'])
+
+        n_samples, n_features = X.shape
+
+        if n_features == 1: 
+            # Converting time series data into matrix format
+            X = Window(window = self.slidingWindow).convert(X)
+
+        X = check_array(X)
+        X = MinMaxScaler(feature_range=(0,1)).fit_transform(X.T).T
+
+        # note the shuffle may be true but should be False
+        if self.preprocessing:
+            dataset = TSDataset(X=X, mean=self.mean, std=self.std)
+        else:
+            dataset = TSDataset(X=X)
+
+        dataloader = torch.utils.data.DataLoader(dataset,
+                                                 batch_size=self.batch_size,
+                                                 shuffle=False)
+        # enable the evaluation mode
+        self.model.eval()
+
+        # construct the vector for holding the reconstruction error
+        outlier_scores = np.zeros([X.shape[0], ])
+        with torch.no_grad():
+            for data, data_idx in dataloader:
+                data_cuda = data.to(self.device).float()
+                # this is the outlier score
+                outlier_scores[data_idx] = pairwise_distances_no_broadcast(
+                    data, self.model(data_cuda).cpu().numpy())
+
+        if outlier_scores.shape[0] < n_samples:
+            outlier_scores = np.array([outlier_scores[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(outlier_scores) + [outlier_scores[-1]]*((self.slidingWindow-1)//2))
+
+        return outlier_scores
\ No newline at end of file
diff --git a/models/CBLOF.py b/models/CBLOF.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3eea71037817b3237de04c6613453ac2019def9
--- /dev/null
+++ b/models/CBLOF.py
@@ -0,0 +1,332 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+import warnings
+
+import numpy as np
+from scipy.spatial.distance import cdist
+from sklearn.cluster import KMeans
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.estimator_checks import check_estimator
+
+from ..utils.stat_models import pairwise_distances_no_broadcast
+from ..utils.utility import check_parameter    
+from .base import BaseDetector
+from ..utils.utility import zscore
+
+
+class CBLOF(BaseDetector):
+    r"""The CBLOF operator calculates the outlier score based on cluster-based
+    local outlier factor.
+
+    CBLOF takes as an input the data set and the cluster model that was
+    generated by a clustering algorithm. It classifies the clusters into small
+    clusters and large clusters using the parameters alpha and beta.
+    The anomaly score is then calculated based on the size of the cluster the
+    point belongs to as well as the distance to the nearest large cluster.
+
+    Use weighting for outlier factor based on the sizes of the clusters as
+    proposed in the original publication. Since this might lead to unexpected
+    behavior (outliers close to small clusters are not found), it is disabled
+    by default.Outliers scores are solely computed based on their distance to
+    the closest large cluster center.
+
+    By default, kMeans is used for clustering algorithm instead of
+    Squeezer algorithm mentioned in the original paper for multiple reasons.
+
+    See :cite:`he2003discovering` for details.
+
+    Parameters
+    ----------
+    n_clusters : int, optional (default=8)
+        The number of clusters to form as well as the number of
+        centroids to generate.
+
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set,
+        i.e. the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+    clustering_estimator : Estimator, optional (default=None)
+        The base clustering algorithm for performing data clustering.
+        A valid clustering algorithm should be passed in. The estimator should
+        have standard sklearn APIs, fit() and predict(). The estimator should
+        have attributes ``labels_`` and ``cluster_centers_``.
+        If ``cluster_centers_`` is not in the attributes once the model is fit,
+        it is calculated as the mean of the samples in a cluster.
+
+        If not set, CBLOF uses KMeans for scalability. See
+        https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
+
+    alpha : float in (0.5, 1), optional (default=0.9)
+        Coefficient for deciding small and large clusters. The ratio
+        of the number of samples in large clusters to the number of samples in
+        small clusters.
+
+    beta : int or float in (1,), optional (default=5).
+        Coefficient for deciding small and large clusters. For a list
+        sorted clusters by size `|C1|, \|C2|, ..., |Cn|, beta = |Ck|/|Ck-1|`
+
+    use_weights : bool, optional (default=False)
+        If set to True, the size of clusters are used as weights in
+        outlier score calculation.
+
+    check_estimator : bool, optional (default=False)
+        If set to True, check whether the base estimator is consistent with
+        sklearn standard.
+
+        .. warning::
+            check_estimator may throw errors with scikit-learn 0.20 above.
+
+    random_state : int, RandomState or None, optional (default=None)
+        If int, random_state is the seed used by the random
+        number generator; If RandomState instance, random_state is the random
+        number generator; If None, the random number generator is the
+        RandomState instance used by `np.random`.
+
+
+    Attributes
+    ----------
+    clustering_estimator_ : Estimator, sklearn instance
+        Base estimator for clustering.
+
+    cluster_labels_ : list of shape (n_samples,)
+        Cluster assignment for the training samples.
+
+    n_clusters_ : int
+        Actual number of clusters (possibly different from n_clusters).
+
+    cluster_sizes_ : list of shape (n_clusters_,)
+        The size of each cluster once fitted with the training data.
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher scores.
+        This value is available once the detector is fitted.
+
+    cluster_centers_ : numpy array of shape (n_clusters_, n_features)
+        The center of each cluster.
+
+    small_cluster_labels_ : list of clusters numbers
+        The cluster assignments belonging to small clusters.
+
+    large_cluster_labels_ : list of clusters numbers
+        The cluster assignments belonging to large clusters.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self, n_clusters=8, contamination=0.1,
+                 clustering_estimator=None, alpha=0.9, beta=5,
+                 use_weights=False, check_estimator=False, random_state=0,
+                 n_jobs=1, normalize=True):
+        super(CBLOF, self).__init__(contamination=contamination)
+        self.n_clusters = n_clusters
+        self.clustering_estimator = clustering_estimator
+        self.alpha = alpha
+        self.beta = beta
+        self.use_weights = use_weights
+        self.check_estimator = check_estimator
+        self.random_state = random_state
+        self.normalize = normalize
+
+    # noinspection PyIncorrectDocstring
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+
+        # validate inputs X and y (optional)
+        X = check_array(X)
+        self._set_n_classes(y)
+        n_samples, n_features = X.shape
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        # check parameters
+        # number of clusters are default to 8
+        self._validate_estimator(default=KMeans(
+            n_clusters=self.n_clusters,
+            random_state=self.random_state))
+
+        self.clustering_estimator_.fit(X=X, y=y)
+        # Get the labels of the clustering results
+        # labels_ is consistent across sklearn clustering algorithms
+        self.cluster_labels_ = self.clustering_estimator_.labels_
+        self.cluster_sizes_ = np.bincount(self.cluster_labels_)
+
+        # Get the actual number of clusters
+        self.n_clusters_ = self.cluster_sizes_.shape[0]
+
+        if self.n_clusters_ != self.n_clusters:
+            warnings.warn("The chosen clustering for CBLOF forms {0} clusters"
+                          "which is inconsistent with n_clusters ({1}).".
+                          format(self.n_clusters_, self.n_clusters))
+
+        self._set_cluster_centers(X, n_features)
+        self._set_small_large_clusters(n_samples)
+
+        self.decision_scores_ = self._decision_function(X,
+                                                        self.cluster_labels_)
+
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+        X = check_array(X)
+        labels = self.clustering_estimator_.predict(X)
+        return self._decision_function(X, labels)
+
+    def _validate_estimator(self, default=None):
+        """Check the value of alpha and beta and clustering algorithm.
+        """
+        check_parameter(self.alpha, low=0, high=1, param_name='alpha',
+                        include_left=False, include_right=False)
+
+        check_parameter(self.beta, low=1, param_name='beta',
+                        include_left=False)
+
+        if self.clustering_estimator is not None:
+            self.clustering_estimator_ = self.clustering_estimator
+        else:
+            self.clustering_estimator_ = default
+
+        # make sure the base clustering algorithm is valid
+        if self.clustering_estimator_ is None:
+            raise ValueError("clustering algorithm cannot be None")
+
+        if self.check_estimator:
+            check_estimator(self.clustering_estimator_)
+
+    def _set_cluster_centers(self, X, n_features):
+        # Noted not all clustering algorithms have cluster_centers_
+        if hasattr(self.clustering_estimator_, 'cluster_centers_'):
+            self.cluster_centers_ = self.clustering_estimator_.cluster_centers_
+        else:
+            # Set the cluster center as the mean of all the samples within
+            # the cluster
+            warnings.warn("The chosen clustering for CBLOF does not have"
+                          "the center of clusters. Calculate the center"
+                          "as the mean of the clusters.")
+            self.cluster_centers_ = np.zeros([self.n_clusters_, n_features])
+            for i in range(self.n_clusters_):
+                self.cluster_centers_[i, :] = np.mean(
+                    X[np.where(self.cluster_labels_ == i)], axis=0)
+
+    def _set_small_large_clusters(self, n_samples):
+        # Sort the index of clusters by the number of samples belonging to it
+        size_clusters = np.bincount(self.cluster_labels_)
+
+        # Sort the order from the largest to the smallest
+        sorted_cluster_indices = np.argsort(size_clusters * -1)
+
+        # Initialize the lists of index that fulfill the requirements by
+        # either alpha or beta
+        alpha_list = []
+        beta_list = []
+
+        for i in range(1, self.n_clusters_):
+            temp_sum = np.sum(size_clusters[sorted_cluster_indices[:i]])
+            if temp_sum >= n_samples * self.alpha:
+                alpha_list.append(i)
+
+            if size_clusters[sorted_cluster_indices[i - 1]] / size_clusters[
+                sorted_cluster_indices[i]] >= self.beta:
+                beta_list.append(i)
+
+            # Find the separation index fulfills both alpha and beta
+        intersection = np.intersect1d(alpha_list, beta_list)
+
+        if len(intersection) > 0:
+            self._clustering_threshold = intersection[0]
+        elif len(alpha_list) > 0:
+            self._clustering_threshold = alpha_list[0]
+        elif len(beta_list) > 0:
+            self._clustering_threshold = beta_list[0]
+        else:
+            raise ValueError("Could not form valid cluster separation. Please "
+                             "change n_clusters or change clustering method")
+
+        self.small_cluster_labels_ = sorted_cluster_indices[
+                                     self._clustering_threshold:]
+        self.large_cluster_labels_ = sorted_cluster_indices[
+                                     0:self._clustering_threshold]
+
+        # No need to calculate small cluster center
+        # self.small_cluster_centers_ = self.cluster_centers_[
+        #     self.small_cluster_labels_]
+
+        self._large_cluster_centers = self.cluster_centers_[
+            self.large_cluster_labels_]
+
+    def _decision_function(self, X, labels):
+        # Initialize the score array
+        scores = np.zeros([X.shape[0], ])
+
+        small_indices = np.where(
+            np.isin(labels, self.small_cluster_labels_))[0]
+        large_indices = np.where(
+            np.isin(labels, self.large_cluster_labels_))[0]
+
+        if small_indices.shape[0] != 0:
+            # Calculate the outlier factor for the samples in small clusters
+            dist_to_large_center = cdist(X[small_indices, :],
+                                         self._large_cluster_centers)
+
+            scores[small_indices] = np.min(dist_to_large_center, axis=1)
+
+        if large_indices.shape[0] != 0:
+            # Calculate the outlier factor for the samples in large clusters
+            large_centers = self.cluster_centers_[labels[large_indices]]
+
+            scores[large_indices] = pairwise_distances_no_broadcast(
+                X[large_indices, :], large_centers)
+
+        if self.use_weights:
+            # Weights are calculated as the number of elements in the cluster
+            scores = scores * self.cluster_sizes_[labels]
+
+        return scores.ravel()
\ No newline at end of file
diff --git a/models/CNN.py b/models/CNN.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6347cade8e68d14049a397eadb15afabb6b71bf
--- /dev/null
+++ b/models/CNN.py
@@ -0,0 +1,273 @@
+from typing import Dict
+import torchinfo
+import tqdm, math
+import numpy as np
+import torch
+from torch import nn, optim
+from torch.utils.data import DataLoader
+
+from ..utils.utility import get_activation_by_name
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+from ..utils.dataset import ForecastDataset
+
+class AdaptiveConcatPool1d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.ap = torch.nn.AdaptiveAvgPool1d(1)
+        self.mp = torch.nn.AdaptiveAvgPool1d(1)
+    
+    def forward(self, x):
+        return torch.cat([self.ap(x), self.mp(x)], 1)
+
+class CNNModel(nn.Module):
+    def __init__(self,
+                 n_features,
+                 num_channel=[32, 32, 40],
+                 kernel_size=3,
+                 stride=1,
+                 predict_time_steps=1,
+                 dropout_rate=0.25,
+                 hidden_activation='relu',
+                 device='cpu'):
+
+        # initialize the super class
+        super(CNNModel, self).__init__()
+
+        # save the default values
+        self.n_features = n_features
+        self.dropout_rate = dropout_rate
+        self.hidden_activation = hidden_activation
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.predict_time_steps = predict_time_steps
+        self.num_channel = num_channel
+        self.device = device
+
+        # get the object for the activations functions
+        self.activation = get_activation_by_name(hidden_activation)
+
+        # initialize encoder and decoder as a sequential
+        self.conv_layers = nn.Sequential()
+        prev_channels = self.n_features
+
+        for idx, out_channels in enumerate(self.num_channel[:-1]):
+            self.conv_layers.add_module(
+                "conv" + str(idx),
+                torch.nn.Conv1d(prev_channels, self.num_channel[idx + 1], 
+                self.kernel_size, self.stride))
+            self.conv_layers.add_module(self.hidden_activation + str(idx),
+                                    self.activation)
+            self.conv_layers.add_module("pool" + str(idx), nn.MaxPool1d(kernel_size=2))
+            prev_channels = out_channels
+
+        self.fc = nn.Sequential(
+            AdaptiveConcatPool1d(),
+            torch.nn.Flatten(),
+            torch.nn.Linear(2*self.num_channel[-1], self.num_channel[-1]),
+            torch.nn.ReLU(),
+            torch.nn.Dropout(dropout_rate),
+            torch.nn.Linear(self.num_channel[-1], self.n_features)
+        )
+
+    def forward(self, x):
+        b, l, c = x.shape
+        x = x.view(b, c, l)
+        x = self.conv_layers(x)     # [128, feature, 23]
+
+        outputs = torch.zeros(self.predict_time_steps, b, self.n_features).to(self.device)
+        for t in range(self.predict_time_steps):
+            decoder_input = self.fc(x)
+            outputs[t] = torch.squeeze(decoder_input, dim=-2)
+
+        return outputs
+    
+class CNN():
+    def __init__(self,
+                 window_size=100,
+                 pred_len=1,
+                 batch_size=128,
+                 epochs=50,
+                 lr=0.0008,
+                 feats=1,
+                 num_channel=[32, 32, 40],
+                 validation_size=0.2):
+        super().__init__()
+        self.__anomaly_score = None
+        
+        cuda = True
+        self.y_hats = None
+        
+        self.cuda = cuda
+        self.device = get_gpu(self.cuda)
+        
+        self.window_size = window_size
+        self.pred_len = pred_len
+        self.batch_size = batch_size
+        self.epochs = epochs
+        
+        self.feats = feats
+        self.num_channel = num_channel
+        self.lr = lr
+        self.validation_size = validation_size
+        
+        self.model = CNNModel(n_features=feats, num_channel=num_channel, predict_time_steps=self.pred_len, device=self.device).to(self.device)
+        
+        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
+        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75)
+        self.loss = nn.MSELoss()
+        self.save_path = None
+        self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3)
+        
+        self.mu = None
+        self.sigma = None
+        self.eps = 1e-10
+        
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            ForecastDataset(tsTrain, window_size=self.window_size, pred_len=self.pred_len),
+            batch_size=self.batch_size,
+            shuffle=True)
+        
+        valid_loader = DataLoader(
+            ForecastDataset(tsValid, window_size=self.window_size, pred_len=self.pred_len),
+            batch_size=self.batch_size,
+            shuffle=False)
+        
+        for epoch in range(1, self.epochs + 1):
+            self.model.train(mode=True)
+            avg_loss = 0
+            loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True)
+            for idx, (x, target) in loop:
+                x, target = x.to(self.device), target.to(self.device)
+
+                # print('x: ', x.shape)       # (bs, win, feat)
+                # print('target: ', target.shape)     # # (bs, pred_len, feat)
+                # print('len(tsTrain): ', len(tsTrain))
+                # print('len(train_loader): ', len(train_loader))
+
+                self.optimizer.zero_grad()
+                
+                output = self.model(x)
+                output = output.view(-1, self.feats*self.pred_len)
+                target = target.view(-1, self.feats*self.pred_len)
+
+                loss = self.loss(output, target)
+                loss.backward()
+
+                self.optimizer.step()
+                
+                avg_loss += loss.cpu().item()
+                loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]')
+                loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+            
+            
+            self.model.eval()
+            scores = []
+            avg_loss = 0
+            loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True)
+            with torch.no_grad():
+                for idx, (x, target) in loop:
+                    x, target = x.to(self.device), target.to(self.device)
+
+                    output = self.model(x)
+                    
+                    output = output.view(-1, self.feats*self.pred_len)
+                    target = target.view(-1, self.feats*self.pred_len)
+                    
+                    loss = self.loss(output, target)
+                    avg_loss += loss.cpu().item()
+                    loop.set_description(f'Validation Epoch [{epoch}/{self.epochs}]')
+                    loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+                    
+                    mse = torch.sub(output, target).pow(2)
+                    scores.append(mse.cpu())
+                    
+            
+            valid_loss = avg_loss/max(len(valid_loader), 1)
+            self.scheduler.step()
+            
+            self.early_stopping(valid_loss, self.model)
+            if self.early_stopping.early_stop or epoch == self.epochs - 1:
+                # fitting Gaussian Distribution
+                if len(scores) > 0:
+                    scores = torch.cat(scores, dim=0)
+                    self.mu = torch.mean(scores)
+                    self.sigma = torch.var(scores)
+                    print(self.mu.size(), self.sigma.size())
+                if self.early_stopping.early_stop:
+                    print("   Early stopping<<<")
+                break
+
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            ForecastDataset(data, window_size=self.window_size, pred_len=self.pred_len),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        self.model.eval()
+        scores = []
+        y_hats = []
+        loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
+        with torch.no_grad():
+            for idx, (x, target) in loop:
+                x, target = x.to(self.device), target.to(self.device)
+                output = self.model(x)
+                
+                output = output.view(-1, self.feats*self.pred_len)
+                target = target.view(-1, self.feats*self.pred_len)
+
+                mse = torch.sub(output, target).pow(2)
+
+                y_hats.append(output.cpu())
+                scores.append(mse.cpu())
+                loop.set_description(f'Testing: ')
+
+        scores = torch.cat(scores, dim=0)
+        # scores = 0.5 * (torch.log(self.sigma + self.eps) + (scores - self.mu)**2 / (self.sigma+self.eps))
+        
+        scores = scores.numpy()
+        scores = np.mean(scores, axis=1)
+        
+        y_hats = torch.cat(y_hats, dim=0)
+        y_hats = y_hats.numpy()
+        
+        l, w = y_hats.shape
+        
+        # new_scores = np.zeros((l - self.pred_len, w))
+        # for i in range(w):
+        #     new_scores[:, i] = scores[self.pred_len - i:l-i, i]
+        # scores = np.mean(new_scores, axis=1)
+        # scores = np.pad(scores, (0, self.pred_len - 1), 'constant', constant_values=(0,0))
+        
+        # new_y_hats = np.zeros((l - self.pred_len, w))
+        # for i in range(w):
+        #     new_y_hats[:, i] = y_hats[self.pred_len - i:l-i, i]
+        # y_hats = np.mean(new_y_hats, axis=1)
+        # y_hats = np.pad(y_hats, (0, self.pred_len - 1), 'constant',constant_values=(0,0))
+
+        assert scores.ndim == 1
+        # self.y_hats = y_hats
+        
+        print('scores: ', scores.shape)
+        if scores.shape[0] < len(data):
+            padded_decision_scores_ = np.zeros(len(data))
+            padded_decision_scores_[: self.window_size+self.pred_len-1] = scores[0]
+            padded_decision_scores_[self.window_size+self.pred_len-1 : ] = scores
+
+        self.__anomaly_score = padded_decision_scores_
+        return padded_decision_scores_
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+    
+    def get_y_hat(self) -> np.ndarray:
+        return self.y_hats
+    
+    def param_statistic(self, save_file):
+        model_stats = torchinfo.summary(self.model, (self.batch_size, self.window_size), verbose=0)
+        with open(save_file, 'w') as f:
+            f.write(str(model_stats))
diff --git a/models/COF.py b/models/COF.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad17d2e6e2dd51f4af6d2549e6d8fa1eb1238e1f
--- /dev/null
+++ b/models/COF.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import warnings
+from operator import itemgetter
+
+import numpy as np
+from scipy.spatial import distance_matrix
+from scipy.spatial import minkowski_distance
+from sklearn.utils import check_array
+
+from .base import BaseDetector
+from ..utils.utility import check_parameter
+
+
+class COF(BaseDetector):
+    """Connectivity-Based Outlier Factor (COF) COF uses the ratio of average
+    chaining distance of data point and the average of average chaining
+    distance of k nearest neighbor of the data point, as the outlier score
+    for observations.
+
+    See :cite:`tang2002enhancing` for details.
+    
+    Two version of COF are supported:
+
+    - Fast COF: computes the entire pairwise distance matrix at the cost of a
+      O(n^2) memory requirement.
+    - Memory efficient COF: calculates pairwise distances incrementally.
+      Use this implementation when it is not feasible to fit the n-by-n 
+      distance in memory. This leads to a linear overhead because many 
+      distances will have to be recalculated.
+
+    Parameters
+    ----------
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set, i.e.
+        the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+    n_neighbors : int, optional (default=20)
+        Number of neighbors to use by default for k neighbors queries.
+        Note that n_neighbors should be less than the number of samples.
+        If n_neighbors is larger than the number of samples provided,
+        all samples will be used.
+        
+    method : string, optional (default='fast')
+        Valid values for method are:
+            
+        - 'fast' Fast COF, computes the full pairwise distance matrix up front.
+        - 'memory' Memory-efficient COF, computes pairwise distances only when
+          needed at the cost of computational speed.
+
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+
+    n_neighbors_: int
+        Number of neighbors to use by default for k neighbors queries.
+    """
+
+    def __init__(self, contamination=0.1, n_neighbors=20, method="fast"):
+        super(COF, self).__init__(contamination=contamination)
+        if isinstance(n_neighbors, int):
+            check_parameter(n_neighbors, low=1, param_name='n_neighbors')
+        else:
+            raise TypeError(
+                "n_neighbors should be int. Got %s" % type(n_neighbors))
+        self.n_neighbors = n_neighbors
+        self.method = method
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        X = check_array(X)
+        self.n_train_ = X.shape[0]
+        self.n_neighbors_ = self.n_neighbors
+
+        if self.n_neighbors_ >= self.n_train_:
+            self.n_neighbors_ = self.n_train_ - 1
+            warnings.warn(
+                "n_neighbors is set to the number of training points "
+                "minus 1: {0}".format(self.n_neighbors_))
+
+            check_parameter(self.n_neighbors_, 1, self.n_train_,
+                            include_left=True, include_right=True)
+
+        self._set_n_classes(y)
+        self.decision_scores_ = self.decision_function(X)
+        self._process_decision_scores()
+
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        if self.method.lower() == "fast":
+            return self._cof_fast(X)
+        elif self.method.lower() == "memory":
+            return self._cof_memory(X)
+        else:
+            raise ValueError("method should be set to either \'fast\' or \'memory\'. Got %s" % self.method)
+
+    def _cof_memory(self, X):
+        """
+        Connectivity-Based Outlier Factor (COF) Algorithm
+        This function is called internally to calculate the
+        Connectivity-Based Outlier Factor (COF) as an outlier
+        score for observations.
+        This function uses a memory efficient implementation at the cost of 
+        speed.
+        :return: numpy array containing COF scores for observations.
+                 The greater the COF, the greater the outlierness.
+        """
+        #dist_matrix = np.array(distance_matrix(X, X))
+        sbn_path_index = np.zeros((X.shape[0],self.n_neighbors_), dtype=np.int64)
+        ac_dist, cof_ = np.zeros((X.shape[0])), np.zeros((X.shape[0]))
+        for i in range(X.shape[0]):
+            #sbn_path = np.argsort(dist_matrix[i])
+            sbn_path = np.argsort(minkowski_distance(X[i,:],X,p=2))
+            sbn_path_index[i,:] = sbn_path[1: self.n_neighbors_ + 1]
+            cost_desc = np.zeros((self.n_neighbors_))
+            for j in range(self.n_neighbors_):
+                #cost_desc.append(
+                #    np.min(dist_matrix[sbn_path[j + 1]][sbn_path][:j + 1]))
+                cost_desc[j] = np.min(minkowski_distance(X[sbn_path[j + 1]],X,p=2)[sbn_path][:j + 1])
+            acd = np.zeros((self.n_neighbors_))
+            for _h, cost_ in enumerate(cost_desc):
+                neighbor_add1 = self.n_neighbors_ + 1
+                acd[_h] = ((2. * (neighbor_add1 - (_h + 1))) / (neighbor_add1 * self.n_neighbors_)) * cost_
+            ac_dist[i] = np.sum(acd)
+        for _g in range(X.shape[0]):
+            cof_[_g] = (ac_dist[_g] * self.n_neighbors_) / np.sum(ac_dist[sbn_path_index[_g]])
+        return np.nan_to_num(cof_)
+    
+    def _cof_fast(self, X):
+        """
+        Connectivity-Based Outlier Factor (COF) Algorithm
+        This function is called internally to calculate the
+        Connectivity-Based Outlier Factor (COF) as an outlier
+        score for observations.
+        This function uses a fast implementation at the cost of memory.
+        :return: numpy array containing COF scores for observations.
+                 The greater the COF, the greater the outlierness.
+        """
+        dist_matrix = np.array(distance_matrix(X, X))
+        sbn_path_index, ac_dist, cof_ = [], [], []
+        for i in range(X.shape[0]):
+            sbn_path = np.argsort(dist_matrix[i])
+            sbn_path_index.append(sbn_path[1: self.n_neighbors_ + 1])
+            cost_desc = []
+            for j in range(self.n_neighbors_):
+                cost_desc.append(
+                    np.min(dist_matrix[sbn_path[j + 1]][sbn_path][:j + 1]))
+            acd = []
+            for _h, cost_ in enumerate(cost_desc):
+                neighbor_add1 = self.n_neighbors_ + 1
+                acd.append(((2. * (neighbor_add1 - (_h + 1))) / (
+                        neighbor_add1 * self.n_neighbors_)) * cost_)
+            ac_dist.append(np.sum(acd))
+        for _g in range(X.shape[0]):
+            cof_.append((ac_dist[_g] * self.n_neighbors_) /
+                        np.sum(itemgetter(*sbn_path_index[_g])(ac_dist)))
+        return np.nan_to_num(cof_)
\ No newline at end of file
diff --git a/models/COPOD.py b/models/COPOD.py
new file mode 100644
index 0000000000000000000000000000000000000000..f74eb1cb7163e1ea00a5f4c17e6b7271ef12add8
--- /dev/null
+++ b/models/COPOD.py
@@ -0,0 +1,205 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+import warnings
+
+import numpy as np
+
+from joblib import Parallel, delayed
+from scipy.stats import skew as skew_sp
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils import check_array
+
+from .base import BaseDetector
+from ..utils.stat_models import column_ecdf
+from ..utils.utility import _partition_estimators
+from ..utils.utility import zscore
+
+def skew(X, axis=0):
+    return np.nan_to_num(skew_sp(X, axis=axis))
+
+def _parallel_ecdf(n_dims, X):
+    """Private method to calculate ecdf in parallel.    
+    Parameters
+    ----------
+    n_dims : int
+        The number of dimensions of the current input matrix
+
+    X : numpy array
+        The subarray for building the ECDF
+
+    Returns
+    -------
+    U_l_mat : numpy array
+        ECDF subarray.
+
+    U_r_mat : numpy array
+        ECDF subarray.
+    """
+    U_l_mat = np.zeros([X.shape[0], n_dims])
+    U_r_mat = np.zeros([X.shape[0], n_dims])
+
+    for i in range(n_dims):
+        U_l_mat[:, i: i + 1] = column_ecdf(X[:, i: i + 1])
+        U_r_mat[:, i: i + 1] = column_ecdf(X[:, i: i + 1] * -1)
+    return U_l_mat, U_r_mat
+
+class COPOD(BaseDetector):
+    """COPOD class for Copula Based Outlier Detector.
+    COPOD is a parameter-free, highly interpretable outlier detection algorithm
+    based on empirical copula models.
+    See :cite:`li2020copod` for details.
+
+    Parameters
+    ----------
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set, i.e.
+        the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+        
+    n_jobs : optional (default=1)
+        The number of jobs to run in parallel for both `fit` and
+        `predict`. If -1, then the number of jobs is set to the
+        number of cores.
+
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self, contamination=0.1, n_jobs=1, normalize=True):
+        super(COPOD, self).__init__(contamination=contamination)
+
+        #TODO: Make it parameterized for n_jobs
+        self.n_jobs = n_jobs
+        self.normalize = normalize
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        X = check_array(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        self._set_n_classes(y)
+        self.decision_scores_ = self.decision_function(X)
+        self.X_train = X
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+         For consistency, outliers are assigned with larger anomaly scores.
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        # use multi-thread execution
+        if self.n_jobs != 1:
+            return self._decision_function_parallel(X)
+        if hasattr(self, 'X_train'):
+            original_size = X.shape[0]
+            X = np.concatenate((self.X_train, X), axis=0)
+        self.U_l = -1 * np.log(column_ecdf(X))
+        self.U_r = -1 * np.log(column_ecdf(-X))
+
+        skewness = np.sign(skew(X, axis=0))
+        self.U_skew = self.U_l * -1 * np.sign(
+            skewness - 1) + self.U_r * np.sign(skewness + 1)
+        self.O = np.maximum(self.U_skew, np.add(self.U_l, self.U_r) / 2)
+        if hasattr(self, 'X_train'):
+            decision_scores_ = self.O.sum(axis=1)[-original_size:]
+        else:
+            decision_scores_ = self.O.sum(axis=1)
+        return decision_scores_.ravel()
+
+    def _decision_function_parallel(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+         For consistency, outliers are assigned with larger anomaly scores.
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        if hasattr(self, 'X_train'):
+            original_size = X.shape[0]
+            X = np.concatenate((self.X_train, X), axis=0)
+
+        n_samples, n_features = X.shape[0], X.shape[1]
+
+        if n_features < 2:
+            raise ValueError(
+                'n_jobs should not be used on one dimensional dataset')
+
+        if n_features <= self.n_jobs:
+            self.n_jobs = n_features
+            warnings.warn("n_features <= n_jobs; setting them equal instead.")
+
+        n_jobs, n_dims_list, starts = _partition_estimators(n_features,
+                                                            self.n_jobs)
+
+        all_results = Parallel(n_jobs=n_jobs, max_nbytes=None,
+                               verbose=True)(
+            delayed(_parallel_ecdf)(
+                n_dims_list[i],
+                X[:, starts[i]:starts[i + 1]],
+            )
+            for i in range(n_jobs))
+
+        # recover the results
+        self.U_l = np.zeros([n_samples, n_features])
+        self.U_r = np.zeros([n_samples, n_features])
+
+        for i in range(n_jobs):
+            self.U_l[:, starts[i]:starts[i + 1]] = all_results[i][0]
+            self.U_r[:, starts[i]:starts[i + 1]] = all_results[i][1]
+
+        self.U_l = -1 * np.log(self.U_l)
+        self.U_r = -1 * np.log(self.U_r)
+
+        skewness = np.sign(skew(X, axis=0))
+        self.U_skew = self.U_l * -1 * np.sign(
+            skewness - 1) + self.U_r * np.sign(skewness + 1)
+        self.O = np.maximum(self.U_skew, np.add(self.U_l, self.U_r) / 2)
+        if hasattr(self, 'X_train'):
+            decision_scores_ = self.O.sum(axis=1)[-original_size:]
+        else:
+            decision_scores_ = self.O.sum(axis=1)
+        return decision_scores_.ravel()
\ No newline at end of file
diff --git a/models/Chronos.py b/models/Chronos.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6b8ab6c94842e26d78a405c54a7604e85e2065b
--- /dev/null
+++ b/models/Chronos.py
@@ -0,0 +1,94 @@
+"""
+This function is adapted from [chronos-forecasting] by [lostella et al.]
+Original source: [https://github.com/amazon-science/chronos-forecasting]
+"""
+
+from autogluon.timeseries import TimeSeriesPredictor
+from sklearn.preprocessing import MinMaxScaler
+import numpy as np
+import pandas as pd
+import tempfile
+
+from .base import BaseDetector
+
+
+class Chronos(BaseDetector):
+    def __init__(self, 
+                 win_size=100,
+                 model_size = 'base',  # [tiny, small, base]
+                 prediction_length=1, 
+                 input_c=1, 
+                 batch_size=128):
+
+        self.model_name = 'Chronos'
+        self.model_size = model_size
+        self.win_size = win_size
+        self.prediction_length = prediction_length
+        self.input_c = input_c
+        self.batch_size = batch_size
+        self.score_list = []
+
+    def fit(self, data):
+
+        for channel in range(self.input_c):
+            
+            data_channel = data[:, channel].reshape(-1, 1)
+            data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.win_size, predict_time_steps=self.prediction_length)
+            # print('data_win: ', data_win.shape)         # (2330, 100)
+            # print('data_target: ', data_target.shape)   # (2330, 1)
+
+            train_data = []
+            count = 0
+            for id in range(data_win.shape[0]):
+                for tt in range(data_win.shape[1]):
+                    train_data.append([id, count, data_win[id, tt]])
+                    count += 1
+            train_data = pd.DataFrame(train_data, columns=['item_id', 'timestamp', 'target'])
+
+            with tempfile.TemporaryDirectory() as temp_dir:
+
+                predictor = TimeSeriesPredictor(prediction_length=self.prediction_length, path=temp_dir).fit(
+                        train_data, 
+                        hyperparameters={
+                        "Chronos": {
+                        "model_path": self.model_size,   # base
+                        "device": "cuda",
+                        "batch_size": self.batch_size}},
+                        skip_model_selection=True,
+                        verbosity=0)
+
+                predictions = predictor.predict(train_data)['mean'].to_numpy().reshape(-1, self.prediction_length)
+                print('predictions: ', predictions.shape)
+
+                ### using mse as the anomaly score
+                scores = (data_target.squeeze() - predictions.squeeze()) ** 2
+                self.score_list.append(scores)
+
+        scores_merge = np.mean(np.array(self.score_list), axis=0)
+        # print('scores_merge: ', scores_merge.shape)
+
+        padded_decision_scores = np.zeros(len(data))
+        padded_decision_scores[: self.win_size+self.prediction_length-1] = scores_merge[0]
+        padded_decision_scores[self.win_size+self.prediction_length-1 : ]=scores_merge
+
+        self.decision_scores_ = padded_decision_scores
+
+
+    def decision_function(self, X):
+        """
+        Not used, present for API consistency by convention.
+        """        
+        pass
+
+    def create_dataset(self, X, slidingWindow, predict_time_steps=1):
+        Xs, ys = [], []
+        for i in range(len(X) - slidingWindow - predict_time_steps+1):
+            
+            tmp = X[i : i + slidingWindow + predict_time_steps].ravel()
+            # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel()
+            
+            x = tmp[:slidingWindow]
+            y = tmp[slidingWindow:]
+            Xs.append(x)
+            ys.append(y)
+        return np.array(Xs), np.array(ys)
\ No newline at end of file
diff --git a/models/DADA.py b/models/DADA.py
new file mode 100644
index 0000000000000000000000000000000000000000..27f8de23c33ade534a0bff9bd8358760708b6773
--- /dev/null
+++ b/models/DADA.py
@@ -0,0 +1,141 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+import math
+import tqdm
+import os
+from transformers import AutoTokenizer
+from typing import Optional, Tuple
+
+# Add debugging prints to understand the import issue
+import sys
+# print(f"Python path: {sys.path}")
+# print(f"Current working directory: {os.getcwd()}")
+# print(f"Current file location: {__file__}")
+# print(f"Current file directory: {os.path.dirname(__file__)}")
+#
+# # Check if the utils directory exists
+# utils_path = os.path.join(os.path.basename(os.path.dirname(__file__)), "utils")
+# print(f"Utils path: {utils_path}")
+# print(f"Utils directory exists: {os.path.exists(utils_path)}")
+# print(f"Utils directory contents: {os.listdir(utils_path) if os.path.exists(utils_path) else 'Directory not found'}")
+#
+# # Check if dataset.py exists
+# dataset_path = os.path.join(utils_path, "dataset.py")
+# print(f"Dataset file path: {dataset_path}")
+# print(f"Dataset file exists: {os.path.exists(dataset_path)}")
+
+# Try different import approaches
+
+os.chdir("/home/lihaoyang/Huawei/TSB-AD/TSB_AD")
+
+try:
+    from utils.dataset import ReconstructDataset
+    print("Relative import successful")
+except ImportError as e:
+    print(f"Relative import failed: {e}")
+
+    # Try absolute import
+    try:
+        from TSB_AD.utils.dataset import ReconstructDataset
+        print("Absolute import successful")
+    except ImportError as e2:
+        print(f"Absolute import failed: {e2}")
+
+        # Try adding parent directory to path
+        try:
+            parent_dir = os.path.dirname(os.path.dirname(__file__))
+            if parent_dir not in sys.path:
+                sys.path.insert(0, parent_dir)
+            from utils.dataset import ReconstructDataset
+            print("Import with modified path successful")
+        except ImportError as e3:
+            print(f"Import with modified path failed: {e3}")
+
+from .base import BaseDetector
+
+# ...existing code...
+
+class DADA(BaseDetector):
+    def __init__(self, device, args=None, win_size=64, batch_size=32):
+        self.win_size = win_size
+        self.batch_size = batch_size
+        self.device = torch.device(f'cuda:{device}' if torch.cuda.is_available() else 'cpu')
+        self.model = self._build_model().to(self.device)
+
+    def _build_model(self):
+        from transformers import AutoModel, AutoConfig
+        import os
+        
+        # Try multiple possible paths
+        possible_paths = [
+            os.environ.get("DADA_MODEL_PATH"),  # Environment variable
+            "/home/lihaoyang/Huawei/DADA/DADA/",  # Original Linux path
+            "./DADA",  # Relative path
+            "DADA"  # Hugging Face model name
+        ]
+        
+        for path in possible_paths:
+            if path is None:
+                continue
+            try:
+                # Try loading config first
+                config = AutoConfig.from_pretrained(path, trust_remote_code=True)
+                model = AutoModel.from_pretrained(path, config=config, trust_remote_code=True)
+                print(f"Successfully loaded DADA model from: {path}")
+                return model
+            except Exception as e:
+                print(f"Failed to load from {path}: {e}")
+                continue
+        
+        raise ValueError("DADA model not found. Please set DADA_MODEL_PATH environment variable or ensure the model is available at one of the expected locations.")
+
+    # def _acquire_device(self):
+    #     if True:
+    #         os.environ["CUDA_VISIBLE_DEVICES"] = str(
+    #             self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
+    #         device = torch.device('cuda:{}'.format(self.args.gpu))
+    #         print('Use GPU: cuda:{}'.format(self.args.gpu))
+    #     else:
+    #         device = torch.device('cpu')
+    #         print('Use CPU')
+    #     return device
+
+    def decision_function(self, x: torch.Tensor) -> torch.Tensor:
+        pass
+
+    def fit(self, data: torch.Tensor, labels: Optional[torch.Tensor] = None) -> None:
+        pass
+    
+    def zero_shot(self, data):
+        
+        test_loader = DataLoader(
+            dataset= ReconstructDataset(data, window_size=self.win_size, stride=self.win_size, normalize=True),
+            batch_size=self.batch_size,
+            shuffle=False)
+        
+        loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
+
+        test_scores = []
+        test_labels = []
+        self.model.eval()
+        self.model.to(self.device)
+        
+        with torch.no_grad():
+            for i, (batch_x, batch_y) in loop:
+                batch_x = batch_x.float().to(self.device)
+                score = self.model.infer(batch_x, norm=0)
+                score = score.detach().cpu().numpy()
+                test_scores.append(score)
+                test_labels.append(batch_y)
+
+        test_scores = np.concatenate(test_scores, axis=0).reshape(-1, 1)
+        test_labels = np.concatenate(test_labels, axis=0).reshape(-1, 1)
+
+        print("Test scores shape:", test_scores.shape)
+        print("Test labels shape:", test_labels.shape)
+
+        return test_scores.reshape(-1) 
\ No newline at end of file
diff --git a/models/Donut.py b/models/Donut.py
new file mode 100644
index 0000000000000000000000000000000000000000..15be238d96fa18f354bd4da88625511af9a4c8c5
--- /dev/null
+++ b/models/Donut.py
@@ -0,0 +1,419 @@
+"""
+This function is adapted from [donut] by [haowen-xu]
+Original source: [https://github.com/NetManAIOps/donut]
+"""
+
+from typing import Dict
+import numpy as np
+import torchinfo
+import torch
+from torch import nn, optim
+import tqdm
+import os, math
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from typing import Tuple, Sequence, Union, Callable
+
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+from ..utils.dataset import ReconstructDataset    
+
+class DonutModel(nn.Module):
+    def __init__(self, input_dim, hidden_dim, latent_dim, mask_prob) -> None:
+        super().__init__()
+
+        """
+        Xu2018
+
+        :param input_dim: Should be window_size * features
+        :param hidden_dims:
+        :param latent_dim:
+        """
+
+        self.latent_dim = latent_dim
+        self.mask_prob = mask_prob
+        
+        encoder = VaeEncoder(input_dim, hidden_dim, latent_dim)
+        decoder = VaeEncoder(latent_dim, hidden_dim, input_dim)
+        
+        self.vae = VAE(encoder=encoder, decoder=decoder, logvar_out=False)
+    
+    def forward(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, ...]:
+        # x: (B, T, D)
+        x = inputs
+        B, T, D = x.shape
+
+        if self.training:
+            # Randomly mask some inputs
+            mask = torch.empty_like(x)
+            mask.bernoulli_(1 - self.mask_prob)
+            x = x * mask
+        else:
+            mask = None
+
+        # Run the VAE
+        x = x.view(x.shape[0], -1)  
+        mean_z, std_z, mean_x, std_x, sample_z = self.vae(x, return_latent_sample=True)
+
+        # Reshape the outputs
+        mean_x = mean_x.view(B, T, D)
+        std_x = std_x.view(B, T, D)
+        return mean_z, std_z, mean_x, std_x, sample_z, mask
+
+def sample_normal(mu: torch.Tensor, std_or_log_var: torch.Tensor, log_var: bool = False, num_samples: int = 1):
+    # ln(σ) = 0.5 * ln(σ^2) -> σ = e^(0.5 * ln(σ^2))
+    if log_var:
+        sigma = std_or_log_var.mul(0.5).exp_()
+    else:
+        sigma = std_or_log_var
+
+    if num_samples == 1:
+        eps = torch.randn_like(mu)  # also copies device from mu
+    else:
+        eps = torch.rand((num_samples,) + mu.shape, dtype=mu.dtype, device=mu.device)
+        mu = mu.unsqueeze(0)
+        sigma = sigma.unsqueeze(0)
+    # z = μ + σ * ϵ, with ϵ ~ N(0,I)
+    return eps.mul(sigma).add_(mu)
+
+def normal_standard_normal_kl(mean: torch.Tensor, std_or_log_var: torch.Tensor, log_var: bool = False) -> torch.Tensor:
+    if log_var:
+        kl_loss = torch.sum(1 + std_or_log_var - mean.pow(2) - std_or_log_var.exp(), dim=-1)
+    else:
+        kl_loss = torch.sum(1 + torch.log(std_or_log_var.pow(2)) - mean.pow(2) - std_or_log_var.pow(2), dim=-1)
+    return -0.5 * kl_loss
+    
+
+def normal_normal_kl(mean_1: torch.Tensor, std_or_log_var_1: torch.Tensor, mean_2: torch.Tensor,
+                     std_or_log_var_2: torch.Tensor, log_var: bool = False) -> torch.Tensor:
+    if log_var:
+        return 0.5 * torch.sum(std_or_log_var_2 - std_or_log_var_1 + (torch.exp(std_or_log_var_1)
+                               + (mean_1 - mean_2)**2) / torch.exp(std_or_log_var_2) - 1, dim=-1)
+
+    return torch.sum(torch.log(std_or_log_var_2) - torch.log(std_or_log_var_1) \
+                     + 0.5 * (std_or_log_var_1**2 + (mean_1 - mean_2)**2) / std_or_log_var_2**2 - 0.5, dim=-1)
+
+
+class VAELoss(torch.nn.modules.loss._Loss):
+    def __init__(self, size_average=None, reduce=None, reduction: str = 'mean', logvar_out: bool = True):
+        super(VAELoss, self).__init__(size_average, reduce, reduction)
+        self.logvar_out = logvar_out
+
+    def forward(self, predictions: Tuple[torch.Tensor, ...], targets: Tuple[torch.Tensor, ...], *args, **kwargs) \
+            -> torch.Tensor:
+        z_mean, z_std_or_log_var, x_dec_mean, x_dec_std = predictions[:4]
+        if len(predictions) > 4:
+            z_prior_mean, z_prior_std_or_logvar = predictions[4:]
+        else:
+            z_prior_mean, z_prior_std_or_logvar = None, None
+
+        y, = targets
+
+        # Gaussian nnl loss assumes multivariate normal with diagonal sigma
+        # Alternatively we can use torch.distribution.Normal(x_dec_mean, x_dec_std).log_prob(y).sum(-1)
+        # or torch.distribution.MultivariateNormal(mean, cov).log_prob(y).sum(-1)
+        # with cov = torch.eye(feat_dim).repeat([1,bz,1,1])*std.pow(2).unsqueeze(-1).
+        # However setting up a distribution seems to be an unnecessary computational overhead.
+        # However, this requires pytorch version > 1.9!!!
+        nll_gauss = F.gaussian_nll_loss(x_dec_mean, y, x_dec_std.pow(2), reduction='none').sum(-1)
+        # For pytorch version < 1.9 use:
+        # nll_gauss = -torch.distribution.Normal(x_dec_mean, x_dec_std).log_prob(y).sum(-1)
+
+        # get KL loss
+        if z_prior_mean is None and z_prior_std_or_logvar is None:
+            # If a prior is not given, we assume standard normal
+            kl_loss = normal_standard_normal_kl(z_mean, z_std_or_log_var, log_var=self.logvar_out)
+        else:
+            if z_prior_mean is None:
+                z_prior_mean = torch.tensor(0, dtype=z_mean.dtype, device=z_mean.device)
+            if z_prior_std_or_logvar is None:
+                value = 0 if self.logvar_out else 1
+                z_prior_std_or_logvar = torch.tensor(value, dtype=z_std_or_log_var.dtype, device=z_std_or_log_var.device)
+
+            kl_loss = normal_normal_kl(z_mean, z_std_or_log_var, z_prior_mean, z_prior_std_or_logvar,
+                                       log_var=self.logvar_out)
+
+        # Combine
+        final_loss = nll_gauss + kl_loss
+
+        if self.reduction == 'none':
+            return final_loss
+        elif self.reduction == 'mean':
+            return torch.mean(final_loss)
+        elif self.reduction == 'sum':
+            return torch.sum(final_loss)
+
+
+class MaskedVAELoss(VAELoss):
+    def __init__(self, size_average=None, reduce=None, reduction: str = 'mean'):
+        super(MaskedVAELoss, self).__init__(size_average, reduce, reduction, logvar_out=False)
+
+    def forward(self, predictions: Tuple[torch.Tensor, ...], targets: Tuple[torch.Tensor, ...], *args, **kwargs) \
+            -> torch.Tensor:
+        mean_z, std_z, mean_x, std_x, sample_z, mask = predictions
+        actual_x, = targets
+
+        if mask is None:
+            mean_z = mean_z.unsqueeze(1)
+            std_z = std_z.unsqueeze(1)
+            return super(MaskedVAELoss, self).forward((mean_z, std_z, mean_x, std_x), (actual_x,), *args, **kwargs)
+
+        # If the loss is masked, one of the terms in the kl loss is weighted, so we can't compute it exactly
+        # anymore and have to use a MC approximation like for the output likelihood
+        nll_output = torch.sum(mask * F.gaussian_nll_loss(mean_x, actual_x, std_x**2, reduction='none'), dim=-1)
+
+        # This is p(z), i.e., the prior likelihood of Z. The paper assumes p(z) = N(z| 0, I), we drop constants
+        beta = torch.mean(mask, dim=(1, 2)).unsqueeze(-1)
+        nll_prior = beta * 0.5 * torch.sum(sample_z * sample_z, dim=-1, keepdim=True)
+
+        nll_approx = torch.sum(F.gaussian_nll_loss(mean_z, sample_z, std_z**2, reduction='none'), dim=-1, keepdim=True)
+
+        final_loss = nll_output + nll_prior - nll_approx
+
+        if self.reduction == 'none':
+            return final_loss
+        elif self.reduction == 'mean':
+            return torch.mean(final_loss)
+        elif self.reduction == 'sum':
+            return torch.sum(final_loss)
+
+class MLP(torch.nn.Module):
+    def __init__(self, input_features: int, hidden_layers: Union[int, Sequence[int]], output_features: int,
+                 activation: Callable = torch.nn.Identity(), activation_after_last_layer: bool = False):
+        super(MLP, self).__init__()
+
+        self.activation = activation
+        self.activation_after_last_layer = activation_after_last_layer
+
+        if isinstance(hidden_layers, int):
+            hidden_layers = [hidden_layers]
+
+        layers = [input_features] + list(hidden_layers) + [output_features]
+        self.layers = torch.nn.ModuleList([torch.nn.Linear(inp, out) for inp, out in zip(layers[:-1], layers[1:])])
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        out = x
+        for layer in self.layers[:-1]:
+            out = layer(out)
+            out = self.activation(out)
+
+        out = self.layers[-1](out)
+        if self.activation_after_last_layer:
+            out = self.activation(out)
+
+        return out
+
+class VaeEncoder(nn.Module):
+    def __init__(self, input_dim: int, hidden_dim: int, latent_dim: int):
+        super(VaeEncoder, self).__init__()
+        
+        self.latent_dim = latent_dim
+
+        self.mlp = MLP(input_dim, hidden_dim, 2*latent_dim, activation=torch.nn.ReLU(), activation_after_last_layer=False)
+        self.softplus = torch.nn.Softplus()
+
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        # x: (B, T, D)
+        mlp_out = self.mlp(x)
+
+        mean, std = mlp_out.tensor_split(2, dim=-1)
+        std = self.softplus(std)
+
+        return mean, std
+    
+class VAE(torch.nn.Module):
+    """
+    VAE Implementation that supports normal distribution with diagonal cov matrix in the latent space
+    and the output
+    """
+
+    def __init__(self, encoder: torch.nn.Module, decoder: torch.nn.Module, logvar_out: bool = True):
+        super(VAE, self).__init__()
+
+        self.encoder = encoder
+        self.decoder = decoder
+        self.log_var = logvar_out
+
+    def forward(self, x: torch.Tensor, return_latent_sample: bool = False, num_samples: int = 1,
+                force_sample: bool = False) -> Tuple[torch.Tensor, ...]:
+        z_mu, z_std_or_log_var = self.encoder(x)
+
+        if self.training or num_samples > 1 or force_sample:
+            z_sample = sample_normal(z_mu, z_std_or_log_var, log_var=self.log_var, num_samples=num_samples)
+        else:
+            z_sample = z_mu
+
+        x_dec_mean, x_dec_std = self.decoder(z_sample)
+
+        if not return_latent_sample:
+            return z_mu, z_std_or_log_var, x_dec_mean, x_dec_std
+
+        return z_mu, z_std_or_log_var, x_dec_mean, x_dec_std, z_sample
+
+
+
+class Donut():
+    def __init__(self,
+                 win_size=120,
+                 input_c=1,
+                 batch_size=128,     # 32, 128
+                 grad_clip=10.0,
+                 num_epochs=50,
+                 mc_samples=1024,
+                 hidden_dim=100,
+                 latent_dim=8,
+                 inject_ratio=0.01,
+                 lr=1e-4,
+                 l2_coff=1e-3,
+                 patience=3,
+                 validation_size=0):
+        super().__init__()
+        self.__anomaly_score = None
+        
+        self.cuda = True
+        self.device = get_gpu(self.cuda)
+        
+        self.win_size = win_size
+        self.input_c = input_c
+        self.batch_size = batch_size
+        self.grad_clip = grad_clip
+        self.num_epochs = num_epochs
+        self.mc_samples = mc_samples
+        self.validation_size = validation_size
+        
+        input_dim = self.win_size*self.input_c
+        
+        self.model = DonutModel(input_dim=input_dim, hidden_dim=hidden_dim, latent_dim=latent_dim, mask_prob=inject_ratio).to(self.device)
+        self.optimizer = optim.AdamW(self.model.parameters(), lr=lr, weight_decay=l2_coff)
+        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.75)
+        self.vaeloss = MaskedVAELoss()
+        
+        self.save_path = None
+        self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=patience)
+        
+    def train(self, train_loader, epoch):
+        self.model.train(mode=True)
+        avg_loss = 0
+        loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True)
+        for idx, (x, target) in loop:
+            x, target = x.to(self.device), target.to(self.device)
+            self.optimizer.zero_grad()
+
+            # print('x: ', x.shape)
+            
+            output = self.model(x)
+            loss = self.vaeloss(output, (target,))
+            loss.backward()
+            
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
+            self.optimizer.step()
+            
+            avg_loss += loss.cpu().item()
+            loop.set_description(f'Training Epoch [{epoch}/{self.num_epochs}]')
+            loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+        
+        return avg_loss/max(len(train_loader), 1)
+                
+    def valid(self, valid_loader, epoch):
+        self.model.eval()
+        avg_loss = 0
+        loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True)
+        with torch.no_grad():
+            for idx, (x, target) in loop:
+                x, target = x.to(self.device), target.to(self.device)
+                output = self.model(x)
+                loss = self.vaeloss(output, (target,))
+                avg_loss += loss.cpu().item()
+                loop.set_description(f'Validation Epoch [{epoch}/{self.num_epochs}]')
+                loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+                
+        return avg_loss/max(len(valid_loader), 1)
+        
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset(tsValid, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+                    
+        for epoch in range(1, self.num_epochs + 1):
+            train_loss = self.train(train_loader, epoch)
+            if len(valid_loader) > 0:
+                valid_loss = self.valid(valid_loader, epoch)
+            self.scheduler.step()
+            
+            if len(valid_loader) > 0:
+                self.early_stopping(valid_loss, self.model)
+            else:
+                self.early_stopping(train_loss, self.model)
+            if self.early_stopping.early_stop:
+                print("   Early stopping<<<")
+                break
+
+        
+    def decision_function(self, data):
+        
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        self.model.eval()
+        scores = []
+        loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
+        with torch.no_grad():
+            for idx, (x, _) in loop:
+                x = x.to(self.device)
+                x_vae = x.view(x.shape[0], -1)
+                B, T, D = x.shape
+
+                res = self.model.vae(x_vae, return_latent_sample=False, num_samples=self.mc_samples)
+                z_mu, z_std, x_dec_mean, x_dec_std = res
+
+                x_dec_mean = x_dec_mean.view(self.mc_samples, B, T, D)
+                x_dec_std = x_dec_std.view(self.mc_samples, B, T, D)                
+                nll_output = torch.sum(F.gaussian_nll_loss(x_dec_mean[:, :, -1, :], x[:, -1, :].unsqueeze(0),
+                                                   x_dec_std[:, :, -1, :]**2, reduction='none'), dim=(0, 2))
+                nll_output /= self.mc_samples
+
+
+                scores.append(nll_output.cpu())
+                loop.set_description(f'Testing: ')
+
+        scores = torch.cat(scores, dim=0)
+        scores = scores.numpy()
+        
+        assert scores.ndim == 1
+        
+        import shutil
+        if self.save_path and os.path.exists(self.save_path):
+            shutil.rmtree(self.save_path)
+            
+        self.__anomaly_score = scores
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+        
+        return self.__anomaly_score
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+    
+    def get_y_hat(self) -> np.ndarray:
+        return super().get_y_hat
+    
+    def param_statistic(self, save_file):
+        model_stats = torchinfo.summary(self.model, (self.batch_size, self.win_size), verbose=0)
+        with open(save_file, 'w') as f:
+            f.write(str(model_stats))
+    
\ No newline at end of file
diff --git a/models/EIF.py b/models/EIF.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed8dca928bc269e305372d533415a9d76d355e72
--- /dev/null
+++ b/models/EIF.py
@@ -0,0 +1,438 @@
+"""
+This function is adapted from [eif] by [mgckind]
+Original source: [https://github.com/sahandha/eif]
+"""
+
+# import eif as iso
+from .base import BaseDetector
+import numpy as np
+import math
+import random as rn
+import os
+import warnings
+from ..utils.utility import zscore
+
+def c_factor(n) :
+    """
+    Average path length of unsuccesful search in a binary search tree given n points
+    
+    Parameters
+    ----------
+    n : int
+        Number of data points for the BST.
+
+    Returns
+    -------
+    float
+        Average path length of unsuccesful search in a BST
+        
+    """
+    return 2.0*(np.log(n-1)+0.5772156649) - (2.0*(n-1.)/(n*1.0))
+
+
+class iForest(object):
+    """
+    Creates an iForest object. This object holds the data as well as the trained trees (iTree objects).
+
+    Attributes
+    ----------
+    X : list
+        Data used for training. It is a list of list of floats.
+    nobjs: int
+        Size of the dataset.
+    sample: int
+        Size of the sample to be used for tree creation.
+    Trees: list
+        A list of tree objects.
+    limit: int
+        Maximum depth a tree can have.
+    exlevel: int
+        Exention level to be used in the creating splitting critera.
+    c: float
+        Multiplicative factor used in computing the anomaly scores.
+
+    Methods
+    -------
+    CheckExtensionLevel()
+        Chaeck the validity of extension level provided by user based on the data
+    compute_paths(X_in)
+        Computes the anomaly score for data X_in
+    """
+    def __init__(self, X, ntrees,  sample_size, limit=None, ExtensionLevel=0):
+        """
+        iForest(X, ntrees,  sample_size, limit=None, ExtensionLevel=0)
+        Initialize a forest by passing in training data, number of trees to be used and the subsample size.
+
+        Parameters
+        ----------
+        X : list of list of floats
+            Training data. List of [x1,x2,...,xn] coordinate points.
+        ntrees : int
+            Number of trees to be used.
+        sample_size : int
+            The size of the subsample to be used in creation of each tree. Must be smaller than |X|
+        limit : int
+            The maximum allowed tree depth. This is by default set to average length of unsucessful search in a binary tree.
+        ExtensionLevel : int
+            Specifies degree of freedom in choosing the hyperplanes for dividing up data. Must be smaller than the dimension n of the dataset.
+        """
+
+        self.ntrees = ntrees
+        self.X = X
+        self.nobjs = len(X)
+        self.sample = sample_size
+        self.Trees = []
+        self.limit = limit
+        self.exlevel = ExtensionLevel
+        self.CheckExtensionLevel()                                              # Extension Level check. See def for explanation.
+        if limit is None:
+            self.limit = int(np.ceil(np.log2(self.sample)))                     # Set limit to the default as specified by the paper (average depth of unsuccesful search through a binary tree).
+        self.c = c_factor(self.sample)
+        for i in range(self.ntrees):                                            # This loop builds an ensemble of iTrees (the forest).
+            ix = rn.sample(range(self.nobjs), self.sample)
+            X_p = X[ix]
+            self.Trees.append(iTree(X_p, 0, self.limit, exlevel=self.exlevel))
+
+    def CheckExtensionLevel(self):
+        """
+        This function makes sure the extension level provided by the user does not exceed the dimension of the data. An exception will be raised in the case of a violation.
+        """
+        dim = self.X.shape[1]
+        if self.exlevel < 0:
+            raise Exception("Extension level has to be an integer between 0 and "+ str(dim-1)+".")
+        if self.exlevel > dim-1:
+            raise Exception("Your data has "+ str(dim) + " dimensions. Extension level can't be higher than " + str(dim-1) + ".")
+
+    def compute_paths(self, X_in = None):
+        """
+        compute_paths(X_in = None)
+        Compute anomaly scores for all data points in a dataset X_in
+
+        Parameters
+        ----------
+        X_in : list of list of floats
+                Data to be scored. iForest.Trees are used for computing the depth reached in each tree by each data point.
+
+        Returns
+        -------
+        float
+            Anomaly score for a given data point.
+        """
+        if X_in is None:
+            X_in = self.X
+        S = np.zeros(len(X_in))
+        for i in  range(len(X_in)):
+            h_temp = 0
+            for j in range(self.ntrees):
+                h_temp += PathFactor(X_in[i],self.Trees[j]).path*1.0            # Compute path length for each point
+            Eh = h_temp/self.ntrees                                             # Average of path length travelled by the point in all trees.
+            S[i] = 2.0**(-Eh/self.c)                                            # Anomaly Score
+        return S
+
+class Node(object):
+    """
+    A single node from each tree (each iTree object). Nodes containe information on hyperplanes used for data division, date to be passed to left and right nodes, whether they are external or internal nodes.
+
+    Attributes
+    ----------
+    e: int
+        Depth of the tree to which the node belongs.
+    size: int
+        Size of the dataset present at the node.
+    X: list
+        Data at the node.
+    n: list
+        Normal vector used to build the hyperplane that splits the data in the node.
+    p: list
+        Intercept point through which the hyperplane passes.
+    lef: Node object
+        Left child node.
+    right: Node object
+        Right child node.
+    ntype: str
+        The type of the node: 'exNode', 'inNode'.
+    """
+    def __init__(self, X, n, p, e, left, right, node_type = '' ):
+        """
+        Node(X, n, p, e, left, right, node_type = '' )
+        Create a node in a given tree (iTree objectg)
+
+        Parameters
+        ----------
+        X : list of list of floats
+            Training data available to each node. List of [x1,x2,...,xn] coordinate points.
+        n : list of floats
+            Normal vector for the hyperplane used for splitting data.
+        p : list of floats
+            Intercept point for the hyperplane used for splitting data.
+        left : Node object
+            Left child node.
+        right : Node object
+            Right child node.
+        node_type : str
+            Specifies if the node is external or internal. Takes two values: 'exNode', 'inNode'.
+        """
+        self.e = e
+        self.size = len(X)
+        self.X = X # to be removed
+        self.n = n
+        self.p = p
+        self.left = left
+        self.right = right
+        self.ntype = node_type
+
+class iTree(object):
+
+    """
+    A single tree in the forest that is build using a unique subsample.
+
+    Attributes
+    ----------
+    exlevel: int
+        Extension level used in the splitting criteria.
+    e: int
+        Depth of tree
+    X: list
+        Data present at the root node of this tree.
+    size: int
+        Size of the dataset.
+    dim: int
+        Dimension of the dataset.
+    Q: list
+        List of ordered integers smaller than dim.
+    l: int
+        Maxium depth a tree can reach before its creation is terminated.
+    n: list
+        Normal vector at the root of this tree, which is used in creating hyperplanes for splitting critera
+    p: list
+        Intercept point at the root of this tree through which the splitting hyperplane passes.
+    exnodes: int
+        The number of external nodes this tree has.
+    root: Node object
+        At each node create a new tree.
+
+    Methods
+    -------
+    make_tree(X, e, l)
+        Builds the tree recursively from a given node. Returns a Node object.
+    """
+
+    def __init__(self,X,e,l, exlevel=0):
+        """
+        iTree(X, e, l, exlevel=0)
+        Create a tree
+
+        Parameters
+        ----------
+        X : list of list of floats
+            Subsample of training data. |X| = iForest.sample_size. List of [x1,x2,...,xn] coordinate points
+        e : int
+            Depth of the tree as it is being traversed down. e <= l.
+        l : int
+            The maximum depth the tree can reach before its creation is terminated.
+        exlevel : int
+            Specifies degree of freedom in choosing the hyperplanes for dividing up data. Must be smaller than the dimension n of the dataset.
+        """
+        self.exlevel = exlevel
+        self.e = e
+        self.X = X                                                              #save data for now. Not really necessary.
+        self.size = len(X)
+        self.dim = self.X.shape[1]
+        self.Q = np.arange(np.shape(X)[1], dtype='int')                         # n dimensions
+        self.l = l
+        self.p = None                                                           # Intercept for the hyperplane for splitting data at a given node.
+        self.n = None                                                           # Normal vector for the hyperplane for splitting data at a given node.
+        self.exnodes = 0
+        self.root = self.make_tree(X,e,l)                                       # At each node create a new tree, starting with root node.
+
+    def make_tree(self,X,e,l):
+        """
+        make_tree(X,e,l)
+        Builds the tree recursively from a given node. Returns a Node object.
+
+        Parameters
+        ----------
+        X: list of list of floats
+            Subsample of training data. |X| = iForest.sample_size. List of [x1,x2,...,xn] coordinate point.
+        e : int
+            Depth of the tree as it is being traversed down. Integer. e <= l.
+        l : int
+            The maximum depth the tree can reach before its creation is terminated. Integer.
+
+        Returns
+        -------
+        Node object
+        """
+        self.e = e
+        if e >= l or len(X) <= 1:                                               # A point is isolated in traning data, or the depth limit has been reached.
+            left = None
+            right = None
+            self.exnodes += 1
+            return Node(X, self.n, self.p, e, left, right, node_type = 'exNode')
+        else:                                                                   # Building the tree continues. All these nodes are internal.
+            mins = X.min(axis=0)
+            maxs = X.max(axis=0)
+            idxs = np.random.choice(range(self.dim), self.dim-self.exlevel-1, replace=False)  # Pick the indices for which the normal vector elements should be set to zero acccording to the extension level.
+            self.n = np.random.normal(0,1,self.dim)                             # A random normal vector picked form a uniform n-sphere. Note that in order to pick uniformly from n-sphere, we need to pick a random normal for each component of this vector.
+            self.n[idxs] = 0
+            self.p = np.random.uniform(mins,maxs)                               # Picking a random intercept point for the hyperplane splitting data.
+            w = (X-self.p).dot(self.n) < 0                                      # Criteria that determines if a data point should go to the left or right child node.
+            return Node(X, self.n, self.p, e,\
+            left=self.make_tree(X[w],e+1,l),\
+            right=self.make_tree(X[~w],e+1,l),\
+            node_type = 'inNode' )
+
+class PathFactor(object):
+    """
+    Given a single tree (iTree objext) and a data point x = [x1,x2,...,xn], compute the legth of the path traversed by the point on the tree when it reaches an external node.
+
+    Attributes
+    ----------
+    path_list: list
+        A list of strings 'L' or 'R' which traces the path a data point travels down a tree.
+    x: list
+        A single data point, which is represented as a list of floats.
+    e: int
+        The depth of a given node in the tree.
+
+    Methods
+    -------
+    find_path(T)
+        Given a tree, it finds the path a single data points takes.
+
+    """
+    def __init__(self,x,itree):
+        """
+        PathFactor(x, itree)
+        Given a single tree (iTree objext) and a data point x = [x1,x2,...,xn], compute the legth of the path traversed by the point on the tree when it reaches an external node.
+
+        Parameters
+        ----------
+        x : list of floats
+            A data point x = [x1, x2, ..., xn].
+        itree : iTree object
+            A single tree.
+        """
+        self.path_list=[]
+        self.x = x
+        self.e = 0
+        self.path  = self.find_path(itree.root)
+
+    def find_path(self,T):
+        """
+        find_path(T)
+        Given a tree, find the path for a single data point based on the splitting criteria stored at each node.
+
+        Parameters
+        ----------
+        T : iTree object
+
+        Returns
+        -------
+        int
+            The depth reached by the data point.
+        """
+        if T.ntype == 'exNode':
+            if T.size <= 1: return self.e
+            else:
+                self.e = self.e + c_factor(T.size)
+                return self.e
+        else:
+            p = T.p                                                             # Intercept for the hyperplane for splitting data at a given node.
+            n = T.n                                                             # Normal vector for the hyperplane for splitting data at a given node.
+
+            self.e += 1
+
+            if (self.x-p).dot(n) < 0:
+                self.path_list.append('L')
+                return self.find_path(T.left)
+            else:
+                self.path_list.append('R')
+                return self.find_path(T.right)
+
+def all_branches(node, current=[], branches = None):
+    """
+    Utility function used in generating a graph visualization. It returns all the branches of a given tree so they can be visualized.
+
+    Parameters
+    ----------
+    node: Node object
+
+    Returns
+    -------
+    list
+        list of branches that were reached.
+    """
+    current = current[:node.e]
+    if branches is None: branches = []
+    if node.ntype == 'inNode':
+        current.append('L')
+        all_branches(node.left, current=current, branches=branches)
+        current = current[:-1]
+        current.append('R')
+        all_branches(node.right, current=current, branches=branches)
+    else:
+        branches.append(current)
+    return branches
+
+class EIF(BaseDetector):
+    """
+    Extenstion to the basic isolation forest. Implementation of https://doi.org/10.1109/TKDE.2019.2947676. Code from https://github.com/sahandha/eif
+    """
+    def __init__(self, n_trees = 100, max_samples=None, extension_level=None, n_jobs=1, normalize=True):
+        self.model_name = 'EIF'
+        self.n_trees = n_trees
+        self.max_samples = max_samples
+        self.extension_level = extension_level
+        self.n_jobs = n_jobs
+        self.normalize = normalize
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        if self.max_samples is not None:
+            self.sample_size = int(self.max_samples * X.shape[0])
+        else:
+            self.sample_size = min(256, X.shape[0])
+
+        self.limit = int(np.ceil(np.log2(self.sample_size)))
+
+        # Extension level 0 resembles standard isolation forest. If unspecified (`null`), then `extension_level=X.shape[1] - 1
+        if self.extension_level is None:
+            self.extension_level = X.shape[1] - 1
+
+        # eif = iso.iForest(
+        #     X,
+        #     ntrees=self.n_trees,
+        #     sample_size=self.sample_size,
+        #     limit=self.limit,
+        #     ExtensionLevel=self.extension_level,
+        # )
+
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        eif = iForest(
+            X,
+            ntrees=self.n_trees,
+            sample_size=self.sample_size,
+            limit=self.limit,
+            ExtensionLevel=self.extension_level,
+        )
+
+        self.decision_scores_ = eif.compute_paths(X_in=X)
+    
+    def decision_function(self, X):
+        """
+        Not used, present for API consistency by convention.
+        """        
+        pass
\ No newline at end of file
diff --git a/models/FFT.py b/models/FFT.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbb1c0cb05da5a4668c5c1f1908de730d533dd58
--- /dev/null
+++ b/models/FFT.py
@@ -0,0 +1,138 @@
+"""
+This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig]
+Original source: [https://github.com/TimeEval/TimeEval-algorithms]
+"""
+
+import numpy as np
+from dataclasses import dataclass
+from TSB_AD.models.base import BaseDetector
+from TSB_AD.utils.utility import zscore
+
+class FFT(BaseDetector):
+
+    def __init__(self, ifft_parameters=5, local_neighbor_window=21, local_outlier_threshold=0.6, max_region_size=50, max_sign_change_distance=10, normalize=True):
+        super().__init__()
+
+        self.ifft_parameters = ifft_parameters
+        self.local_neighbor_window = local_neighbor_window
+        self.local_outlier_threshold = local_outlier_threshold
+        self.max_region_size = max_region_size
+        self.max_sign_change_distance = max_sign_change_distance
+        self.normalize = normalize
+        self.decision_scores_ = None
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods."""
+        n_samples, n_features = X.shape
+        if self.normalize: 
+            if n_features == 1:
+                X = zscore(X, axis=0, ddof=0)
+            else:
+                X = zscore(X, axis=1, ddof=1)
+        self.data = X
+        self.decision_scores_ = self.detect_anomalies()  
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector."""
+        n_samples, n_features = X.shape
+        decision_scores_ = np.zeros(n_samples)
+        self.data = X
+        local_outliers = self.calculate_local_outliers()
+        if not local_outliers:
+            print("No local outliers detected.")
+            return np.zeros_like(self.data)
+
+        regions = self.calculate_region_outliers(local_outliers)
+        anomaly_scores = np.zeros_like(self.data)
+        for region in regions:
+            start_index = local_outliers[region.start_idx].index
+            end_index = local_outliers[region.end_idx].index
+            anomaly_scores[start_index:end_index + 1] = region.score
+
+        decision_scores_ = anomaly_scores
+        return decision_scores_
+
+    @staticmethod
+    def reduce_parameters(f: np.ndarray, k: int) -> np.ndarray:
+        transformed = f.copy()
+        transformed[k:] = 0
+        return transformed
+
+    def calculate_local_outliers(self):
+        n = len(self.data)
+        k = max(min(self.ifft_parameters, n), 1)
+        y = self.reduce_parameters(np.fft.fft(self.data), k)
+        f2 = np.real(np.fft.ifft(y))
+
+        so = np.abs(f2 - self.data)
+        mso = np.mean(so)
+        neighbor_c = self.local_neighbor_window // 2
+
+        scores = []
+        score_idxs = []
+        for i in range(n):
+            if so[i] > mso:
+                nav = np.mean(self.data[max(i - neighbor_c, 0):min(i + neighbor_c + 1, n)])
+                scores.append(self.data[i] - nav)
+                score_idxs.append(i)
+
+        if not scores:
+            return []
+
+        ms = np.mean(scores)
+        sds = np.std(scores) + 1e-6  
+        z_scores = (np.array(scores) - ms) / sds
+
+        return [self.LocalOutlier(index=score_idxs[i], z_score=z_scores[i])
+                for i in range(len(scores)) if abs(z_scores[i]) > self.local_outlier_threshold]
+
+    def calculate_region_outliers(self, local_outliers):
+        def distance(a: int, b: int) -> int:
+            return abs(local_outliers[b].index - local_outliers[a].index)
+
+        regions = []
+        i = 0
+        n_l = len(local_outliers) - 1
+        while i < n_l:
+            start_idx = i
+            while i < n_l and distance(i, i + 1) <= self.max_sign_change_distance:
+                i += 1
+            end_idx = i
+            if end_idx > start_idx:
+                score = np.mean([abs(local_outliers[j].z_score) for j in range(start_idx, end_idx + 1)])
+                regions.append(self.RegionOutlier(start_idx=start_idx, end_idx=end_idx, score=score))
+            i += 1
+
+        return regions
+
+    @dataclass
+    class LocalOutlier:
+        index: int
+        z_score: float
+
+        @property
+        def sign(self) -> int:
+            return np.sign(self.z_score)
+
+    @dataclass
+    class RegionOutlier:
+        start_idx: int
+        end_idx: int
+        score: float
+
+    def detect_anomalies(self):
+        """Detect anomalies by combining local and regional outliers."""
+        local_outliers = self.calculate_local_outliers()
+        if not local_outliers:
+            print("No local outliers detected.")
+            return np.zeros_like(self.data)
+
+        regions = self.calculate_region_outliers(local_outliers)
+        anomaly_scores = np.zeros_like(self.data)
+        for region in regions:
+            start_index = local_outliers[region.start_idx].index
+            end_index = local_outliers[region.end_idx].index
+            anomaly_scores[start_index:end_index + 1] = region.score
+
+        return anomaly_scores
\ No newline at end of file
diff --git a/models/FITS.py b/models/FITS.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ded4df741464fa42b124eed9632901e8544c20c
--- /dev/null
+++ b/models/FITS.py
@@ -0,0 +1,223 @@
+"""
+This function is adapted from [FITS] by [VEWOXIC]
+Original source: [https://github.com/VEWOXIC/FITS]
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from typing import Dict
+import torchinfo
+import tqdm
+import numpy as np
+import torch
+from torch import nn, optim
+from torch.utils.data import DataLoader
+import math
+
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+from ..utils.dataset import ReconstructDataset    
+
+class Model(nn.Module):
+
+    # FITS: Frequency Interpolation Time Series Forecasting
+
+    def __init__(self, seq_len, pred_len, individual, enc_in, cut_freq):
+        super(Model, self).__init__()
+        self.seq_len = seq_len
+        self.pred_len = pred_len
+        self.individual = individual
+        self.channels = enc_in
+
+        self.dominance_freq = cut_freq # 720/24
+        self.length_ratio = (self.seq_len + self.pred_len)/self.seq_len
+
+        if self.individual:
+            self.freq_upsampler = nn.ModuleList()
+            for i in range(self.channels):
+                self.freq_upsampler.append(nn.Linear(self.dominance_freq, int(self.dominance_freq*self.length_ratio)).to(torch.cfloat))
+
+        else:
+            self.freq_upsampler = nn.Linear(self.dominance_freq, int(self.dominance_freq*self.length_ratio)).to(torch.cfloat) # complex layer for frequency upcampling]
+        # configs.pred_len=configs.seq_len+configs.pred_len
+        # #self.Dlinear=DLinear.Model(configs)
+        # configs.pred_len=self.pred_len
+
+
+    def forward(self, x):
+        # RIN
+        x_mean = torch.mean(x, dim=1, keepdim=True)
+        x = x - x_mean
+        x_var=torch.var(x, dim=1, keepdim=True)+ 1e-5
+        # print(x_var)
+        x = x / torch.sqrt(x_var)
+
+        low_specx = torch.fft.rfft(x, dim=1)
+        low_specx[:,self.dominance_freq:]=0 # LPF
+        low_specx = low_specx[:,0:self.dominance_freq,:] # LPF
+        # print(low_specx.permute(0,2,1))
+        if self.individual:
+            low_specxy_ = torch.zeros([low_specx.size(0),int(self.dominance_freq*self.length_ratio),low_specx.size(2)],dtype=low_specx.dtype).to(low_specx.device)
+            for i in range(self.channels):
+                low_specxy_[:,:,i]=self.freq_upsampler[i](low_specx[:,:,i].permute(0,1)).permute(0,1)
+        else:
+            low_specxy_ = self.freq_upsampler(low_specx.permute(0,2,1)).permute(0,2,1)
+        # print(low_specxy_)
+        low_specxy = torch.zeros([low_specxy_.size(0),int((self.seq_len+self.pred_len)/2+1),low_specxy_.size(2)],dtype=low_specxy_.dtype).to(low_specxy_.device)
+        low_specxy[:,0:low_specxy_.size(1),:]=low_specxy_ # zero padding
+        low_xy=torch.fft.irfft(low_specxy, dim=1)
+        low_xy=low_xy * self.length_ratio # energy compemsation for the length change
+        # dom_x=x-low_x
+        
+        # dom_xy=self.Dlinear(dom_x)
+        # xy=(low_xy+dom_xy) * torch.sqrt(x_var) +x_mean # REVERSE RIN
+        xy=(low_xy) * torch.sqrt(x_var) +x_mean
+        return xy, low_xy* torch.sqrt(x_var)
+
+    
+class FITS():
+    def __init__(self,
+                 win_size=100,
+                 DSR=4,
+                 individual=True,
+                 input_c=1,
+                 batch_size=128,
+                 cut_freq=12,
+                 epochs=50,
+                 lr=1e-3,
+                 validation_size=0.2
+                 ):
+        super().__init__()
+        self.__anomaly_score = None
+        
+        self.cuda = True
+        self.device = get_gpu(self.cuda)
+
+            
+        self.win_size = win_size        
+        self.DSR = DSR
+        self.individual = individual
+        self.input_c = input_c
+        self.batch_size = batch_size
+        self.cut_freq = cut_freq
+        self.validation_size = validation_size
+
+        self.model = Model(seq_len=self.win_size//self.DSR, pred_len=self.win_size-self.win_size//self.DSR, individual=self.individual, enc_in=self.input_c, cut_freq=self.cut_freq).to(self.device)
+
+        self.epochs = epochs
+        self.learning_rate = lr
+        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
+        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75)
+        self.loss = nn.MSELoss()
+        self.anomaly_criterion = nn.MSELoss(reduce=False)
+        
+        self.save_path = None
+        self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3)
+    
+    def fit(self, data):
+        
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset(tsValid, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        for epoch in range(1, self.epochs + 1):
+            self.model.train(mode=True)
+            avg_loss = 0
+            loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True)
+            for idx, (x, target) in loop:
+
+                x = x[:, ::self.DSR, :]
+                x, target = x.to(self.device), target.to(self.device)
+                self.optimizer.zero_grad()
+                
+                output, _ = self.model(x)
+
+                # print('x: ', x.shape)
+                # print('target: ', target.shape)
+                
+                loss = self.loss(output, target)
+                loss.backward()
+
+                self.optimizer.step()
+                
+                avg_loss += loss.cpu().item()
+                loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]')
+                loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+            
+            
+            self.model.eval()
+            avg_loss = 0
+            loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True)
+            with torch.no_grad():
+                for idx, (x, target) in loop:
+
+                    x = x[:, ::self.DSR, :]
+                    x, target = x.to(self.device), target.to(self.device)
+                    output, _ = self.model(x)
+                    loss = self.loss(output, target)
+                    avg_loss += loss.cpu().item()
+                    loop.set_description(f'Validation Epoch [{epoch}/{self.epochs}]')
+                    loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+            
+            valid_loss = avg_loss/max(len(valid_loader), 1)
+            self.scheduler.step()
+            
+            self.early_stopping(valid_loss, self.model)
+            if self.early_stopping.early_stop:
+                print("   Early stopping<<<")
+                break
+    
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        self.model.eval()
+        scores = []
+        loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
+        with torch.no_grad():
+            for idx, (x, target) in loop:
+
+                x = x[:, ::self.DSR, :]
+                x, target = x.to(self.device), target.to(self.device)
+                output, _ = self.model(x)
+                # loss = self.loss(output, target)
+                score = torch.mean(self.anomaly_criterion(output, target), dim=-1)
+                scores.append(score.cpu()[:,-1])
+
+                loop.set_description(f'Testing: ')
+
+        scores = torch.cat(scores, dim=0)
+        scores = scores.numpy().flatten()
+
+        assert scores.ndim == 1
+        self.__anomaly_score = scores
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+        
+        return self.__anomaly_score
+
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+    
+    def param_statistic(self, save_file):
+        model_stats = torchinfo.summary(self.model, (self.batch_size, self.input_len), verbose=0)
+        with open(save_file, 'w') as f:
+            f.write(str(model_stats))
\ No newline at end of file
diff --git a/models/HBOS.py b/models/HBOS.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8ab6271d2fa99cb41e0f449b66ffb342800dbe8
--- /dev/null
+++ b/models/HBOS.py
@@ -0,0 +1,380 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+from numba import njit
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+
+from .feature import Window
+from .base import BaseDetector
+from ..utils.utility import check_parameter, get_optimal_n_bins, invert_order
+from ..utils.utility import zscore
+
+
+class HBOS(BaseDetector):
+    """Histogram- based outlier detection (HBOS) is an efficient unsupervised
+    method. It assumes the feature independence and calculates the degree
+    of outlyingness by building histograms. See :cite:`goldstein2012histogram`
+    for details.    
+
+    Two versions of HBOS are supported:        
+    - Static number of bins: uses a static number of bins for all features.
+    - Automatic number of bins: every feature uses a number of bins deemed to 
+      be optimal according to the Birge-Rozenblac method
+      (:cite:`birge2006many`).
+      
+    Parameters
+    ----------
+    n_bins : int or string, optional (default=10)
+        The number of bins. "auto" uses the birge-rozenblac method for
+        automatic selection of the optimal number of bins for each feature.
+
+    alpha : float in (0, 1), optional (default=0.1)
+        The regularizer for preventing overflow.
+
+    tol : float in (0, 1), optional (default=0.5)
+        The parameter to decide the flexibility while dealing
+        the samples falling outside the bins.
+
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set,
+        i.e. the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+    Attributes
+    ----------
+    bin_edges_ : numpy array of shape (n_bins + 1, n_features )
+        The edges of the bins.
+
+    hist_ : numpy array of shape (n_bins, n_features)
+        The density of each histogram.
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self, slidingWindow=100, sub=True, n_bins=10, alpha=0.1, tol=0.5, contamination=0.1, normalize=True):
+        super(HBOS, self).__init__(contamination=contamination)
+        self.slidingWindow = slidingWindow
+        self.sub = sub
+        self.n_bins = n_bins
+        self.alpha = alpha
+        self.tol = tol
+        self.normalize = normalize
+
+        check_parameter(alpha, 0, 1, param_name='alpha')
+        check_parameter(tol, 0, 1, param_name='tol')
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        # validate inputs X and y (optional)
+        X = check_array(X)
+        self._set_n_classes(y)
+
+        _, n_features = X.shape[0], X.shape[1]
+
+        if isinstance(self.n_bins, str) and self.n_bins.lower() == "auto":
+            # Uses the birge rozenblac method for automatic histogram size per feature
+            self.hist_ = []
+            self.bin_edges_ = []
+
+            # build the histograms for all dimensions
+            for i in range(n_features):
+                n_bins = get_optimal_n_bins(X[:, i])
+                hist, bin_edges = np.histogram(X[:, i], bins=n_bins,
+                                               density=True)
+                self.hist_.append(hist)
+                self.bin_edges_.append(bin_edges)
+                # the sum of (width * height) should equal to 1
+                assert (np.isclose(1, np.sum(
+                    hist * np.diff(bin_edges)), atol=0.1))
+
+            outlier_scores = _calculate_outlier_scores_auto(X, self.bin_edges_,
+                                                            self.hist_,
+                                                            self.alpha,
+                                                            self.tol)
+
+        elif check_parameter(self.n_bins, low=2, high=np.inf):
+            self.hist_ = np.zeros([self.n_bins, n_features])
+            self.bin_edges_ = np.zeros([self.n_bins + 1, n_features])
+
+            # build the histograms for all dimensions
+            for i in range(n_features):
+                self.hist_[:, i], self.bin_edges_[:, i] = \
+                    np.histogram(X[:, i], bins=self.n_bins, density=True)
+                # the sum of (width * height) should equal to 1
+                # assert (np.isclose(1, np.sum(
+                #     self.hist_[:, i] * np.diff(self.bin_edges_[:, i])),
+                #                    atol=0.1))
+
+            outlier_scores = _calculate_outlier_scores(X, self.bin_edges_,
+                                                       self.hist_,
+                                                       self.n_bins,
+                                                       self.alpha, self.tol)
+
+        # invert decision_scores_. Outliers comes with higher outlier scores
+        self.decision_scores_ = invert_order(np.sum(outlier_scores, axis=1))
+
+        # padded decision_scores_
+        if self.decision_scores_.shape[0] < n_samples:
+            self.decision_scores_ = np.array([self.decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(self.decision_scores_) + [self.decision_scores_[-1]]*((self.slidingWindow-1)//2))
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        check_is_fitted(self, ['hist_', 'bin_edges_'])
+
+        n_samples, n_features = X.shape
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+        
+        X = check_array(X)
+
+        if isinstance(self.n_bins, str) and self.n_bins.lower() == "auto":
+            outlier_scores = _calculate_outlier_scores_auto(X, self.bin_edges_,
+                                                            self.hist_,
+                                                            self.alpha,
+                                                            self.tol)
+        elif check_parameter(self.n_bins, low=2, high=np.inf):
+            outlier_scores = _calculate_outlier_scores(X, self.bin_edges_,
+                                                       self.hist_,
+                                                       self.n_bins,
+                                                       self.alpha, self.tol)
+
+        # invert outlier scores. Outliers comes with higher outlier scores
+        decision_scores_ = invert_order(np.sum(outlier_scores, axis=1))
+        # padded decision_scores_
+        if decision_scores_.shape[0] < n_samples:
+            decision_scores_ = np.array([decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(decision_scores_) + [decision_scores_[-1]]*((self.slidingWindow-1)//2))
+        return decision_scores_
+
+
+# @njit #due to variable size of histograms, can no longer naively use numba for jit
+def _calculate_outlier_scores_auto(X, bin_edges, hist, alpha,
+                                   tol):  # pragma: no cover
+    """The internal function to calculate the outlier scores based on
+    the bins and histograms constructed with the training data. The program
+    is optimized through numba. It is excluded from coverage test for
+    eliminating the redundancy.
+
+    Parameters
+    ----------
+    X : numpy array of shape (n_samples, n_features
+        The input samples.
+
+    bin_edges : list of length n_features containing numpy arrays
+        The edges of the bins.
+
+    hist : =list of length n_features containing numpy arrays
+        The density of each histogram.
+
+    alpha : float in (0, 1)
+        The regularizer for preventing overflow.
+
+    tol : float in (0, 1)
+        The parameter to decide the flexibility while dealing
+        the samples falling outside the bins.
+
+    Returns
+    -------
+    outlier_scores : numpy array of shape (n_samples, n_features)
+        Outlier scores on all features (dimensions).
+    """
+
+    n_samples, n_features = X.shape[0], X.shape[1]
+    outlier_scores = np.zeros(shape=(n_samples, n_features))
+
+    for i in range(n_features):
+
+        # Find the indices of the bins to which each value belongs.
+        # See documentation for np.digitize since it is tricky
+        # >>> x = np.array([0.2, 6.4, 3.0, 1.6, -1, 100, 10])
+        # >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
+        # >>> np.digitize(x, bins, right=True)
+        # array([1, 4, 3, 2, 0, 5, 4], dtype=int64)
+
+        bin_inds = np.digitize(X[:, i], bin_edges[i], right=True)
+
+        # Calculate the outlying scores on dimension i
+        # Add a regularizer for preventing overflow
+        out_score_i = np.log2(hist[i] + alpha)
+
+        optimal_n_bins = get_optimal_n_bins(X[:, i])
+
+        for j in range(n_samples):
+
+            # If the sample does not belong to any bins
+            # bin_ind == 0 (fall outside since it is too small)
+            if bin_inds[j] == 0:
+                dist = bin_edges[i][0] - X[j, i]
+                bin_width = bin_edges[i][1] - bin_edges[i][0]
+
+                # If it is only slightly lower than the smallest bin edge
+                # assign it to bin 1
+                if dist <= bin_width * tol:
+                    outlier_scores[j, i] = out_score_i[0]
+                else:
+                    outlier_scores[j, i] = np.min(out_score_i)
+
+            # If the sample does not belong to any bins
+            # bin_ind == optimal_n_bins+1 (fall outside since it is too large)
+            elif bin_inds[j] == optimal_n_bins + 1:
+                dist = X[j, i] - bin_edges[i][-1]
+                bin_width = bin_edges[i][-1] - bin_edges[i][-2]
+
+                # If it is only slightly larger than the largest bin edge
+                # assign it to the last bin
+                if dist <= bin_width * tol:
+                    outlier_scores[j, i] = out_score_i[optimal_n_bins - 1]
+                else:
+                    outlier_scores[j, i] = np.min(out_score_i)
+            else:
+                outlier_scores[j, i] = out_score_i[bin_inds[j] - 1]
+
+    return outlier_scores
+
+
+@njit
+def _calculate_outlier_scores(X, bin_edges, hist, n_bins, alpha,
+                              tol):  # pragma: no cover
+    """The internal function to calculate the outlier scores based on
+    the bins and histograms constructed with the training data. The program
+    is optimized through numba. It is excluded from coverage test for
+    eliminating the redundancy.
+
+    Parameters
+    ----------
+    X : numpy array of shape (n_samples, n_features)
+        The input samples.
+
+    bin_edges : numpy array of shape (n_bins + 1, n_features )
+        The edges of the bins.
+
+    hist : numpy array of shape (n_bins, n_features)
+        The density of each histogram.
+
+    n_bins : int
+        The number of bins. 
+
+    alpha : float in (0, 1)
+        The regularizer for preventing overflow.
+
+    tol : float in (0, 1)
+        The parameter to decide the flexibility while dealing
+        the samples falling outside the bins.
+
+    Returns
+    -------
+    outlier_scores : numpy array of shape (n_samples, n_features)
+        Outlier scores on all features (dimensions).
+    """
+
+    n_samples, n_features = X.shape[0], X.shape[1]
+    outlier_scores = np.zeros(shape=(n_samples, n_features))
+
+    for i in range(n_features):
+
+        # Find the indices of the bins to which each value belongs.
+        # See documentation for np.digitize since it is tricky
+        # >>> x = np.array([0.2, 6.4, 3.0, 1.6, -1, 100, 10])
+        # >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
+        # >>> np.digitize(x, bins, right=True)
+        # array([1, 4, 3, 2, 0, 5, 4], dtype=int64)
+
+        bin_inds = np.digitize(X[:, i], bin_edges[:, i], right=True)
+
+        # Calculate the outlying scores on dimension i
+        # Add a regularizer for preventing overflow
+        out_score_i = np.log2(hist[:, i] + alpha)
+
+        for j in range(n_samples):
+
+            # If the sample does not belong to any bins
+            # bin_ind == 0 (fall outside since it is too small)
+            if bin_inds[j] == 0:
+                dist = bin_edges[0, i] - X[j, i]
+                bin_width = bin_edges[1, i] - bin_edges[0, i]
+
+                # If it is only slightly lower than the smallest bin edge
+                # assign it to bin 1
+                if dist <= bin_width * tol:
+                    outlier_scores[j, i] = out_score_i[0]
+                else:
+                    outlier_scores[j, i] = np.min(out_score_i)
+
+            # If the sample does not belong to any bins
+            # bin_ind == n_bins+1 (fall outside since it is too large)
+            elif bin_inds[j] == n_bins + 1:
+                dist = X[j, i] - bin_edges[-1, i]
+                bin_width = bin_edges[-1, i] - bin_edges[-2, i]
+
+                # If it is only slightly larger than the largest bin edge
+                # assign it to the last bin
+                if dist <= bin_width * tol:
+                    outlier_scores[j, i] = out_score_i[n_bins - 1]
+                else:
+                    outlier_scores[j, i] = np.min(out_score_i)
+            else:
+                outlier_scores[j, i] = out_score_i[bin_inds[j] - 1]
+
+    return outlier_scores
\ No newline at end of file
diff --git a/models/IForest.py b/models/IForest.py
new file mode 100644
index 0000000000000000000000000000000000000000..7906c2ad29454a39f2e6c55e7b57f3de039cff30
--- /dev/null
+++ b/models/IForest.py
@@ -0,0 +1,327 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+from joblib import Parallel
+from joblib.parallel import delayed
+from sklearn.ensemble import IsolationForest
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+
+from .feature import Window
+from .base import BaseDetector
+# noinspection PyProtectedMember
+from ..utils.utility import invert_order
+from ..utils.utility import zscore
+
+class IForest(BaseDetector):
+    """Wrapper of scikit-learn Isolation Forest with more functionalities.
+
+    The IsolationForest 'isolates' observations by randomly selecting a
+    feature and then randomly selecting a split value between the maximum and
+    minimum values of the selected feature.
+    See :cite:`liu2008isolation,liu2012isolation` for details.
+
+    Since recursive partitioning can be represented by a tree structure, the
+    number of splittings required to isolate a sample is equivalent to the path
+    length from the root node to the terminating node.
+
+    This path length, averaged over a forest of such random trees, is a
+    measure of normality and our decision function.
+
+    Random partitioning produces noticeably shorter paths for anomalies.
+    Hence, when a forest of random trees collectively produce shorter path
+    lengths for particular samples, they are highly likely to be anomalies.
+
+    Parameters
+    ----------
+    n_estimators : int, optional (default=100)
+        The number of base estimators in the ensemble.
+
+    max_samples : int or float, optional (default="auto")
+        The number of samples to draw from X to train each base estimator.
+
+            - If int, then draw `max_samples` samples.
+            - If float, then draw `max_samples * X.shape[0]` samples.
+            - If "auto", then `max_samples=min(256, n_samples)`.
+
+        If max_samples is larger than the number of samples provided,
+        all samples will be used for all trees (no sampling).
+
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set, i.e. the proportion
+        of outliers in the data set. Used when fitting to define the threshold
+        on the decision function.
+
+    max_features : int or float, optional (default=1.0)
+        The number of features to draw from X to train each base estimator.
+
+            - If int, then draw `max_features` features.
+            - If float, then draw `max_features * X.shape[1]` features.
+
+    bootstrap : bool, optional (default=False)
+        If True, individual trees are fit on random subsets of the training
+        data sampled with replacement. If False, sampling without replacement
+        is performed.
+
+    n_jobs : integer, optional (default=1)
+        The number of jobs to run in parallel for both `fit` and `predict`.
+        If -1, then the number of jobs is set to the number of cores.
+
+    behaviour : str, default='old'
+        Behaviour of the ``decision_function`` which can be either 'old' or
+        'new'. Passing ``behaviour='new'`` makes the ``decision_function``
+        change to match other anomaly detection algorithm API which will be
+        the default behaviour in the future. As explained in details in the
+        ``offset_`` attribute documentation, the ``decision_function`` becomes
+        dependent on the contamination parameter, in such a way that 0 becomes
+        its natural threshold to detect outliers.
+
+        .. versionadded:: 0.7.0
+           ``behaviour`` is added in 0.7.0 for back-compatibility purpose.
+
+        .. deprecated:: 0.20
+           ``behaviour='old'`` is deprecated in sklearn 0.20 and will not be
+           possible in 0.22.
+
+        .. deprecated:: 0.22
+           ``behaviour`` parameter will be deprecated in sklearn 0.22 and
+           removed in 0.24.
+
+        .. warning::
+            Only applicable for sklearn 0.20 above.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    verbose : int, optional (default=0)
+        Controls the verbosity of the tree building process.
+
+    Attributes
+    ----------
+    estimators_ : list of DecisionTreeClassifier
+        The collection of fitted sub-estimators.
+
+    estimators_samples_ : list of arrays
+        The subset of drawn samples (i.e., the in-bag samples) for each base
+        estimator.
+
+    max_samples_ : integer
+        The actual number of samples
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self,
+                 slidingWindow=100,
+                 n_estimators=100,
+                 sub=True,
+                 max_samples="auto",
+                 contamination=0.1,
+                 max_features=1.,
+                 bootstrap=False,
+                 n_jobs=1,
+                 behaviour='old',
+                 random_state=0,         # set the random state
+                 verbose=0, 
+                 normalize=True):
+        super(IForest, self).__init__(contamination=contamination)
+        self.slidingWindow = slidingWindow
+        self.sub = sub
+        self.n_estimators = n_estimators
+        self.max_samples = max_samples
+        self.max_features = max_features
+        self.bootstrap = bootstrap
+        self.n_jobs = n_jobs
+        self.behaviour = behaviour
+        self.random_state = random_state
+        self.verbose = verbose
+        self.normalize = normalize
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)        
+        if self.normalize: 
+            if n_features == 1:
+                X = zscore(X, axis=0, ddof=0)
+            else: 
+                X = zscore(X, axis=1, ddof=1)
+
+        # validate inputs X and y (optional)
+        X = check_array(X)
+        self._set_n_classes(y)
+
+        # In sklearn 0.20+ new behaviour is added (arg behaviour={'new','old'})
+        # to IsolationForest that shifts the location of the anomaly scores
+        # noinspection PyProtectedMember
+
+        self.detector_ = IsolationForest(n_estimators=self.n_estimators,
+                                         max_samples=self.max_samples,
+                                         contamination=self.contamination,
+                                         max_features=self.max_features,
+                                         bootstrap=self.bootstrap,
+                                         n_jobs=self.n_jobs,
+                                         random_state=self.random_state,
+                                         verbose=self.verbose)
+
+        self.detector_.fit(X=X, y=None, sample_weight=None)
+
+        # invert decision_scores_. Outliers comes with higher outlier scores.
+        self.decision_scores_ = invert_order(self.detector_.decision_function(X))
+
+        # padded decision_scores_
+        if self.decision_scores_.shape[0] < n_samples:
+            self.decision_scores_ = np.array([self.decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(self.decision_scores_) + [self.decision_scores_[-1]]*((self.slidingWindow-1)//2))
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+
+        n_samples, n_features = X.shape
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+
+        # invert outlier scores. Outliers comes with higher outlier scores
+        decision_scores_ = invert_order(self.detector_.decision_function(X))
+        # padded decision_scores_
+        if decision_scores_.shape[0] < n_samples:
+            decision_scores_ = np.array([decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(decision_scores_) + [decision_scores_[-1]]*((self.slidingWindow-1)//2))
+        return decision_scores_
+
+    @property
+    def estimators_(self):
+        """The collection of fitted sub-estimators.
+        Decorator for scikit-learn Isolation Forest attributes.
+        """
+        return self.detector_.estimators_
+
+    @property
+    def estimators_samples_(self):
+        """The subset of drawn samples (i.e., the in-bag samples) for
+        each base estimator.
+        Decorator for scikit-learn Isolation Forest attributes.
+        """
+        return self.detector_.estimators_samples_
+
+    @property
+    def max_samples_(self):
+        """The actual number of samples.
+        Decorator for scikit-learn Isolation Forest attributes.
+        """
+        return self.detector_.max_samples_
+
+    @property
+    def estimators_features_(self):
+        """The indeces of the subset of features used to train the estimators.
+        Decorator for scikit-learn Isolation Forest attributes.
+        """
+        return self.detector_.estimators_features_
+
+    @property
+    def n_features_in_(self):
+        """The number of features seen during the fit.
+        Decorator for scikit-learn Isolation Forest attributes.
+        """
+        return self.detector_.n_features_in_
+
+    @property
+    def offset_(self):
+        """Offset used to define the decision function from the raw scores.
+        Decorator for scikit-learn Isolation Forest attributes.
+        """
+        return self.detector_.offset_
+
+    @property
+    def feature_importances_(self):
+        """The impurity-based feature importance. The higher, the more
+        important the feature. The importance of a feature is computed as the
+        (normalized) total reduction of the criterion brought by that feature.
+        It is also known as the Gini importance.
+
+        .. warning::
+        impurity-based feature importance can be misleading for
+        high cardinality features (many unique values). See
+        https://scikit-learn.org/stable/modules/generated/sklearn.inspection.permutation_importance.html
+        as an alternative.
+
+        Returns
+        -------
+        feature_importances_ : ndarray of shape (n_features,)
+            The values of this array sum to 1, unless all trees are single node
+            trees consisting of only the root node, in which case it will be an
+            array of zeros.
+        """
+        check_is_fitted(self)
+        all_importances = Parallel(
+            n_jobs=self.n_jobs)(
+            delayed(getattr)(tree, "feature_importances_")
+            for tree in self.detector_.estimators_
+            if tree.tree_.node_count > 1
+        )
+
+        if not all_importances:
+            return np.zeros(self.n_features_in_, dtype=np.float64)
+
+        all_importances = np.mean(all_importances, axis=0, dtype=np.float64)
+        return all_importances / np.sum(all_importances)
\ No newline at end of file
diff --git a/models/KMeansAD.py b/models/KMeansAD.py
new file mode 100644
index 0000000000000000000000000000000000000000..22783ade816aafcba92fce3b1f25ade75f2eda61
--- /dev/null
+++ b/models/KMeansAD.py
@@ -0,0 +1,68 @@
+"""
+This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig]
+Original source: [https://github.com/TimeEval/TimeEval-algorithms]
+"""
+
+from sklearn.base import BaseEstimator, OutlierMixin
+from sklearn.cluster import KMeans
+import numpy as np
+from numpy.lib.stride_tricks import sliding_window_view
+from ..utils.utility import zscore
+
+class KMeansAD(BaseEstimator, OutlierMixin):
+    def __init__(self, k, window_size, stride, n_jobs=1, normalize=True):
+        self.k = k
+        self.window_size = window_size
+        self.stride = stride
+        self.model = KMeans(n_clusters=k)
+        self.padding_length = 0
+        self.normalize = normalize
+
+    def _preprocess_data(self, X: np.ndarray) -> np.ndarray:
+        flat_shape = (X.shape[0] - (self.window_size - 1), -1)  # in case we have a multivariate TS
+        slides = sliding_window_view(X, window_shape=self.window_size, axis=0).reshape(flat_shape)[::self.stride, :]
+        self.padding_length = X.shape[0] - (slides.shape[0] * self.stride + self.window_size - self.stride)
+        print(f"Required padding_length={self.padding_length}")
+        if self.normalize: slides = zscore(slides, axis=1, ddof=1)
+        return slides
+
+    def _custom_reverse_windowing(self, scores: np.ndarray) -> np.ndarray:
+        print("Reversing window-based scores to point-based scores:")
+        print(f"Before reverse-windowing: scores.shape={scores.shape}")
+        # compute begin and end indices of windows
+        begins = np.array([i * self.stride for i in range(scores.shape[0])])
+        ends = begins + self.window_size
+
+        # prepare target array
+        unwindowed_length = self.stride * (scores.shape[0] - 1) + self.window_size + self.padding_length
+        mapped = np.full(unwindowed_length, fill_value=np.nan)
+
+        # only iterate over window intersections
+        indices = np.unique(np.r_[begins, ends])
+        for i, j in zip(indices[:-1], indices[1:]):
+            window_indices = np.flatnonzero((begins <= i) & (j-1 < ends))
+            # print(i, j, window_indices)
+            mapped[i:j] = np.nanmean(scores[window_indices])
+
+        # replace untouched indices with 0 (especially for the padding at the end)
+        np.nan_to_num(mapped, copy=False)
+        print(f"After reverse-windowing: scores.shape={mapped.shape}")
+        return mapped
+
+    def fit(self, X: np.ndarray, y=None, preprocess=True) -> 'KMeansAD':
+        if preprocess:
+            X = self._preprocess_data(X)
+        self.model.fit(X)
+        return self
+
+    def predict(self, X: np.ndarray, preprocess=True) -> np.ndarray:
+        if preprocess:
+            X = self._preprocess_data(X)
+        clusters = self.model.predict(X)
+        diffs = np.linalg.norm(X - self.model.cluster_centers_[clusters], axis=1)
+        return self._custom_reverse_windowing(diffs)
+
+    def fit_predict(self, X, y=None) -> np.ndarray:
+        X = self._preprocess_data(X)
+        self.fit(X, y, preprocess=False)
+        return self.predict(X, preprocess=False)
\ No newline at end of file
diff --git a/models/KNN.py b/models/KNN.py
new file mode 100644
index 0000000000000000000000000000000000000000..1826e17f67255682f432e9aee697a2bb13deada5
--- /dev/null
+++ b/models/KNN.py
@@ -0,0 +1,290 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+from warnings import warn
+
+import numpy as np
+from sklearn.neighbors import BallTree
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils import check_array
+import math
+
+from .base import BaseDetector
+from .feature import Window
+from ..utils.utility import zscore
+
+class KNN(BaseDetector):
+    # noinspection PyPep8
+    """kNN class for outlier detection.
+    For an observation, its distance to its kth nearest neighbor could be
+    viewed as the outlying score. It could be viewed as a way to measure
+    the density. See :cite:`ramaswamy2000efficient,angiulli2002fast` for
+    details.
+
+    Three kNN detectors are supported:
+    largest: use the distance to the kth neighbor as the outlier score
+    mean: use the average of all k neighbors as the outlier score
+    median: use the median of the distance to k neighbors as the outlier score
+
+    Parameters
+    ----------
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set,
+        i.e. the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+    n_neighbors : int, optional (default = 10)
+        Number of neighbors to use by default for k neighbors queries.
+
+    method : str, optional (default='largest')
+        {'largest', 'mean', 'median'}
+
+        - 'largest': use the distance to the kth neighbor as the outlier score
+        - 'mean': use the average of all k neighbors as the outlier score
+        - 'median': use the median of the distance to k neighbors as the
+          outlier score
+
+    radius : float, optional (default = 1.0)
+        Range of parameter space to use by default for `radius_neighbors`
+        queries.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use BallTree
+        - 'kd_tree' will use KDTree
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+        .. deprecated:: 0.74
+           ``algorithm`` is deprecated in PyOD 0.7.4 and will not be
+           possible in 0.7.6. It has to use BallTree for consistency.
+
+    leaf_size : int, optional (default = 30)
+        Leaf size passed to BallTree. This can affect the
+        speed of the construction and query, as well as the memory
+        required to store the tree.  The optimal value depends on the
+        nature of the problem.
+
+    metric : string or callable, default 'minkowski'
+        metric to use for distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string.
+
+        Distance matrices are not supported.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
+          'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath',
+          'sqeuclidean', 'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics.
+
+    p : integer, optional (default = 2)
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+        See http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances
+
+    metric_params : dict, optional (default = None)
+        Additional keyword arguments for the metric function.
+
+    n_jobs : int, optional (default = 1)
+        The number of parallel jobs to run for neighbors search.
+        If ``-1``, then the number of jobs is set to the number of CPU cores.
+        Affects only kneighbors and kneighbors_graph methods.
+
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+    def __init__(self, slidingWindow=100, sub=True, contamination=0.1, n_neighbors=10, method='largest',
+                 radius=1.0, algorithm='auto', leaf_size=30,
+                 metric='minkowski', p=2, metric_params=None, n_jobs=1, normalize=True,
+                 **kwargs):
+                
+        self.slidingWindow = slidingWindow
+        self.sub = sub
+        self.n_neighbors = n_neighbors
+        self.method = method
+        self.radius = radius
+        self.algorithm = algorithm
+        self.leaf_size = leaf_size
+        self.metric = metric
+        self.p = p
+        self.metric_params = metric_params
+        self.normalize = normalize
+        self.n_jobs = n_jobs
+
+        if self.algorithm != 'auto' and self.algorithm != 'ball_tree':
+            warn('algorithm parameter is deprecated and will be removed '
+                 'in version 0.7.6. By default, ball_tree will be used.',
+                 FutureWarning)
+            
+        self.neigh_ = NearestNeighbors(n_neighbors=self.n_neighbors,
+                                       radius=self.radius,
+                                       algorithm=self.algorithm,
+                                       leaf_size=self.leaf_size,
+                                       metric=self.metric,
+                                       p=self.p,
+                                       metric_params=self.metric_params,
+                                       n_jobs=self.n_jobs,
+                                       **kwargs)
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        # validate inputs X and y (optional)
+        X = check_array(X)
+
+        self.neigh_.fit(X)
+
+        if self.neigh_._tree is not None:
+            self.tree_ = self.neigh_._tree
+
+        else:
+            if self.metric_params is not None:
+                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
+                                      metric=self.metric,
+                                      **self.metric_params)
+            else:
+                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
+                                      metric=self.metric)
+
+
+        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
+                                             return_distance=True)
+
+        self.decision_scores_ = self._get_dist_by_method(dist_arr)
+        # padded decision_scores_
+        if self.decision_scores_.shape[0] < n_samples:
+            self.decision_scores_ = np.array([self.decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(self.decision_scores_) + [self.decision_scores_[-1]]*((self.slidingWindow-1)//2))
+
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        print("inside decision Function")
+        # check_is_fitted(self, ['tree_', 'decision_scores_',
+        #                        'threshold_', 'labels_'])
+
+        n_samples = X.shape[0]
+        X = check_array(X)
+        X = Window(window = self.slidingWindow).convert(X)
+        
+        # initialize the output score
+        pred_scores = np.zeros([X.shape[0], 1])
+
+        for i in range(X.shape[0]):
+            x_i = X[i, :]
+            x_i = np.asarray(x_i).reshape(1, x_i.shape[0])
+
+            # get the distance of the current point
+            dist_arr, _ = self.tree_.query(x_i, k=self.n_neighbors)
+            dist = self._get_dist_by_method(dist_arr)
+            pred_score_i = dist[-1]
+
+            # record the current item
+            pred_scores[i, :] = pred_score_i
+
+        pred_scores = pred_scores.ravel()
+        if pred_scores.shape[0] < n_samples:
+            padded_decision_scores_ = np.array([pred_scores[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(pred_scores) + [pred_scores[-1]]*((self.slidingWindow-1)//2))
+
+        return padded_decision_scores_
+    
+
+    def _get_dist_by_method(self, dist_arr):
+        """Internal function to decide how to process passed in distance array
+
+        Parameters
+        ----------
+        dist_arr : numpy array of shape (n_samples, n_neighbors)
+            Distance matrix.
+
+        Returns
+        -------
+        dist : numpy array of shape (n_samples,)
+            The outlier scores by distance.
+        """
+        if self.method == 'largest':
+            return dist_arr[:, -1]
+        elif self.method == 'mean':
+            return np.mean(dist_arr, axis=1)
+        elif self.method == 'median':
+            return np.median(dist_arr, axis=1)
diff --git a/models/LOF.py b/models/LOF.py
new file mode 100644
index 0000000000000000000000000000000000000000..46e45d592ee167e2d2ce0dcc60c772b4aa467635
--- /dev/null
+++ b/models/LOF.py
@@ -0,0 +1,258 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import math
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.utils.validation import check_array
+from sklearn.utils.validation import check_is_fitted
+
+from .base import BaseDetector
+from .feature import Window
+from ..utils.utility import invert_order
+from ..utils.utility import zscore
+
+# noinspection PyProtectedMember
+class LOF(BaseDetector):
+    """Wrapper of scikit-learn LOF Class with more functionalities.
+    Unsupervised Outlier Detection using Local Outlier Factor (LOF).
+
+    The anomaly score of each sample is called Local Outlier Factor.
+    It measures the local deviation of density of a given sample with
+    respect to its neighbors.
+    It is local in that the anomaly score depends on how isolated the object
+    is with respect to the surrounding neighborhood.
+    More precisely, locality is given by k-nearest neighbors, whose distance
+    is used to estimate the local density.
+    By comparing the local density of a sample to the local densities of
+    its neighbors, one can identify samples that have a substantially lower
+    density than their neighbors. These are considered outliers.
+    See :cite:`breunig2000lof` for details.
+
+    Parameters
+    ----------
+    n_neighbors : int, optional (default=20)
+        Number of neighbors to use by default for `kneighbors` queries.
+        If n_neighbors is larger than the number of samples provided,
+        all samples will be used.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use BallTree
+        - 'kd_tree' will use KDTree
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method.
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, optional (default=30)
+        Leaf size passed to `BallTree` or `KDTree`. This can
+        affect the speed of the construction and query, as well as the memory
+        required to store the tree. The optimal value depends on the
+        nature of the problem.
+
+    metric : string or callable, default 'minkowski'
+        metric used for the distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If 'precomputed', the training input X is expected to be a distance
+        matrix.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
+          'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath',
+          'sqeuclidean', 'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics:
+        http://docs.scipy.org/doc/scipy/reference/spatial.distance.html
+
+    p : integer, optional (default = 2)
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+        See http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances
+
+    metric_params : dict, optional (default = None)
+        Additional keyword arguments for the metric function.
+
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set, i.e. the proportion
+        of outliers in the data set. When fitting this is used to define the
+        threshold on the decision function.
+
+    n_jobs : int, optional (default = 1)
+        The number of parallel jobs to run for neighbors search.
+        If ``-1``, then the number of jobs is set to the number of CPU cores.
+        Affects only kneighbors and kneighbors_graph methods.
+
+    novelty : bool (default=False)
+        By default, LocalOutlierFactor is only meant to be used for outlier
+        detection (novelty=False). Set novelty to True if you want to use
+        LocalOutlierFactor for novelty detection. In this case be aware that
+        that you should only use predict, decision_function and score_samples
+        on new unseen data and not on the training set.
+
+    Attributes
+    ----------
+    n_neighbors_ : int
+        The actual number of neighbors used for `kneighbors` queries.
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self, slidingWindow=100, sub=True, n_neighbors=20, algorithm='auto', leaf_size=30,
+                 metric='minkowski', p=2, metric_params=None,
+                 contamination=0.1, n_jobs=1, novelty=True, normalize=True):
+        super(LOF, self).__init__(contamination=contamination)
+
+        self.slidingWindow = slidingWindow
+        self.sub = sub
+        self.n_neighbors = n_neighbors
+        self.algorithm = algorithm
+        self.leaf_size = leaf_size
+        self.metric = metric
+        self.p = p
+        self.metric_params = metric_params
+        self.n_jobs = n_jobs
+        self.novelty = novelty
+        self.normalize = normalize
+
+    # noinspection PyIncorrectDocstring
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        # print('self.slidingWindow: ', self.slidingWindow)
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: 
+            if n_features == 1:
+                X = zscore(X, axis=0, ddof=0)
+            else: 
+                X = zscore(X, axis=1, ddof=1)
+                
+        # validate inputs X and y (optional)
+        X = check_array(X)
+        self._set_n_classes(y)
+
+        self.detector_ = LocalOutlierFactor(n_neighbors=self.n_neighbors,
+                                            algorithm=self.algorithm,
+                                            leaf_size=self.leaf_size,
+                                            metric=self.metric,
+                                            p=self.p,
+                                            metric_params=self.metric_params,
+                                            contamination=self.contamination,
+                                            n_jobs=self.n_jobs,
+                                            novelty=self.novelty)
+        self.detector_.fit(X=X, y=y)
+
+        # Invert decision_scores_. Outliers comes with higher outlier scores
+        self.decision_scores_ = invert_order(self.detector_.negative_outlier_factor_)
+
+        # padded decision_scores_
+        if self.decision_scores_.shape[0] < n_samples:
+            self.decision_scores_ = np.array([self.decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(self.decision_scores_) + [self.decision_scores_[-1]]*((self.slidingWindow-1)//2))
+
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+
+        print('self.slidingWindow: ', self.slidingWindow)
+        n_samples, n_features = X.shape
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+
+        # Invert outlier scores. Outliers comes with higher outlier scores
+        # noinspection PyProtectedMember
+        try:
+            decision_scores_ = invert_order(self.detector_._score_samples(X))
+        except AttributeError:
+            try:
+                decision_scores_ = invert_order(self.detector_._decision_function(X))
+            except AttributeError:
+                decision_scores_ = invert_order(self.detector_.score_samples(X))
+
+        # padded decision_scores_
+        if decision_scores_.shape[0] < n_samples:
+            decision_scores_ = np.array([decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(decision_scores_) + [decision_scores_[-1]]*((self.slidingWindow-1)//2))
+        return decision_scores_
+
+    @property
+    def n_neighbors_(self):
+        """The actual number of neighbors used for kneighbors queries.
+        Decorator for scikit-learn LOF attributes.
+        """
+        return self.detector_.n_neighbors_
\ No newline at end of file
diff --git a/models/LSTMAD.py b/models/LSTMAD.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7bdea15961257b28f30d074023fb5714c6fa473
--- /dev/null
+++ b/models/LSTMAD.py
@@ -0,0 +1,237 @@
+from typing import Dict
+import torchinfo
+import tqdm, math
+import numpy as np
+import torch
+from torch import nn, optim
+from torch.utils.data import DataLoader
+
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+from ..utils.dataset import ForecastDataset    
+
+class LSTMModel(nn.Module):
+    def __init__(self, window_size, feats, 
+                 hidden_dim, pred_len, num_layers, batch_size, device) -> None:
+        super().__init__()
+        self.pred_len = pred_len
+        self.batch_size = batch_size
+        self.feats = feats
+        self.device = device
+        
+        self.lstm_encoder = nn.LSTM(input_size=feats, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
+        self.lstm_decoder = nn.LSTM(input_size=feats, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
+        
+        self.relu = nn.GELU()
+        self.fc = nn.Linear(hidden_dim, feats)
+        
+    def forward(self, src):
+        _, decoder_hidden = self.lstm_encoder(src)
+        cur_batch = src.shape[0]
+        
+        decoder_input = torch.zeros(cur_batch, 1, self.feats).to(self.device)
+        outputs = torch.zeros(self.pred_len, cur_batch, self.feats).to(self.device)
+        
+        for t in range(self.pred_len):
+            decoder_output, decoder_hidden = self.lstm_decoder(decoder_input, decoder_hidden)
+            decoder_output = self.relu(decoder_output)
+            decoder_input = self.fc(decoder_output)
+            
+            outputs[t] = torch.squeeze(decoder_input, dim=-2)
+            
+        return outputs
+    
+class LSTMAD():
+    def __init__(self,
+                 window_size=100,
+                 pred_len=1,
+                 batch_size=128,
+                 epochs=50,
+                 lr=0.0008,
+                 feats=1,
+                 hidden_dim=20,
+                 num_layer=2,
+                 validation_size=0.2):
+        super().__init__()
+        self.__anomaly_score = None
+        
+        cuda = True
+        self.y_hats = None
+        
+        self.cuda = cuda
+        self.device = get_gpu(self.cuda)
+
+        
+        self.window_size = window_size
+        self.pred_len = pred_len
+        self.batch_size = batch_size
+        self.epochs = epochs
+        
+        self.feats = feats
+        self.hidden_dim = hidden_dim
+        self.num_layer = num_layer
+        self.lr = lr
+        self.validation_size = validation_size
+
+        print('self.device: ', self.device)
+        
+        self.model = LSTMModel(self.window_size, feats, hidden_dim, self.pred_len, num_layer, batch_size=self.batch_size, device=self.device).to(self.device)
+        
+        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
+        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75)
+        self.loss = nn.MSELoss()
+        self.save_path = None
+        self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3)
+        
+        self.mu = None
+        self.sigma = None
+        self.eps = 1e-10
+        
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            ForecastDataset(tsTrain, window_size=self.window_size, pred_len=self.pred_len),
+            batch_size=self.batch_size,
+            shuffle=True)
+        
+        valid_loader = DataLoader(
+            ForecastDataset(tsValid, window_size=self.window_size, pred_len=self.pred_len),
+            batch_size=self.batch_size,
+            shuffle=False)
+        
+        for epoch in range(1, self.epochs + 1):
+            self.model.train(mode=True)
+            avg_loss = 0
+            loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True)
+            for idx, (x, target) in loop:
+                x, target = x.to(self.device), target.to(self.device)
+
+                # print('x: ', x.shape)       # (bs, win, feat)
+                # print('target: ', target.shape)     # # (bs, pred_len, feat)
+
+                self.optimizer.zero_grad()
+                
+                output = self.model(x)
+
+                # print('output: ', output.shape)     # (pred_len, bs, feat)
+
+                output = output.view(-1, self.feats*self.pred_len)
+                target = target.view(-1, self.feats*self.pred_len)
+
+                loss = self.loss(output, target)
+                loss.backward()
+
+                self.optimizer.step()
+                
+                avg_loss += loss.cpu().item()
+                loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]')
+                loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+            
+            
+            self.model.eval()
+            scores = []
+            avg_loss = 0
+            loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True)
+            with torch.no_grad():
+                for idx, (x, target) in loop:
+                    x, target = x.to(self.device), target.to(self.device)
+
+                    output = self.model(x)
+                    
+                    output = output.view(-1, self.feats*self.pred_len)
+                    target = target.view(-1, self.feats*self.pred_len)
+                    
+                    loss = self.loss(output, target)
+                    avg_loss += loss.cpu().item()
+                    loop.set_description(f'Validation Epoch [{epoch}/{self.epochs}]')
+                    loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
+                    
+                    mse = torch.sub(output, target).pow(2)
+                    scores.append(mse.cpu())
+                    
+            
+            valid_loss = avg_loss/max(len(valid_loader), 1)
+            self.scheduler.step()
+            
+            self.early_stopping(valid_loss, self.model)
+            if self.early_stopping.early_stop or epoch == self.epochs - 1:
+                # fitting Gaussian Distribution
+                if len(scores) > 0:
+                    scores = torch.cat(scores, dim=0)
+                    self.mu = torch.mean(scores)
+                    self.sigma = torch.var(scores)
+                    print(self.mu.size(), self.sigma.size())
+                if self.early_stopping.early_stop:
+                    print("   Early stopping<<<")
+                break
+
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            ForecastDataset(data, window_size=self.window_size, pred_len=self.pred_len),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        self.model.eval()
+        scores = []
+        y_hats = []
+        loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
+        with torch.no_grad():
+            for idx, (x, target) in loop:
+                x, target = x.to(self.device), target.to(self.device)
+                output = self.model(x)
+                
+                output = output.view(-1, self.feats*self.pred_len)
+                target = target.view(-1, self.feats*self.pred_len)
+
+                mse = torch.sub(output, target).pow(2)
+                y_hats.append(output.cpu())
+                scores.append(mse.cpu())
+                loop.set_description(f'Testing: ')
+
+        scores = torch.cat(scores, dim=0)
+        # scores = 0.5 * (torch.log(self.sigma + self.eps) + (scores - self.mu)**2 / (self.sigma+self.eps))
+        
+        scores = scores.numpy()
+        scores = np.mean(scores, axis=1)
+        
+        y_hats = torch.cat(y_hats, dim=0)
+        y_hats = y_hats.numpy()
+        
+        l, w = y_hats.shape
+        
+        # new_scores = np.zeros((l - self.pred_len, w))
+        # for i in range(w):
+        #     new_scores[:, i] = scores[self.pred_len - i:l-i, i]
+        # scores = np.mean(new_scores, axis=1)
+        # scores = np.pad(scores, (0, self.pred_len - 1), 'constant', constant_values=(0,0))
+        
+        # new_y_hats = np.zeros((l - self.pred_len, w))
+        # for i in range(w):
+        #     new_y_hats[:, i] = y_hats[self.pred_len - i:l-i, i]
+        # y_hats = np.mean(new_y_hats, axis=1)
+        # y_hats = np.pad(y_hats, (0, self.pred_len - 1), 'constant',constant_values=(0,0))
+
+        assert scores.ndim == 1
+        # self.y_hats = y_hats
+        
+        # print('scores: ', scores.shape)
+        if scores.shape[0] < len(data):
+            padded_decision_scores_ = np.zeros(len(data))
+            padded_decision_scores_[: self.window_size+self.pred_len-1] = scores[0]
+            padded_decision_scores_[self.window_size+self.pred_len-1 : ] = scores
+
+        self.__anomaly_score = padded_decision_scores_
+        return padded_decision_scores_
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+    
+    def get_y_hat(self) -> np.ndarray:
+        return self.y_hats
+    
+    def param_statistic(self, save_file):
+        model_stats = torchinfo.summary(self.model, (self.batch_size, self.window_size), verbose=0)
+        with open(save_file, 'w') as f:
+            f.write(str(model_stats))
diff --git a/models/Lag_Llama.py b/models/Lag_Llama.py
new file mode 100644
index 0000000000000000000000000000000000000000..f51a5359eba6a4768726dc35a5e8b5417c29252e
--- /dev/null
+++ b/models/Lag_Llama.py
@@ -0,0 +1,139 @@
+"""
+This function is adapted from [lag-llama] by [ashok-arjun&kashif]
+Original source: [https://github.com/time-series-foundation-models/lag-llama]
+"""
+
+from itertools import islice
+
+from matplotlib import pyplot as plt
+import matplotlib.dates as mdates
+
+import torch
+from gluonts.evaluation import make_evaluation_predictions
+from gluonts.dataset.pandas import PandasDataset
+import pandas as pd
+import numpy as np
+from ..utils.torch_utility import get_gpu
+
+from lag_llama.gluon.estimator import LagLlamaEstimator
+
+class Lag_Llama():
+    def __init__(self, 
+                 win_size=96, 
+                 prediction_length=1, 
+                 input_c=1,
+                 use_rope_scaling=False,
+                 batch_size=64,
+                 num_samples=100,
+                 ckpt_path='lag-llama.ckpt'):
+
+        self.model_name = 'Lag_Llama'
+        self.context_length = win_size
+        self.prediction_length = prediction_length
+        self.input_c = input_c
+        self.ckpt_path = ckpt_path
+        self.use_rope_scaling = use_rope_scaling
+        self.batch_size = batch_size
+        self.num_samples = num_samples
+        self.score_list = []
+
+        self.cuda = True
+        self.device = get_gpu(self.cuda)
+
+
+    def fit(self, data):
+
+        for channel in range(self.input_c):
+            
+            data_channel = data[:, channel].reshape(-1, 1)
+            data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.context_length, predict_time_steps=self.prediction_length)
+            # print('data_win: ', data_win.shape)         # (2330, 100)
+            # print('data_target: ', data_target.shape)   # (2330, 1)
+
+            data_win = data_win.T
+
+            date_rng = pd.date_range(start='2021-01-01', periods=data_win.shape[0], freq='H')   # Dummy timestep
+            df_wide = pd.DataFrame(data_win, index=date_rng)
+            # Convert numerical columns to float 32 format for lag-llama
+            for col in df_wide.columns:
+                # Check if column is not of string type
+                if df_wide[col].dtype != 'object' and pd.api.types.is_string_dtype(df_wide[col]) == False:
+                    df_wide[col] = df_wide[col].astype('float32')
+
+            # Create a PandasDataset
+            ds = PandasDataset(dict(df_wide))
+
+            ckpt = torch.load(self.ckpt_path, map_location=self.device) # Uses GPU since in this Colab we use a GPU.
+            estimator_args = ckpt["hyper_parameters"]["model_kwargs"]
+
+            rope_scaling_arguments = {
+                "type": "linear",
+                "factor": max(1.0, (self.context_length + self.prediction_length) / estimator_args["context_length"]),
+            }
+
+            estimator = LagLlamaEstimator(
+                ckpt_path=self.ckpt_path,
+                prediction_length=self.prediction_length,
+                context_length=self.context_length, # Lag-Llama was trained with a context length of 32, but can work with any context length
+
+                # estimator args
+                input_size=estimator_args["input_size"],
+                n_layer=estimator_args["n_layer"],
+                n_embd_per_head=estimator_args["n_embd_per_head"],
+                n_head=estimator_args["n_head"],
+                scaling=estimator_args["scaling"],
+                time_feat=estimator_args["time_feat"],
+                rope_scaling=rope_scaling_arguments if self.use_rope_scaling else None,
+
+                batch_size=self.batch_size,
+                num_parallel_samples=100,
+                device=self.device,
+            )
+
+            lightning_module = estimator.create_lightning_module()
+            transformation = estimator.create_transformation()
+            predictor = estimator.create_predictor(transformation, lightning_module)
+
+            forecast_it, ts_it = make_evaluation_predictions(
+                dataset=ds,
+                predictor=predictor,
+                num_samples=self.num_samples
+            )
+            forecasts = list(forecast_it)
+            tss = list(ts_it)
+
+            predictions = np.array([pred.mean for pred in forecasts])
+
+            # print('predictions: ', predictions.shape)
+
+            ### using mse as the anomaly score
+            scores = (data_target.squeeze() - predictions.squeeze()) ** 2
+            self.score_list.append(scores)
+
+        scores_merge = np.mean(np.array(self.score_list), axis=0)
+
+        padded_decision_scores = np.zeros(len(data))
+        padded_decision_scores[: self.context_length+self.prediction_length-1] = scores_merge[0]
+        padded_decision_scores[self.context_length+self.prediction_length-1 : ]=scores_merge
+
+        self.decision_scores_ = padded_decision_scores
+
+
+    def decision_function(self, X):
+        """
+        Not used, present for API consistency by convention.
+        """        
+        pass
+
+    def create_dataset(self, X, slidingWindow, predict_time_steps=1):
+        Xs, ys = [], []
+        for i in range(len(X) - slidingWindow - predict_time_steps+1):
+            
+            tmp = X[i : i + slidingWindow + predict_time_steps].ravel()
+            # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel()
+            
+            x = tmp[:slidingWindow]
+            y = tmp[slidingWindow:]
+            Xs.append(x)
+            ys.append(y)
+        return np.array(Xs), np.array(ys)
\ No newline at end of file
diff --git a/models/Left_STAMPi.py b/models/Left_STAMPi.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bb77f1303eb8621f15b2d878284946d7ec62eea
--- /dev/null
+++ b/models/Left_STAMPi.py
@@ -0,0 +1,82 @@
+import numpy as np
+import logging
+import math
+from stumpy import stumpi
+from TSB_AD.models.base import BaseDetector
+from TSB_AD.utils.utility import zscore
+
+class Left_STAMPi(BaseDetector):
+
+    def __init__(self, n_init_train=100, window_size=50, normalize=True):
+        super().__init__()
+        self.n_init_train = n_init_train
+        self.window_size = window_size
+        self.normalize = normalize
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+        if self.normalize: 
+            X = zscore(X, axis=0, ddof=0)
+
+        warmup = self.n_init_train
+        ws = self.window_size
+
+        if ws > warmup:
+            logging.warning(f"WARN: window_size is larger than n_init_train. Adjusting to n_init_train={warmup}.")
+            ws = warmup
+        if ws < 3:
+            logging.warning("WARN: window_size must be at least 3. Adjusting to 3.")
+            ws = 3
+
+        self.stream = stumpi(X[:warmup, 0], m=ws, egress=False)
+        for point in X[warmup:, 0]:
+            self.stream.update(point)
+  
+        self.decision_scores_ = self.stream.left_P_
+        self.decision_scores_[:warmup] = 0  
+
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        n_samples = X.shape[0]
+        padded_scores = self.pad_anomaly_scores(self.decision_scores_, n_samples, self.window_size)
+        return padded_scores
+
+    @staticmethod
+    def pad_anomaly_scores(scores, n_samples, window_size):
+        """
+        Pads the anomaly scores to match the length of the input time series.
+        Padding is symmetric, using the first and last values.
+        """
+        left_padding = [scores[0]] * math.ceil((window_size - 1) / 2)
+        right_padding = [scores[-1]] * ((window_size - 1) // 2)
+        padded_scores = np.array(left_padding + list(scores) + right_padding)
+
+        return padded_scores[:n_samples]
\ No newline at end of file
diff --git a/models/M2N2.py b/models/M2N2.py
new file mode 100644
index 0000000000000000000000000000000000000000..06bfabc78d9a8023f53008c079c2a58feb64a812
--- /dev/null
+++ b/models/M2N2.py
@@ -0,0 +1,395 @@
+"""
+This function is adapted from [M2N2] by [Dongmin Kim et al.]
+Original source: [https://github.com/carrtesy/M2N2]
+Reimplemented by: [EmorZz1G]
+"""
+from tqdm import tqdm
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .base import BaseDetector
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+from torch.utils.data import DataLoader
+from ..utils.dataset import ReconstructDataset
+from typing import Literal
+        
+
+# models
+'''
+Basic MLP implementation by:
+Dongmin Kim (tommy.dm.kim@gmail.com)
+'''
+class Detrender(nn.Module):
+    def __init__(self, num_features: int, gamma=0.99):
+        """
+        :param num_features: the number of features or channels
+        :param eps: a value added for numerical stability
+        """
+        super(Detrender, self).__init__()
+        self.num_features = num_features
+        self.gamma = gamma
+        self.mean = nn.Parameter(torch.zeros(1, 1, self.num_features), requires_grad=False)
+
+
+    def forward(self, x, mode:str):
+        if mode == 'norm':
+            x = self._normalize(x)
+        elif mode == 'denorm':
+            x = self._denormalize(x)
+        else: raise NotImplementedError
+        return x
+
+
+    def _update_statistics(self, x):
+        dim2reduce = tuple(range(0, x.ndim-1))
+        mu = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
+        self.mean.lerp_(mu, 1-self.gamma)
+
+    def _set_statistics(self, x:torch.Tensor):
+        self.mean = nn.Parameter(x, requires_grad=False)
+
+    def _normalize(self, x):
+        x = x - self.mean
+        return x
+
+    def _denormalize(self, x):
+        x = x + self.mean
+        return x
+
+
+class MLP(nn.Module):
+    def __init__(self, seq_len, num_channels, latent_space_size, gamma, normalization="None"):
+        super().__init__()
+        self.L, self.C = seq_len, num_channels
+        self.encoder = Encoder(seq_len*num_channels, latent_space_size)
+        self.decoder = Decoder(seq_len*num_channels, latent_space_size)
+        self.normalization = normalization
+
+        if self.normalization == "Detrend":
+            self.use_normalizer = True
+            self.normalizer = Detrender(num_channels, gamma=gamma)
+        else:
+            self.use_normalizer = False
+
+
+    def forward(self, X):
+        B, L, C = X.shape
+        assert (L == self.L) and (C == self.C)
+
+        if self.use_normalizer:
+            X = self.normalizer(X, "norm")
+            
+        z = self.encoder(X.reshape(B, L*C))
+        out = self.decoder(z).reshape(B, L, C)
+
+        if self.use_normalizer:
+            out = self.normalizer(out, "denorm")
+        return out
+
+
+class Encoder(nn.Module):
+    def __init__(self, input_size, latent_space_size):
+        super().__init__()
+        self.linear1 = nn.Linear(input_size, input_size // 2)
+        self.relu1 = nn.ReLU()
+        self.linear2 = nn.Linear(input_size // 2, input_size // 4)
+        self.relu2 = nn.ReLU()
+        self.linear3 = nn.Linear(input_size // 4, latent_space_size)
+        self.relu3 = nn.ReLU()
+
+    def forward(self, x):
+        x = self.linear1(x)
+        x = self.relu1(x)
+        x = self.linear2(x)
+        x = self.relu2(x)
+        x = self.linear3(x)
+        x = self.relu3(x)
+        return x
+
+
+class Decoder(nn.Module):
+    def __init__(self, input_size, latent_space_size):
+        super().__init__()
+        self.linear1 = nn.Linear(latent_space_size, input_size // 4)
+        self.relu1 = nn.ReLU()
+        self.linear2 = nn.Linear(input_size // 4, input_size // 2)
+        self.relu2 = nn.ReLU()
+        self.linear3 = nn.Linear(input_size // 2, input_size)
+
+    def forward(self, x):
+        x = self.linear1(x)
+        x = self.relu1(x)
+        x = self.linear2(x)
+        x = self.relu2(x)
+        out = self.linear3(x)
+        return out
+
+
+class MLP_Trainer:
+    def __init__(
+            self, model, train_loader, valid_loader=None,
+            epochs=10, lr=1e-3, L2_reg=0, device='cuda'
+        ):
+        self.model = model
+        self.train_loader = train_loader
+        self.valid_loader = valid_loader
+        self.device = device
+        self.epochs = epochs
+        self.optimizer = torch.optim.AdamW(
+            params=self.model.parameters(), lr=lr, weight_decay=L2_reg)
+
+    def train(self):
+        train_iterator = tqdm(
+            range(1, self.epochs + 1),
+            total=self.epochs,
+            desc="training epochs",
+            leave=True
+        )
+        if self.valid_loader is not None:
+            early_stop = EarlyStoppingTorch(patience=5)
+        for epoch in train_iterator:
+            train_stats = self.train_epoch()
+            if self.valid_loader is not None:
+                valid_loss = self.valid()
+                early_stop(valid_loss, self.model)
+                if early_stop.early_stop:
+                    break
+
+    def train_epoch(self):
+        self.model.train()
+        train_summary = 0.0
+        for i, batch_data in enumerate(self.train_loader):
+            train_log = self._process_batch(batch_data)
+            train_summary += train_log["summary"]
+        train_summary /= len(self.train_loader)
+        return train_summary
+
+    def _process_batch(self, batch_data) -> dict:
+        X = batch_data[0].to(self.device)
+        B, L, C = X.shape
+        # recon
+        Xhat = self.model(X)
+        # optimize
+        self.optimizer.zero_grad()
+        loss = F.mse_loss(Xhat, X)
+        loss.backward()
+        self.optimizer.step()
+        out = {
+            "recon_loss": loss.item(),
+            "summary": loss.item(),
+        }
+        return out
+
+    @torch.no_grad()
+    def valid(self):
+        assert self.valid_loader is not None, 'cannot valid without any data'
+        self.model.eval()
+        for i, batch_data in enumerate(self.valid_loader):
+            X = batch_data[0].to(self.device)
+            Xhat = self.model(X)
+            loss = F.mse_loss(Xhat, X)
+        return loss.item()
+
+class MLP_Tester:
+    def __init__(self, model, train_loader, test_loader, lr=1e-3, device='cuda'):
+        self.model = model
+        self.train_loader = train_loader
+        self.test_loader = test_loader
+        self.device = device
+        self.lr = lr
+
+    @torch.no_grad()
+    def offline(self, dataloader):
+        self.model.eval()
+        it = tqdm(
+            dataloader,
+            total=len(dataloader),
+            desc="offline inference",
+            leave=True
+        )
+        recon_errors = []
+        for i, batch_data in enumerate(it):
+            X = batch_data[0].to(self.device)
+            B, L, C = X.shape
+            Xhat = self.model(X)
+            recon_error = F.mse_loss(Xhat, X, reduction='none')
+            recon_error = recon_error.detach().cpu().numpy()
+            recon_errors.append(recon_error)
+            torch.cuda.empty_cache()
+        recon_errors = np.concatenate(recon_errors, axis=0) # (B, L, C)
+        anomaly_scores = recon_errors.mean(axis=2).reshape(-1) # (B, L) => (B*L,)
+        return anomaly_scores
+
+    def online(self, dataloader, init_thr, normalization="None"):
+        self.model.train()
+        it = tqdm(
+            dataloader,
+            total=len(dataloader),
+            desc="online inference",
+            leave=True
+        )
+        tau = init_thr
+        TT_optimizer = torch.optim.SGD(
+            [p for p in self.model.parameters()], lr=self.lr)
+
+        Xs, Xhats = [], []
+        preds = []
+        As, thrs = [], []
+        update_count = 0
+        for i, batch_data in enumerate(it):
+            X = batch_data[0].to(self.device)
+            B, L, C = X.shape
+            # Update of test-time statistics.
+            if normalization == "Detrend":
+                self.model.normalizer._update_statistics(X)
+            # inference
+            Xhat = self.model(X)
+            E = (Xhat-X)**2
+            A = E.mean(dim=2)
+            # A: (B, L, C) -> (B, L)
+            ytilde = (A >= tau).float()
+            pred = ytilde
+            # log model outputs
+            Xs.append(X)
+            Xhats.append(Xhat.clone().detach())
+            # generate anomaly scores for each time step
+            As.append(A.clone().detach())
+            preds.append(pred.clone().detach())
+            thrs.append(tau)
+            # learn new-normals
+            TT_optimizer.zero_grad()
+            mask = (ytilde == 0)
+            recon_loss = (A * mask).mean()
+            recon_loss.backward()
+            TT_optimizer.step()
+            update_count += torch.sum(mask).item()
+        # outputs
+        anoscs = torch.cat(As, axis=0).reshape(-1).detach().cpu().numpy()
+        print('total update count:', update_count)
+        return anoscs
+
+class M2N2(BaseDetector):
+    def __init__(self, 
+                 win_size=12,
+                 stride=12,
+                 num_channels=1, 
+                 batch_size=64,
+                 epochs=10,
+                 latent_dim=128,
+                 lr=1e-3,
+                 ttlr=1e-3, # learning rate for online test-time adaptation
+                 normalization="Detrend",
+                 gamma=0.99,
+                 th=0.95, # 95 percentile == 0.95 quantile
+                 valid_size=0.2,
+                 infer_mode='online'):
+        self.model_name = 'M2N2'
+        self.normalization = normalization
+        self.device = get_gpu(True)
+        self.model = MLP(
+            seq_len=win_size,
+            num_channels=num_channels,
+            latent_space_size=latent_dim,
+            gamma=gamma,
+            normalization=normalization,
+        ).to(self.device)
+        
+        self.th = th
+        self.lr = lr
+        self.ttlr = ttlr
+        self.epochs = epochs
+        self.batch_size = batch_size
+        self.win_size = win_size
+        self.stride = stride
+        self.valid_size = valid_size
+        self.infer_mode = infer_mode
+        
+    def fit(self, data):
+        if self.valid_size is None:
+            self.train_loader = DataLoader(
+                dataset=ReconstructDataset(
+                    data, window_size=self.win_size, stride=self.stride),
+                batch_size=self.batch_size,
+                shuffle=True
+            )
+            self.valid_loader = None
+        else:
+            data_train = data[:int((1-self.valid_size)*len(data))]
+            data_valid = data[int((1-self.valid_size)*len(data)):]
+            self.train_loader = DataLoader(
+                dataset=ReconstructDataset(
+                    data_train, window_size=self.win_size, stride=self.stride),
+                batch_size=self.batch_size,
+                shuffle=True
+            )
+            self.valid_loader = DataLoader(
+                dataset=ReconstructDataset(
+                    data_valid, window_size=self.win_size, stride=self.stride),
+                batch_size=self.batch_size,
+                shuffle=False,
+            )
+
+        self.trainer = MLP_Trainer(
+            model=self.model,
+            train_loader=self.train_loader,
+            valid_loader=self.valid_loader,
+            epochs=self.epochs,
+            lr=self.lr,
+            device=self.device
+        )
+        self.trainer.train()
+
+        self.tester = MLP_Tester(
+            model=self.model,
+            train_loader=self.train_loader,
+            test_loader=self.train_loader,
+            lr=self.ttlr,
+            device=self.device,
+        )
+        train_anoscs = self.tester.offline(self.train_loader)
+        self.tau = np.quantile(train_anoscs, self.th)
+        print('tau', self.tau)
+
+    def decision_function(self, data):
+        self.test_loader = DataLoader(
+            dataset=ReconstructDataset(
+                data, window_size=self.win_size, stride=self.stride),
+            batch_size=self.batch_size,
+            shuffle=False,
+        )
+        self.tester = MLP_Tester(
+            model=self.model,
+            train_loader=self.train_loader,
+            test_loader=self.test_loader,
+            lr=self.ttlr,
+            device=self.device,
+        )
+        if self.infer_mode == 'online':
+            anoscs = self.tester.online(
+                self.test_loader, self.tau,
+                normalization=self.normalization)
+        else:
+            anoscs = self.tester.offline(self.test_loader)
+
+        self.decision_scores_ = pad_by_edge_value(anoscs, len(data), mode='right')
+        return self.decision_scores_
+
+
+def pad_by_edge_value(scores, target_len, mode: Literal['both', 'left', 'right']):
+    scores = np.array(scores).reshape(-1)
+    assert len(scores) <= target_len, f'the length of scores is more than target one'
+    print(f'origin length: {len(scores)}; target length: {target_len}')
+    current_len = scores.shape[0]
+    pad_total = max(target_len-current_len, 0)
+    if mode == 'left':
+        pad_before = pad_total
+    elif mode == 'right':
+        pad_before = 0
+    else:
+        pad_before = pad_total // 2 + 1
+    pad_after = pad_total - pad_before
+    padded_scores = np.pad(scores, (pad_before, pad_after), mode='edge')
+    return padded_scores
\ No newline at end of file
diff --git a/models/MCD.py b/models/MCD.py
new file mode 100644
index 0000000000000000000000000000000000000000..f84ebd16f45e2197bf99f0011604c11de627ed3d
--- /dev/null
+++ b/models/MCD.py
@@ -0,0 +1,274 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+from sklearn.covariance import MinCovDet
+from sklearn.utils.validation import check_array
+from sklearn.utils.validation import check_is_fitted
+from .feature import Window
+from .base import BaseDetector
+from ..utils.utility import zscore
+import numpy as np
+import math
+__all__ = ['MCD']
+
+
+class MCD(BaseDetector):
+    """Detecting outliers in a Gaussian distributed dataset using
+    Minimum Covariance Determinant (MCD): robust estimator of covariance.
+
+    The Minimum Covariance Determinant covariance estimator is to be applied
+    on Gaussian-distributed data, but could still be relevant on data
+    drawn from a unimodal, symmetric distribution. It is not meant to be used
+    with multi-modal data (the algorithm used to fit a MinCovDet object is
+    likely to fail in such a case).
+    One should consider projection pursuit methods to deal with multi-modal
+    datasets.
+
+    First fit a minimum covariance determinant model and then compute the
+    Mahalanobis distance as the outlier degree of the data
+
+    See :cite:`rousseeuw1999fast,hardin2004outlier` for details.
+
+    Parameters
+    ----------
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set,
+        i.e. the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+    store_precision : bool
+        Specify if the estimated precision is stored.
+
+    assume_centered : bool
+        If True, the support of the robust location and the covariance
+        estimates is computed, and a covariance estimate is recomputed from
+        it, without centering the data.
+        Useful to work with data whose mean is significantly equal to
+        zero but is not exactly zero.
+        If False, the robust location and covariance are directly computed
+        with the FastMCD algorithm without additional treatment.
+
+    support_fraction : float, 0 < support_fraction < 1
+        The proportion of points to be included in the support of the raw
+        MCD estimate. Default is None, which implies that the minimum
+        value of support_fraction will be used within the algorithm:
+        [n_sample + n_features + 1] / 2
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    Attributes
+    ----------
+    raw_location_ : array-like, shape (n_features,)
+        The raw robust estimated location before correction and re-weighting.
+
+    raw_covariance_ : array-like, shape (n_features, n_features)
+        The raw robust estimated covariance before correction and re-weighting.
+
+    raw_support_ : array-like, shape (n_samples,)
+        A mask of the observations that have been used to compute
+        the raw robust estimates of location and shape, before correction
+        and re-weighting.
+
+    location_ : array-like, shape (n_features,)
+        Estimated robust location
+
+    covariance_ : array-like, shape (n_features, n_features)
+        Estimated robust covariance matrix
+
+    precision_ : array-like, shape (n_features, n_features)
+        Estimated pseudo inverse matrix.
+        (stored only if store_precision is True)
+
+    support_ : array-like, shape (n_samples,)
+        A mask of the observations that have been used to compute
+        the robust estimates of location and shape.
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted. Mahalanobis distances of the training set (on which
+        `:meth:`fit` is called) observations.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self, slidingWindow=100, sub=True, contamination=0.1, store_precision=True,
+                 assume_centered=False, support_fraction=None,
+                 random_state=2024, normalize=True):
+        super(MCD, self).__init__(contamination=contamination)
+        self.store_precision = store_precision
+        self.sub = sub
+        self.assume_centered = assume_centered
+        self.support_fraction = support_fraction
+        self.random_state = random_state
+        self.slidingWindow = slidingWindow
+        self.normalize = normalize
+
+    # noinspection PyIncorrectDocstring
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        # Validate inputs X and y (optional)
+        X = check_array(X)
+        self._set_n_classes(y)
+
+        support_fraction = self.support_fraction
+        while True:
+            try:
+                self.detector_ = MinCovDet(store_precision=self.store_precision,
+                                        assume_centered=self.assume_centered,
+                                        support_fraction=support_fraction,
+                                        random_state=self.random_state)
+                self.detector_.fit(X=X, y=y)
+                break
+            except ValueError:
+                support_fraction = support_fraction + 0.1
+                if support_fraction >= 1:
+                    support_fraction = None
+                    
+
+        # Use mahalanabis distance as the outlier score
+        self.decision_scores_ = self.detector_.dist_
+        # padded decision_scores_
+        if self.decision_scores_.shape[0] < n_samples:
+            self.decision_scores_ = np.array([self.decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(self.decision_scores_) + [self.decision_scores_[-1]]*((self.slidingWindow-1)//2))
+
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+        n_samples, n_features = X.shape
+        if n_features == 1: 
+            # Converting time series data into matrix format
+            X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        X = check_array(X)
+
+        # Computer mahalanobis distance of the samples
+        decision_scores_ = self.detector_.mahalanobis(X)
+        # padded decision_scores_
+        if decision_scores_.shape[0] < n_samples:
+            decision_scores_ = np.array([decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(decision_scores_) + [decision_scores_[-1]]*((self.slidingWindow-1)//2))
+
+        return decision_scores_
+
+    @property
+    def raw_location_(self):
+        """The raw robust estimated location before correction and
+        re-weighting.
+
+        Decorator for scikit-learn MinCovDet attributes.
+        """
+        return self.detector_.raw_location_
+
+    @property
+    def raw_covariance_(self):
+        """The raw robust estimated location before correction and
+        re-weighting.
+
+        Decorator for scikit-learn MinCovDet attributes.
+        """
+        return self.detector_.raw_covariance_
+
+    @property
+    def raw_support_(self):
+        """A mask of the observations that have been used to compute
+        the raw robust estimates of location and shape, before correction
+        and re-weighting.
+
+        Decorator for scikit-learn MinCovDet attributes.
+        """
+        return self.detector_.raw_support_
+
+    @property
+    def location_(self):
+        """Estimated robust location.
+
+        Decorator for scikit-learn MinCovDet attributes.
+        """
+        return self.detector_.location_
+
+    @property
+    def covariance_(self):
+        """Estimated robust covariance matrix.
+
+        Decorator for scikit-learn MinCovDet attributes.
+        """
+        return self.detector_.covariance_
+
+    @property
+    def precision_(self):
+        """ Estimated pseudo inverse matrix.
+        (stored only if store_precision is True)
+
+        Decorator for scikit-learn MinCovDet attributes.
+        """
+        return self.detector_.precision_
+
+    @property
+    def support_(self):
+        """A mask of the observations that have been used to compute
+        the robust estimates of location and shape.
+
+        Decorator for scikit-learn MinCovDet attributes.
+        """
+        return self.detector_.support_
diff --git a/models/MOMENT.py b/models/MOMENT.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6d9acaba22a30b7db30622c8885005fe904365f
--- /dev/null
+++ b/models/MOMENT.py
@@ -0,0 +1,220 @@
+"""
+This function is adapted from [moment] by [mononitogoswami]
+Original source: [https://github.com/moment-timeseries-foundation-model/moment]
+"""
+
+from momentfm import MOMENTPipeline
+from momentfm.utils.masking import Masking
+from momentfm.utils.utils import control_randomness
+from sklearn.preprocessing import MinMaxScaler
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from torch import nn
+import math
+
+from .base import BaseDetector
+from ..utils.dataset import ReconstructDataset_Moment
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+
+class MOMENT(BaseDetector):
+    def __init__(self, 
+                 win_size=256, 
+                 input_c=1, 
+                 batch_size=128,
+                 epochs=2,
+                 validation_size=0,
+                 lr=1e-4):
+
+        self.model_name = 'MOMENT'
+        self.win_size = win_size
+        self.input_c = input_c
+        self.batch_size = batch_size
+        self.anomaly_criterion = nn.MSELoss(reduce=False)
+        self.epochs = epochs
+        self.validation_size = validation_size
+        self.lr = lr
+
+        cuda = True        
+        self.cuda = cuda
+        self.device = get_gpu(self.cuda)
+
+        # Control randomness for reproducibility
+        control_randomness(seed=42)
+
+        # Load the model properly with config
+        try:
+            self.model = MOMENTPipeline.from_pretrained(
+                "AutonLab/MOMENT-1-base",
+                model_kwargs={
+                    "task_name": "reconstruction",
+                    "n_channels": self.input_c,
+                    "max_seq_len": self.win_size
+                }
+            )
+            self.model.init()
+        except Exception as e:
+            # Fallback: try alternative initialization
+            print(f"Failed to load MOMENT model with from_pretrained: {e}")
+            print("Attempting alternative initialization...")
+            from transformers import AutoConfig
+            config = AutoConfig.from_pretrained("AutonLab/MOMENT-1-base")
+            config.task_name = "reconstruction"
+            config.n_channels = self.input_c
+            config.max_seq_len = self.win_size
+            self.model = MOMENTPipeline(config)
+            self.model.init()
+
+        self.model = self.model.to(self.device).float()
+        # Optimize Mean Squarred Error using your favourite optimizer
+        self.criterion = torch.nn.MSELoss() 
+        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
+        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75)
+        self.save_path = None
+        self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3)
+
+    def zero_shot(self, data):
+
+        test_loader = DataLoader(
+            dataset=ReconstructDataset_Moment(data, window_size=self.win_size, normalize=True),
+            batch_size=self.batch_size,
+            shuffle=False)
+
+        trues, preds = [], []
+        self.score_list = []
+        with torch.no_grad():
+            for batch_x, batch_masks in tqdm(test_loader, total=len(test_loader)):
+                batch_x = batch_x.to("cuda").float()
+                batch_masks = batch_masks.to("cuda")
+                batch_x = batch_x.permute(0,2,1)
+
+                # print('batch_x: ', batch_x.shape)             # [batch_size, n_channels, window_size]
+                # print('batch_masks: ', batch_masks.shape)     # [batch_size, window_size]
+
+                output = self.model(x_enc=batch_x, input_mask=batch_masks) # [batch_size, n_channels, window_size]
+                score = torch.mean(self.anomaly_criterion(batch_x, output.reconstruction), dim=-1).detach().cpu().numpy()[:, -1]
+                self.score_list.append(score)
+
+        self.__anomaly_score = np.concatenate(self.score_list, axis=0).reshape(-1)
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+        self.decision_scores_ = self.__anomaly_score
+
+
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset_Moment(tsTrain, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset_Moment(tsValid, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+
+        mask_generator = Masking(mask_ratio=0.3) # Mask 30% of patches randomly 
+
+
+        for epoch in range(1, self.epochs + 1):
+            self.model.train()
+            for batch_x, batch_masks in tqdm(train_loader, total=len(train_loader)):
+                batch_x = batch_x.to(self.device).float()
+                batch_x = batch_x.permute(0,2,1)
+                # print('batch_x: ', batch_x.shape)
+
+                original = batch_x
+                n_channels = batch_x.shape[1]
+                
+                # Reshape to [batch_size * n_channels, 1, window_size]
+                batch_x = batch_x.reshape((-1, 1, self.win_size)) 
+                
+                batch_masks = batch_masks.to(self.device).long()
+                batch_masks = batch_masks.repeat_interleave(n_channels, axis=0)
+                
+                # Randomly mask some patches of data
+                mask = mask_generator.generate_mask(
+                    x=batch_x, input_mask=batch_masks).to(self.device).long()
+                
+                mask = torch.nn.functional.pad(mask, (0, batch_masks.size(1) - mask.size(1)), mode='constant', value=1)
+
+                # Forward
+                model_output = self.model(batch_x, input_mask=batch_masks, mask=mask).reconstruction
+                model_output = torch.nn.functional.pad(model_output, (0, original.size(2)-model_output.size(2)), mode='replicate')
+
+                output = model_output.reshape(original.size(0), n_channels, self.win_size)
+
+                # Compute loss
+                loss = self.criterion(output, original)
+                    
+                # print(f"loss: {loss.item()}")
+                
+                # Backward
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+
+            # self.model.eval()
+            # avg_loss = 0
+            # with torch.no_grad():
+            #     for batch_x, batch_masks in tqdm(valid_loader, total=len(valid_loader)):
+            #         batch_x = batch_x.to("cuda").float()
+            #         batch_masks = batch_masks.to("cuda")
+            #         batch_x = batch_x.permute(0,2,1)
+
+            #         print('batch_x: ', batch_x.shape)
+            #         print('batch_masks: ', batch_masks.shape)
+
+            #         output = self.model(batch_x, input_mask=batch_masks) 
+
+            #         loss = self.criterion(output.reconstruction.reshape(-1, n_channels, self.win_size), batch_x)
+            #         print(f"loss: {loss.item()}")
+            #         avg_loss += loss.cpu().item()
+
+            # valid_loss = avg_loss/max(len(valid_loader), 1)
+            # self.scheduler.step()
+            # self.early_stopping(valid_loss, self.model)
+            # if self.early_stopping.early_stop:
+            #     print("   Early stopping<<<")
+            #     break
+        
+    def decision_function(self, data):
+        """
+        Not used, present for API consistency by convention.
+        """
+
+        test_loader = DataLoader(
+            dataset=ReconstructDataset_Moment(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False)
+
+        trues, preds = [], []
+        self.score_list = []
+        with torch.no_grad():
+            for batch_x, batch_masks in tqdm(test_loader, total=len(test_loader)):
+                batch_x = batch_x.to("cuda").float()
+                batch_masks = batch_masks.to("cuda")
+                batch_x = batch_x.permute(0,2,1)
+
+                # print('batch_x: ', batch_x.shape)             # [batch_size, n_channels, window_size]
+                # print('batch_masks: ', batch_masks.shape)     # [batch_size, window_size]
+
+                output = self.model(batch_x, input_mask=batch_masks) 
+                score = torch.mean(self.anomaly_criterion(batch_x, output.reconstruction), dim=-1).detach().cpu().numpy()[:, -1]
+                self.score_list.append(score)
+
+        self.__anomaly_score = np.concatenate(self.score_list, axis=0).reshape(-1)
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+
+        return self.__anomaly_score
\ No newline at end of file
diff --git a/models/MatrixProfile.py b/models/MatrixProfile.py
new file mode 100644
index 0000000000000000000000000000000000000000..a44a2ddb3de63ba8f40d483becf59fa259535458
--- /dev/null
+++ b/models/MatrixProfile.py
@@ -0,0 +1,47 @@
+import stumpy
+import numpy as np
+
+class MatrixProfile():
+    """
+    Wrapper of the stympy implementation of the MatrixProfile algorithm
+
+    Parameters
+    ----------
+    window : int,
+        target subsequence length.
+    
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples - m,)
+        The anomaly score.
+        The higher, the more abnormal. Anomalies tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    """
+
+    def __init__(self, window):
+        self.window = window
+        self.model_name = 'MatrixProfile'
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+        
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, )
+            The input samples.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        self.profile = stumpy.stump(X.ravel(),m=self.window)
+        #self.profile = mp.compute(X, windows=self.window)
+        res = np.zeros(len(X))
+        res.fill(self.profile[:, 0].min())
+        res[self.window//2:-self.window//2+1] = self.profile[:, 0]
+        self.decision_scores_ = res
+        return self
diff --git a/models/Moirai.py b/models/Moirai.py
new file mode 100644
index 0000000000000000000000000000000000000000..65e05260e80b4fc208b6730c15ca537d23803e13
--- /dev/null
+++ b/models/Moirai.py
@@ -0,0 +1,312 @@
+"""
+Moirai model for anomaly detection using zero-shot forecasting.
+Adapted from test_anomaly.py approach for TSB-AD framework.
+"""
+
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+import warnings
+warnings.filterwarnings('ignore')
+
+from gluonts.dataset.pandas import PandasDataset
+from gluonts.dataset.split import split
+from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
+
+from .base import BaseDetector
+from ..utils.dataset import MoiraiWindowedDataset
+
+
+class Moirai(BaseDetector):
+    def __init__(self, 
+                 win_size=96,
+                 model_path="Salesforce/moirai-1.0-R-small",
+                 num_samples=100,
+                 device='cuda:0',
+                 use_score=False,
+                 threshold=0.5):
+        """
+        Initialize Moirai anomaly detector.
+        
+        Args:
+            win_size (int): Window size for context and prediction
+            model_path (str): Path to pretrained Moirai model
+            num_samples (int): Number of forecast samples
+            device (str): Device to run model on
+            use_score (bool): Whether to use raw scores or threshold
+            threshold (float): Threshold for binary classification
+        """
+        self.model_name = 'Moirai'
+        self.win_size = win_size
+        self.model_path = model_path
+        self.num_samples = num_samples
+        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
+        self.use_score = use_score
+        self.threshold = threshold
+        self.decision_scores_ = None
+
+    def fit(self, data):
+        """
+        Fit the Moirai model and compute anomaly scores.
+        
+        Args:
+            data: Input time series data (1D or 2D numpy array)
+        """
+        try:
+            # Ensure data is in the right format
+            if data.ndim == 1:
+                data = data.reshape(-1, 1)
+            
+            print(f"Moirai: Processing data with shape {data.shape}")
+            
+            # Create windowed dataset following test_anomaly.py pattern
+            dataset = MoiraiWindowedDataset(
+                data=data, 
+                win_size=self.win_size, 
+                step=self.win_size,  # Non-overlapping windows
+                normalize=False  # Let Moirai handle normalization
+            )
+            
+            print(f"Moirai: Created {len(dataset)} windows")
+            
+            if len(dataset) == 0:
+                print("Warning: No valid windows created. Data might be too short.")
+                self.decision_scores_ = np.zeros(len(data))
+                return
+            
+            # Process each window using DataLoader (similar to test_anomaly.py)
+            data_loader = DataLoader(
+                dataset=dataset,
+                batch_size=1,
+                shuffle=False,
+                drop_last=False
+            )
+            
+            all_predictions = []
+            all_targets = []
+            
+            print("Processing windows with Moirai model...")
+            # Add progress bar for window processing
+            for i, (context, target) in enumerate(tqdm(data_loader, desc="Processing windows", unit="window")):
+                # Process single window following test_anomaly.py pattern
+                scores = self._process_window(context.squeeze(0).numpy(), target.squeeze(0).numpy(), i)
+                all_predictions.append(scores)
+                all_targets.append(target.squeeze(0).numpy())
+            
+            # Combine all predictions
+            if all_predictions:
+                print("Computing anomaly scores...")
+                # Concatenate predictions along time dimension
+                combined_predictions = np.concatenate(all_predictions, axis=0)
+                combined_targets = np.concatenate(all_targets, axis=0)
+                
+                # Compute anomaly scores as prediction error
+                if combined_targets.ndim == 1 or combined_predictions.ndim == 1:
+                    # Handle univariate case or when predictions are 1D
+                    if combined_targets.ndim != combined_predictions.ndim:
+                        # Ensure both have same number of dimensions
+                        if combined_predictions.ndim == 1 and combined_targets.ndim == 2:
+                            combined_predictions = combined_predictions.reshape(-1, 1)
+                        elif combined_targets.ndim == 1 and combined_predictions.ndim == 2:
+                            combined_targets = combined_targets.reshape(-1, 1)
+                    
+                    if combined_targets.shape != combined_predictions.shape:
+                        print(f"Shape mismatch: targets {combined_targets.shape}, predictions {combined_predictions.shape}")
+                        # Use only the first feature if shapes don't match
+                        if combined_targets.ndim == 2:
+                            combined_targets = combined_targets[:, 0]
+                        if combined_predictions.ndim == 2:
+                            combined_predictions = combined_predictions[:, 0]
+                    
+                    anomaly_scores = (combined_targets - combined_predictions) ** 2
+                    if anomaly_scores.ndim == 2:
+                        anomaly_scores = np.mean(anomaly_scores, axis=1)
+                else:
+                    # For multivariate, use mean squared error across features
+                    if combined_targets.shape != combined_predictions.shape:
+                        print(f"Shape mismatch: targets {combined_targets.shape}, predictions {combined_predictions.shape}")
+                        # Use only matching dimensions
+                        min_features = min(combined_targets.shape[1], combined_predictions.shape[1])
+                        combined_targets = combined_targets[:, :min_features]
+                        combined_predictions = combined_predictions[:, :min_features]
+                    
+                    anomaly_scores = np.mean((combined_targets - combined_predictions) ** 2, axis=1)
+                
+                # Pad scores to match original data length
+                print("Padding scores to original data length...")
+                self.decision_scores_ = self._pad_scores_to_original_length(
+                    anomaly_scores, len(data), dataset.get_window_info()
+                )
+            else:
+                print("Warning: No predictions generated")
+                self.decision_scores_ = np.zeros(len(data))
+                
+        except Exception as e:
+            print(f"Error in Moirai.fit(): {str(e)}")
+            import traceback
+            traceback.print_exc()
+            self.decision_scores_ = np.zeros(len(data))
+
+    def _process_window(self, context, target, window_index):
+        """
+        Process a single window following the test_anomaly.py approach.
+        
+        Args:
+            context: Context data for the window (win_size, n_features)
+            target: Target data for the window (win_size, n_features) 
+            window_index: Index of the current window
+            
+        Returns:
+            predictions: Forecasted values for the target period
+        """
+        try:
+            # Update progress description in tqdm (this will be shown in the progress bar)
+            tqdm.write(f"Processing window {window_index + 1}")
+            
+            # Ensure 2D shape
+            if context.ndim == 1:
+                context = context.reshape(-1, 1)
+            if target.ndim == 1:
+                target = target.reshape(-1, 1)
+            
+            # Combine context and target for full window (following test_anomaly.py)
+            full_window = np.vstack([context, target])
+            
+            # Create DataFrame
+            feature_df = pd.DataFrame(full_window)
+            
+            # For multivariate data, we need to handle it properly
+            if feature_df.shape[1] == 1:
+                feature_df.columns = ['target']
+                target_col = 'target'
+                feature_cols = []
+            else:
+                # For multivariate, use all features as target
+                feature_df.columns = [f'target_{i}' for i in range(feature_df.shape[1])]
+                target_col = feature_df.columns.tolist()  # Use all columns as targets
+                feature_cols = []
+            
+            # Add timestamp and unique_id
+            timestamp_range = pd.date_range(
+                start=pd.Timestamp('2023-01-01 10:00:00'), 
+                periods=len(feature_df), 
+                freq='T'
+            )
+            feature_df.index = timestamp_range
+            feature_df['unique_id'] = window_index
+            
+            # Create GluonTS dataset
+            moirai_df = feature_df.reset_index().rename(columns={'index': 'timestamp'})
+            
+            if isinstance(target_col, list):
+                # Multivariate case - use multiple target columns
+                ds = PandasDataset.from_long_dataframe(
+                    moirai_df,
+                    target=target_col,
+                    item_id="unique_id",
+                    timestamp="timestamp",
+                )
+            else:
+                # Univariate case
+                if feature_cols:
+                    ds = PandasDataset.from_long_dataframe(
+                        moirai_df,
+                        target=target_col,
+                        item_id="unique_id",
+                        timestamp="timestamp",
+                        feat_dynamic_real=feature_cols,
+                    )
+                else:
+                    ds = PandasDataset.from_long_dataframe(
+                        moirai_df,
+                        target=target_col,
+                        item_id="unique_id",
+                        timestamp="timestamp",
+                    )
+            
+            # Split dataset (following test_anomaly.py)
+            test_size = self.win_size
+            _, test_template = split(ds, offset=-test_size)
+            
+            test_data = test_template.generate_instances(
+                prediction_length=self.win_size,
+                windows=1,
+                distance=self.win_size,
+                max_history=self.win_size,
+            )
+            
+            # Create Moirai model
+            # Determine target dimension based on number of features
+            target_dim = target.shape[1] if target.ndim > 1 else 1
+            
+            model = MoiraiForecast(
+                module=MoiraiModule.from_pretrained(self.model_path),
+                prediction_length=self.win_size,
+                context_length=self.win_size,
+                patch_size="auto",
+                num_samples=self.num_samples,
+                target_dim=target_dim,
+                feat_dynamic_real_dim=ds.num_feat_dynamic_real,
+                past_feat_dynamic_real_dim=ds.num_past_feat_dynamic_real,
+            )
+            
+            # Create predictor and generate forecasts
+            predictor = model.create_predictor(batch_size=1, device=self.device)
+            forecasts = predictor.predict(test_data.input)
+            forecasts = list(forecasts)
+            
+            # Get median prediction (following test_anomaly.py)
+            predictions = np.median(forecasts[0].samples, axis=0)
+            
+            return predictions
+            
+        except Exception as e:
+            print(f"Error processing window {window_index}: {str(e)}")
+            # Return zeros as fallback with correct shape
+            target_shape = (self.win_size, target.shape[1]) if target.ndim > 1 else (self.win_size,)
+            return np.zeros(target_shape)
+
+    def _pad_scores_to_original_length(self, scores, original_length, window_info):
+        """
+        Pad anomaly scores to match the original data length.
+        
+        Args:
+            scores: Computed anomaly scores from windows
+            original_length: Length of the original input data
+            window_info: Information about windowing strategy
+            
+        Returns:
+            padded_scores: Scores padded to original length
+        """
+        padded_scores = np.zeros(original_length)
+        
+        win_size = window_info['win_size']
+        step = window_info['step']
+        
+        # Fill in scores from each window
+        score_windows = scores.reshape(-1, win_size)
+        for i, score_window in enumerate(tqdm(score_windows, desc="Padding scores", unit="window")):
+            start_idx = i * step + win_size  # Offset by win_size (context part)
+            end_idx = start_idx + win_size
+            
+            if end_idx <= original_length:
+                padded_scores[start_idx:end_idx] = score_window
+            elif start_idx < original_length:
+                # Partial window at the end
+                remaining = original_length - start_idx
+                padded_scores[start_idx:] = score_window[:remaining]
+        
+        # Fill beginning (context part) with first window's average
+        if len(scores) > 0:
+            first_score = np.mean(scores[:win_size]) if len(scores) >= win_size else np.mean(scores)
+            padded_scores[:win_size] = first_score
+        
+        return padded_scores
+
+    def decision_function(self, X):
+        """
+        Not used for zero-shot approach, present for API consistency.
+        """
+        return self.decision_scores_
diff --git a/models/Moirai_new.py b/models/Moirai_new.py
new file mode 100644
index 0000000000000000000000000000000000000000..6902d153cc5feed90dfc432b8e74553a3ee94c37
--- /dev/null
+++ b/models/Moirai_new.py
@@ -0,0 +1,261 @@
+"""
+Moirai model for anomaly detection using zero-shot forecasting.
+Adapted from test_anomaly.py approach for TSB-AD framework.
+"""
+
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import DataLoader
+import warnings
+warnings.filterwarnings('ignore')
+
+from gluonts.dataset.pandas import PandasDataset
+from gluonts.dataset.split import split
+from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
+
+from .base import BaseDetector
+from ..utils.dataset import MoiraiWindowedDataset
+
+
+class Moirai(BaseDetector):
+    def __init__(self, 
+                 win_size=96,
+                 model_path="Salesforce/moirai-1.0-R-small",
+                 num_samples=100,
+                 device='cuda:0',
+                 use_score=False,
+                 threshold=0.5):
+        """
+        Initialize Moirai anomaly detector.
+        
+        Args:
+            win_size (int): Window size for context and prediction
+            model_path (str): Path to pretrained Moirai model
+            num_samples (int): Number of forecast samples
+            device (str): Device to run model on
+            use_score (bool): Whether to use raw scores or threshold
+            threshold (float): Threshold for binary classification
+        """
+        self.model_name = 'Moirai'
+        self.win_size = win_size
+        self.model_path = model_path
+        self.num_samples = num_samples
+        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
+        self.use_score = use_score
+        self.threshold = threshold
+        self.decision_scores_ = None
+
+    def fit(self, data):
+        """
+        Fit the Moirai model and compute anomaly scores.
+        
+        Args:
+            data: Input time series data (1D or 2D numpy array)
+        """
+        try:
+            # Ensure data is in the right format
+            if data.ndim == 1:
+                data = data.reshape(-1, 1)
+            
+            print(f"Moirai: Processing data with shape {data.shape}")
+            
+            # Create windowed dataset following test_anomaly.py pattern
+            dataset = MoiraiWindowedDataset(
+                data=data, 
+                win_size=self.win_size, 
+                step=self.win_size,  # Non-overlapping windows
+                normalize=False  # Let Moirai handle normalization
+            )
+            
+            print(f"Moirai: Created {len(dataset)} windows")
+            
+            if len(dataset) == 0:
+                print("Warning: No valid windows created. Data might be too short.")
+                self.decision_scores_ = np.zeros(len(data))
+                return
+            
+            # Process each window using DataLoader (similar to test_anomaly.py)
+            data_loader = DataLoader(
+                dataset=dataset,
+                batch_size=1,
+                shuffle=False,
+                drop_last=False
+            )
+            
+            all_predictions = []
+            all_targets = []
+            
+            for i, (context, target) in enumerate(data_loader):
+                # Process single window following test_anomaly.py pattern
+                scores = self._process_window(context.squeeze(0).numpy(), target.squeeze(0).numpy(), i)
+                all_predictions.append(scores)
+                all_targets.append(target.squeeze(0).numpy())
+            
+            # Combine all predictions
+            if all_predictions:
+                # Concatenate predictions along time dimension
+                combined_predictions = np.concatenate(all_predictions, axis=0)
+                combined_targets = np.concatenate(all_targets, axis=0)
+                
+                # Compute anomaly scores as prediction error
+                if combined_targets.ndim == 1:
+                    anomaly_scores = (combined_targets - combined_predictions) ** 2
+                else:
+                    # For multivariate, use mean squared error across features
+                    anomaly_scores = np.mean((combined_targets - combined_predictions) ** 2, axis=1)
+                
+                # Pad scores to match original data length
+                self.decision_scores_ = self._pad_scores_to_original_length(
+                    anomaly_scores, len(data), dataset.get_window_info()
+                )
+            else:
+                print("Warning: No predictions generated")
+                self.decision_scores_ = np.zeros(len(data))
+                
+        except Exception as e:
+            print(f"Error in Moirai.fit(): {str(e)}")
+            import traceback
+            traceback.print_exc()
+            self.decision_scores_ = np.zeros(len(data))
+
+    def _process_window(self, context, target, window_index):
+        """
+        Process a single window following the test_anomaly.py approach.
+        
+        Args:
+            context: Context data for the window (win_size, n_features)
+            target: Target data for the window (win_size, n_features) 
+            window_index: Index of the current window
+            
+        Returns:
+            predictions: Forecasted values for the target period
+        """
+        try:
+            # Ensure 2D shape
+            if context.ndim == 1:
+                context = context.reshape(-1, 1)
+            if target.ndim == 1:
+                target = target.reshape(-1, 1)
+            
+            # Combine context and target for full window (following test_anomaly.py)
+            full_window = np.vstack([context, target])
+            
+            # Create DataFrame
+            feature_df = pd.DataFrame(full_window)
+            if feature_df.shape[1] == 1:
+                feature_df.columns = ['target']
+                target_col = 'target'
+                feature_cols = []
+            else:
+                feature_df.columns = [f'feature_{i}' for i in range(feature_df.shape[1])]
+                target_col = 'feature_0'  # Use first feature as target
+                feature_cols = [f'feature_{i}' for i in range(1, feature_df.shape[1])]
+            
+            # Add timestamp and unique_id
+            timestamp_range = pd.date_range(
+                start=pd.Timestamp('2023-01-01 10:00:00'), 
+                periods=len(feature_df), 
+                freq='T'
+            )
+            feature_df.index = timestamp_range
+            feature_df['unique_id'] = window_index
+            
+            # Create GluonTS dataset
+            moirai_df = feature_df.reset_index().rename(columns={'index': 'timestamp'})
+            
+            if feature_cols:
+                ds = PandasDataset.from_long_dataframe(
+                    moirai_df,
+                    target=target_col,
+                    item_id="unique_id",
+                    timestamp="timestamp",
+                    feat_dynamic_real=feature_cols,
+                )
+            else:
+                ds = PandasDataset.from_long_dataframe(
+                    moirai_df,
+                    target=target_col,
+                    item_id="unique_id",
+                    timestamp="timestamp",
+                )
+            
+            # Split dataset (following test_anomaly.py)
+            test_size = self.win_size
+            _, test_template = split(ds, offset=-test_size)
+            
+            test_data = test_template.generate_instances(
+                prediction_length=self.win_size,
+                windows=1,
+                distance=self.win_size,
+                max_history=self.win_size,
+            )
+            
+            # Create Moirai model
+            model = MoiraiForecast(
+                module=MoiraiModule.from_pretrained(self.model_path),
+                prediction_length=self.win_size,
+                context_length=self.win_size,
+                patch_size="auto",
+                num_samples=self.num_samples,
+                target_dim=1,
+                feat_dynamic_real_dim=ds.num_feat_dynamic_real,
+                past_feat_dynamic_real_dim=ds.num_past_feat_dynamic_real,
+            )
+            
+            # Create predictor and generate forecasts
+            predictor = model.create_predictor(batch_size=1, device=self.device)
+            forecasts = predictor.predict(test_data.input)
+            forecasts = list(forecasts)
+            
+            # Get median prediction (following test_anomaly.py)
+            predictions = np.median(forecasts[0].samples, axis=0)
+            
+            return predictions
+            
+        except Exception as e:
+            print(f"Error processing window {window_index}: {str(e)}")
+            # Return zeros as fallback
+            return np.zeros(self.win_size)
+
+    def _pad_scores_to_original_length(self, scores, original_length, window_info):
+        """
+        Pad anomaly scores to match the original data length.
+        
+        Args:
+            scores: Computed anomaly scores from windows
+            original_length: Length of the original input data
+            window_info: Information about windowing strategy
+            
+        Returns:
+            padded_scores: Scores padded to original length
+        """
+        padded_scores = np.zeros(original_length)
+        
+        win_size = window_info['win_size']
+        step = window_info['step']
+        
+        # Fill in scores from each window
+        for i, score_window in enumerate(scores.reshape(-1, win_size)):
+            start_idx = i * step + win_size  # Offset by win_size (context part)
+            end_idx = start_idx + win_size
+            
+            if end_idx <= original_length:
+                padded_scores[start_idx:end_idx] = score_window
+            elif start_idx < original_length:
+                # Partial window at the end
+                remaining = original_length - start_idx
+                padded_scores[start_idx:] = score_window[:remaining]
+        
+        # Fill beginning (context part) with first window's average
+        if len(scores) > 0:
+            first_score = np.mean(scores[:win_size]) if len(scores) >= win_size else np.mean(scores)
+            padded_scores[:win_size] = first_score
+        
+        return padded_scores
+
+    def decision_function(self, X):
+        """
+        Not used for zero-shot approach, present for API consistency.
+        """
+        return self.decision_scores_
diff --git a/models/Moirai_old.py b/models/Moirai_old.py
new file mode 100644
index 0000000000000000000000000000000000000000..5407246ce46a77e6dcb8c0cbb07df5cba88a055a
--- /dev/null
+++ b/models/Moirai_old.py
@@ -0,0 +1,202 @@
+"""
+Moirai model wrapper for anomaly detection
+Adapted from the test_anomaly.py implementation
+"""
+
+import numpy as np
+import pandas as pd
+import torch
+from tqdm import tqdm
+import tempfile
+import warnings
+warnings.filterwarnings('ignore')
+
+from gluonts.dataset.pandas import PandasDataset
+from gluonts.dataset.split import split
+from uni2ts.model.moirai.forecast import MoiraiForecast, MoiraiModule
+
+from .base import BaseDetector
+
+
+class Moirai(BaseDetector):
+    def __init__(self, 
+                 win_size=96,
+                 model_path="Salesforce/moirai-1.0-R-small",
+                 num_samples=100,
+                 device='cuda:0',
+                 use_score=False,
+                 threshold=0.5):
+
+        self.model_name = 'Moirai'
+        self.win_size = win_size
+        self.model_path = model_path
+        self.num_samples = num_samples
+        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
+        self.use_score = use_score
+        self.threshold = threshold
+        self.decision_scores_ = None
+
+    def fit(self, data):
+        """
+        Fit Moirai on the data and compute anomaly scores using zero-shot approach
+        This implementation follows the exact windowing logic from the data loaders
+        """
+        print(f"Moirai zero-shot anomaly detection on data shape: {data.shape}")
+        
+        # Handle univariate data (ensure 2D shape)
+        if data.ndim == 1:
+            data = data.reshape(-1, 1)
+        
+        # Check if we have enough data
+        if data.shape[0] < 2 * self.win_size:
+            raise ValueError(f"Data length ({data.shape[0]}) is less than required minimum (2 * win_size = {2 * self.win_size})")
+        
+        all_target = []
+        all_moirai_preds = []
+        last_pred_label = None
+        
+        # Create sliding windows following the data loader pattern
+        # For testing, we use stride = win_size (non-overlapping windows like in data loaders)
+        num_windows = (data.shape[0] - 2 * self.win_size) // self.win_size + 1
+        
+        for i in tqdm(range(num_windows), desc="Processing windows"):
+            # Extract window following data loader logic
+            start_idx = i * self.win_size
+            end_idx = start_idx + 2 * self.win_size
+            
+            if end_idx > data.shape[0]:
+                break
+                
+            # Get the 2*win_size window (this matches batch_x from data loader)
+            window_data = data[start_idx:end_idx]  # Shape: (2*win_size, n_features)
+            
+            # Create synthetic labels (all zeros initially, replaced by predictions)
+            label = np.zeros(window_data.shape[0])
+            
+            # Replace the first win_size labels with last prediction if not first window
+            if i != 0 and last_pred_label is not None:
+                label[:self.win_size] = last_pred_label
+            
+            # Convert to DataFrame format required by GluonTS
+            # Handle both univariate and multivariate data
+            if window_data.shape[1] == 1:
+                # Univariate case
+                feature = pd.DataFrame(window_data, columns=['value'])
+            else:
+                # Multivariate case
+                feature = pd.DataFrame(window_data)
+                feature.columns = [f'feature_{j}' for j in range(feature.shape[1])]
+            
+            label_df = pd.DataFrame(label, columns=['label'])
+            df = pd.concat([feature, label_df], axis=1)
+            
+            # Add timestamp and unique_id
+            new_index = pd.date_range(
+                start=pd.Timestamp('2023-01-01 10:00:00'), 
+                periods=len(df), 
+                freq='T'
+            )
+            new_index_iso = new_index.strftime('%Y-%m-%d %H:%M:%S')
+            df.insert(0, 'Timestamp', new_index_iso)
+            df['unique_id'] = 0
+            moirai_df = df.set_index('Timestamp')
+
+            # Create GluonTS dataset
+            feat_cols = feature.columns.tolist()
+            ds = PandasDataset.from_long_dataframe(
+                moirai_df,
+                target="label",
+                item_id="unique_id",
+                feat_dynamic_real=feat_cols,
+            )
+
+            test_size = self.win_size
+            _, test_template = split(ds, offset=-test_size)
+
+            test_data = test_template.generate_instances(
+                prediction_length=self.win_size,
+                windows=1,
+                distance=self.win_size,
+                max_history=self.win_size,
+            )
+
+            # Create Moirai model (recreate for each window to avoid memory issues)
+            model = MoiraiForecast(
+                module=MoiraiModule.from_pretrained(self.model_path),
+                prediction_length=self.win_size,
+                context_length=self.win_size,
+                patch_size="auto",
+                num_samples=self.num_samples,
+                target_dim=1,
+                feat_dynamic_real_dim=ds.num_feat_dynamic_real,
+                past_feat_dynamic_real_dim=ds.num_past_feat_dynamic_real,
+            )
+
+            try:
+                predictor = model.create_predictor(batch_size=1, device=self.device)
+                forecasts = predictor.predict(test_data.input)
+                forecasts = list(forecasts)
+
+                moirai_preds = np.median(forecasts[0].samples, axis=0)
+                all_moirai_preds.append(moirai_preds)
+                
+                # Collect targets for verification
+                input_it = iter(test_data.label)
+                for item in input_it:
+                    all_target.extend(item['target'])
+                
+                # Update last prediction for next window
+                if self.use_score:
+                    last_pred_label = moirai_preds
+                else:
+                    last_pred_label = (moirai_preds >= self.threshold).astype(int)
+                    
+            except Exception as e:
+                print(f"Error processing window {i}: {e}")
+                # Use zeros as fallback
+                moirai_preds = np.zeros(self.win_size)
+                all_moirai_preds.append(moirai_preds)
+                last_pred_label = moirai_preds
+
+        # Concatenate all predictions
+        if all_moirai_preds:
+            all_moirai_preds = np.concatenate(all_moirai_preds, axis=0)
+        else:
+            all_moirai_preds = np.zeros(0)
+        
+        # Create scores array that matches the original data length
+        # This follows the pattern from data loaders: each window predicts win_size points
+        padded_scores = np.zeros(data.shape[0])
+        
+        if len(all_moirai_preds) > 0:
+            # Map predictions back to original data indices
+            for i, pred_window in enumerate(np.array_split(all_moirai_preds, num_windows)):
+                if len(pred_window) > 0:
+                    start_pred_idx = self.win_size + i * self.win_size  # Start from win_size offset
+                    end_pred_idx = min(start_pred_idx + len(pred_window), data.shape[0])
+                    actual_len = end_pred_idx - start_pred_idx
+                    padded_scores[start_pred_idx:end_pred_idx] = pred_window[:actual_len]
+            
+            # Fill the first win_size points with the first prediction if available
+            if self.win_size < len(padded_scores):
+                first_pred = all_moirai_preds[0] if len(all_moirai_preds) > 0 else 0
+                padded_scores[:self.win_size] = first_pred
+        
+        self.decision_scores_ = padded_scores
+        print(f"Generated anomaly scores shape: {self.decision_scores_.shape}")
+        return self
+
+    def decision_function(self, X):
+        """
+        Return anomaly scores for X
+        """
+        if self.decision_scores_ is None:
+            raise ValueError("Model must be fitted before calling decision_function")
+        return self.decision_scores_[:len(X)]
+
+    def zero_shot(self, data):
+        """
+        Zero-shot anomaly detection
+        """
+        self.fit(data)
+        return self.decision_scores_
diff --git a/models/NormA.txt b/models/NormA.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bb94ce31c1034f9817cb4f73289e13433ce2626
--- /dev/null
+++ b/models/NormA.txt
@@ -0,0 +1,6 @@
+# Algorithms protected by patent. Code protected by copyright and provided
+# as is. Email the authors for the password of the ZIP file (boniol.paul@gmail.com and themis@mi.parisdescartes.fr). Users from
+# the academia may use this code only for academic research purposes,
+# provided that the authors are properly acknowledged using the citations
+# below. Users from the industry may test and evaluate this code by
+# requesting a license.
\ No newline at end of file
diff --git a/models/OCSVM.py b/models/OCSVM.py
new file mode 100644
index 0000000000000000000000000000000000000000..65901834e4ad70d21c5d997201e204be9f94b62a
--- /dev/null
+++ b/models/OCSVM.py
@@ -0,0 +1,260 @@
+# -*- coding: utf-8 -*-
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+from sklearn.svm import OneClassSVM
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+from sklearn.preprocessing import MinMaxScaler
+
+from .feature import Window
+from .base import BaseDetector
+from ..utils.utility import invert_order
+from ..utils.utility import zscore
+
+class OCSVM(BaseDetector):
+    """Wrapper of scikit-learn one-class SVM Class with more functionalities.
+    Unsupervised Outlier Detection.
+
+    Estimate the support of a high-dimensional distribution.
+
+    The implementation is based on libsvm.
+    See http://scikit-learn.org/stable/modules/svm.html#svm-outlier-detection
+    and :cite:`scholkopf2001estimating`.
+
+    Parameters
+    ----------
+    kernel : string, optional (default='rbf')
+         Specifies the kernel type to be used in the algorithm.
+         It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or
+         a callable.
+         If none is given, 'rbf' will be used. If a callable is given it is
+         used to precompute the kernel matrix.
+
+    nu : float, optional
+        An upper bound on the fraction of training
+        errors and a lower bound of the fraction of support
+        vectors. Should be in the interval (0, 1]. By default 0.5
+        will be taken.
+
+    degree : int, optional (default=3)
+        Degree of the polynomial kernel function ('poly').
+        Ignored by all other kernels.
+
+    gamma : float, optional (default='auto')
+        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
+        If gamma is 'auto' then 1/n_features will be used instead.
+
+    coef0 : float, optional (default=0.0)
+        Independent term in kernel function.
+        It is only significant in 'poly' and 'sigmoid'.
+
+    tol : float, optional
+        Tolerance for stopping criterion.
+
+    shrinking : bool, optional
+        Whether to use the shrinking heuristic.
+
+    cache_size : float, optional
+        Specify the size of the kernel cache (in MB).
+
+    verbose : bool, default: False
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in libsvm that, if enabled, may not work
+        properly in a multithreaded context.
+
+    max_iter : int, optional (default=-1)
+        Hard limit on iterations within solver, or -1 for no limit.
+
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set, i.e.
+        the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+
+    Attributes
+    ----------
+    support_ : array-like, shape = [n_SV]
+        Indices of support vectors.
+
+    support_vectors_ : array-like, shape = [nSV, n_features]
+        Support vectors.
+
+    dual_coef_ : array, shape = [1, n_SV]
+        Coefficients of the support vectors in the decision function.
+
+    coef_ : array, shape = [1, n_features]
+        Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+
+        `coef_` is readonly property derived from `dual_coef_` and
+        `support_vectors_`
+
+    intercept_ : array, shape = [1,]
+        Constant in the decision function.
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self, slidingWindow=100, kernel='rbf', sub=True, degree=3, gamma='auto', coef0=0.0,
+                 tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
+                 verbose=False, max_iter=-1, contamination=0.1, normalize=True):
+        super(OCSVM, self).__init__(contamination=contamination)
+        self.slidingWindow = slidingWindow
+        self.sub = sub
+        self.kernel = kernel
+        self.degree = degree
+        self.gamma = gamma
+        self.coef0 = coef0
+        self.tol = tol
+        self.nu = nu
+        self.shrinking = shrinking
+        self.cache_size = cache_size
+        self.verbose = verbose
+        self.max_iter = max_iter
+        self.normalize = normalize
+
+    def fit(self, X, y=None, sample_weight=None, **params):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        sample_weight : array-like, shape (n_samples,)
+            Per-sample weights. Rescale C per sample. Higher weights
+            force the classifier to put more emphasis on these points.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        # validate inputs X and y (optional)
+        X = check_array(X)
+        X = MinMaxScaler(feature_range=(0,1)).fit_transform(X.T).T
+
+        self._set_n_classes(y)
+
+        self.detector_ = OneClassSVM(kernel=self.kernel,
+                                     degree=self.degree,
+                                     gamma=self.gamma,
+                                     coef0=self.coef0,
+                                     tol=self.tol,
+                                     nu=self.nu,
+                                     shrinking=self.shrinking,
+                                     cache_size=self.cache_size,
+                                     verbose=self.verbose,
+                                     max_iter=self.max_iter)
+        self.detector_.fit(X=X, y=y, sample_weight=sample_weight,
+                           **params)
+
+        # invert decision_scores_. Outliers comes with higher outlier scores
+        self.decision_scores_ = invert_order(self.detector_.decision_function(X))
+
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+
+        n_samples, n_features = X.shape
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+        if self.normalize: X = zscore(X, axis=1, ddof=1)
+
+        # invert outlier scores. Outliers comes with higher outlier scores
+        decision_scores_ = invert_order(self.detector_.decision_function(X))
+        # padded decision_scores_
+        if decision_scores_.shape[0] < n_samples:
+            decision_scores_ = np.array([decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(decision_scores_) + [decision_scores_[-1]]*((self.slidingWindow-1)//2))
+
+        return decision_scores_
+
+    @property
+    def support_(self):
+        """Indices of support vectors.
+        Decorator for scikit-learn One class SVM attributes.
+        """
+        return self.detector_.support_
+
+    @property
+    def support_vectors_(self):
+        """Support vectors.
+        Decorator for scikit-learn One class SVM attributes.
+        """
+        return self.detector_.support_vectors_
+
+    @property
+    def dual_coef_(self):
+        """Coefficients of the support vectors in the decision function.
+        Decorator for scikit-learn One class SVM attributes.
+        """
+        return self.detector_.dual_coef_
+
+    @property
+    def coef_(self):
+        """Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+        `coef_` is readonly property derived from `dual_coef_` and
+        `support_vectors_`
+        Decorator for scikit-learn One class SVM attributes.
+        """
+        return self.detector_.coef_
+
+    @property
+    def intercept_(self):
+        """ Constant in the decision function.
+        Decorator for scikit-learn One class SVM attributes.
+        """
+        return self.detector_.intercept_
\ No newline at end of file
diff --git a/models/OFA.py b/models/OFA.py
new file mode 100644
index 0000000000000000000000000000000000000000..c472613b00afd9e2c30e5b40c3430dc4ec47a40d
--- /dev/null
+++ b/models/OFA.py
@@ -0,0 +1,364 @@
+"""
+This function is adapted from [NeurIPS2023-One-Fits-All] by [tianzhou2011]
+Original source: [https://github.com/DAMO-DI-ML/NeurIPS2023-One-Fits-All]
+"""
+
+import argparse
+from typing import Dict
+import numpy as np
+import torchinfo
+import torch
+from torch import nn, optim
+from torch.utils.data import DataLoader
+from torch.nn.utils import weight_norm
+import tqdm
+import os, math
+from typing import Optional
+import torch.nn.functional as F
+
+from transformers.models.gpt2.modeling_gpt2 import GPT2Model
+from einops import rearrange
+
+
+from ..utils.torch_utility import EarlyStoppingTorch, PositionalEmbedding, TokenEmbedding, TemporalEmbedding, get_gpu, TimeFeatureEmbedding, DataEmbedding, adjust_learning_rate
+from ..utils.dataset import ReconstructDataset    
+
+class DataEmbedding_wo_pos(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_wo_pos, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(x) + self.temporal_embedding(x_mark)
+        return self.dropout(x)
+
+class PatchEmbedding(nn.Module):
+    def __init__(self, d_model, patch_len, stride, dropout):
+        super(PatchEmbedding, self).__init__()
+        # Patching
+        self.patch_len = patch_len
+        self.stride = stride
+        self.padding_patch_layer = nn.ReplicationPad1d((0, stride))
+
+        # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
+        self.value_embedding = TokenEmbedding(patch_len, d_model)
+
+        # Positional embedding
+        self.position_embedding = PositionalEmbedding(d_model)
+
+        # Residual dropout
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        # do patching
+        n_vars = x.shape[1]
+        x = self.padding_patch_layer(x)
+        x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
+        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
+        # Input encoding
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x), n_vars
+
+class DataEmbedding_wo_time(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_wo_time, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x):
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x)
+
+class Model(nn.Module):
+    
+    def __init__(self,
+                 pred_len=0,
+                 seq_len=100,
+                 patch_size=1,
+                 stride=1,      
+                 d_model = 768,
+                 d_ff = 768,
+                 embed = "timeF",
+                 gpt_layers = 6,
+                 enc_in = 1,
+                 c_out = 1,
+                 freq = "h",
+                 dropout= 0.1,
+                 mlp = 0,
+                 model_path = "pre_train"):
+        super(Model, self).__init__()
+        self.pred_len = pred_len
+        self.seq_len = seq_len
+        self.patch_size = patch_size
+        self.stride = stride
+        self.seq_len = seq_len
+        self.d_ff = d_ff
+        self.d_model = d_model
+        self.embed = embed
+        self.gpt_layers = gpt_layers
+        self.enc_in = enc_in
+        self.c_out = c_out
+        self.freq = freq
+        self.dropout = dropout
+        self.model_path = model_path
+        self.mlp = mlp
+    
+        self.patch_num = (self.seq_len + self.pred_len - self.patch_size) // self.stride + 1
+
+        self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride)) 
+        self.patch_num += 1
+        self.enc_embedding = DataEmbedding(self.enc_in * self.patch_size, self.d_model, self.embed, self.freq,
+                                           self.dropout)
+
+        self.gpt2 = GPT2Model.from_pretrained('gpt2', output_attentions=True, output_hidden_states=True)    
+        self.gpt2.h = self.gpt2.h[:self.gpt_layers]
+        
+        for i, (name, param) in enumerate(self.gpt2.named_parameters()):
+            if 'ln' in name or 'wpe' in name: # or 'mlp' in name:
+                param.requires_grad = True
+            elif 'mlp' in name and self.mlp == 1:
+                param.requires_grad = True
+            else:
+                param.requires_grad = False
+
+        # if configs.use_gpu:
+        #     device = torch.device('cuda:{}'.format(0))
+        #     self.gpt2.to(device=device)
+
+        self.ln_proj = nn.LayerNorm(self.d_ff)
+        self.out_layer = nn.Linear(
+            self.d_ff, 
+            self.c_out, 
+            bias=True)
+
+    def forward(self, x_enc):
+        dec_out = self.anomaly_detection(x_enc)
+        return dec_out  # [B, L, D]
+
+    def anomaly_detection(self, x_enc):
+        B, L, M = x_enc.shape
+        
+        # Normalization from Non-stationary Transformer
+
+        seg_num = 25
+        x_enc = rearrange(x_enc, 'b (n s) m -> b n s m', s=seg_num)
+        means = x_enc.mean(2, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=2, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+        x_enc = rearrange(x_enc, 'b n s m -> b (n s) m')
+
+        # means = x_enc.mean(1, keepdim=True).detach()
+        # x_enc = x_enc - means
+        # stdev = torch.sqrt(
+        #     torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        # x_enc /= stdev
+
+        # enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
+        enc_out = torch.nn.functional.pad(x_enc, (0, 768-x_enc.shape[-1]))
+        
+        outputs = self.gpt2(inputs_embeds=enc_out).last_hidden_state
+        
+        outputs = outputs[:, :, :self.d_ff]
+        # outputs = self.ln_proj(outputs)
+        dec_out = self.out_layer(outputs)
+
+        # De-Normalization from Non-stationary Transformer
+
+        dec_out = rearrange(dec_out, 'b (n s) m -> b n s m', s=seg_num)
+        dec_out = dec_out * \
+                  (stdev[:, :, 0, :].unsqueeze(2).repeat(
+                      1, 1, seg_num, 1))
+        dec_out = dec_out + \
+                  (means[:, :, 0, :].unsqueeze(2).repeat(
+                      1, 1, seg_num, 1))
+        dec_out = rearrange(dec_out, 'b n s m -> b (n s) m')
+
+        # dec_out = dec_out * \
+        #           (stdev[:, 0, :].unsqueeze(1).repeat(
+        #               1, self.pred_len + self.seq_len, 1))
+        # dec_out = dec_out + \
+        #           (means[:, 0, :].unsqueeze(1).repeat(
+        #               1, self.pred_len + self.seq_len, 1))
+        return dec_out
+
+class OFA():
+    def __init__(self,
+                 win_size = 100,
+                 stride = 1,
+                 enc_in = 1,
+                 features = 'M',
+                 batch_size = 128,
+                 learning_rate = 0.0001,
+                 epochs = 10,
+                 patience = 3,
+                 lradj = "type1",
+                 validation_size=0.2):
+        super().__init__()
+        self.win_size = win_size
+        self.stride = stride
+        self.enc_in = enc_in
+        self.features = features
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+        self.patience = patience
+        self.lradj = lradj
+        self.validation_size = validation_size
+
+        self.decision_scores_ = None
+        
+        cuda = True
+        self.y_hats = None
+        
+        self.cuda = cuda
+        self.device = get_gpu(self.cuda)
+            
+        self.model = Model(seq_len=self.win_size, enc_in=self.enc_in, c_out=self.enc_in).float().to(self.device)
+        self.model_optim = optim.Adam(self.model.parameters(), lr=self.learning_rate)
+        self.criterion = nn.MSELoss()
+        
+        self.early_stopping = EarlyStoppingTorch(None, patience=self.patience)
+        self.input_shape = (self.batch_size, self.win_size, self.enc_in)
+        
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset(tsTrain, window_size=self.win_size, stride=self.stride),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset(tsValid, window_size=self.win_size, stride=self.stride),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        train_steps = len(train_loader)
+        for epoch in range(1, self.epochs + 1):
+            ## Training
+            train_loss = 0
+            self.model.train()
+            
+            loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True)
+            for i, (batch_x, _) in loop:
+                self.model_optim.zero_grad()
+                
+                batch_x = batch_x.float().to(self.device)
+                
+                outputs = self.model(batch_x)
+                loss = self.criterion(outputs, batch_x)
+                
+                loss.backward()
+                self.model_optim.step()
+                
+                train_loss += loss.cpu().item()
+                
+                loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]')
+                loop.set_postfix(loss=loss.item(), avg_loss=train_loss/(i+1))
+            
+            ## Validation
+            self.model.eval()
+            total_loss = []
+            
+            loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True)
+            with torch.no_grad():
+                for i, (batch_x, _) in loop:
+                    batch_x = batch_x.float().to(self.device)
+
+                    outputs = self.model(batch_x)
+                    f_dim = -1 if self.features == 'MS' else 0
+                    outputs = outputs[:, :, f_dim:]
+                    pred = outputs.detach().cpu()
+                    true = batch_x.detach().cpu()
+
+                    loss = self.criterion(pred, true)
+                    total_loss.append(loss)
+                    loop.set_description(f'Valid Epoch [{epoch}/{self.epochs}]')
+                    
+            valid_loss = np.average(total_loss)
+            loop.set_postfix(loss=loss.item(), valid_loss=valid_loss)
+            self.early_stopping(valid_loss, self.model)
+            if self.early_stopping.early_stop:
+                print("   Early stopping<<<")
+                break
+            
+            adjust_learning_rate(self.model_optim, epoch + 1, self.lradj, self.learning_rate)
+                
+            
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size, stride=self.stride),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        self.model.eval()
+        attens_energy = []
+        y_hats = []
+        self.anomaly_criterion = nn.MSELoss(reduce=False)
+        
+        loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
+        with torch.no_grad():
+            for i, (batch_x, _) in loop:
+                batch_x = batch_x.float().to(self.device)
+                # reconstruction
+                outputs = self.model(batch_x)
+                # # criterion
+                # print('batch_x: ', batch_x.shape)
+                # print('outputs: ', outputs.shape)
+                score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
+                y_hat = torch.squeeze(outputs, -1)
+                
+                score = score.detach().cpu().numpy()[:, -1]
+                y_hat = y_hat.detach().cpu().numpy()[:, -1]
+                
+                attens_energy.append(score)
+                y_hats.append(y_hat)
+                loop.set_description(f'Testing Phase: ')
+
+        attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
+        scores = np.array(attens_energy)
+        
+        y_hats = np.concatenate(y_hats, axis=0).reshape(-1)
+        y_hats = np.array(y_hats)
+
+        assert scores.ndim == 1
+        
+        import shutil
+        self.save_path = None
+        if self.save_path and os.path.exists(self.save_path):
+            shutil.rmtree(self.save_path)
+        
+        # Custom stride length
+        scores_win = [scores[i] for i in range(scores.shape[0])]
+        self.decision_scores_ = np.zeros(len(data))
+        count = np.zeros(len(data))
+        for i, score in enumerate(scores_win):
+            start = i * self.stride
+            end = start + self.win_size
+            self.decision_scores_[start:end] += score
+            count[start:end] += 1
+        self.decision_scores_ = self.decision_scores_ / np.maximum(count, 1)
+
+        return self.decision_scores_
+    
+    def param_statistic(self, save_file):
+        model_stats = torchinfo.summary(self.model, self.input_shape, verbose=0)
+        with open(save_file, 'w') as f:
+            f.write(str(model_stats))
diff --git a/models/OmniAnomaly.py b/models/OmniAnomaly.py
new file mode 100644
index 0000000000000000000000000000000000000000..013110815715ce2626070130c951c6003e6e65b6
--- /dev/null
+++ b/models/OmniAnomaly.py
@@ -0,0 +1,221 @@
+"""
+This function is adapted from [OmniAnomaly] by [TsingHuasuya et al.]
+Original source: [https://github.com/NetManAIOps/OmniAnomaly]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+import torch
+import torch.nn.functional as F
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+from torch import nn
+from torch.utils.data import DataLoader
+from sklearn.preprocessing import MinMaxScaler
+import tqdm
+
+from .base import BaseDetector
+from ..utils.dataset import ReconstructDataset
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+
+class OmniAnomalyModel(nn.Module):
+    def __init__(self, feats, device):
+        super(OmniAnomalyModel, self).__init__()
+        self.name = 'OmniAnomaly'
+        self.device = device
+        self.lr = 0.002
+        self.beta = 0.01
+        self.n_feats = feats
+        self.n_hidden = 32
+        self.n_latent = 8
+        self.lstm = nn.GRU(feats, self.n_hidden, 2)
+        self.encoder = nn.Sequential(
+            nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(),
+            nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(),
+            # nn.Flatten(),
+            nn.Linear(self.n_hidden, 2*self.n_latent)
+        )
+        self.decoder = nn.Sequential(
+            nn.Linear(self.n_latent, self.n_hidden), nn.PReLU(),
+            nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(),
+            nn.Linear(self.n_hidden, self.n_feats), nn.Sigmoid(),
+        )
+
+    def forward(self, x, hidden = None):
+        bs = x.shape[0]
+        win = x.shape[1]
+
+        # hidden = torch.rand(2, bs, self.n_hidden, dtype=torch.float64) if hidden is not None else hidden
+        hidden = torch.rand(2, bs, self.n_hidden).to(self.device) if hidden is not None else hidden
+
+        out, hidden = self.lstm(x.view(-1, bs, self.n_feats), hidden)
+
+        # print('out: ', out.shape)       # (L, bs, n_hidden)
+        # print('hidden: ', hidden.shape) # (2, bs, n_hidden)
+
+        ## Encode
+        x = self.encoder(out)
+        mu, logvar = torch.split(x, [self.n_latent, self.n_latent], dim=-1)
+        ## Reparameterization trick
+        std = torch.exp(0.5*logvar)
+        eps = torch.randn_like(std)
+        x = mu + eps*std
+        ## Decoder
+        x = self.decoder(x)             # (L, bs, n_feats)
+        return x.reshape(bs, win*self.n_feats), mu.reshape(bs, win*self.n_latent), logvar.reshape(bs, win*self.n_latent), hidden
+
+
+class OmniAnomaly(BaseDetector):
+    def __init__(self,
+                 win_size = 5,
+                 feats = 1,
+                 batch_size = 128,
+                 epochs = 50,
+                 patience = 3,
+                 lr = 0.002,
+                 validation_size=0.2
+                 ):
+        super().__init__()
+
+        self.__anomaly_score = None
+
+        self.cuda = True
+        self.device = get_gpu(self.cuda)
+
+        self.win_size = win_size
+        self.batch_size = batch_size
+        self.epochs = epochs
+        self.feats = feats
+        self.validation_size = validation_size
+
+        self.model = OmniAnomalyModel(feats=self.feats, device=self.device).to(self.device)
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(), lr=lr, weight_decay=1e-5
+        )
+        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 5, 0.9)
+        self.criterion = nn.MSELoss(reduction = 'none')
+
+        self.early_stopping = EarlyStoppingTorch(None, patience=patience)
+
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset(tsValid, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        mses, klds = [], []
+        for epoch in range(1, self.epochs + 1):
+            self.model.train(mode=True)
+            n = epoch + 1
+            avg_loss = 0
+            loop = tqdm.tqdm(
+                enumerate(train_loader), total=len(train_loader), leave=True
+            )
+            for idx, (d, _) in loop:        
+                d = d.to(self.device)
+                # print('d: ', d.shape)
+
+                y_pred, mu, logvar, hidden = self.model(d, hidden if idx else None)
+                d = d.view(-1, self.feats*self.win_size)
+                MSE = torch.mean(self.criterion(y_pred, d), axis=-1)
+                KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=-1)
+                loss = torch.mean(MSE + self.model.beta * KLD)
+
+                mses.append(torch.mean(MSE).item())
+                klds.append(self.model.beta * torch.mean(KLD).item())
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+
+                avg_loss += loss.cpu().item()
+                loop.set_description(f"Training Epoch [{epoch}/{self.epochs}]")
+                loop.set_postfix(loss=loss.item(), avg_loss=avg_loss / (idx + 1))
+
+            if len(valid_loader) > 0:
+                self.model.eval()
+                avg_loss_val = 0
+                loop = tqdm.tqdm(
+                    enumerate(valid_loader), total=len(valid_loader), leave=True
+                )
+                with torch.no_grad():
+                    for idx, (d, _) in loop:
+                        d = d.to(self.device)
+                        y_pred, mu, logvar, hidden = self.model(d, hidden if idx else None)
+                        d = d.view(-1, self.feats*self.win_size)
+                        MSE = torch.mean(self.criterion(y_pred, d), axis=-1)
+                        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=-1)
+                        loss = torch.mean(MSE + self.model.beta * KLD)
+
+                        avg_loss_val += loss.cpu().item()
+                        loop.set_description(
+                            f"Validation Epoch [{epoch}/{self.epochs}]"
+                        )
+                        loop.set_postfix(loss=loss.item(), avg_loss_val=avg_loss_val / (idx + 1))
+
+            self.scheduler.step()
+            if len(valid_loader) > 0:
+                avg_loss = avg_loss_val / len(valid_loader)
+            else:
+                avg_loss = avg_loss / len(train_loader)
+            self.early_stopping(avg_loss, self.model)
+            if self.early_stopping.early_stop:
+                print("   Early stopping<<<")
+                break
+
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+
+        self.model.eval()
+        scores = []
+        y_preds = []
+        loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
+
+        with torch.no_grad():
+            for idx, (d, _) in loop:
+                d = d.to(self.device)
+                # print('d: ', d.shape)
+
+                y_pred, _, _, hidden = self.model(d, hidden if idx else None)
+                y_preds.append(y_pred)
+                d = d.view(-1, self.feats*self.win_size)
+
+                # print('y_pred: ', y_pred.shape)
+                # print('d: ', d.shape)
+                loss = torch.mean(self.criterion(y_pred, d), axis=-1)
+                # print('loss: ', loss.shape)
+
+                scores.append(loss.cpu())
+        
+        scores = torch.cat(scores, dim=0)
+        scores = scores.numpy()
+
+        self.__anomaly_score = scores
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+        
+        return self.__anomaly_score
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+
+    def param_statistic(self, save_file):
+        pass
diff --git a/models/PCA.py b/models/PCA.py
new file mode 100644
index 0000000000000000000000000000000000000000..531055e24915d7ce4e25bfcb7702dcf5b83aef79
--- /dev/null
+++ b/models/PCA.py
@@ -0,0 +1,383 @@
+"""
+This function is adapted from [pyod] by [yzhao062]
+Original source: [https://github.com/yzhao062/pyod]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+from scipy.spatial.distance import cdist
+from sklearn.decomposition import PCA as sklearn_PCA
+from sklearn.utils.validation import check_array
+from sklearn.utils.validation import check_is_fitted
+
+from .feature import Window
+from .base import BaseDetector
+from ..utils.utility import check_parameter
+from ..utils.utility import standardizer    
+
+class PCA(BaseDetector):
+    """Principal component analysis (PCA) can be used in detecting outliers.
+    PCA is a linear dimensionality reduction using Singular Value Decomposition
+    of the data to project it to a lower dimensional space.
+
+    In this procedure, covariance matrix of the data can be decomposed to
+    orthogonal vectors, called eigenvectors, associated with eigenvalues. The
+    eigenvectors with high eigenvalues capture most of the variance in the
+    data.
+
+    Therefore, a low dimensional hyperplane constructed by k eigenvectors can
+    capture most of the variance in the data. However, outliers are different
+    from normal data points, which is more obvious on the hyperplane
+    constructed by the eigenvectors with small eigenvalues.
+
+    Therefore, outlier scores can be obtained as the sum of the projected
+    distance of a sample on all eigenvectors.
+    See :cite:`shyu2003novel,aggarwal2015outlier` for details.
+
+    Score(X) = Sum of weighted euclidean distance between each sample to the
+    hyperplane constructed by the selected eigenvectors
+
+    Parameters
+    ----------
+    n_components : int, float, None or string
+        Number of components to keep.
+        if n_components is not set all components are kept::
+
+            n_components == min(n_samples, n_features)
+
+        if n_components == 'mle' and svd_solver == 'full', Minka\'s MLE is used
+        to guess the dimension
+        if ``0 < n_components < 1`` and svd_solver == 'full', select the number
+        of components such that the amount of variance that needs to be
+        explained is greater than the percentage specified by n_components
+        n_components cannot be equal to n_features for svd_solver == 'arpack'.
+
+    n_selected_components : int, optional (default=None)
+        Number of selected principal components
+        for calculating the outlier scores. It is not necessarily equal to
+        the total number of the principal components. If not set, use
+        all principal components.
+
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set, i.e.
+        the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+    copy : bool (default True)
+        If False, data passed to fit are overwritten and running
+        fit(X).transform(X) will not yield the expected results,
+        use fit_transform(X) instead.
+
+    whiten : bool, optional (default False)
+        When True (False by default) the `components_` vectors are multiplied
+        by the square root of n_samples and then divided by the singular values
+        to ensure uncorrelated outputs with unit component-wise variances.
+
+        Whitening will remove some information from the transformed signal
+        (the relative variance scales of the components) but can sometime
+        improve the predictive accuracy of the downstream estimators by
+        making their data respect some hard-wired assumptions.
+
+    svd_solver : string {'auto', 'full', 'arpack', 'randomized'}
+        auto :
+            the solver is selected by a default policy based on `X.shape` and
+            `n_components`: if the input data is larger than 500x500 and the
+            number of components to extract is lower than 80% of the smallest
+            dimension of the data, then the more efficient 'randomized'
+            method is enabled. Otherwise the exact full SVD is computed and
+            optionally truncated afterwards.
+        full :
+            run exact full SVD calling the standard LAPACK solver via
+            `scipy.linalg.svd` and select the components by postprocessing
+        arpack :
+            run SVD truncated to n_components calling ARPACK solver via
+            `scipy.sparse.linalg.svds`. It requires strictly
+            0 < n_components < X.shape[1]
+        randomized :
+            run randomized SVD by the method of Halko et al.
+
+    tol : float >= 0, optional (default .0)
+        Tolerance for singular values computed by svd_solver == 'arpack'.
+
+    iterated_power : int >= 0, or 'auto', (default 'auto')
+        Number of iterations for the power method computed by
+        svd_solver == 'randomized'.
+
+    random_state : int, RandomState instance or None, optional (default None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`. Used when ``svd_solver`` == 'arpack' or 'randomized'.
+
+    weighted : bool, optional (default=True)
+        If True, the eigenvalues are used in score computation.
+        The eigenvectors with small eigenvalues comes with more importance
+        in outlier score calculation.
+
+    standardization : bool, optional (default=True)
+        If True, perform standardization first to convert
+        data to zero mean and unit variance.
+        See http://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html
+
+    Attributes
+    ----------
+    components_ : array, shape (n_components, n_features)
+        Principal axes in feature space, representing the directions of
+        maximum variance in the data. The components are sorted by
+        ``explained_variance_``.
+
+    explained_variance_ : array, shape (n_components,)
+        The amount of variance explained by each of the selected components.
+
+        Equal to n_components largest eigenvalues
+        of the covariance matrix of X.
+
+    explained_variance_ratio_ : array, shape (n_components,)
+        Percentage of variance explained by each of the selected components.
+
+        If ``n_components`` is not set then all components are stored and the
+        sum of explained variances is equal to 1.0.
+
+    singular_values_ : array, shape (n_components,)
+        The singular values corresponding to each of the selected components.
+        The singular values are equal to the 2-norms of the ``n_components``
+        variables in the lower-dimensional space.
+
+    mean_ : array, shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+
+        Equal to `X.mean(axis=0)`.
+
+    n_components_ : int
+        The estimated number of components. When n_components is set
+        to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this
+        number is estimated from input data. Otherwise it equals the parameter
+        n_components, or n_features if n_components is None.
+
+    noise_variance_ : float
+        The estimated noise covariance following the Probabilistic PCA model
+        from Tipping and Bishop 1999. See "Pattern Recognition and
+        Machine Learning" by C. Bishop, 12.2.1 p. 574 or
+        http://www.miketipping.com/papers/met-mppca.pdf. It is required to
+        computed the estimated data covariance and score samples.
+
+        Equal to the average of (min(n_features, n_samples) - n_components)
+        smallest eigenvalues of the covariance matrix of X.
+
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    def __init__(self, slidingWindow=100, sub = True, n_components=None, n_selected_components=None,
+                 contamination=0.1, copy=True, whiten=False, svd_solver='auto',
+                 tol=0.0, iterated_power='auto', random_state=0,
+                 weighted=True, standardization=True, zero_pruning=True):
+
+        super(PCA, self).__init__(contamination=contamination)
+        self.slidingWindow = slidingWindow
+        self.sub = sub
+        self.n_components = n_components
+        self.n_selected_components = n_selected_components
+        self.copy = copy
+        self.whiten = whiten
+        self.svd_solver = svd_solver
+        self.tol = tol
+        self.iterated_power = iterated_power
+        self.random_state = random_state
+        self.weighted = weighted
+        self.standardization = standardization
+        self.zero_pruning = zero_pruning
+
+    # noinspection PyIncorrectDocstring
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        n_samples, n_features = X.shape
+
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+
+        # validate inputs X and y (optional)
+        X = check_array(X)
+        self._set_n_classes(y)
+
+        # PCA is recommended to use on the standardized data (zero mean and
+        # unit variance).
+        if self.standardization:
+            X, self.scaler_ = standardizer(X, keep_scalar=True)
+
+        if self.zero_pruning:
+            non_zero_columns = np.any(X != 0, axis=0)
+            X = X[:, non_zero_columns]
+
+        self.detector_ = sklearn_PCA(n_components=self.n_components,
+                                     copy=self.copy,
+                                     whiten=self.whiten,
+                                     svd_solver=self.svd_solver,
+                                     tol=self.tol,
+                                     iterated_power=self.iterated_power,
+                                     random_state=self.random_state)
+        self.detector_.fit(X=X, y=y)
+
+        # copy the attributes from the sklearn PCA object
+        self.n_components_ = self.detector_.n_components_
+        self.components_ = self.detector_.components_
+
+        # validate the number of components to be used for outlier detection
+        if self.n_selected_components is None:
+            self.n_selected_components_ = self.n_components_
+        else:
+            self.n_selected_components_ = self.n_selected_components
+        check_parameter(self.n_selected_components_, 1, self.n_components_,
+                        include_left=True, include_right=True,
+                        param_name='n_selected_components_')
+
+        # use eigenvalues as the weights of eigenvectors
+        self.w_components_ = np.ones([self.n_components_, ])
+        if self.weighted:
+            self.w_components_ = self.detector_.explained_variance_ratio_
+
+        # outlier scores is the sum of the weighted distances between each
+        # sample to the eigenvectors. The eigenvectors with smaller
+        # eigenvalues have more influence
+        # Not all eigenvectors are used, only n_selected_components_ smallest
+        # are used since they better reflect the variance change
+
+        self.selected_components_ = self.components_[
+                                    -1 * self.n_selected_components_:, :]
+        self.selected_w_components_ = self.w_components_[
+                                      -1 * self.n_selected_components_:]
+
+        self.decision_scores_ = np.sum(
+            cdist(X, self.selected_components_) / self.selected_w_components_,
+            axis=1).ravel()
+
+        # padded decision_scores_
+        if self.decision_scores_.shape[0] < n_samples:
+            self.decision_scores_ = np.array([self.decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(self.decision_scores_) + [self.decision_scores_[-1]]*((self.slidingWindow-1)//2))
+
+        self._process_decision_scores()
+        return self
+
+    def decision_function(self, X):
+        """Predict raw anomaly score of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on different
+        detector algorithms. For consistency, outliers are assigned with
+        larger anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The training input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        check_is_fitted(self, ['components_', 'w_components_'])
+
+        n_samples, n_features = X.shape
+                    
+        # Converting time series data into matrix format
+        X = Window(window = self.slidingWindow).convert(X)
+
+        X = check_array(X)
+        if self.standardization:
+            X = self.scaler_.transform(X)
+
+        decision_scores_ = np.sum(
+            cdist(X, self.selected_components_) / self.selected_w_components_,
+            axis=1).ravel()
+        # padded decision_scores_
+        if decision_scores_.shape[0] < n_samples:
+            decision_scores_ = np.array([decision_scores_[0]]*math.ceil((self.slidingWindow-1)/2) + 
+                        list(decision_scores_) + [decision_scores_[-1]]*((self.slidingWindow-1)//2))
+        return decision_scores_
+
+    @property
+    def explained_variance_(self):
+        """The amount of variance explained by each of the selected components.
+
+        Equal to n_components largest eigenvalues
+        of the covariance matrix of X.
+
+        Decorator for scikit-learn PCA attributes.
+        """
+        return self.detector_.explained_variance_
+
+    @property
+    def explained_variance_ratio_(self):
+        """Percentage of variance explained by each of the selected components.
+
+        If ``n_components`` is not set then all components are stored and the
+        sum of explained variances is equal to 1.0.
+
+        Decorator for scikit-learn PCA attributes.
+        """
+        return self.detector_.explained_variance_ratio_
+
+    @property
+    def singular_values_(self):
+        """The singular values corresponding to each of the selected
+        components. The singular values are equal to the 2-norms of the
+        ``n_components`` variables in the lower-dimensional space.
+
+        Decorator for scikit-learn PCA attributes.
+        """
+        return self.detector_.singular_values_
+
+    @property
+    def mean_(self):
+        """Per-feature empirical mean, estimated from the training set.
+
+        Decorator for scikit-learn PCA attributes.
+        """
+        return self.detector_.mean_
+
+    @property
+    def noise_variance_(self):
+        """The estimated noise covariance following the Probabilistic PCA model
+        from Tipping and Bishop 1999. See "Pattern Recognition and
+        Machine Learning" by C. Bishop, 12.2.1 p. 574 or
+        http://www.miketipping.com/papers/met-mppca.pdf. It is required to
+        computed the estimated data covariance and score samples.
+
+        Equal to the average of (min(n_features, n_samples) - n_components)
+        smallest eigenvalues of the covariance matrix of X.
+
+        Decorator for scikit-learn PCA attributes.
+        """
+        return self.detector_.noise_variance_
\ No newline at end of file
diff --git a/models/POLY.py b/models/POLY.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f9107032f4c9a26cb8dbe05309ce5bdce86f5cf
--- /dev/null
+++ b/models/POLY.py
@@ -0,0 +1,250 @@
+"""Polynomial Anomoly Detector with GARCH method and raw error method
+"""
+# Author: Yinchen Wu <yinchen@uchicago.edu>
+
+import numpy as np
+import math
+from scipy.special import erf
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.utils.validation import check_is_fitted
+from .base import BaseDetector
+from .distance import Fourier
+from ..utils.utility import zscore
+
+
+class POLY(BaseDetector):
+    """An elementary method to detect pointwise anomolies using polynomial approxiamtion. 
+    A polynomial of certain degree and window size is fitted to the given timeseries dataset.
+    A GARCH method is ran on the difference betweeen the approximation and the true value of 
+    the dataset to estimate the volatitilies of each point. A detector score is derived on each 
+    point based on the estimated volatitilies and residual to measure the normality of each point.
+    An alternative method that only considers absoulte difference is also used. 
+    Parameters
+    ----------
+    Power : int, optional (default=1)
+        The power of polynomial fitted to the data
+    neighborhood : int, optional (default=max (100, 10*window size))
+        The number of samples to fit for one subsequence. Since the timeseries may vary, 
+        to caculate the score for the subsequnece (a, a+k) of samples k, we only fit the
+        polynomal on its neighborhood.  
+    window: int, optional (default = 20)
+        The length of the window to detect the given anomolies 
+    contamination : float in (0., 0.55), optional (default=0.1)
+        The amount of contamination of the data set, i.e. the proportion
+        of outliers in the data set. Used when fitting to define the threshold
+        on the decision function.
+
+    Attributes
+    ----------
+    estimators_ : dictionary of coefficients at each polynomial
+        The collection of fitted sub-estimators.
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+    def __init__(self, power = 1, window = 200, neighborhood = None, contamination = 0.1, normalize=True):
+        self.power = power
+        self.window = window
+        self.avg_window = None
+        if neighborhood == None:
+            self.neighborhood = max(10*window, 100)
+        else:
+            self.neighborhood = neighborhood
+            
+        self.contamination = contamination
+        self.normalize = normalize
+        self.model_name = 'POLY'
+
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, )
+            The input samples.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        if self.normalize: X = (X - X.min()) / (X.max() - X.min())
+
+        X = X.squeeze()
+        # validate inputs X and y (optional)
+        self._set_n_classes(y)
+
+        self.X_train_ = X
+        self.n_train_ = len(X)
+        self.decision_scores_ = np.zeros([self.n_train_, 1])
+        
+        self.n_initial_ = min(500, int(0.1 * self.n_train_))
+        self.X_initial_ = X[:self.n_initial_]                         
+
+        window = self.window
+        
+        if self.avg_window != None:
+            self.neighborhood = max(self.neighborhood, 6*self.avg_window) 
+            
+        if self.neighborhood > len(X):
+            self.neighborhood = len(X)
+        
+        neighborhood = self.neighborhood
+                                          
+        N = math.floor(self.n_train_ / window)
+        M = math.ceil(self.n_initial_ / window)
+        data = self.X_train_
+        power=self.power
+        fit = {}
+
+        for i in range(M, N+1):
+
+            index = int(i * window)    
+            neighbor = int(neighborhood/2)
+
+            if index + neighbor < self.n_train_ and index - neighbor > 0: 
+
+                x = np.concatenate((np.arange(index - neighbor, index), np.arange(index + window, index + neighbor)))
+                y = np.concatenate((data[index - neighbor: index], data[index + window: index + neighbor] ))
+                mymodel = np.poly1d(np.polyfit(x, y, power))
+                fit['model' + str(index)] = mymodel
+            elif index + neighbor >= self.n_train_ and index + window < self.n_train_:
+                x = np.concatenate((np.arange(self.n_train_ - neighborhood, index), np.arange(index + window, self.n_train_)))
+                y = np.concatenate((data[self.n_train_ - neighborhood: index], data[index + window: self.n_train_] ))
+                mymodel = np.poly1d(np.polyfit(x, y, power))
+                fit['model' + str(index)] = mymodel     
+            elif index + window >= self.n_train_:
+                x = np.arange(self.n_train_ - neighborhood, index)
+                y = data[self.n_train_ - neighborhood: index]
+                mymodel = np.poly1d(np.polyfit(x, y, power))
+                fit['model' + str(index)] = mymodel     
+            elif index + window < neighborhood:
+                x = np.concatenate((np.arange(0, index), np.arange(index + window, neighborhood)))
+                y = np.concatenate((data[0: index], data[index + window: neighborhood] ))
+                try:
+                    mymodel = np.poly1d(np.polyfit(x, y, power))
+                except:
+                    x = np.concatenate((np.arange(0, index), np.arange(len(data[index + window: neighborhood]))))
+                    mymodel = np.poly1d(np.polyfit(x, y, power))
+
+                fit['model' + str(index)] = mymodel
+            else:
+                x = np.concatenate((np.arange(index - neighbor, index), np.arange(index + window, index + neighbor)))
+                y = np.concatenate((data[index - neighbor: index], data[index + window: index + neighbor] ))
+                print(data.shape)
+                print(x.shape)
+                print(y.shape)
+                mymodel = np.poly1d(np.polyfit(x, y, power))
+                fit['model' + str(index)] = mymodel
+    
+        Y = np.zeros(self.n_train_ )
+        for i in range(M, N):
+            myline = np.linspace(window *i, window * (i+1), window)
+            Y[window *i: window * (i+1)] = fit['model' + str(i * window)](myline)
+        if self.n_train_ % N != 0:
+            x = np.arange(N*window, self.n_train_ )
+            Y[N*window:] = fit['model'+str(N * window)](x)  
+        self.estimation = Y
+        self.estimator = fit
+
+        measure = Fourier()
+        measure.detector = self
+        measure.set_param()
+        self.decision_scores_ = self.decision_function(X, measure=measure)
+        return self
+        
+        
+    
+    def decision_function(self, X= False, measure = None):
+        """Derive the decision score based on the given distance measure
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, )
+            The input samples.
+        measure : object
+            object for given distance measure with methods to derive the score
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        if type(X) != bool:
+            self.X_train_ = X
+        estimation = self.estimation
+        window = self.window
+        n_train_ = self.n_train_
+        score = np.zeros(n_train_)
+        N = math.floor((self.n_train_) / window)
+        M = math.ceil(self.n_initial_ / window)
+
+        for i in range(M, N+1):
+            index = i * window
+            if i == N:
+                end = self.n_train_
+            else:
+                end = index + window
+            score[index: index+window] = measure.measure(self.X_train_[index: end], estimation[index: end], index)
+        self._mu = np.mean(self.decision_scores_)
+        self._sigma = np.std(self.decision_scores_)
+        return score
+
+    def predict_proba(self, X, method='linear', measure = None):
+        """Predict the probability of a sample being outlier. Two approaches
+        are possible:
+        1. simply use Min-max conversion to linearly transform the outlier
+           scores into the range of [0,1]. The model must be
+           fitted first.
+        2. use unifying scores, see :cite:`kriegel2011interpreting`.
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+        method : str, optional (default='linear')
+            probability conversion method. It must be one of
+            'linear' or 'unify'.
+        Returns
+        -------
+        outlier_probability : numpy array of shape (n_samples,)
+            For each observation, tells whether or not
+            it should be considered as an outlier according to the
+            fitted model. Return the outlier probability, ranging
+            in [0,1].
+        """
+
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+        train_scores = self.decision_scores_
+
+        self.fit(X)
+        self.decision_function(measure = measure)
+        test_scores = self.decision_scores_
+
+        probs = np.zeros([X.shape[0], int(self._classes)])
+        if method == 'linear':
+            scaler = MinMaxScaler().fit(train_scores.reshape(-1, 1))
+            probs[:, 1] = scaler.transform(
+                test_scores.reshape(-1, 1)).ravel().clip(0, 1)
+            probs[:, 0] = 1 - probs[:, 1]
+            return probs
+
+        elif method == 'unify':
+            # turn output into probability
+            pre_erf_score = (test_scores - self._mu) / (
+                    self._sigma * np.sqrt(2))
+            erf_score = erf(pre_erf_score)
+            probs[:, 1] = erf_score.clip(0, 1).ravel()
+            probs[:, 0] = 1 - probs[:, 1]
+            return probs
+        else:
+            raise ValueError(method,
+                             'is not a valid probability conversion method')
\ No newline at end of file
diff --git a/models/README.md b/models/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..82ee292577732eaa066650b0bd19df714d67603a
--- /dev/null
+++ b/models/README.md
@@ -0,0 +1,23 @@
+### Extra Installation Direction
+
+If you want to use [Chronos](https://github.com/amazon-science/chronos-forecasting), please install the following
+```bash
+git clone https://github.com/autogluon/autogluon
+cd autogluon && pip install -e timeseries/[TimeSeriesDataFrame,TimeSeriesPredictor]
+```
+
+If you want to use [MOMENT](https://github.com/moment-timeseries-foundation-model/moment), please install the following
+```bash
+pip install momentfm   # only support Python 3.11 for now
+```
+
+If you want to use [TimesFM](https://github.com/google-research/timesfm), please install the following
+```bash
+pip install timesfm[torch]
+```
+
+If you want to use [Lag-Llama](https://github.com/time-series-foundation-models/lag-llama), please install the following
+```bash
+gluonts[torch]<=0.14.4
+```
+and download the checkpoint from [Link](https://github.com/time-series-foundation-models/lag-llama) and add the path to [Lag_Llama.py](https://github.com/TheDatumOrg/TSB-AD/blob/main/TSB_AD/models/Lag_Llama.py).
\ No newline at end of file
diff --git a/models/RobustPCA.py b/models/RobustPCA.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5c0ec228a8255e4d42cb1b4b8539971fa87b2a7
--- /dev/null
+++ b/models/RobustPCA.py
@@ -0,0 +1,109 @@
+"""
+This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig]
+Original source: [https://github.com/TimeEval/TimeEval-algorithms]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from sklearn.decomposition import PCA
+from typing import Optional
+
+from .base import BaseDetector
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_array
+from scipy.spatial.distance import cdist
+
+class Robust_PCA:
+    def __init__(self, D, mu=None, lmbda=None):
+        self.D = D
+        self.S = np.zeros(self.D.shape)
+        err = np.inf
+
+        if mu:
+            self.mu = mu
+        else:
+            self.mu = np.prod(self.D.shape) / (4 * np.linalg.norm(self.D, ord=1))
+
+        self.mu_inv = 1 / self.mu
+
+        if lmbda:
+            self.lmbda = lmbda
+        else:
+            self.lmbda = 1 / np.sqrt(np.max(self.D.shape))
+
+    @staticmethod
+    def frobenius_norm(M):
+        return np.linalg.norm(M, ord='fro')
+
+    @staticmethod
+    def shrink(M, tau):
+        return np.sign(M) * np.maximum((np.abs(M) - tau), np.zeros(M.shape))
+
+    def svd_threshold(self, M, tau):
+        U, S, V = np.linalg.svd(M, full_matrices=False)
+        return np.dot(U, np.dot(np.diag(self.shrink(S, tau)), V))
+
+    def fit(self, tol=None, max_iter=1000, iter_print=100):
+        iter = 0
+        err = np.Inf
+        Sk = self.S
+        Yk = self.Y
+        Lk = np.zeros(self.D.shape)
+
+        if tol:
+            _tol = tol
+        else:
+            _tol = 1E-7 * self.frobenius_norm(self.D)
+
+        #this loop implements the principal component pursuit (PCP) algorithm
+        #located in the table on page 29 of https://arxiv.org/pdf/0912.3599.pdf
+        while (err > _tol) and iter < max_iter:
+            Lk = self.svd_threshold(
+                self.D - Sk + self.mu_inv * Yk, self.mu_inv)                            #this line implements step 3
+            Sk = self.shrink(
+                self.D - Lk + (self.mu_inv * Yk), self.mu_inv * self.lmbda)             #this line implements step 4
+            Yk = Yk + self.mu * (self.D - Lk - Sk)                                      #this line implements step 5
+            err = self.frobenius_norm(self.D - Lk - Sk)
+            iter += 1
+            if (iter % iter_print) == 0 or iter == 1 or iter > max_iter or err <= _tol:
+                print('iteration: {0}, error: {1}'.format(iter, err))
+
+        self.L = Lk
+        self.S = Sk
+        return Lk, Sk
+    
+class RobustPCA(BaseDetector):
+    def __init__(self, max_iter: int = 1000, n_components = None, zero_pruning = True):
+        self.pca: Optional[PCA] = None
+        self.max_iter = max_iter
+        self.n_components = n_components
+        self.zero_pruning = zero_pruning
+
+    def fit(self, X, y=None):
+
+        if self.zero_pruning:
+            non_zero_columns = np.any(X != 0, axis=0)
+            X = X[:, non_zero_columns]
+        
+        rpca = Robust_PCA(X)
+        L, S = rpca.fit(max_iter=self.max_iter)
+        self.detector_ = PCA(n_components=L.shape[1])
+        self.detector_.fit(L)
+        self.decision_scores_ = self.decision_function(L)
+        return self
+
+    # def decision_function(self, X):
+    #     check_is_fitted(self, ['detector_'])
+    #     X_transformed = self.detector_.transform(X)  # Transform the data into the PCA space
+    #     X_reconstructed = self.detector_.inverse_transform(X_transformed)  # Reconstruct the data from the PCA space
+    #     anomaly_scores = np.linalg.norm(X - X_reconstructed, axis=1)  # Compute the Euclidean norm between original and reconstructed data
+    #     return anomaly_scores
+
+    def decision_function(self, X):
+        assert self.detector_, "Please train PCA before running the detection!"
+
+        L = self.detector_.transform(X)
+        S = np.absolute(X - L)
+        return S.sum(axis=1)
diff --git a/models/SAND.py b/models/SAND.py
new file mode 100644
index 0000000000000000000000000000000000000000..41a7a1705394444cd876f3028f722d9105a3d912
--- /dev/null
+++ b/models/SAND.py
@@ -0,0 +1,405 @@
+import time
+import os
+import math
+import pickle
+import sys
+from tqdm import tqdm
+import pandas as pd
+
+
+import numpy as np
+from numpy.random import randint
+from numpy.linalg import norm, eigh
+from numpy.fft import fft, ifft
+
+from tslearn.clustering import KShape
+# from tslearn.cycc import cdist_normalized_cc, y_shifted_sbd_vec
+from tslearn.metrics import cdist_normalized_cc, y_shifted_sbd_vec
+from tslearn.utils import to_time_series_dataset,to_time_series
+
+import stumpy
+
+
+
+class SAND():
+    
+
+    """
+    Online and offline method that use a set of weighted subsequences (Theta) to identify anomalies. 
+    The anomalies are identified by computing the distance of a given subsequence (the targeted 
+    subsequence to analyze) to Theta
+    ----------
+    subsequence_length : int : subsequence length to analyze
+    pattern_length : int (greater than pattern length): length of the subsequences in Theta
+    k : int (greater than 1) : number of subsequences in Theta
+
+    online : Boolean, Compute the analysis online or offline
+    - Online: run per batch the model update and the computation of the score
+    (requires the set alpha, init_length, and batch_size)
+    - Offline: run the model for one unique batch
+
+    alpha : float ([0,1]) : update rate (used in Online mode only)
+    init_length : int (greater than subsequence_length) : length of the initial batch (used in Online mode only)
+    batch_size : int (greater than subsequence_length) : length of the batches (used in Online mode only)
+    """
+    def __init__(self,pattern_length,subsequence_length,k=6):
+        
+        # Configuration parameter
+        self.current_time = 0
+        self.mean = -1
+        self.std = -1
+
+        # algorithm parameter
+        self.k = k
+        self.subsequence_length = subsequence_length
+        self.pattern_length = pattern_length
+        
+        # real time evolving storage
+        self.clusters = []
+        self.new_clusters_dist = []
+        self.nm_current_weight = []
+        self.S = []
+        self.clusters_subseqs = []
+    
+    
+    """
+    Build the model and compute the anoamly score
+    ----------
+    X : np.array or List, the time series to analyse
+    
+    online : Boolean, Compute the analysis online or offline
+    - Online: run per batch the model update and the computation of the score
+    (requires the set alpha, init_length, and batch_size)
+    - Offline: run the model for one unique batch
+
+    alpha : float ([0,1]) : update rate (used in Online mode only)
+    init_length : int (greater than subsequence_length) : length of the initial batch (used in Online mode only)
+    batch_size : int (greater than subsequence_length) : length of the batches (used in Online mode only)
+    overlapping rate (smaller than len(X)//2 and batch_size//2) : Number points seperating subsequences in the time series.
+    """
+    def fit(self,X, y=None, online=False, alpha=None, init_length=None, batch_size=None, overlaping_rate=10, verbose=True):
+        # Take subsequence every 'overlaping_rate' points
+        # Change it to 1 for completely overlapping subsequences 
+        # Change it to 'subsequence_length' for non-overlapping subsequences 
+        # Change it to 'subsequence_length//4' for non-trivial matching subsequences 
+        self.overlaping_rate = overlaping_rate
+        self.ts = list(X)
+        self.decision_scores_ = []
+
+        if online:
+            if (alpha is None) or (init_length is None) or (batch_size is None):
+                print("You must specify a value for alpha, init_length, and batch_size")
+                return None
+            
+            self.alpha = alpha
+            self.init_length = init_length
+            self.batch_size = batch_size
+
+            if verbose:
+                print(self.current_time,end='-->')
+            
+            self._initialize()
+            self._set_normal_model()
+            self.decision_scores_ = self._run(self.ts[:min(len(self.ts),self.current_time)])
+
+            while self.current_time < len(self.ts)-self.subsequence_length:
+
+                if verbose:
+                    print(self.current_time,end='-->')
+
+                self._run_next_batch()
+                self._set_normal_model()
+                if self.current_time < len(self.ts)-self.subsequence_length:
+                    self.decision_scores_ += self._run(self.ts[self.current_time-self.batch_size:min(len(self.ts),self.current_time)])
+                else:
+                    self.decision_scores_ += self._run(self.ts[self.current_time-self.batch_size:])
+
+            if verbose:
+                print("[STOP]: score length {}".format(len(self.decision_scores_)))
+
+
+        else:
+            self.init_length = len(X)
+            self.alpha = 0.5
+            self.batch_size = 0
+            
+            self._initialize()
+            self._set_normal_model()
+            self.decision_scores_ = self._run(self.ts)
+
+        self.decision_scores_ = np.array(self.decision_scores_)
+
+
+    # Computation of the anomaly score
+    def _run(self,ts):
+        all_join = []
+        
+        all_activated_weighted = []
+        if len(self.nm_current_weight) != len(self.weights):
+            self.nm_current_weight = self.nm_current_weight + self.weights[len(self.nm_current_weight):]
+
+        for index_name in range(len(self.clusters)):
+            if self.nm_current_weight[index_name]> 0:
+                join = stumpy.stump(ts,self.pattern_length,self.clusters[index_name][0],ignore_trivial = False)[:,0]
+                join = np.array(join)
+                join = np.nan_to_num(join)
+                all_join.append(join)
+
+        join = [0]*len(all_join[0])
+        
+        for sub_join,scores_sub_join,scores_sub_join_old,t_decay in zip(all_join,self.weights,self.nm_current_weight,self.time_decay):
+            new_w = float(scores_sub_join)/float(1+max(0,t_decay-self.batch_size))
+            update_w = float(1-self.alpha)*float(scores_sub_join_old) + float(self.alpha)*float(new_w)
+
+            join = [float(j) + float(sub_j)*update_w for j,sub_j in zip(list(join),list(sub_join))]
+            all_activated_weighted.append(update_w)
+        
+        join = join + [join[-1]]*(self.pattern_length-1)
+        join = np.array(join)/np.sum(all_activated_weighted)
+        join = self._running_mean(join,self.pattern_length)
+        join = [join[0]]*(self.pattern_length-1) + list(join)
+        
+        self.nm_current_weight = all_activated_weighted
+        if self.mean == -1:
+            self.mean = np.mean(join)
+            self.std = np.std(join)
+        else:
+            self.mean = (1-self.alpha)*self.mean + self.alpha*np.mean(join)
+            self.std = (1-self.alpha)*self.std + self.alpha*np.std(join)
+
+        join = (np.array(join) - self.mean)/self.std
+        
+
+
+        return list(np.nan_to_num(join))
+
+
+    # MAIN METHODS:
+    # - Initialization
+    # - Theta update for next batch
+    # - Score computaiton
+
+    # Initialization of the model
+    def _initialize(self):
+        
+        cluster_subseqs,clusters = self._kshape_subsequence(initialization=True)
+
+        all_mean_dist = []
+        for i,(cluster,cluster_subseq) in enumerate(zip(clusters,cluster_subseqs)):
+            self._set_initial_S(cluster_subseq,i,cluster[0])
+            all_mean_dist.append(self._compute_mean_dist(cluster[0],cluster[1]))
+
+        self.clusters = clusters
+        self.new_clusters_dist = all_mean_dist
+        self.current_time = self.init_length
+
+
+
+
+    # Model update for next batch
+    def _run_next_batch(self):
+        
+        # Run K-Shape algorithm on the subsequences of the current batch
+        cluster_subseqs,clusters = self._kshape_subsequence(initialization=False)
+
+        #self.new_clusters_subseqs = cluster_subseqs
+        self.new_clusters_to_merge = clusters
+
+        to_add = [[] for i in range(len(self.clusters))]
+        new_c = []
+        
+        # Finding the clusters that match exisiting clusters
+        # - Storing in to_add all the clusters that have to be merged with the existing clusters
+        # - Storing in new_c tyhe new clusters to be added.
+        for cluster,cluster_subseq in zip(clusters,cluster_subseqs):
+            min_dist = np.inf
+            tmp_index = -1
+            for index_o,origin_cluster in enumerate(self.clusters):
+                new_dist = self._sbd(origin_cluster[0],cluster[0])[0]
+                if min_dist > new_dist:
+                    min_dist = new_dist
+                    tmp_index = index_o
+            if tmp_index != -1: 
+                if min_dist < self.new_clusters_dist[tmp_index]:
+                    to_add[tmp_index].append((cluster,cluster_subseq))
+                else:
+                    new_c.append((cluster,cluster_subseq))
+        
+        self.to_add = to_add
+        self.new_c = new_c
+        
+        new_clusters = []
+        all_mean_dist = []
+        # Merging existing clusters with new clusters
+        for i,(cur_c,t_a) in enumerate(zip(self.clusters,to_add)): 
+            # Check if new subsequences to add
+            if len(t_a) > 0:
+                all_index = cur_c[1]
+                all_sub_to_add = []
+                for t_a_s in t_a:
+                    all_index += t_a_s[0][1]
+                    all_sub_to_add +=  t_a_s[1]
+
+                # Updating the centroid shape
+                new_centroid,_ = self._extract_shape_stream(all_sub_to_add,i,cur_c[0],initial=False)
+                new_clusters.append((self._clean_cluster_tslearn(new_centroid),all_index))
+
+                # Updating the intra cluster distance
+                dist_to_add = self._compute_mean_dist(cur_c[0],all_index)
+                ratio = float(len(cur_c[1]))/float(len(cur_c[1]) + len(all_index))
+                all_mean_dist.append( (ratio) * self.new_clusters_dist[i] + (1.0 - ratio) * dist_to_add )
+
+            # If no new subsequences to add, copy the old cluster
+            else:
+                new_clusters.append(cur_c)
+                all_mean_dist.append(self.new_clusters_dist[i])
+        
+        # Adding new clusters
+        for i,t_a in enumerate(new_c):
+            self._set_initial_S(t_a[1],len(self.clusters) + i,t_a[0][0])
+            new_clusters.append((t_a[0][0],t_a[0][1]))
+            all_mean_dist.append(self._compute_mean_dist(t_a[0][0],t_a[0][1]))
+        
+
+        self.clusters = new_clusters
+        self.new_clusters_dist = all_mean_dist
+        self.current_time = self.current_time + self.batch_size
+
+    
+    # SBD distance
+    def _sbd(self,x, y):
+        ncc = self._ncc_c(x, y)
+        idx = ncc.argmax()
+        dist = 1 - ncc[idx]
+        return dist, None
+
+    # Core clustering computation unit
+    def _kshape_subsequence(self,initialization=True):
+        all_subsequences = []
+        idxs = []
+        
+        if initialization:
+            nb_subsequence = self.init_length
+        else:
+            nb_subsequence = self.batch_size
+
+        for i in range(self.current_time,min(self.current_time + nb_subsequence,len(self.ts)-self.subsequence_length),self.overlaping_rate):
+            all_subsequences.append(self.ts[i:i+self.subsequence_length])
+            idxs.append(i)
+        
+        ks = KShape(n_clusters=self.k,verbose=False)
+        list_label = ks.fit_predict(np.array(all_subsequences))
+        
+
+        cluster_subseq = [[] for i in range(self.k)]
+        cluster_idx = [[] for i in range(self.k)]
+        for lbl,idx in zip(list_label,idxs):
+            cluster_idx[lbl].append(idx)
+            cluster_subseq[lbl].append(self.ts[idx:idx+self.subsequence_length])
+        
+        # safety check
+        new_cluster_subseq = []
+        clusters = []
+
+        for i in range(self.k):
+            if len(cluster_subseq[i]) > 0:
+                new_cluster_subseq.append(cluster_subseq[i])
+                clusters.append((self._clean_cluster_tslearn(ks.cluster_centers_[i]),cluster_idx[i]))
+        return new_cluster_subseq,clusters
+
+
+
+
+    # Model elements update
+    def _set_normal_model(self):
+        Frequency = []
+        Centrality = []
+        Time_decay = []
+        for i,nm in enumerate(self.clusters):
+            Frequency.append(float(len(nm[1])))
+            Time_decay.append(float(self.current_time)-float(nm[1][-1]))
+            dist_nms = 0
+            for j,nm_t in enumerate(self.clusters):
+                if j != i:
+                    dist_nms += self._sbd(nm[0],nm_t[0])[0]
+            Centrality.append(dist_nms)
+            
+        Frequency = list((np.array(Frequency) - min(Frequency))/(max(Frequency) - min(Frequency)+0.0000001)+1)
+        Centrality = list((np.array(Centrality) - min(Centrality))/(max(Centrality) - min(Centrality)+0.0000001)+1)
+        
+        weights = []
+        for f,c,t in zip(Frequency,Centrality,Time_decay):
+            weights.append(float(f)**2/float(c))
+        
+        self.weights = weights
+        self.time_decay = Time_decay
+        
+    # Setting in memory the matrix S
+    def _set_initial_S(self,X,idx,cluster_centers):
+        X = to_time_series_dataset(X)
+        cluster_centers = to_time_series(cluster_centers)
+        sz = X.shape[1]
+        Xp = y_shifted_sbd_vec(cluster_centers, X,
+                        norm_ref=-1,
+                        norms_dataset=np.linalg.norm(X, axis=(1, 2)))
+        S = np.dot(Xp[:, :, 0].T, Xp[:, :, 0])
+        self.S.append(S)
+
+    # Computation of the updated centroid
+    def _extract_shape_stream(self,X,idx,cluster_centers,initial=True):
+        X = to_time_series_dataset(X)
+        cluster_centers = to_time_series(cluster_centers)
+        sz = X.shape[1]
+        Xp = y_shifted_sbd_vec(cluster_centers, X,
+                        norm_ref=-1,
+                        norms_dataset=np.linalg.norm(X, axis=(1, 2)))
+        S = np.dot(Xp[:, :, 0].T, Xp[:, :, 0])
+
+        if not initial:    
+            S = S + self.S[idx]
+        self.S[idx] = S
+        Q = np.eye(sz) - np.ones((sz, sz)) / sz
+        M = np.dot(Q.T, np.dot(S, Q))
+        _, vec = np.linalg.eigh(M)
+        mu_k = vec[:, -1].reshape((sz, 1))
+        dist_plus_mu = np.sum(np.linalg.norm(Xp - mu_k, axis=(1, 2)))
+        dist_minus_mu = np.sum(np.linalg.norm(Xp + mu_k, axis=(1, 2)))
+        if dist_minus_mu < dist_plus_mu:
+            mu_k *= -1
+
+        return self._zscore(mu_k, ddof=1),S
+
+    # Reset value of a cluster
+    def _clean_cluster_tslearn(self,cluster):
+        return np.array([val[0] for val in cluster])
+    
+    # Compute mean distance of a element in a cluster
+    def _compute_mean_dist(self,cluster,all_index):
+        dist_all = []
+        for i in all_index:
+            dist_all.append(self._sbd(self.ts[i:i+self.subsequence_length],cluster)[0])
+        return np.mean(dist_all)
+
+    def _running_mean(self,x,N):
+        return (np.cumsum(np.insert(x,0,0))[N:] - np.cumsum(np.insert(x,0,0))[:-N])/N
+
+    def _ncc_c(self,x, y):
+        den = np.array(norm(x) * norm(y))
+        den[den == 0] = np.inf
+
+        x_len = len(x)
+        fft_size = 1 << (2*x_len-1).bit_length()
+        cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size)))
+        cc = np.concatenate((cc[-(x_len-1):], cc[:x_len]))
+        return np.real(cc) / den
+
+    def _zscore(self,a, axis=0, ddof=0):
+        a = np.asanyarray(a)
+        mns = a.mean(axis=axis)
+        sstd = a.std(axis=axis, ddof=ddof)
+        if axis and mns.ndim < a.ndim:
+            res = ((a - np.expand_dims(mns, axis=axis)) /
+                   np.expand_dims(sstd, axis=axis))
+        else:
+            res = (a - mns) / sstd
+        return np.nan_to_num(res)
\ No newline at end of file
diff --git a/models/SR.py b/models/SR.py
new file mode 100644
index 0000000000000000000000000000000000000000..001c5f3090e2ee6803ffcacc305a0d8f7a180f9b
--- /dev/null
+++ b/models/SR.py
@@ -0,0 +1,34 @@
+"""Spectral Residual
+"""
+# Author: Andreas Mueller <andreas.mueller@microsoft.com>
+import numpy as np
+
+def SR(X, window_size):
+    X = (X - X.min()) / (X.max() - X.min())
+    X = X.ravel()
+    fft = np.fft.fft(X)
+
+    amp = np.abs(fft)
+    log_amp = np.log(amp)
+    phase = np.angle(fft)
+    # split spectrum into bias term and symmetric frequencies
+    bias, sym_freq = log_amp[:1], log_amp[1:]
+    # select just the first half of the sym_freq
+    freq = sym_freq[:(len(sym_freq) + 1) // 2]
+    window_amp = 100
+
+    pad_left = (window_amp - 1) // 2
+    padded_freq = np.concatenate([np.tile(X[0], pad_left), freq, np.tile(X[-1], window_amp - pad_left - 1)])
+    conv_amp = np.ones(window_amp) / window_amp
+    ma_freq = np.convolve(padded_freq, conv_amp, 'valid')
+    # construct moving average log amplitude spectrum
+    ma_log_amp = np.concatenate([
+        bias,
+        ma_freq,
+        (ma_freq[:-1] if len(sym_freq) % 2 == 1 else ma_freq)[::-1]
+    ])
+    assert ma_log_amp.shape[0] == log_amp.shape[0], "`ma_log_amp` size does not match `log_amp` size."
+    # compute residual spectrum and transform back to time domain
+    res_amp = log_amp - ma_log_amp
+    sr = np.abs(np.fft.ifft(np.exp(res_amp + 1j * phase)))
+    return sr
\ No newline at end of file
diff --git a/models/Series2Graph.txt b/models/Series2Graph.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48c92ddfce07ed801af0232745a52dbe60cda477
--- /dev/null
+++ b/models/Series2Graph.txt
@@ -0,0 +1,6 @@
+# Algorithms protected by patent. Code protected by copyright and provided
+# as is. Email the authors for the password of the ZIP file (boniol.paul@gmail.com and themis@mi.parisdescartes.fr). Users from
+# the academia may use this code only for academic research purposes,
+# provided that the authors are properly acknowledged using the citations
+# below. Users from the industry may test and evaluate this code by
+# requesting a license.
diff --git a/models/TAClip.py b/models/TAClip.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/models/TSPulse.py b/models/TSPulse.py
new file mode 100644
index 0000000000000000000000000000000000000000..462dfc92fb8f28eb9f8145c8f0be358cc378d6fb
--- /dev/null
+++ b/models/TSPulse.py
@@ -0,0 +1,376 @@
+"""
+TSPulse Anomaly Detection Implementation
+TSPulse is a foundation model for time series anomaly detection using reconstruction-based approach.
+Based on IBM's Granite Time Series TSPulse model.
+"""
+
+import numpy as np
+import pandas as pd
+import torch
+import warnings
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.utils import check_array
+
+# TSPulse imports
+# try:
+    # Try direct import first
+from .granite_tsfm.tsfm_public.models.tspulse.modeling_tspulse import TSPulseForReconstruction
+from .granite_tsfm.tsfm_public.toolkit.ad_helpers import AnomalyScoreMethods
+from .granite_tsfm.tsfm_public.toolkit.time_series_anomaly_detection_pipeline import TimeSeriesAnomalyDetectionPipeline
+
+
+class TSPulse:
+    """
+    TSPulse Anomaly Detection Model
+    
+    TSPulse is a foundation model that uses reconstruction-based anomaly detection.
+    It supports multiple prediction modes:
+    - TIME_RECONSTRUCTION: Reconstruction in time domain
+    - FREQUENCY_RECONSTRUCTION: Reconstruction in frequency domain  
+    - PREDICTIVE: Predictive approach
+    
+    Parameters
+    ----------
+    num_input_channels : int, default=1
+        Number of input channels (features) in the time series
+    model_path : str, default="ibm-granite/granite-timeseries-tspulse-r1"
+        Path to the pretrained TSPulse model
+    prediction_mode : list, default=["time_reconstruction", "frequency_reconstruction"]
+        List of prediction modes to use for anomaly detection
+    aggregation_length : int, default=64
+        Length for aggregation of scores
+    aggr_function : str, default="max"
+        Aggregation function ("max", "mean", "median")
+    smoothing_length : int, default=8
+        Length for smoothing the anomaly scores
+    least_significant_scale : float, default=0.01
+        Minimum scale for significance
+    least_significant_score : float, default=0.1
+        Minimum score for significance
+    batch_size : int, default=256
+        Batch size for processing
+    device : str, default=None
+        Device to use ("cuda" or "cpu"). Auto-detected if None.
+    """
+    
+    def __init__(self, 
+                 num_input_channels=1,
+                 model_path="ibm-granite/granite-timeseries-tspulse-r1",
+                 prediction_mode=None,
+                 aggregation_length=64,
+                 aggr_function="max", 
+                 smoothing_length=8,
+                 least_significant_scale=0.01,
+                 least_significant_score=0.1,
+                 batch_size=256,
+                 device=None):
+        
+        self.num_input_channels = num_input_channels
+        self.model_path = model_path
+        self.aggregation_length = aggregation_length
+        self.aggr_function = aggr_function
+        self.smoothing_length = smoothing_length
+        self.least_significant_scale = least_significant_scale
+        self.least_significant_score = least_significant_score
+        self.batch_size = batch_size
+        
+        # Set default prediction modes
+        if prediction_mode is None:
+            self.prediction_mode = [
+                AnomalyScoreMethods.TIME_RECONSTRUCTION.value,
+                AnomalyScoreMethods.FREQUENCY_RECONSTRUCTION.value,
+            ]
+        else:
+            self.prediction_mode = prediction_mode
+            
+        # Set device
+        if device is None:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = torch.device(device)
+            
+        # Initialize model and pipeline
+        self._load_model()
+        self._setup_pipeline()
+        
+    def _load_model(self):
+        """Load the pretrained TSPulse model"""
+        try:
+            self.model = TSPulseForReconstruction.from_pretrained(
+                self.model_path,
+                num_input_channels=self.num_input_channels,
+                revision="main",
+                mask_type="user",
+            )
+            print(f"TSPulse model loaded successfully on {self.device}")
+        except Exception as e:
+            raise RuntimeError(f"Failed to load TSPulse model: {str(e)}")
+    
+    def _setup_pipeline(self):
+        """Setup the anomaly detection pipeline"""
+        self.pipeline = TimeSeriesAnomalyDetectionPipeline(
+            self.model,
+            timestamp_column="timestamp",
+            target_columns=None,  # Will be set dynamically
+            prediction_mode=self.prediction_mode,
+            aggregation_length=self.aggregation_length,
+            aggr_function=self.aggr_function,
+            smoothing_length=self.smoothing_length,
+            least_significant_scale=self.least_significant_scale,
+            least_significant_score=self.least_significant_score,
+        )
+    
+    def _prepare_data(self, X):
+        """
+        Prepare data for TSPulse pipeline
+        
+        Parameters
+        ----------
+        X : numpy.ndarray
+            Input time series data of shape (n_samples, n_features)
+            
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame with timestamp and feature columns
+        """
+        X = check_array(X)
+        n_samples, n_features = X.shape
+        
+        # Create DataFrame with timestamp
+        df = pd.DataFrame()
+        
+        # Add timestamp column
+        df['timestamp'] = pd.date_range(
+            start='2022-01-01', 
+            periods=n_samples, 
+            freq='s'
+        )
+        
+        # Add feature columns
+        if n_features == 1:
+            df['value'] = X.ravel()
+            target_columns = ['value']
+        else:
+            for i in range(n_features):
+                df[f'feature_{i}'] = X[:, i]
+            target_columns = [f'feature_{i}' for i in range(n_features)]
+        
+        return df, target_columns
+    
+    def fit(self, X, y=None):
+        """
+        Fit the TSPulse model (TSPulse is zero-shot, so this just validates input)
+        
+        Parameters
+        ----------
+        X : numpy.ndarray
+            Training data of shape (n_samples, n_features)
+        y : array-like, optional
+            Target values (ignored, for compatibility)
+            
+        Returns
+        -------
+        self : object
+            Returns self
+        """
+        X = check_array(X)
+        self.n_features_in_ = X.shape[1]
+        
+        # Update model for correct number of channels
+        if self.n_features_in_ != self.num_input_channels:
+            self.num_input_channels = self.n_features_in_
+            print(f"Updating TSPulse model for {self.num_input_channels} input channels")
+            self._load_model()
+            self._setup_pipeline()
+        
+        return self
+    
+    def decision_function(self, X):
+        """
+        Compute anomaly scores for input data
+        
+        Parameters
+        ----------
+        X : numpy.ndarray
+            Input data of shape (n_samples, n_features)
+            
+        Returns
+        -------
+        numpy.ndarray
+            Anomaly scores of shape (n_samples,)
+        """
+        X = check_array(X)
+        
+        # Prepare data for pipeline
+        df, target_columns = self._prepare_data(X)
+        
+        # Update pipeline target columns
+        self.pipeline.target_columns = target_columns
+        
+        try:
+            # Run anomaly detection pipeline
+            result = self.pipeline(
+                df, 
+                batch_size=self.batch_size, 
+                predictive_score_smoothing=False
+            )
+            
+            # Extract anomaly scores
+            anomaly_scores = result['anomaly_score'].values
+            
+            # Ensure scores are same length as input
+            if len(anomaly_scores) != len(X):
+                # Handle length mismatch by padding or truncating
+                if len(anomaly_scores) < len(X):
+                    # Pad with mean score
+                    mean_score = np.mean(anomaly_scores)
+                    padding = np.full(len(X) - len(anomaly_scores), mean_score)
+                    anomaly_scores = np.concatenate([anomaly_scores, padding])
+                else:
+                    # Truncate to match input length
+                    anomaly_scores = anomaly_scores[:len(X)]
+            
+            return anomaly_scores
+            
+        except Exception as e:
+            print(f"Warning: TSPulse pipeline failed: {str(e)}")
+            # Return default scores on failure
+            return np.random.random(len(X)) * 0.1
+    
+    def predict(self, X, threshold=0.5):
+        """
+        Predict anomalies using threshold
+        
+        Parameters
+        ----------
+        X : numpy.ndarray
+            Input data of shape (n_samples, n_features)
+        threshold : float, default=0.5
+            Threshold for anomaly detection
+            
+        Returns
+        -------
+        numpy.ndarray
+            Binary predictions (1 for anomaly, 0 for normal)
+        """
+        scores = self.decision_function(X)
+        return (scores > threshold).astype(int)
+    
+    def fit_predict(self, X, y=None):
+        """
+        Fit and predict in one step
+        
+        Parameters
+        ----------
+        X : numpy.ndarray
+            Input data
+        y : array-like, optional
+            Target values (ignored)
+            
+        Returns
+        -------
+        numpy.ndarray
+            Anomaly scores
+        """
+        return self.fit(X).decision_function(X)
+
+# Legacy compatibility functions
+def run_TSPulse_univariate(data, **kwargs):
+    """
+    Run TSPulse for univariate time series anomaly detection
+    
+    Parameters
+    ----------
+    data : numpy.ndarray
+        Univariate time series data
+    **kwargs : dict
+        Additional parameters for TSPulse model
+        
+    Returns
+    -------
+    numpy.ndarray
+        Anomaly scores
+    """
+    try:
+        # Extract parameters
+        win_size = kwargs.get('win_size', 256)
+        batch_size = kwargs.get('batch_size', 64)
+        
+        # Initialize TSPulse for univariate data
+        model = TSPulse(
+            num_input_channels=1,
+            batch_size=batch_size,
+            **{k: v for k, v in kwargs.items() if k not in ['win_size', 'batch_size']}
+        )
+        
+        # Ensure data is 2D
+        if data.ndim == 1:
+            data = data.reshape(-1, 1)
+        
+        # Fit and predict
+        scores = model.fit_predict(data)
+        return scores
+        
+    except Exception as e:
+        print(f"Error in TSPulse univariate: {str(e)}")
+        return np.random.random(len(data)) * 0.1
+
+def run_TSPulse_multivariate(data, **kwargs):
+    """
+    Run TSPulse for multivariate time series anomaly detection
+    
+    Parameters
+    ----------
+    data : numpy.ndarray
+        Multivariate time series data of shape (n_samples, n_features)
+    **kwargs : dict
+        Additional parameters for TSPulse model
+        
+    Returns
+    -------
+    numpy.ndarray
+        Anomaly scores
+    """
+    try:
+        # Extract parameters
+        win_size = kwargs.get('win_size', 256)
+        batch_size = kwargs.get('batch_size', 64)
+        
+        # Initialize TSPulse for multivariate data
+        model = TSPulse(
+            num_input_channels=data.shape[1] if data.ndim > 1 else 1,
+            batch_size=batch_size,
+            **{k: v for k, v in kwargs.items() if k not in ['win_size', 'batch_size']}
+        )
+        
+        # Fit and predict
+        scores = model.fit_predict(data)
+        return scores
+        
+    except Exception as e:
+        print(f"Error in TSPulse multivariate: {str(e)}")
+        return np.random.random(len(data)) * 0.1
+
+# Main function for compatibility with existing framework
+def run_TSPulse(data, **kwargs):
+    """
+    Main TSPulse runner that handles both univariate and multivariate data
+    
+    Parameters
+    ----------
+    data : numpy.ndarray
+        Time series data
+    **kwargs : dict
+        Additional parameters
+        
+    Returns
+    -------
+    numpy.ndarray
+        Anomaly scores
+    """
+    if data.ndim == 1 or (data.ndim == 2 and data.shape[1] == 1):
+        return run_TSPulse_univariate(data, **kwargs)
+    else:
+        return run_TSPulse_multivariate(data, **kwargs)
+
diff --git a/models/TimeRCD.py b/models/TimeRCD.py
new file mode 100644
index 0000000000000000000000000000000000000000..acf3191235a5463435e0cd80c49180a63eb8032b
--- /dev/null
+++ b/models/TimeRCD.py
@@ -0,0 +1,575 @@
+import tqdm
+import os
+import textwrap
+import torch
+import torch.nn.functional as F
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from torch.utils.data import DataLoader
+import warnings
+from torch.utils.data import Dataset
+from sklearn.preprocessing import StandardScaler
+
+warnings.filterwarnings("ignore")
+
+from .time_rcd.dataset import ChatTSTimeRCDPretrainDataset
+from .time_rcd.TimeRCD_pretrain_multi import TimeSeriesPretrainModel, create_random_mask, collate_fn, test_collate_fn
+from .time_rcd.time_rcd_config import TimeRCDConfig, default_config
+from utils.dataset import TimeRCDDataset
+
+class TimeRCDPretrainTester:
+    """Tester class for visualizing pretrained model results."""
+
+    def __init__(self, checkpoint_path: str, config: TimeRCDConfig):
+        self.config = config
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.win_size = config.win_size
+        self.batch_size = config.batch_size
+        # Load model
+        self.model = TimeSeriesPretrainModel(config).to(self.device)
+        self.load_checkpoint(checkpoint_path)
+        self.model.eval()
+
+        print(f"Model loaded on device: {self.device}")
+
+    def load_checkpoint(self, checkpoint_path: str):
+        """Load model from checkpoint."""
+        if not os.path.exists(checkpoint_path):
+            raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
+
+        checkpoint = torch.load(checkpoint_path, map_location=self.device)
+
+        # Handle different checkpoint formats
+        if 'model_state_dict' in checkpoint:
+            state_dict = checkpoint['model_state_dict']
+        else:
+            state_dict = checkpoint
+
+        # Remove 'module.' prefix if present (from DDP training)
+        new_state_dict = {}
+        for key, value in state_dict.items():
+            if key.startswith('module.'):
+                new_key = key[7:]  # Remove 'module.' prefix
+            else:
+                new_key = key
+            new_state_dict[new_key] = value
+
+        self.model.load_state_dict(new_state_dict)
+        print(f"Successfully loaded checkpoint from {checkpoint_path}")
+
+    def predict(self, batch):
+        """Run inference on a batch."""
+        with torch.no_grad():
+            # Move data to device
+            time_series = batch['time_series'].to(self.device)
+            normal_time_series = batch['normal_time_series'].to(self.device)
+            masked_time_series = batch['masked_time_series'].to(self.device)
+            attribute = batch['attribute']
+            batch_size, seq_len, num_features = time_series.shape
+
+            # 对时间序列标准化
+            time_series = (time_series - time_series.mean(dim=1, keepdim=True)) / (time_series.std(dim=1, keepdim=True) + 1e-8)
+            masked_time_series = (masked_time_series - masked_time_series.mean(dim=1, keepdim=True)) / (masked_time_series.std(dim=1, keepdim=True) + 1e-8)
+
+            mask = batch['mask'].to(self.device)
+            labels = batch['labels'].to(self.device)
+            attention_mask = batch['attention_mask'].to(self.device)
+
+            # Get embeddings
+            local_embeddings = self.model(
+                time_series=time_series,
+                mask=attention_mask)
+
+            # Get reconstruction
+            reconstructed = self.model.reconstruction_head(local_embeddings)
+            reconstructed = reconstructed.view(batch_size, seq_len, num_features)  # (B, seq_len, num_features)
+
+            # Get anomaly predictions
+            anomaly_logits = self.model.anomaly_head(local_embeddings)
+            anomaly_logits = torch.mean(anomaly_logits, dim=-2)  # (B, seq_len, 2)
+            anomaly_probs = F.softmax(anomaly_logits, dim=-1)[..., 1]  # Probability of anomaly (B, seq_len)
+
+            return {
+                'original': time_series.cpu(),
+                'normal': normal_time_series.cpu(),
+                'masked': masked_time_series.cpu(),
+                'reconstructed': reconstructed.cpu(),
+                'mask': mask.cpu(),
+                'anomaly_probs': anomaly_probs.cpu(),
+                'true_labels': labels.cpu(),
+                'attention_mask': attention_mask.cpu(),
+                'attribute': attribute
+            }
+
+    def visualize_single_sample(self, results, sample_idx=0, save_path=None):
+        """Visualize results for a single time series sample."""
+        # Extract data for the specified sample
+        original = results['original'][sample_idx].squeeze(-1).numpy()  # (seq_len, num_features) / (seq_len,)
+        normal = results['normal'][sample_idx].squeeze(-1).numpy()
+        masked = results['masked'][sample_idx].squeeze(-1).numpy()
+        reconstructed = results['reconstructed'][sample_idx].squeeze(-1).numpy()
+        mask = results['mask'][sample_idx].numpy().astype(bool)
+        anomaly_probs = results['anomaly_probs'][sample_idx].numpy()  # (seq_len,)
+        true_labels = results['true_labels'][sample_idx].numpy()  # (seq_len,)
+        attention_mask = results['attention_mask'][sample_idx].numpy().astype(bool)
+        attribute = results['attribute'][sample_idx]
+
+        # Only consider valid sequence length
+        valid_length = attention_mask.sum()
+        original = original[:valid_length]
+        normal = normal[:valid_length]
+        masked = masked[:valid_length]
+        reconstructed = reconstructed[:valid_length]
+        mask = mask[:valid_length]
+        anomaly_probs = anomaly_probs[:valid_length]
+        true_labels = true_labels[:valid_length]
+
+        # Create time axis
+        time_axis = np.arange(len(original))
+
+        assert original.ndim == normal.ndim == reconstructed.ndim == masked.ndim, "Original, normal, reconstructed, and masked time series must have the same dimensions."
+        if original.ndim == 1:
+            # Create subplots
+            fig, axes = plt.subplots(3, 1, figsize=(15, 12))
+
+            # 1. Reconstruction visualization
+            ax1 = axes[0]
+            ax1.plot(time_axis, original, 'b-', label='Original', linewidth=2, alpha=0.8)
+            ax1.plot(time_axis, masked, 'g--', label='Masked Input', linewidth=1.5, alpha=0.7)
+            ax1.plot(time_axis[mask], reconstructed[mask], 'ro',
+                     label='Reconstructed', markersize=4, alpha=0.8)
+
+            # Highlight masked regions
+            mask_regions = []
+            in_mask = False
+            start_idx = 0
+
+            for i, is_masked in enumerate(mask):
+                if is_masked and not in_mask:
+                    start_idx = i
+                    in_mask = True
+                elif not is_masked and in_mask:
+                    mask_regions.append((start_idx, i - 1))
+                    in_mask = False
+
+            if in_mask:  # Handle case where mask continues to the end
+                mask_regions.append((start_idx, len(mask) - 1))
+
+            for start, end in mask_regions:
+                ax1.axvspan(start, end, alpha=0.2, color='red',
+                            label='Masked Region' if start == mask_regions[0][0] else "")
+
+            ax1.set_title('Time Series Reconstruction', fontsize=14, fontweight='bold')
+            ax1.set_xlabel('Time Steps')
+            ax1.set_ylabel('Value')
+            ax1.legend()
+            ax1.grid(True, alpha=0.3)
+
+            # 2. Anomaly detection visualization
+            ax2 = axes[1]
+            ax2.plot(time_axis, normal, 'g-', label='Normal Time Series', linewidth=1, alpha=0.6)
+            ax2.plot(time_axis, original, 'b-', label='Anomalous Time Series', linewidth=1, alpha=0.6)
+
+            # Color background based on true anomaly labels
+            anomaly_regions = []
+            in_anomaly = False
+            start_idx = 0
+
+            for i, is_anomaly in enumerate(true_labels > 0.5):
+                if is_anomaly and not in_anomaly:
+                    start_idx = i
+                    in_anomaly = True
+                elif not is_anomaly and in_anomaly:
+                    anomaly_regions.append((start_idx, i - 1))
+                    in_anomaly = False
+
+            if in_anomaly:
+                anomaly_regions.append((start_idx, len(true_labels) - 1))
+
+            for start, end in anomaly_regions:
+                ax2.axvspan(start, end, alpha=0.3, color='red',
+                            label='True Anomaly' if start == anomaly_regions[0][0] else "")
+
+            # Plot predicted anomaly probabilities
+            ax2_twin = ax2.twinx()
+            ax2_twin.plot(time_axis, anomaly_probs, 'r-', label='Anomaly Probability',
+                          linewidth=2, alpha=0.8)
+            ax2_twin.axhline(y=0.5, color='orange', linestyle='--', alpha=0.7,
+                             label='Threshold (0.5)')
+            ax2_twin.set_ylabel('Anomaly Probability', color='red')
+            ax2_twin.set_ylim(0, 1)
+
+            ax2.set_title('Anomaly Detection Results', fontsize=14, fontweight='bold')
+            ax2.set_xlabel('Time Steps')
+            ax2.set_ylabel('Time Series Value', color='blue')
+
+            # Combine legends
+            lines1, labels1 = ax2.get_legend_handles_labels()
+            lines2, labels2 = ax2_twin.get_legend_handles_labels()
+            ax2.legend(lines1 + lines2, labels1 + labels2, loc='upper right')
+            ax2.grid(True, alpha=0.3)
+
+            # 3. Performance metrics visualization
+            ax3 = axes[2]
+
+            # Calculate reconstruction error for masked regions
+            if mask.sum() > 0:
+                recon_error = np.abs(original[mask] - reconstructed[mask])
+                ax3.bar(np.arange(len(recon_error)), recon_error,
+                        alpha=0.7, color='orange', label='Reconstruction Error')
+                ax3.set_title('Reconstruction Error (Masked Regions Only)',
+                              fontsize=14, fontweight='bold')
+                ax3.set_xlabel('Masked Time Step Index')
+                ax3.set_ylabel('Absolute Error')
+                ax3.legend()
+                ax3.grid(True, alpha=0.3)
+            else:
+                ax3.text(0.5, 0.5, 'No masked regions in this sample',
+                         ha='center', va='center', transform=ax3.transAxes, fontsize=12)
+                ax3.set_title('Reconstruction Error', fontsize=14, fontweight='bold')
+
+            plt.tight_layout()
+
+            if save_path:
+                plt.savefig(save_path, dpi=300, bbox_inches='tight')
+
+            plt.show()
+
+        elif original.ndim == 2:
+            _, num_features = original.shape
+
+            fig_height = 4 * num_features + 2
+            fig, axes = plt.subplots(num_features, 1, figsize=(16, fig_height))
+            plt.subplots_adjust(top=0.85, hspace=0.2, left=0.08, right=0.92, bottom=0.08)
+
+            anomaly_regions = []
+            in_anomaly = False
+            start_idx = 0
+            for i, is_anomaly in enumerate(true_labels > 0.5):
+                if is_anomaly and not in_anomaly:
+                    start_idx = i
+                    in_anomaly = True
+                elif not is_anomaly and in_anomaly:
+                    anomaly_regions.append((start_idx, i - 1))
+                    in_anomaly = False
+            if in_anomaly:
+                anomaly_regions.append((start_idx, len(true_labels) - 1))
+
+            for feature_idx in range(num_features):
+                ax = axes[feature_idx]
+                ax.plot(time_axis, original[:, feature_idx], 'b-',
+                        linewidth=1, label=f'Anomalous Time Series', alpha=0.8)
+                ax.plot(time_axis, normal[:, feature_idx], 'g-',
+                        linewidth=1, label='Normal Time Series', alpha=0.8)
+                y_min, y_max = ax.get_ylim()
+                shift = y_max - y_min
+                ax.set_ylim(y_min - shift, y_max)
+
+                for start, end in anomaly_regions:
+                    if start == end:
+                        ax.axvspan(start - 0.5, start + 0.5, alpha=0.3, color='grey',
+                                   label='True Anomaly Region' if start == anomaly_regions[0][
+                                       0] and feature_idx == 0 else "")
+                    else:
+                        ax.axvspan(start, end, alpha=0.3, color='grey',
+                                   label='True Anomaly Region' if start == anomaly_regions[0][
+                                       0] and feature_idx == 0 else "")
+
+                ax2 = ax.twinx()
+                ax2.plot(time_axis, anomaly_probs, 'r-', linewidth=1,
+                         label='Anomaly Score', alpha=0.9)
+                ax2.set_ylim(0, 1.5)
+                ax2.set_ylabel('Anomaly Score', fontsize=12)
+                ax.set_ylabel(f'Value', fontsize=12)
+                if feature_idx == num_features - 1:
+                    ax.set_xlabel('Time Steps', fontsize=12)
+                else:
+                    ax.set_xticklabels([])
+
+                ax.set_title(f'Feature {feature_idx} - Time Series & Anomaly Score',
+                             fontsize=16, pad=10)
+                ax.grid(True, alpha=0.3)
+
+                if feature_idx == 0:
+                    lines1, labels1 = ax.get_legend_handles_labels()
+                    lines2, labels2 = ax2.get_legend_handles_labels()
+                    ax.legend(lines1 + lines2, labels1 + labels2,
+                              loc='upper right', bbox_to_anchor=(0.98, 0.98), fontsize=14)
+
+            anomalies = []
+            isendo = attribute['is_endogenous']
+            edges = attribute['dag']
+            for idx, item in enumerate(attribute['attribute_list']):
+                for k, v in item['anomalies'].items():
+                    anomalies.append((f"feature_{idx}_{k[2:]}", v))
+            anomalies_str = ', '.join([f"{k}: {v}" for k, v in anomalies])
+            wrap_width = 100
+            wrapped_anomalies = textwrap.fill(f"Anomalies: {anomalies_str}", width=wrap_width)
+            wrapped_edges = textwrap.fill(f"Edges: {str(edges)}", width=wrap_width)
+            title = f"Multivariate Time Series Visualization\n{isendo}_{wrapped_anomalies}\n{wrapped_edges}"
+            fig.suptitle(title, fontsize=22, y=0.95, ha='center', va='top')
+
+            if save_path:
+                plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white')
+            plt.show()
+
+
+        else:
+            raise ValueError("Unsupported original data shape: {}".format(original.shape))
+
+    def test_model(self, data_path: str, filename: str, num_samples: int = 5, save_dir: str = None,
+                   max_test_data: int = 100):
+        """Test the model on a dataset and visualize results."""
+        # Load test dataset
+        full_test_dataset = ChatTSTimeRCDPretrainDataset(data_path, filename, split="test", train_ratio=0)
+        print(f'Length of dataset: {len(full_test_dataset)}')
+
+        # Limit to max_test_data samples
+        if len(full_test_dataset) > max_test_data:
+            indices = torch.randperm(len(full_test_dataset))[:max_test_data].tolist()
+            test_dataset = torch.utils.data.Subset(full_test_dataset, indices)
+            print("random")
+        else:
+            test_dataset = full_test_dataset
+
+        # Create visualization loader for detailed visualization (one by one)
+        vis_loader = DataLoader(
+            test_dataset,
+            batch_size=1,  # Process one sample at a time for visualization
+            shuffle=False,
+            collate_fn=collate_fn,
+            num_workers=0
+        )
+
+        # Visualize individual samples (one by one)
+        num_visualize = min(num_samples, len(test_dataset))
+
+        vis_iter = iter(vis_loader)
+
+        for i in range(num_visualize):
+            try:
+                vis_batch = next(vis_iter)
+
+                # Run inference for this single sample
+                vis_results = self.predict(vis_batch)
+
+                save_path = None
+                if save_dir:
+                    os.makedirs(save_dir, exist_ok=True)
+                    save_path = os.path.join(save_dir, f"sample_{i + 1}_results.png")
+
+                self.visualize_single_sample(vis_results, sample_idx=0, save_path=save_path)
+
+            except StopIteration:
+                break
+
+    def zero_shot(self, data):
+        """Run zero-shot inference on the provided data."""
+        if len(data)  <= self.win_size:
+           self.win_size = len(data)
+
+        test_loader = DataLoader(
+            dataset=TimeRCDDataset(data, window_size=self.win_size, stride=self.win_size, normalize=True),
+            batch_size=self.batch_size,
+            collate_fn=test_collate_fn,
+            num_workers=0,
+            shuffle=False,)
+
+        loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
+        scores = []
+        logits = []
+        with torch.no_grad():
+            for i, batch in loop:
+                # Move data to device
+                time_series = batch['time_series'].to(self.device)
+                # print("Here is the time series shape: ", time_series.shape)
+                # print(f"Here are a sample of dataset after normalization: {time_series[:10, :]}")
+                batch_size, seq_len, num_features = time_series.shape
+                # 对时间序列标准化
+                attention_mask = batch['attention_mask'].to(self.device)
+                # print("Here is the attention mask shape: ", attention_mask.shape)
+                # print("Here is the attention mask: ", attention_mask)
+                # Get embeddings
+                local_embeddings = self.model(
+                    time_series=time_series,
+                    mask=attention_mask)
+
+                # Get anomaly predictions
+                anomaly_logits = self.model.anomaly_head(local_embeddings)
+                anomaly_logits = torch.mean(anomaly_logits, dim=-2)  # (B, seq_len, 2)
+                anomaly_probs = F.softmax(anomaly_logits, dim=-1)[..., 1]  # Probability of anomaly (B, seq_len)
+                scores.append(anomaly_probs.cpu().numpy())
+                logit = anomaly_logits[..., 1] - anomaly_logits[..., 0]  # Anomaly logits (B, seq_len)
+                logits.append(logit.cpu().numpy())
+        return scores, logits
+
+    def evaluate(self, time_series, mask):
+        with torch.no_grad():
+            time_series = time_series.to(self.device)
+            mask = mask.to(self.device)
+            local_embeddings = self.model(time_series = time_series, mask = mask)
+
+            reconstructed = self.model.reconstruction_head(local_embeddings)  # (B, seq_len, num_features, 1)
+            reconstructed = reconstructed.squeeze(-1)
+
+            mask_expand = mask.unsqueeze(-1).expand(-1, -1, reconstructed.shape[-1])
+
+            anomaly_probs = ((reconstructed - time_series) ** 2)[mask_expand]
+        return anomaly_probs, reconstructed
+
+
+    def zero_shot_reconstruct(self, data, visualize=True, data_index=None):
+        """Run zero-shot inference on the provided data."""
+        if len(data) <= self.win_size:
+            self.win_size = len(data)
+
+        test_loader = DataLoader(
+            dataset=Dataset_UCR(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            # collate_fn=collate_fn,
+            num_workers=0,
+            shuffle=False, )
+
+        loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
+        scores = []
+        with torch.no_grad():
+            for i, (x, mask) in loop:
+                # Move data to device
+                print("Here is the batch type: ", type(x))
+                print("Shape: ", np.array(x).shape)
+                time_series = torch.tensor(x, dtype=torch.float32).to(self.device)  # (B, seq_len, num_features)
+                mask_tensor = torch.tensor(mask, dtype=torch.bool).to(self.device)
+                # print("Here is the time series shape: ", time_series.shape)
+                # 对时间序列标准化
+                # attention_mask = batch['attention_mask'].to(self.device)
+                score, reconstructed = self.evaluate(time_series, mask_tensor)
+
+                scores.append(score)
+
+                # Visualize the first batch if requested
+                if visualize:
+                    self.visualize_reconstruction(original=time_series[0].cpu().numpy(),
+                                                  reconstructed=reconstructed.cpu().numpy(),
+                                                  mask=mask_tensor[0].cpu().numpy(),
+                                                  scores=score.cpu().numpy(),
+                                                  save_path=f"/home/lihaoyang/Huawei/TSB-AD/Synthetic/random_mask_anomaly_head_Time_RCD_Reconstruction_5000/plot/",
+                                                  index=data_index)
+
+        return scores
+
+    def visualize_reconstruction(self, original, reconstructed, mask, scores, index, save_path=None):
+        """Visualize reconstruction results for a single sample."""
+        import matplotlib.pyplot as plt
+
+        seq_len = len(original)
+        time_axis = np.arange(seq_len)
+
+        # Squeeze singleton dimensions
+        original = original.squeeze()
+        reconstructed = reconstructed.squeeze(0).squeeze(-1)
+        scores = scores.squeeze()
+
+        fig, axes = plt.subplots(2, 1, figsize=(15, 10))
+
+        # 1. Reconstruction plot
+        ax1 = axes[0]
+        ax1.plot(time_axis, original, 'b-', label='Original', linewidth=2, alpha=0.8)
+        ax1.plot(time_axis, reconstructed, 'r--', label='Reconstructed', linewidth=2, alpha=0.8)
+
+        # Highlight masked regions
+        mask_regions = []
+        in_mask = False
+        start_idx = 0
+        for i, is_masked in enumerate(mask):
+            if is_masked and not in_mask:
+                start_idx = i
+                in_mask = True
+            elif not is_masked and in_mask:
+                mask_regions.append((start_idx, i - 1))
+                in_mask = False
+        if in_mask:
+            mask_regions.append((start_idx, len(mask) - 1))
+
+        for start, end in mask_regions:
+            ax1.axvspan(start, end, alpha=0.2, color='red',
+                        label='Masked Region' if start == mask_regions[0][0] else "")
+
+        ax1.set_title('Time Series Reconstruction', fontsize=14, fontweight='bold')
+        ax1.set_xlabel('Time Steps')
+        ax1.set_ylabel('Value')
+        ax1.legend()
+        ax1.grid(True, alpha=0.3)
+
+        # 2. Reconstruction error plot
+        ax2 = axes[1]
+        recon_error = np.abs(original - reconstructed)
+        ax2.plot(time_axis, recon_error, 'g-', label='Reconstruction Error', linewidth=2, alpha=0.8)
+
+        # Plot scores if available (mapped to time steps)
+        if len(scores) == mask.sum():
+            # Scores are only for masked points, map back to full sequence
+            full_scores = np.zeros(seq_len)
+            full_scores[mask] = scores
+            ax2_twin = ax2.twinx()
+            ax2_twin.plot(time_axis, full_scores, 'orange', label='Anomaly Scores', linewidth=1.5, alpha=0.7)
+            ax2_twin.set_ylabel('Anomaly Score', color='orange')
+            ax2_twin.legend(loc='upper right')
+
+        ax2.set_title('Reconstruction Error', fontsize=14, fontweight='bold')
+        ax2.set_xlabel('Time Steps')
+        ax2.set_ylabel('Absolute Error')
+        ax2.legend()
+        ax2.grid(True, alpha=0.3)
+
+        plt.tight_layout()
+
+        if save_path:
+            if not os.path.exists(save_path):
+                os.makedirs(save_path, exist_ok=True)
+            save_path = os.path.join(save_path, f"reconstruction_sample_{index}_results.png")
+            plt.savefig(save_path, dpi=300, bbox_inches='tight')
+            print("Visualization saved to: ", save_path)
+
+        # plt.show()
+
+
+class Dataset_UCR(Dataset):
+    def __init__(self, data, window_size: int = 1000):
+        super().__init__()
+        self.data = data.reshape(-1, 1) if len(data.shape) == 1 else data
+        self.window_size = window_size
+        self._load_data()
+        self._process_windows()
+    
+    def _load_data(self):
+        # train_data = np.load(train_path, allow_pickle=True)  # (seq_len, num_features)
+        # test_data = np.load(test_path, allow_pickle=True)  # (seq_len, num_features)
+        # test_labels = np.load(label_path, allow_pickle=True)  # (seq_len, )
+        train_data = self.data
+        scaler = StandardScaler()
+        train_data = scaler.fit_transform(train_data)
+        self.raw_test = scaler.transform(self.data)
+
+    def _process_windows(self):
+        if len(self.raw_test) <= self.window_size:
+            self.test = np.expand_dims(self.raw_test, axis=0)
+            # self.test_labels = np.expand_dims(self.raw_labels, axis=0)
+            self.mask = np.expand_dims(np.ones(len(self.raw_test), dtype=bool), axis=0)
+        else:
+            self.raw_masks = np.ones(len(self.raw_test), dtype=bool)
+            padding = self.window_size - (len(self.raw_test) % self.window_size)
+            if padding < self.window_size:
+                self.raw_test = np.pad(self.raw_test, ((0, padding), (0, 0)), mode='constant')
+                # self.raw_labels = np.pad(self.raw_labels, (0, padding), mode='constant')
+                self.raw_masks = np.pad(self.raw_masks, (0, padding), mode='constant')
+            self.test = self.raw_test.reshape(-1, self.window_size, self.raw_test.shape[1])
+            # self.test_labels = self.raw_labels.reshape(-1, self.window_size)
+            self.mask = self.raw_masks.reshape(-1, self.window_size)
+            assert self.test.shape[0] == self.test_labels.shape[0] == self.mask.shape[0], "Inconsistent window sizes"
+
+    def __len__(self):
+        return len(self.test)
+
+    def __getitem__(self, index):
+        return np.float32(self.test[index]), self.mask[index]
\ No newline at end of file
diff --git a/models/TimesFM.py b/models/TimesFM.py
new file mode 100644
index 0000000000000000000000000000000000000000..f003cdd4c2931746c99bc357f9ebca263a968cc7
--- /dev/null
+++ b/models/TimesFM.py
@@ -0,0 +1,87 @@
+"""
+This function is adapted from [timesfm] by [siriuz42 et al.]
+Original source: [https://github.com/google-research/timesfm]
+"""
+
+import timesfm
+import numpy as np
+
+class TimesFM():
+    def __init__(self, 
+                 win_size=96, 
+                 prediction_length=1, 
+                 input_c=1):
+
+        self.model_name = 'TimesFM'
+        self.win_size = win_size
+        self.prediction_length = prediction_length
+        self.input_c = input_c
+        self.score_list = []
+
+    def fit(self, data):
+
+        for channel in range(self.input_c):
+            
+            data_channel = data[:, channel].reshape(-1, 1)
+            data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.win_size, predict_time_steps=self.prediction_length)
+            # print('data_win: ', data_win.shape)         # (2330, 100)
+            # print('data_target: ', data_target.shape)   # (2330, 1)
+
+            # tfm = timesfm.TimesFm(
+            #     context_len=self.win_size,
+            #     horizon_len=self.prediction_length,
+            #     input_patch_len=32,
+            #     output_patch_len=128,
+            #     num_layers=20,
+            #     model_dims=1280,
+            #     backend="gpu")
+            # tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")
+
+            tfm = timesfm.TimesFm(
+                hparams=timesfm.TimesFmHparams(
+                    backend="gpu",
+                    per_core_batch_size=32,
+                    horizon_len=self.prediction_length,
+                ),
+                checkpoint=timesfm.TimesFmCheckpoint(
+                    huggingface_repo_id="google/timesfm-1.0-200m-pytorch"),
+            )
+
+            forecast_input = [data_win[i, :] for i in range(data_win.shape[0])]
+            point_forecast, _ = tfm.forecast(forecast_input)
+
+            print('predictions: ', point_forecast.shape)
+
+            ### using mse as the anomaly score
+            scores = (data_target.squeeze() - point_forecast.squeeze()) ** 2
+            # scores = np.mean(scores, axis=1)
+            self.score_list.append(scores)
+
+        scores_merge = np.mean(np.array(self.score_list), axis=0)
+        # print('scores_merge: ', scores_merge.shape)
+
+        padded_decision_scores = np.zeros(len(data))
+        padded_decision_scores[: self.win_size+self.prediction_length-1] = scores_merge[0]
+        padded_decision_scores[self.win_size+self.prediction_length-1 : ]=scores_merge
+
+        self.decision_scores_ = padded_decision_scores
+
+
+    def decision_function(self, X):
+        """
+        Not used, present for API consistency by convention.
+        """        
+        pass
+
+    def create_dataset(self, X, slidingWindow, predict_time_steps=1):
+        Xs, ys = [], []
+        for i in range(len(X) - slidingWindow - predict_time_steps+1):
+            
+            tmp = X[i : i + slidingWindow + predict_time_steps].ravel()
+            # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel()
+            
+            x = tmp[:slidingWindow]
+            y = tmp[slidingWindow:]
+            Xs.append(x)
+            ys.append(y)
+        return np.array(Xs), np.array(ys)
\ No newline at end of file
diff --git a/models/TimesNet.py b/models/TimesNet.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab4d5ab27d29bbd24e21461cb0163ee0c3742e96
--- /dev/null
+++ b/models/TimesNet.py
@@ -0,0 +1,345 @@
+'''
+TimesNet from "TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis" (ICLR 2023)
+Code partially from https://github.com/thuml/Time-Series-Library/
+
+Copyright (c) 2021 THUML @ Tsinghua University
+'''
+
+from typing import Dict
+import numpy as np
+import torchinfo
+import torch
+from torch import nn, optim
+from torch.utils.data import DataLoader
+import torch.nn.functional as F
+import torch.fft
+from torch.nn.utils import weight_norm
+import math
+import tqdm
+import os
+
+from ..utils.torch_utility import EarlyStoppingTorch, DataEmbedding, adjust_learning_rate, get_gpu
+from ..utils.dataset import ReconstructDataset    
+ 
+class Inception_Block_V1(nn.Module):
+    def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
+        super(Inception_Block_V1, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_kernels = num_kernels
+        kernels = []
+        for i in range(self.num_kernels):
+            kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i))
+        self.kernels = nn.ModuleList(kernels)
+        if init_weight:
+            self._initialize_weights()
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        res_list = []
+        for i in range(self.num_kernels):
+            res_list.append(self.kernels[i](x))
+        res = torch.stack(res_list, dim=-1).mean(-1)
+        return res
+
+
+def FFT_for_Period(x, k=2):
+    # [B, T, C]
+    xf = torch.fft.rfft(x, dim=1)
+    # find period by amplitudes
+    frequency_list = abs(xf).mean(0).mean(-1)
+    frequency_list[0] = 0
+    _, top_list = torch.topk(frequency_list, k)
+    top_list = top_list.detach().cpu().numpy()
+    period = x.shape[1] // top_list
+    return period, abs(xf).mean(-1)[:, top_list]
+
+
+class TimesBlock(nn.Module):
+    def __init__(self,
+                 seq_len=96,
+                 pred_len=0,
+                 top_k=3,
+                 d_model=8,
+                 d_ff=16,
+                 num_kernels=6
+                 ):
+        super(TimesBlock, self).__init__()
+        self.seq_len = seq_len
+        self.pred_len = pred_len
+        self.k = top_k
+        # parameter-efficient design
+        self.conv = nn.Sequential(
+            Inception_Block_V1(d_model, d_ff,
+                               num_kernels=num_kernels),
+            nn.GELU(),
+            Inception_Block_V1(d_ff, d_model,
+                               num_kernels=num_kernels)
+        )
+
+    def forward(self, x):
+        B, T, N = x.size()
+        period_list, period_weight = FFT_for_Period(x, self.k)
+
+        res = []
+        for i in range(self.k):
+            period = period_list[i]
+            # padding
+            if (self.seq_len + self.pred_len) % period != 0:
+                length = (
+                                 ((self.seq_len + self.pred_len) // period) + 1) * period
+                padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device)
+                out = torch.cat([x, padding], dim=1)
+            else:
+                length = (self.seq_len + self.pred_len)
+                out = x
+            # reshape
+            out = out.reshape(B, length // period, period,
+                              N).permute(0, 3, 1, 2).contiguous()
+            # 2D conv: from 1d Variation to 2d Variation
+            out = self.conv(out)
+            # reshape back
+            out = out.permute(0, 2, 3, 1).reshape(B, -1, N)
+            res.append(out[:, :(self.seq_len + self.pred_len), :])
+        res = torch.stack(res, dim=-1)
+        # adaptive aggregation
+        period_weight = F.softmax(period_weight, dim=1)
+        period_weight = period_weight.unsqueeze(
+            1).unsqueeze(1).repeat(1, T, N, 1)
+        res = torch.sum(res * period_weight, -1)
+        # residual connection
+        res = res + x
+        return res
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://openreview.net/pdf?id=ju_Uqw384Oq
+    """
+
+    def __init__(self,
+                 seq_len=96,
+                 pred_len=0,
+                 d_model=8,
+                 enc_in=1,
+                 c_out=1,
+                 e_layers=1,
+                 dropout=0.1,
+                 embed='timeF',
+                 freq="t"
+                 ):
+        super(Model, self).__init__()
+        self.seq_len = seq_len
+        self.pred_len = pred_len
+        self.model = nn.ModuleList([TimesBlock(seq_len=self.seq_len)
+                                    for _ in range(e_layers)])
+        self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq, dropout)
+        self.layer = e_layers
+        self.layer_norm = nn.LayerNorm(d_model)
+        self.projection = nn.Linear(d_model, c_out, bias=True)
+
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        # embedding
+        enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
+        # TimesNet
+        for i in range(self.layer):
+            enc_out = self.layer_norm(self.model[i](enc_out))
+        # porject back
+        dec_out = self.projection(enc_out)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * \
+                  (stdev[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1))
+        dec_out = dec_out + \
+                  (means[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1))
+        return dec_out
+
+    def forward(self, x_enc):
+        dec_out = self.anomaly_detection(x_enc)
+        return dec_out  # [B, L, D]
+
+class TimesNet():
+    def __init__(self,
+                 win_size=96,
+                 enc_in=1,
+                 epochs=10,
+                 batch_size=128,
+                 lr=1e-4,
+                 patience=3,
+                 features="M",
+                 lradj="type1",
+                 validation_size=0.2):
+        super().__init__()
+
+        self.win_size = win_size
+        self.enc_in = enc_in
+        self.batch_size = batch_size
+        self.lr = lr
+        self.patience = patience
+        self.epochs = epochs
+        self.features = features
+        self.lradj = lradj
+        self.validation_size = validation_size
+
+        self.__anomaly_score = None
+        
+        cuda = True
+        self.y_hats = None
+        
+        self.cuda = cuda
+        self.device = get_gpu(self.cuda)
+            
+        self.model = Model(seq_len=self.win_size, enc_in=self.enc_in, c_out=self.enc_in).float().to(self.device)
+        self.model_optim = optim.Adam(self.model.parameters(), lr=self.lr)
+        self.criterion = nn.MSELoss()
+        
+        self.early_stopping = EarlyStoppingTorch(None, patience=self.patience)
+        
+        self.input_shape = (self.batch_size, self.win_size, self.enc_in)
+        
+    
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset(tsValid, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        train_steps = len(train_loader)
+        for epoch in range(1, self.epochs + 1):
+            ## Training
+            train_loss = 0
+            self.model.train()
+            
+            loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True)
+            for i, (batch_x, _) in loop:
+                self.model_optim.zero_grad()
+                
+                batch_x = batch_x.float().to(self.device)
+                
+                outputs = self.model(batch_x)
+                loss = self.criterion(outputs, batch_x)
+                
+                loss.backward()
+                self.model_optim.step()
+                
+                train_loss += loss.cpu().item()
+                
+                loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]')
+                loop.set_postfix(loss=loss.item(), avg_loss=train_loss/(i+1))
+            
+            ## Validation
+            self.model.eval()
+            total_loss = []
+            
+            loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True)
+            with torch.no_grad():
+                for i, (batch_x, _) in loop:
+                    batch_x = batch_x.float().to(self.device)
+
+                    outputs = self.model(batch_x)
+
+                    f_dim = -1 if self.features == 'MS' else 0
+                    outputs = outputs[:, :, f_dim:]
+                    pred = outputs.detach().cpu()
+                    true = batch_x.detach().cpu()
+
+                    loss = self.criterion(pred, true)
+                    total_loss.append(loss)
+                    loop.set_description(f'Valid Epoch [{epoch}/{self.epochs}]')
+                    
+            valid_loss = np.average(total_loss)
+            loop.set_postfix(loss=loss.item(), valid_loss=valid_loss)
+            self.early_stopping(valid_loss, self.model)
+            if self.early_stopping.early_stop:
+                print("   Early stopping<<<")
+                break
+            
+            adjust_learning_rate(self.model_optim, epoch + 1, self.lradj, self.lr)
+                        
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        self.model.eval()
+        attens_energy = []
+        y_hats = []
+        self.anomaly_criterion = nn.MSELoss(reduce=False)
+        
+        loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
+        with torch.no_grad():
+            for i, (batch_x, _) in loop:
+                batch_x = batch_x.float().to(self.device)
+                # reconstruction
+                outputs = self.model(batch_x)
+                # criterion
+                score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
+                y_hat = torch.squeeze(outputs, -1)
+                
+                score = score.detach().cpu().numpy()[:, -1]
+                y_hat = y_hat.detach().cpu().numpy()[:, -1]
+                
+                attens_energy.append(score)
+                y_hats.append(y_hat)
+                loop.set_description(f'Testing Phase: ')
+
+        attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
+        scores = np.array(attens_energy)
+        
+        y_hats = np.concatenate(y_hats, axis=0).reshape(-1)
+        y_hats = np.array(y_hats)
+
+        assert scores.ndim == 1
+        
+        import shutil
+        self.save_path = None
+        if self.save_path and os.path.exists(self.save_path):
+            shutil.rmtree(self.save_path)
+            
+        self.__anomaly_score = scores
+        self.y_hats = y_hats
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+        
+        return self.__anomaly_score
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+    
+    def get_y_hat(self) -> np.ndarray:
+        return self.y_hats
+    
+    def param_statistic(self, save_file):
+        model_stats = torchinfo.summary(self.model, self.input_shape, verbose=0)
+        with open(save_file, 'w') as f:
+            f.write(str(model_stats))
\ No newline at end of file
diff --git a/models/TranAD.py b/models/TranAD.py
new file mode 100644
index 0000000000000000000000000000000000000000..59245f3e642084d7da188b523b67f1e52ceac975
--- /dev/null
+++ b/models/TranAD.py
@@ -0,0 +1,282 @@
+"""
+This function is adapted from [TranAD] by [imperial-qore]
+Original source: [https://github.com/imperial-qore/TranAD]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+import torch
+import torch.nn.functional as F
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+from torch import nn
+from torch.nn import TransformerEncoder
+from torch.nn import TransformerDecoder
+from torch.utils.data import DataLoader
+from sklearn.preprocessing import MinMaxScaler
+import tqdm
+
+from .base import BaseDetector
+from ..utils.dataset import ReconstructDataset
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, dropout=0.1, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, d_model).float() * (-math.log(10000.0) / d_model)
+        )
+        pe += torch.sin(position * div_term)
+        pe += torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer("pe", pe)
+
+    def forward(self, x, pos=0):
+        x = x + self.pe[pos : pos + x.size(0), :]
+        return self.dropout(x)
+
+class TransformerEncoderLayer(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward=16, dropout=0):
+        super(TransformerEncoderLayer, self).__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.activation = nn.LeakyReLU(True)
+
+    def forward(self, src, *args, **kwargs):
+        src2 = self.self_attn(src, src, src)[0]
+        src = src + self.dropout1(src2)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        return src
+
+class TransformerDecoderLayer(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward=16, dropout=0):
+        super(TransformerDecoderLayer, self).__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.activation = nn.LeakyReLU(True)
+
+    def forward(self, tgt, memory, *args, **kwargs):
+        tgt2 = self.self_attn(tgt, tgt, tgt)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt2 = self.multihead_attn(tgt, memory, memory)[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt
+
+class TranADModel(nn.Module):
+    def __init__(self, batch_size, feats, win_size):
+        super(TranADModel, self).__init__()
+        self.name = "TranAD"
+        self.batch = batch_size
+        self.n_feats = feats
+        self.n_window = win_size
+        self.n = self.n_feats * self.n_window
+        self.pos_encoder = PositionalEncoding(2 * feats, 0.1, self.n_window)
+        encoder_layers = TransformerEncoderLayer(
+            d_model=2 * feats, nhead=feats, dim_feedforward=16, dropout=0.1
+        )
+        self.transformer_encoder = TransformerEncoder(encoder_layers, 1)
+        decoder_layers1 = TransformerDecoderLayer(
+            d_model=2 * feats, nhead=feats, dim_feedforward=16, dropout=0.1
+        )
+        self.transformer_decoder1 = TransformerDecoder(decoder_layers1, 1)
+        decoder_layers2 = TransformerDecoderLayer(
+            d_model=2 * feats, nhead=feats, dim_feedforward=16, dropout=0.1
+        )
+        self.transformer_decoder2 = TransformerDecoder(decoder_layers2, 1)
+        self.fcn = nn.Sequential(nn.Linear(2 * feats, feats), nn.Sigmoid())
+
+    def encode(self, src, c, tgt):
+        src = torch.cat((src, c), dim=2)
+        src = src * math.sqrt(self.n_feats)
+        src = self.pos_encoder(src)
+        memory = self.transformer_encoder(src)
+        tgt = tgt.repeat(1, 1, 2)
+        return tgt, memory
+
+    def forward(self, src, tgt):
+        # Phase 1 - Without anomaly scores
+        c = torch.zeros_like(src)
+        x1 = self.fcn(self.transformer_decoder1(*self.encode(src, c, tgt)))
+        # Phase 2 - With anomaly scores
+        c = (x1 - src) ** 2
+        x2 = self.fcn(self.transformer_decoder2(*self.encode(src, c, tgt)))
+        return x1, x2
+
+
+class TranAD(BaseDetector):
+    def __init__(self,
+                 win_size = 100,
+                 feats = 1,
+                 batch_size = 128,
+                 epochs = 50,
+                 patience = 3,
+                 lr = 1e-4,
+                 validation_size=0.2
+                 ):
+        super().__init__()
+
+        self.__anomaly_score = None
+
+        self.cuda = True
+        self.device = get_gpu(self.cuda)
+
+        self.win_size = win_size
+        self.batch_size = batch_size
+        self.epochs = epochs
+        self.feats = feats
+        self.validation_size = validation_size
+
+        self.model = TranADModel(batch_size=self.batch_size, feats=self.feats, win_size=self.win_size).to(self.device)
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(), lr=lr, weight_decay=1e-5
+        )
+        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 5, 0.9)
+        self.criterion = nn.MSELoss()
+
+        self.early_stopping = EarlyStoppingTorch(None, patience=patience)
+
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset(tsValid, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        for epoch in range(1, self.epochs + 1):
+            self.model.train(mode=True)
+            avg_loss = 0
+            loop = tqdm.tqdm(
+                enumerate(train_loader), total=len(train_loader), leave=True
+            )
+            for idx, (x, _) in loop:                
+                if torch.isnan(x).any() or torch.isinf(x).any():
+                    print("Input data contains nan or inf")
+                    x = torch.nan_to_num(x)
+
+                x = x.to(self.device)
+                bs = x.shape[0]
+                x = x.permute(1, 0, 2)
+                elem = x[-1, :, :].view(1, bs, self.feats)
+
+                self.optimizer.zero_grad()
+                z = self.model(x, elem)
+                loss = (1 / epoch) * self.criterion(z[0], elem) + (1 - 1 / epoch) * self.criterion(z[1], elem)
+                loss.backward(retain_graph=True)
+
+                self.optimizer.step()
+                avg_loss += loss.cpu().item()
+                loop.set_description(f"Training Epoch [{epoch}/{self.epochs}]")
+                loop.set_postfix(loss=loss.item(), avg_loss=avg_loss / (idx + 1))
+
+            if torch.isnan(loss):
+                print(f"Loss is nan at epoch {epoch}")
+                break
+
+            if len(valid_loader) > 0:
+                self.model.eval()
+                avg_loss_val = 0
+                loop = tqdm.tqdm(
+                    enumerate(valid_loader), total=len(valid_loader), leave=True
+                )
+                with torch.no_grad():
+                    for idx, (x, _) in loop:      
+
+                        if torch.isnan(x).any() or torch.isinf(x).any():
+                            print("Input data contains nan or inf")
+                            x = torch.nan_to_num(x)
+
+                        x = x.to(self.device)
+                        # x = x.unsqueeze(-1)
+                        bs = x.shape[0]
+                        x = x.permute(1, 0, 2)
+                        elem = x[-1, :, :].view(1, bs, self.feats)
+
+                        self.optimizer.zero_grad()
+                        z = self.model(x, elem)
+                        loss = (1 / epoch) * self.criterion(z[0], elem) + (
+                            1 - 1 / epoch
+                        ) * self.criterion(z[1], elem)
+
+                        avg_loss_val += loss.cpu().item()
+                        loop.set_description(f"Validation Epoch [{epoch}/{self.epochs}]")
+                        loop.set_postfix(loss=loss.item(), avg_loss_val=avg_loss_val / (idx + 1))
+
+            self.scheduler.step()
+            if len(valid_loader) > 0:
+                avg_loss = avg_loss_val / len(valid_loader)
+            else:
+                avg_loss = avg_loss / len(train_loader)
+            self.early_stopping(avg_loss, self.model)
+            if self.early_stopping.early_stop:
+                print("   Early stopping<<<")
+                break
+
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+
+        self.model.eval()
+        scores = []
+        loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
+        with torch.no_grad():
+            for idx, (x, _) in loop:
+                x = x.to(self.device)
+                bs = x.shape[0]
+                x = x.permute(1, 0, 2)
+                elem = x[-1, :, :].view(1, bs, self.feats)
+                # breakpoint()
+                _, z = self.model(x, elem)
+
+                loss = torch.mean(F.mse_loss(z, elem, reduction="none")[0], axis=-1)
+                scores.append(loss.cpu())
+
+        scores = torch.cat(scores, dim=0)
+        scores = scores.numpy()
+
+        self.__anomaly_score = scores
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+        
+        return self.__anomaly_score
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+
+    def param_statistic(self, save_file):
+        pass
diff --git a/models/USAD.py b/models/USAD.py
new file mode 100644
index 0000000000000000000000000000000000000000..0052fcfcf70887dc801a6b304c1655fa64b1bdc6
--- /dev/null
+++ b/models/USAD.py
@@ -0,0 +1,223 @@
+"""
+This function is adapted from [usad] by [manigalati]
+Original source: [https://github.com/manigalati/usad]
+"""
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+import torch
+import torch.nn.functional as F
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
+from torch import nn
+from torch.utils.data import DataLoader
+from sklearn.preprocessing import MinMaxScaler
+import tqdm
+
+from .base import BaseDetector
+from ..utils.dataset import ReconstructDataset
+from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
+
+class USADModel(nn.Module):
+    def __init__(self, feats, n_window=5):
+        super(USADModel, self).__init__()
+        self.name = 'USAD'
+        self.lr = 0.0001
+        self.n_feats = feats
+        self.n_hidden = 16
+        self.n_latent = 5
+        self.n_window = n_window # USAD w_size = 5
+        self.n = self.n_feats * self.n_window
+        self.encoder = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(self.n, self.n_hidden), nn.ReLU(True),
+            nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True),
+            nn.Linear(self.n_hidden, self.n_latent), nn.ReLU(True),
+        )
+        self.decoder1 = nn.Sequential(
+            nn.Linear(self.n_latent,self.n_hidden), nn.ReLU(True),
+            nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True),
+            nn.Linear(self.n_hidden, self.n), nn.Sigmoid(),
+        )
+        self.decoder2 = nn.Sequential(
+            nn.Linear(self.n_latent,self.n_hidden), nn.ReLU(True),
+            nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True),
+            nn.Linear(self.n_hidden, self.n), nn.Sigmoid(),
+        )
+
+    def forward(self, g):
+        bs = g.shape[0]
+        ## Encode
+        # z = self.encoder(g.view(1,-1))
+        z = self.encoder(g.view(bs, self.n))
+        ## Decoders (Phase 1)
+        ae1 = self.decoder1(z)
+        ae2 = self.decoder2(z)
+        ## Encode-Decode (Phase 2)
+        ae2ae1 = self.decoder2(self.encoder(ae1))
+        # return ae1.view(-1), ae2.view(-1), ae2ae1.view(-1)
+        return ae1.view(bs, self.n), ae2.view(bs, self.n), ae2ae1.view(bs, self.n)
+
+
+class USAD(BaseDetector):
+    def __init__(self,
+                 win_size = 5,
+                 feats = 1,
+                 batch_size = 128,
+                 epochs = 10,
+                 patience = 3,
+                 lr = 1e-4,
+                 validation_size=0.2
+                 ):
+        super().__init__()
+
+        self.__anomaly_score = None
+
+        self.cuda = True
+        self.device = get_gpu(self.cuda)
+
+        self.win_size = win_size
+        self.batch_size = batch_size
+        self.epochs = epochs
+        self.feats = feats
+        self.validation_size = validation_size
+
+        self.model = USADModel(feats=self.feats, n_window=self.win_size).to(self.device)
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(), lr=lr, weight_decay=1e-5
+        )
+        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 5, 0.9)
+        self.criterion = nn.MSELoss(reduction = 'none')
+
+        self.early_stopping = EarlyStoppingTorch(None, patience=patience)
+
+    def fit(self, data):
+        tsTrain = data[:int((1-self.validation_size)*len(data))]
+        tsValid = data[int((1-self.validation_size)*len(data)):]
+
+        train_loader = DataLoader(
+            dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=True
+        )
+        
+        valid_loader = DataLoader(
+            dataset=ReconstructDataset(tsValid, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+        
+        l1s, l2s = [], []
+        for epoch in range(1, self.epochs + 1):
+            self.model.train(mode=True)
+            n = epoch + 1
+            avg_loss = 0
+            loop = tqdm.tqdm(
+                enumerate(train_loader), total=len(train_loader), leave=True
+            )
+            for idx, (d, _) in loop:        
+                d = d.to(self.device)     # (bs, win, feat)
+                # print('d: ', d.shape)
+
+                ae1s, ae2s, ae2ae1s = self.model(d)
+                # print('ae2ae1s: ', ae2ae1s.shape)
+
+                d = d.view(ae2ae1s.shape[0], self.feats*self.win_size)
+
+                l1 = (1 / n) * self.criterion(ae1s, d) + (1 - 1/n) * self.criterion(ae2ae1s, d)
+                l2 = (1 / n) * self.criterion(ae2s, d) - (1 - 1/n) * self.criterion(ae2ae1s, d)
+                # print('l1: ', l1.shape)
+
+                l1s.append(torch.mean(l1).item())
+                l2s.append(torch.mean(l2).item())
+                loss = torch.mean(l1 + l2)
+
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+
+                avg_loss += loss.cpu().item()
+                loop.set_description(f"Training Epoch [{epoch}/{self.epochs}]")
+                loop.set_postfix(loss=loss.item(), avg_loss=avg_loss / (idx + 1))
+
+            if len(valid_loader) > 0:
+                self.model.eval()
+                avg_loss_val = 0
+                loop = tqdm.tqdm(
+                    enumerate(valid_loader), total=len(valid_loader), leave=True
+                )
+                with torch.no_grad():
+                    for idx, (d, _) in loop:
+                        d = d.to(self.device)
+                        ae1s, ae2s, ae2ae1s = self.model(d)
+                        d = d.view(ae2ae1s.shape[0], self.feats*self.win_size)
+
+                        l1 = (1 / n) * self.criterion(ae1s, d) + (1 - 1/n) * self.criterion(ae2ae1s, d)
+                        l2 = (1 / n) * self.criterion(ae2s, d) - (1 - 1/n) * self.criterion(ae2ae1s, d)
+
+                        l1s.append(torch.mean(l1).item())
+                        l2s.append(torch.mean(l2).item())
+                        loss = torch.mean(l1 + l2)
+                        avg_loss_val += loss.cpu().item()
+                        loop.set_description(
+                            f"Validation Epoch [{epoch}/{self.epochs}]"
+                        )
+                        loop.set_postfix(loss=loss.item(), avg_loss_val=avg_loss_val / (idx + 1))
+
+            self.scheduler.step()
+            if len(valid_loader) > 0:
+                avg_loss = avg_loss_val / len(valid_loader)
+            else:
+                avg_loss = avg_loss / len(train_loader)
+            self.early_stopping(avg_loss, self.model)
+            if self.early_stopping.early_stop:
+                print("   Early stopping<<<")
+                break
+
+    def decision_function(self, data):
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size),
+            batch_size=self.batch_size,
+            shuffle=False
+        )
+
+        self.model.eval()
+        scores = []
+        loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
+
+        with torch.no_grad():
+            for idx, (d, _) in loop:
+                d = d.to(self.device)
+                # print('d: ', d.shape)
+
+                ae1, ae2, ae2ae1 = self.model(d)
+                d = d.view(ae2ae1.shape[0], self.feats*self.win_size)
+
+                # print('ae2ae1: ', ae2ae1.shape)
+                # print('d: ', d.shape)
+
+                loss = 0.1 * self.criterion(ae1, d) + 0.9 * self.criterion(ae2ae1, d)
+                # print('loss: ', loss.shape)
+                loss = torch.mean(loss, axis=-1)
+
+                scores.append(loss.cpu())
+        
+        scores = torch.cat(scores, dim=0)
+        scores = scores.numpy()
+
+        self.__anomaly_score = scores
+
+        if self.__anomaly_score.shape[0] < len(data):
+            self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
+                        list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
+        
+        return self.__anomaly_score
+
+    def anomaly_score(self) -> np.ndarray:
+        return self.__anomaly_score
+
+    def param_statistic(self, save_file):
+        pass
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/models/__pycache__/__init__.cpython-310.pyc b/models/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e3bab17cdefea4d4acfa202bd47e6212cf7fa145
Binary files /dev/null and b/models/__pycache__/__init__.cpython-310.pyc differ
diff --git a/models/base.py b/models/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..792290aaa86435976169503fa24c0dde7e2e7ff5
--- /dev/null
+++ b/models/base.py
@@ -0,0 +1,628 @@
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import warnings
+from collections import defaultdict
+from inspect import signature
+import os
+import numpy as np
+from numpy import percentile
+from scipy.special import erf
+from scipy.stats import binom
+from sklearn.metrics import roc_auc_score
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.utils import deprecated
+from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.validation import check_is_fitted
+from sklearn.metrics import precision_score
+from sklearn.utils import column_or_1d
+
+
+def precision_n_scores(y, y_pred, n=None):
+    """Utility function to calculate precision @ rank n."""
+    # turn raw prediction decision scores into binary labels
+    y_pred = get_label_n(y, y_pred, n)
+    
+    # enforce formats of y and labels_
+    y = column_or_1d(y)
+    y_pred = column_or_1d(y_pred)
+    
+    return precision_score(y, y_pred)
+
+
+def get_label_n(y, y_pred, n=None):
+    """Function to turn raw outlier scores into binary labels by assign 1
+    to top n outlier scores."""
+    if n is not None:
+        threshold = np.percentile(y_pred, 100 * (1 - n / len(y_pred)))
+        y_pred_binary = (y_pred > threshold).astype('int')
+    else:
+        # if n is not defined, use the number of outliers in ground truth
+        n = np.sum(y)
+        threshold = np.percentile(y_pred, 100 * (1 - n / len(y_pred)))
+        y_pred_binary = (y_pred > threshold).astype('int')
+    
+    return y_pred_binary
+
+
+def _pprint(params, offset=0, printer=repr):
+    """Pretty print the dictionary 'params'"""
+    # Do a multi-line justified repr:
+    options = np.get_printoptions()
+    np.set_printoptions(precision=5, threshold=64, edgeitems=2)
+    params_list = list()
+    this_line_length = offset
+    line_sep = ',\n' + (1 + offset // 2) * ' '
+    for i, (k, v) in enumerate(sorted(params.items())):
+        if type(v) is float:
+            # use str for representing floating point numbers
+            this_repr = '%s=%s' % (k, str(v))
+        else:
+            # use repr of the rest
+            this_repr = '%s=%s' % (k, printer(v))
+        params_list.append(this_repr)
+        this_line_length += len(this_repr)
+    
+    lines = [line_sep.join(params_list)]
+    # reset numpy print options
+    np.set_printoptions(**options)
+    return '\n'.join(lines)
+
+
+class BaseDetector(metaclass=abc.ABCMeta):
+    """Abstract class for all outlier detection algorithms.
+
+
+    Parameters
+    ----------
+    contamination : float in (0., 0.5), optional (default=0.1)
+        The amount of contamination of the data set,
+        i.e. the proportion of outliers in the data set. Used when fitting to
+        define the threshold on the decision function.
+
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is fitted.
+
+    threshold_ : float
+        The threshold is based on ``contamination``. It is the
+        ``n_samples * contamination`` most abnormal samples in
+        ``decision_scores_``. The threshold is calculated for generating
+        binary outlier labels.
+
+    labels_ : int, either 0 or 1
+        The binary labels of the training data. 0 stands for inliers
+        and 1 for outliers/anomalies. It is generated by applying
+        ``threshold_`` on ``decision_scores_``.
+    """
+
+    @abc.abstractmethod
+    def __init__(self, contamination=0.1):
+
+        if (isinstance(contamination, (float, int))):
+
+            if not (0. < contamination <= 0.5):
+                raise ValueError("contamination must be in (0, 0.5], "
+                                 "got: %f" % contamination)
+
+        # allow arbitrary input such as PyThreshld object
+        self.contamination = contamination
+
+    # noinspection PyIncorrectDocstring
+    @abc.abstractmethod
+    def fit(self, X, y=None):
+        """Fit detector. y is ignored in unsupervised methods.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        pass
+
+    @abc.abstractmethod
+    def decision_function(self, X):
+        """Predict raw anomaly scores of X using the fitted detector.
+
+        The anomaly score of an input sample is computed based on the fitted
+        detector. For consistency, outliers are assigned with
+        higher anomaly scores.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples. Sparse matrices are accepted only
+            if they are supported by the base estimator.
+
+        Returns
+        -------
+        anomaly_scores : numpy array of shape (n_samples,)
+            The anomaly score of the input samples.
+        """
+        pass
+
+    @deprecated()
+    def fit_predict(self, X, y=None):
+        """Fit detector first and then predict whether a particular sample
+        is an outlier or not. y is ignored in unsupervised models.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        outlier_labels : numpy array of shape (n_samples,)
+            For each observation, tells whether
+            it should be considered as an outlier according to the
+            fitted model. 0 stands for inliers and 1 for outliers.
+
+        .. deprecated:: 0.6.9
+          `fit_predict` will be removed in pyod 0.8.0.; it will be
+          replaced by calling `fit` function first and then accessing
+          `labels_` attribute for consistency.
+        """
+
+        self.fit(X, y)
+        return self.labels_
+
+    def predict(self, X, return_confidence=False):
+        """Predict if a particular sample is an outlier or not.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        return_confidence : boolean, optional(default=False)
+            If True, also return the confidence of prediction.
+
+        Returns
+        -------
+        outlier_labels : numpy array of shape (n_samples,)
+            For each observation, tells whether
+            it should be considered as an outlier according to the
+            fitted model. 0 stands for inliers and 1 for outliers.
+        confidence : numpy array of shape (n_samples,).
+            Only if return_confidence is set to True.
+        """
+
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+        pred_score = self.decision_function(X)
+
+        if isinstance(self.contamination, (float, int)):
+            prediction = (pred_score > self.threshold_).astype('int').ravel()
+
+        # if this is a PyThresh object
+        else:
+            prediction = self.contamination.eval(pred_score)
+
+        if return_confidence:
+            confidence = self.predict_confidence(X)
+            return prediction, confidence
+
+        return prediction
+
+    def predict_proba(self, X, method='linear', return_confidence=False):
+        """Predict the probability of a sample being outlier. Two approaches
+        are possible:
+
+        1. simply use Min-max conversion to linearly transform the outlier
+           scores into the range of [0,1]. The model must be
+           fitted first.
+        2. use unifying scores, see :cite:`kriegel2011interpreting`.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        method : str, optional (default='linear')
+            probability conversion method. It must be one of
+            'linear' or 'unify'.
+
+        return_confidence : boolean, optional(default=False)
+            If True, also return the confidence of prediction.
+
+
+        Returns
+        -------
+        outlier_probability : numpy array of shape (n_samples, n_classes)
+            For each observation, tells whether or not
+            it should be considered as an outlier according to the
+            fitted model. Return the outlier probability, ranging
+            in [0,1]. Note it depends on the number of classes, which is by
+            default 2 classes ([proba of normal, proba of outliers]).
+        """
+
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+        train_scores = self.decision_scores_
+
+        test_scores = self.decision_function(X)
+
+        probs = np.zeros([X.shape[0], int(self._classes)])
+        if method == 'linear':
+            scaler = MinMaxScaler().fit(train_scores.reshape(-1, 1))
+            probs[:, 1] = scaler.transform(
+                test_scores.reshape(-1, 1)).ravel().clip(0, 1)
+            probs[:, 0] = 1 - probs[:, 1]
+
+            if return_confidence:
+                confidence = self.predict_confidence(X)
+                return probs, confidence
+
+            return probs
+
+        elif method == 'unify':
+            # turn output into probability
+            pre_erf_score = (test_scores - self._mu) / (
+                    self._sigma * np.sqrt(2))
+            erf_score = erf(pre_erf_score)
+            probs[:, 1] = erf_score.clip(0, 1).ravel()
+            probs[:, 0] = 1 - probs[:, 1]
+
+            if return_confidence:
+                confidence = self.predict_confidence(X)
+                return probs, confidence
+
+            return probs
+        else:
+            raise ValueError(method,
+                             'is not a valid probability conversion method')
+
+    def predict_confidence(self, X):
+        """Predict the model's confidence in making the same prediction
+        under slightly different training sets.
+        See :cite:`perini2020quantifying`.
+
+        Parameters
+        -------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        confidence : numpy array of shape (n_samples,)
+            For each observation, tells how consistently the model would
+            make the same prediction if the training set was perturbed.
+            Return a probability, ranging in [0,1].
+
+        """
+
+        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
+
+        n = len(self.decision_scores_)
+
+        # todo: this has an optimization opportunity since the scores may
+        # already be available
+        test_scores = self.decision_function(X)
+
+        count_instances = np.vectorize(
+            lambda x: np.count_nonzero(self.decision_scores_ <= x))
+        n_instances = count_instances(test_scores)
+
+        # Derive the outlier probability using Bayesian approach
+        posterior_prob = np.vectorize(lambda x: (1 + x) / (2 + n))(n_instances)
+
+        if not isinstance(self.contamination, (float, int)):
+            contam = np.sum(self.labels_) / n
+        # if this is a PyThresh object
+        else:
+            contam = self.contamination
+
+        # Transform the outlier probability into a confidence value
+        confidence = np.vectorize(
+            lambda p: 1 - binom.cdf(n - int(n * contam), n, p))(
+            posterior_prob)
+
+        if isinstance(self.contamination, (float, int)):
+            prediction = (test_scores > self.threshold_).astype('int').ravel()
+        # if this is a PyThresh object
+        else:
+            prediction = self.contamination.eval(test_scores)
+        np.place(confidence, prediction == 0, 1 - confidence[prediction == 0])
+
+        return confidence
+
+    def _predict_rank(self, X, normalized=False):
+        """Predict the outlyingness rank of a sample by a fitted model. The
+        method is for outlier detector score combination.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        normalized : bool, optional (default=False)
+            If set to True, all ranks are normalized to [0,1].
+
+        Returns
+        -------
+        ranks : array, shape (n_samples,)
+            Outlying rank of a sample according to the training data.
+
+        """
+
+        check_is_fitted(self, ['decision_scores_'])
+
+        test_scores = self.decision_function(X)
+        train_scores = self.decision_scores_
+
+        sorted_train_scores = np.sort(train_scores)
+        ranks = np.searchsorted(sorted_train_scores, test_scores)
+
+        if normalized:
+            # return normalized ranks
+            ranks = ranks / ranks.max()
+        return ranks
+
+    @deprecated()
+    def fit_predict_score(self, X, y, scoring='roc_auc_score'):
+        """Fit the detector, predict on samples, and evaluate the model by
+        predefined metrics, e.g., ROC.
+
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, n_features)
+            The input samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        scoring : str, optional (default='roc_auc_score')
+            Evaluation metric:
+
+            - 'roc_auc_score': ROC score
+            - 'prc_n_score': Precision @ rank n score
+
+        Returns
+        -------
+        score : float
+
+        .. deprecated:: 0.6.9
+          `fit_predict_score` will be removed in pyod 0.8.0.; it will be
+          replaced by calling `fit` function first and then accessing
+          `labels_` attribute for consistency. Scoring could be done by
+          calling an evaluation method, e.g., AUC ROC.
+        """
+
+        self.fit(X)
+
+        if scoring == 'roc_auc_score':
+            score = roc_auc_score(y, self.decision_scores_)
+        elif scoring == 'prc_n_score':
+            score = precision_n_scores(y, self.decision_scores_)
+        else:
+            raise NotImplementedError('PyOD built-in scoring only supports '
+                                      'ROC and Precision @ rank n')
+
+        print("{metric}: {score}".format(metric=scoring, score=score))
+
+        return score
+
+    # def score(self, X, y, scoring='roc_auc_score'):
+    #     """Returns the evaluation resulted on the given test data and labels.
+    #     ROC is chosen as the default evaluation metric
+    #
+    #     :param X: The input samples
+    #     :type X: numpy array of shape (n_samples, n_features)
+    #
+    #     :param y: Outlier labels of the input samples
+    #     :type y: array, shape (n_samples,)
+    #
+    #     :param scoring: Evaluation metric
+    #
+    #             -' roc_auc_score': ROC score
+    #             - 'prc_n_score': Precision @ rank n score
+    #     :type scoring: str, optional (default='roc_auc_score')
+    #
+    #     :return: Evaluation score
+    #     :rtype: float
+    #     """
+    #     check_is_fitted(self, ['decision_scores_'])
+    #     if scoring == 'roc_auc_score':
+    #         score = roc_auc_score(y, self.decision_function(X))
+    #     elif scoring == 'prc_n_score':
+    #         score = precision_n_scores(y, self.decision_function(X))
+    #     else:
+    #         raise NotImplementedError('PyOD built-in scoring only supports '
+    #                                   'ROC and Precision @ rank n')
+    #
+    #     print("{metric}: {score}".format(metric=scoring, score=score))
+    #
+    #     return score
+
+    def _set_n_classes(self, y):
+        """Set the number of classes if `y` is presented, which is not
+        expected. It could be useful for multi-class outlier detection.
+
+        Parameters
+        ----------
+        y : numpy array of shape (n_samples,)
+            Ground truth.
+
+        Returns
+        -------
+        self
+        """
+
+        self._classes = 2  # default as binary classification
+        if y is not None:
+            check_classification_targets(y)
+            self._classes = len(np.unique(y))
+            warnings.warn(
+                "y should not be presented in unsupervised learning.")
+        return self
+
+    def _process_decision_scores(self):
+        """Internal function to calculate key attributes:
+
+        - threshold_: used to decide the binary label
+        - labels_: binary labels of training data
+
+        Returns
+        -------
+        self
+        """
+
+        if isinstance(self.contamination, (float, int)):
+            self.threshold_ = percentile(self.decision_scores_,
+                                         100 * (1 - self.contamination))
+            self.labels_ = (self.decision_scores_ > self.threshold_).astype(
+                'int').ravel()
+
+        # if this is a PyThresh object
+        else:
+            self.labels_ = self.contamination.eval(self.decision_scores_)
+            self.threshold_ = self.contamination.thresh_
+            if not self.threshold_:
+                self.threshold_ = np.sum(self.labels_) / len(self.labels_)
+
+        # calculate for predict_proba()
+
+        self._mu = np.mean(self.decision_scores_)
+        self._sigma = np.std(self.decision_scores_)
+
+        return self
+
+    # noinspection PyMethodParameters
+    def _get_param_names(cls):
+        # noinspection PyPep8
+        """Get parameter names for the estimator
+
+        See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
+        and sklearn/base.py for more information.
+        """
+
+        # fetch the constructor or the original constructor before
+        # deprecation wrapping if any
+        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
+        if init is object.__init__:
+            # No explicit constructor to introspect
+            return []
+
+        # introspect the constructor arguments to find the model parameters
+        # to represent
+        init_signature = signature(init)
+        # Consider the constructor parameters excluding 'self'
+        parameters = [p for p in init_signature.parameters.values()
+                      if p.name != 'self' and p.kind != p.VAR_KEYWORD]
+        for p in parameters:
+            if p.kind == p.VAR_POSITIONAL:
+                raise RuntimeError("scikit-learn estimators should always "
+                                   "specify their parameters in the signature"
+                                   " of their __init__ (no varargs)."
+                                   " %s with constructor %s doesn't "
+                                   " follow this convention."
+                                   % (cls, init_signature))
+        # Extract and sort argument names excluding 'self'
+        return sorted([p.name for p in parameters])
+
+    # noinspection PyPep8
+    def get_params(self, deep=True):
+        """Get parameters for this estimator.
+
+        See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
+        and sklearn/base.py for more information.
+
+        Parameters
+        ----------
+        deep : bool, optional (default=True)
+            If True, will return the parameters for this estimator and
+            contained subobjects that are estimators.
+
+        Returns
+        -------
+        params : mapping of string to any
+            Parameter names mapped to their values.
+        """
+
+        out = dict()
+        for key in self._get_param_names():
+            # We need deprecation warnings to always be on in order to
+            # catch deprecated param values.
+            # This is set in utils/__init__.py but it gets overwritten
+            # when running under python3 somehow.
+            warnings.simplefilter("always", DeprecationWarning)
+            try:
+                with warnings.catch_warnings(record=True) as w:
+                    value = getattr(self, key, None)
+                if len(w) and w[0].category == DeprecationWarning:
+                    # if the parameter is deprecated, don't show it
+                    continue
+            finally:
+                warnings.filters.pop(0)
+
+            # XXX: should we rather test if instance of estimator?
+            if deep and hasattr(value, 'get_params'):
+                deep_items = value.get_params().items()
+                out.update((key + '__' + k, val) for k, val in deep_items)
+            out[key] = value
+        return out
+
+    def set_params(self, **params):
+        # noinspection PyPep8
+        """Set the parameters of this estimator.
+        The method works on simple estimators as well as on nested objects
+        (such as pipelines). The latter have parameters of the form
+        ``<component>__<parameter>`` so that it's possible to update each
+        component of a nested object.
+
+        See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
+        and sklearn/base.py for more information.
+
+        Returns
+        -------
+        self : object
+        """
+
+        if not params:
+            # Simple optimization to gain speed (inspect is slow)
+            return self
+        valid_params = self.get_params(deep=True)
+
+        nested_params = defaultdict(dict)  # grouped by prefix
+        for key, value in params.items():
+            key, delim, sub_key = key.partition('__')
+            if key not in valid_params:
+                raise ValueError('Invalid parameter %s for estimator %s. '
+                                 'Check the list of available parameters '
+                                 'with `estimator.get_params().keys()`.' %
+                                 (key, self))
+
+            if delim:
+                nested_params[key][sub_key] = value
+            else:
+                setattr(self, key, value)
+
+        for key, sub_params in nested_params.items():
+            valid_params[key].set_params(**sub_params)
+
+        return self
+
+    def __repr__(self):
+        # noinspection PyPep8
+        """
+        See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
+        and sklearn/base.py for more information.
+        """
+
+        class_name = self.__class__.__name__
+        return '%s(%s)' % (class_name, _pprint(self.get_params(deep=False),
+                                               offset=len(class_name), ),)
\ No newline at end of file
diff --git a/models/distance.py b/models/distance.py
new file mode 100644
index 0000000000000000000000000000000000000000..e96efd797a6dbfc0ed5e7fbcbdf38f50f6db1890
--- /dev/null
+++ b/models/distance.py
@@ -0,0 +1,835 @@
+"""Classes of distance measure for model type A
+"""
+# Author: Yinchen Wu <yinchen@uchicago.edu>
+
+import numpy as np
+from arch import arch_model
+import math
+
+
+class Euclidean:
+    """ The function class for Lp euclidean norm
+    ----------
+    Power : int, optional (default=1)
+        The power of the lp norm. For power = k, the measure is calculagted by |x - y|_k
+    neighborhood : int, optional (default=max (100, 10*window size))
+        The length of neighborhood to derivete the normalizing constant D which is based on
+        the difference of maximum and minimum in the neighborhood minus window. 
+    window: int, optional (default = length of input data)
+        The length of the subsequence to be compaired
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, power = 1, neighborhood = 100, window = 20, norm = False):
+        self.power = power
+        self.window = window
+        self.neighborhood = neighborhood
+        self.detector = None
+        self.decision_scores_  = []
+        self.norm = norm
+        self.X_train = 2
+    def measure(self, X, Y, index):
+        """Derive the decision score based on the given distance measure 
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, )
+            The real input samples subsequence.
+        Y : numpy array of shape (n_samples, )
+            The estimated input samples subsequence.
+        Index : int
+        the index of the starting point in the subsequence
+        Returns
+        -------
+        score : float
+            dissimiarity score between the two subsquence
+        """
+        X_train = self.X_train
+        X_train = self.detector.X_train_
+        power = self.power
+        
+        window = self.window
+        neighborhood = self.neighborhood
+        norm = self.norm
+        data = X_train
+        if norm == False:
+            if X.shape[0] == 0:
+                score = 0
+            else:
+                score = np.linalg.norm(X-Y, power)/(X.shape[0])
+            self.decision_scores_.append((index, score))
+            return score
+        elif type(X_train) == int:
+            print('Error! Detector is not fed to the object and X_train is not known')
+        elif neighborhood != 'all':
+            neighbor = int(self.neighborhood/2)
+
+            if index + neighbor < self.n_train_ and index - neighbor > 0: 
+                region = np.concatenate((data[index - neighbor: index], data[index + window: index + neighbor] ))
+                D = np.max(region) - np.min(region)
+            elif index + neighbor >= self.n_train_ and index + window < self.n_train_:
+                region = np.concatenate((data[self.n_train_ - neighborhood: index], data[index + window: self.n_train_] ))
+                D =  np.max(region) - np.min(region)   
+            elif index + window >= self.n_train_:
+                region = data[self.n_train_ - neighborhood: index]
+                D = np.max(region) - np.min(region) 
+            else:
+                region = np.concatenate((data[0: index], data[index + window: index + neighborhood] ))
+                D = np.max(region) - np.min(region) 
+            
+            score = np.linalg.norm(X-Y, power)/D/(X.shape[0]**power)
+            self.decision_scores_.append((index, score))
+            return score
+    def set_param(self):
+        if self.detector != None:
+            self.window = self.detector.window
+            self.neighborhood = self.detector.neighborhood
+            self.n_train_ = self.detector.n_train_
+            self.X_train = self.detector.X_train_
+        else:
+            print('Error! Detector is not fed to the object and X_train is not known')
+        return self
+                
+
+class Mahalanobis:
+    """ The function class for Mahalanobis measure
+    ----------
+    Probability : boolean, optional (default=False)
+        Whether to derive the anomoly score by the probability that such point occurs
+    neighborhood : int, optional (default=max (100, 10*window size))
+        The length of neighborhood to derivete the normalizing constant D which is based on
+        the difference of maximum and minimum in the neighborhood minus window. 
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, probability = False):
+        self.probability = probability
+        self.detector = None
+        self.decision_scores_  = []
+        self.mu = 0
+        
+    def set_param(self):
+        '''update the parameters with the detector that is used 
+        '''
+
+        self.n_initial_ = self.detector.n_initial_
+        self.estimation = self.detector.estimation
+        self.X_train = self.detector.X_train_
+        self.window = self.detector.window
+        window = self.window
+        resid = self.X_train - self.estimation
+        number = max(100, self.window)
+        self.residual = np.zeros((window, number))
+        for i in range(number):
+            self.residual[:, i] = resid[self.n_initial_+i:self.n_initial_+i+window]
+        self.mu = np.zeros(number)
+        self.cov = np.cov(self.residual, rowvar=1)
+        if self.window == 1:
+            self.cov = (np.sum(np.square(self.residual))/(number - 1))**0.5
+        return self
+    def norm_pdf_multivariate(self, x):
+        '''multivarite normal density function
+        '''
+        try:
+            mu = self.mu
+        except:
+            mu = np.zeros(x.shape[0])
+        sigma = self.cov
+        size = x.shape[0]
+        if size == len(mu) and (size, size) == sigma.shape:
+            det = np.linalg.det(sigma)
+            if det == 0:
+                raise NameError("The covariance matrix can't be singular")
+
+            norm_const = 1.0/ ( math.pow((2*math.pi),float(size)/2) * math.pow(det,1.0/2) )
+            x_mu = np.matrix(x - mu)
+            inv = np.linalg.inv(sigma)        
+            result = math.pow(math.e, -0.5 * (x_mu * inv * x_mu.T))
+            return norm_const * result
+        else:
+            raise NameError("The dimensions of the input don't match")
+    def normpdf(self, x):
+        '''univariate normal
+        '''
+        mean = 0
+        sd = np.asscalar(self.cov)
+        var = float(sd)**2
+        denom = (2*math.pi*var)**.5
+        num = math.exp(-(float(x)-float(mean))**2/(2*var))
+        return num/denom 
+
+    def measure(self, X, Y, index):
+        """Derive the decision score based on the given distance measure 
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, )
+            The real input samples subsequence.
+        Y : numpy array of shape (n_samples, )
+            The estimated input samples subsequence.
+        Index : int
+        the index of the starting point in the subsequence
+        Returns
+        -------
+        score : float
+            dissimiarity score between the two subsquence
+        """
+        mu = np.zeros(self.detector.window)
+        cov = self.cov
+        if self.probability == False:
+
+            if X.shape[0] == mu.shape[0]:
+                score = np.matmul(np.matmul((X-Y-mu).T, cov), (X-Y-mu))/(X.shape[0])
+                self.decision_scores_.append((index, score))
+                return score
+            else:
+                return (X-Y).T.dot(X-Y)
+
+        else:
+            if len(X) > 1:
+                prob = self.norm_pdf_multivariate(X-Y)
+            elif len(X) == 1: 
+                X = np.asscalar(X)
+                Y = np.asscalar(Y)
+                prob = self.normpdf(X-Y)
+            else:
+                prob = 1
+            score = 1 - prob
+            score = max(score, 0)
+            self.decision_scores_.append((index, score))
+            return score
+
+
+class Garch:
+    """ The function class for garch measure
+    ----------
+    p, q : int, optional (default=1, 1)
+        The order of the garch model to be fitted on the residual
+    mean : string, optional (default='zero' )
+        The forecast conditional mean. 
+    vol: string, optional (default = 'garch')
+        he forecast conditional variance.
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, p = 1, q = 1, mean = 'zero', vol = 'garch'):
+        self.p = p
+        self.q = q
+        self.vol = vol
+        self.mean = mean
+        self.decision_scores_  = []
+        
+    def set_param(self):
+        '''update the parameters with the detector that is used 
+        '''
+        q = self.q
+        p=self.p
+        mean = self.mean
+        vol = self.vol
+        if self.detector != None:
+            self.n_initial_ = self.detector.n_initial_
+            self.estimation = self.detector.estimation
+            self.X_train = self.detector.X_train_
+            self.window = self.detector.window
+            resid = 10 * (self.X_train - self.estimation)
+            model = arch_model(resid, mean=mean, vol=vol, p=p, q=q)
+            model_fit = model.fit(disp='off')
+            self.votility = model_fit.conditional_volatility/10
+        else:
+            print('Error! Detector not fed to the measure')
+        return self
+
+    def measure(self, X, Y, index):
+        """Derive the decision score based on the given distance measure 
+        Parameters
+        ----------
+        X : numpy array of shape (n_samples, )
+            The real input samples subsequence.
+        Y : numpy array of shape (n_samples, )
+            The estimated input samples subsequence.
+        Index : int
+        the index of the starting point in the subsequence
+        Returns
+        -------
+        score : float
+            dissimiarity score between the two subsquences
+        """
+        X = np.array(X)
+        Y = np.array(Y)
+        length = len(X)
+        score = 0
+        if length != 0:
+            for i in range(length):
+                sigma = self.votility[index + i]
+                if sigma != 0:
+                    score += abs(X[i]-Y[i])/sigma
+                    
+            score = score/length       
+        return score
+
+
+class SSA_DISTANCE:
+    """ The function class for SSA measure
+    good for contextual anomolies
+    ----------
+    method : string, optional (default='linear' )
+        The method to fit the line and derives the SSA score
+    e: float, optional (default = 1)
+        The upper bound to start new line search for linear method
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, method ='linear', e = 1):
+        self.method = method
+        self.decision_scores_  = []
+        self.e = e
+    def Linearization(self, X2):
+        """Obtain the linearized curve.
+        Parameters
+        ----------
+        X2 : numpy array of shape (n, )
+            the time series curve to be fitted
+        e: float, integer, or numpy array 
+        weights to obtain the 
+        Returns
+        -------
+        fit: parameters for the fitted linear curve
+        """
+        e = self.e
+        i = 0
+        fit = {}
+        fit['index'] = []
+        fit['rep'] = []
+        while i < len(X2):
+            fit['index'].append(i)
+            try:
+                fit['Y'+str(i)]= X2[i]
+            except:
+                print(X2.shape, X2)
+            fit['rep'].append(np.array([i, X2[i]]))
+            if i+1 >= len(X2):
+                    break
+            k = X2[i+1]-X2[i]
+            b = -i*(X2[i+1]-X2[i])+X2[i]
+            fit['reg' +str(i)]= np.array([k, b])
+            i += 2
+            if i >= len(X2):
+                break
+            d = np.abs(X2[i]- (k * i+b))
+            while d < e:
+                i +=1 
+                if i >= len(X2):
+                    break
+                d = np.abs(X2[i]- (k * i+b)) 
+        return fit   
+    def set_param(self):
+        '''update the parameters with the detector that is used. 
+        Since the SSA measure doens't need the attributes of detector
+        or characteristics of X_train, the process is omitted. 
+        '''
+
+        return self
+
+    def measure(self, X2, X3, start_index):
+        """Obtain the SSA similarity score.
+        Parameters
+        ----------
+        X2 : numpy array of shape (n, )
+            the reference timeseries
+        X3 : numpy array of shape (n, )
+            the tested timeseries
+        e: float, integer, or numpy array 
+        weights to obtain the 
+        Returns
+        -------
+        score: float, the higher the more dissimilar are the two curves 
+        """       
+        #linearization of data X2 and X3
+        X2 = np.array(X2)
+        X3 = np.array(X3)
+
+        fit = self.Linearization(X2)
+        fit2 = self.Linearization(X3)
+    
+        #line alinement 
+        Index = []
+        test_list = fit['index'] + fit2['index']
+        [Index.append(x) for x in test_list if x not in Index]
+        Y = 0
+    
+        #Similarity Computation
+        for i in Index:
+            if i in fit['index'] and i in fit2['index']:
+                Y += abs(fit['Y'+str(i)]-fit2['Y'+str(i)])
+
+            elif i in fit['index']:
+                J = np.max(np.where(np.array(fit2['index']) < i ))
+                index = fit2['index'][J]
+                k = fit2['reg'+str(index)][0]
+                b = fit2['reg'+str(index)][1]
+                value = abs(k * i + b - fit['Y'+str(i)])
+                Y += value
+            elif i in fit2['index']:
+                J = np.max(np.where(np.array(fit['index']) < i ))
+                index = fit['index'][J]
+                k = fit['reg'+str(index)][0]
+                b = fit['reg'+str(index)][1]
+                value = abs(k * i + b - fit2['Y'+str(i)])
+                Y += value
+        if len(Index) != 0: 
+            score = Y/len(Index)
+        else:
+            score = 0
+        self.decision_scores_.append((start_index, score))
+        if len(X2) == 1:
+            print('Error! SSA measure doesn\'t apply to singleton' )
+        else:
+            return score  
+
+
+class Fourier:
+    """ The function class for Fourier measure
+    good for contextual anomolies
+    ----------
+    power: int, optional (default = 2)
+        Lp norm for dissimiarlity measure considered
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, power = 2):
+        self.decision_scores_  = []
+        self.power = power
+    def set_param(self):
+        '''update the parameters with the detector that is used 
+        since the FFT measure doens't need the attributes of detector
+        or characteristics of X_train, the process is omitted. 
+        '''
+
+        return self
+
+    def measure(self, X2, X3, start_index):
+        """Obtain the SSA similarity score.
+        Parameters
+        ----------
+        X2 : numpy array of shape (n, )
+            the reference timeseries
+        X3 : numpy array of shape (n, )
+            the tested timeseries
+        index: int, 
+        current index for the subseqeuence that is being measured 
+        Returns
+        -------
+        score: float, the higher the more dissimilar are the two curves 
+        """       
+ 
+        power = self.power
+        X2 = np.array(X2)
+        X3 = np.array(X3)
+        if len(X2) == 0:
+            score = 0
+        else:
+            X2 = np.fft.fft(X2)
+            X3 = np.fft.fft(X3)
+            score = np.linalg.norm(X2 - X3, ord = power)/len(X3)
+        self.decision_scores_.append((start_index, score))
+        return score
+
+
+class DTW:
+    """ The function class for dynamic time warping measure
+
+    ----------
+    method : string, optional (default='L2' )
+        The distance measure to derive DTW.
+        Avaliable "L2", "L1", and custom
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, method = 'L2'):
+        self.decision_scores_  = []
+        if type(method) == str:
+            if method == 'L1':
+                distance = lambda x, y: abs(x-y)
+            elif method == 'L2':
+                distance = lambda x, y: (x-y)**2
+        else:
+            distance = method
+        self.distance = distance
+    def set_param(self):
+        '''update the parameters with the detector that is used 
+        since the FFT measure doens't need the attributes of detector
+        or characteristics of X_train, the process is omitted. 
+        '''
+
+        return self
+
+    def measure(self, X1, X2, start_index):
+        """Obtain the SSA similarity score.
+        Parameters
+        ----------
+        X1 : numpy array of shape (n, )
+            the reference timeseries
+        X2 : numpy array of shape (n, )
+            the tested timeseries
+        index: int, 
+        current index for the subseqeuence that is being measured 
+        Returns
+        -------
+        score: float, the higher the more dissimilar are the two curves 
+        """       
+        distance = self.distance
+        X1 = np.array(X1)
+        X2 = np.array(X2)
+        
+        value = 1
+        if len(X1)==0:
+            value =0
+            X1= np.zeros(5)
+            X2 = X1
+        M = np.zeros((len(X1), len(X2)))
+        for index_i in range(len(X1)):
+            for index_j in range(len(X1) - index_i):
+                L = []
+                i = index_i
+                j = index_i + index_j
+                D = distance(X1[i], X2[j])
+                try:
+                    L.append(M[i-1, j-1])
+                except:
+                    L.append(np.inf)
+                try:
+                    L.append(M[i, j-1])
+                except:
+                    L.append(np.inf)
+                try:
+                    L.append(M[i-1, j])
+                except:
+                    L.append(np.inf)
+                D += min(L)
+                M[i,j] = D
+                if i !=j:
+                    L = []
+                    j = index_i
+                    i = index_i + index_j
+                    D = distance(X1[i], X2[j])
+                    try:
+                        L.append(M[i-1, j-1])
+                    except:
+                        L.append(np.inf)
+                    try:
+                        L.append(M[i, j-1])
+                    except:
+                        L.append(np.inf)
+                    try:
+                        L.append(M[i-1, j])
+                    except:
+                        L.append(np.inf)
+                    D += min(L)
+                    M[i,j] = D
+        
+        score = M[len(X1)-1, len(X1)-1]/len(X1)
+        if value == 0:
+            score = 0
+        self.decision_scores_.append((start_index, score))
+        return score
+
+
+class EDRS:
+    """ The function class for edit distance on real sequences 
+
+    ----------
+    method : string, optional (default='L2' )
+        The distance measure to derive DTW.
+        Avaliable "L2", "L1", and custom
+    ep: float, optiona (default = 0.1)
+        the threshold value to decide Di_j
+    vot : boolean, optional (default = False)
+        whether to adapt a chaging votilities estimaed by garch
+        for ep at different windows. 
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, method = 'L1', ep = False, vol = False):
+        self.decision_scores_  = []
+        if type(method) == str:
+            if method == 'L1':
+                distance = lambda x, y: abs(x-y)
+        else:
+            distance = method
+        self.distance = distance
+        self.ep = ep
+        self.vot = vol
+    def set_param(self):
+        '''update the ep based on the votalitiy of the model 
+        '''
+        estimation = np.array(self.detector.estimation )
+        initial = self.detector.n_initial_
+        X = np.array(self.detector.X_train_)
+        self.initial = initial
+        residual = estimation[initial:] - X[initial:]
+        # number = len(residual)
+        # var = (np.sum(np.square(residual))/(number - 1))**0.5
+        vot = self.vot
+        if vot == False:
+            var = np.var(residual)
+        else:
+            model = arch_model(10 * residual, mean='Constant', vol='garch', p=1, q=1)
+            model_fit = model.fit(disp='off')
+            var = model_fit.conditional_volatility/10
+            
+        if self.ep == False:
+            self.ep =  3 * (np.sum(np.square(residual))/(len(residual) - 1))**0.5
+        else: 
+            self.ep = self.ep
+        
+        return self
+
+    def measure(self, X1, X2, start_index):
+        """Obtain the SSA similarity score.
+        Parameters
+        ----------
+        X1 : numpy array of shape (n, )
+            the reference timeseries
+        X2 : numpy array of shape (n, )
+            the tested timeseries
+        index: int, 
+        current index for the subseqeuence that is being measured 
+        Returns
+        -------
+        score: float, the higher the more dissimilar are the two curves 
+        """       
+        distance = self.distance
+        X1 = np.array(X1)
+        X2 = np.array(X2)
+        vot = self.vot
+
+        if vot == False:
+            ep = self.ep
+        else:
+            try:
+                ep = self.ep[start_index - self.initial]
+            except:
+                #sometime start_index is the length of the number 
+                ep = 0
+        value = 1
+        if len(X1)==0:
+            value =0
+            X1= np.zeros(5)
+            X2 = X1
+        M = np.zeros((len(X1), len(X2)))
+        M[:, 0] = np.arange(len(X1))
+        M[0, :] = np.arange(len(X1))
+        for index_i in range(1, len(X1)):
+            for index_j in range(len(X1) - index_i):
+
+                L = []
+                i = index_i
+                j = index_i + index_j
+                D = distance(X1[i], X2[j])
+                if D < ep:
+                    M[i, j]= M[i-1, j-1]
+                else:
+                    try:
+                        L.append(M[i-1, j-1])
+                    except:
+                        L.append(np.inf)
+                    try:
+                        L.append(M[i, j-1])
+                    except:
+                        L.append(np.inf)
+                    try:
+                        L.append(M[i-1, j])
+                    except:
+                        L.append(np.inf)
+                    M[i,j] = 1 + min(L)
+                if i !=j:
+                    L = []
+                    j = index_i
+                    i = index_i + index_j
+                    D = distance(X1[i], X2[j])
+                    if D < ep:
+                        M[i, j]= M[i-1, j-1]
+                    else: 
+                        try:
+                            L.append(M[i-1, j-1])
+                        except:
+                            L.append(np.inf)
+                        try:
+                            L.append(M[i, j-1])
+                        except:
+                            L.append(np.inf)
+                        try:
+                            L.append(M[i-1, j])
+                        except:
+                            L.append(np.inf)
+                        M[i,j] = 1 + min(L)
+
+        score = M[len(X1)-1, len(X1)-1]/len(X1)
+        if value == 0:
+            score = 0
+        self.decision_scores_.append((start_index, score))
+        return score
+
+class TWED:
+    """ Function class for Time-warped edit distance(TWED) measure
+
+    ----------
+    method : string, optional (default='L2' )
+        The distance measure to derive DTW.
+        Avaliable "L2", "L1", and custom
+    gamma: float, optiona (default = 0.1)
+        mismatch penalty
+    v : float, optional (default = False)
+        stifness parameter
+    Attributes
+    ----------
+    decision_scores_ : numpy array of shape (n_samples,)
+        The outlier scores of the training data.
+        The higher, the more abnormal. Outliers tend to have higher
+        scores. This value is available once the detector is
+        fitted.
+    detector: Object classifier
+        the anomaly detector that is used
+    """
+    def __init__(self, gamma = 0.1, v = 0.1):
+        self.decision_scores_  = []
+
+        self.gamma = gamma
+        self.v = v
+    def set_param(self):
+        '''No need'''     
+        return self
+    
+    def measure(self, A, B, start_index):
+        """Obtain the SSA similarity score.
+        Parameters
+        ----------
+        X1 : numpy array of shape (n, )
+            the reference timeseries
+        X2 : numpy array of shape (n, )
+            the tested timeseries
+        index: int, 
+        current index for the subseqeuence that is being measured 
+        Returns
+        -------
+        score: float, the higher the more dissimilar are the two curves 
+        """    
+        #code modifed from wikipedia
+        Dlp = lambda x,y: abs(x-y)
+        timeSB = np.arange(1,len(B)+1)
+        timeSA = np.arange(1,len(A)+1)
+        nu = self.v
+        _lambda = self.gamma
+        # Reference :
+        #    Marteau, P.; F. (2009). "Time Warp Edit Distance with Stiffness Adjustment for Time Series Matching".
+        #    IEEE Transactions on Pattern Analysis and Machine Intelligence. 31 (2): 306–318. arXiv:cs/0703033
+        #    http://people.irisa.fr/Pierre-Francois.Marteau/
+
+        # Check if input arguments
+        if len(A) != len(timeSA):
+            print("The length of A is not equal length of timeSA")
+            return None, None
+    
+        if len(B) != len(timeSB):
+            print("The length of B is not equal length of timeSB")
+            return None, None
+
+        if nu < 0:
+            print("nu is negative")
+            return None, None
+
+        # Add padding
+        A = np.array([0] + list(A))
+        timeSA = np.array([0] + list(timeSA))
+        B = np.array([0] + list(B))
+        timeSB = np.array([0] + list(timeSB))
+
+        n = len(A)
+        m = len(B)
+        # Dynamical programming
+        DP = np.zeros((n, m))
+
+        # Initialize DP Matrix and set first row and column to infinity
+        DP[0, :] = np.inf
+        DP[:, 0] = np.inf
+        DP[0, 0] = 0
+
+        # Compute minimal cost
+        for i in range(1, n):
+            for j in range(1, m):
+                # Calculate and save cost of various operations
+                C = np.ones((3, 1)) * np.inf
+                # Deletion in A
+                C[0] = (
+                    DP[i - 1, j]
+                    + Dlp(A[i - 1], A[i])
+                    + nu * (timeSA[i] - timeSA[i - 1])
+                    + _lambda
+                )
+                # Deletion in B
+                C[1] = (
+                    DP[i, j - 1]
+                    + Dlp(B[j - 1], B[j])
+                    + nu * (timeSB[j] - timeSB[j - 1])
+                    + _lambda
+                )
+                # Keep data points in both time series
+                C[2] = (
+                    DP[i - 1, j - 1]
+                    + Dlp(A[i], B[j])
+                    + Dlp(A[i - 1], B[j - 1])
+                    + nu * (abs(timeSA[i] - timeSB[j]) + abs(timeSA[i - 1] - timeSB[j - 1]))
+                )
+                # Choose the operation with the minimal cost and update DP Matrix
+                DP[i, j] = np.min(C)
+        distance = DP[n - 1, m - 1]
+        self.M = DP
+        self.decision_scores_.append((start_index, distance))
+        return distance
\ No newline at end of file
diff --git a/models/feature.py b/models/feature.py
new file mode 100644
index 0000000000000000000000000000000000000000..597f18fcaa45d959bc8bf67373970574ac975433
--- /dev/null
+++ b/models/feature.py
@@ -0,0 +1,326 @@
+import numpy as np
+import pandas as pd
+import math
+from statsmodels.tsa.seasonal import seasonal_decompose
+import warnings
+from builtins import range
+from numpy.linalg import LinAlgError
+from numpy.lib.stride_tricks import sliding_window_view
+
+with warnings.catch_warnings():
+    # Ignore warnings of the patsy package
+    warnings.simplefilter("ignore", DeprecationWarning)
+
+    from statsmodels.tsa.ar_model import AR
+
+from hurst import compute_Hc
+
+class Window:
+    """ The class for rolling window feature mapping.
+    Converts the original time series X into a matrix of sliding windows.
+    """
+    def __init__(self, window=100, stride=1):
+        self.window = window
+        self.stride = stride
+
+    def convert(self, X):
+        shape = (X.shape[0] - (self.window - 1), -1)
+        windows = sliding_window_view(X, window_shape=self.window, axis=0).reshape(shape)[::self.stride, :]        
+        return windows
+
+class tf_Stat:
+    '''statisitc feature extraction using the tf_feature package. 
+    It calculates 763 features in total so it might be over complicated for some models. 
+    Recommend to use for methods like Isolation Forest which randomly picks a feature
+    and then perform the classification. To use for other distance-based model like KNN,
+    LOF, CBLOF, etc, first train to pass a function that give weights to individual features so that
+    inconsequential features won't cloud the important ones (mean, variance, kurtosis, etc).
+
+    '''
+    def __init__(self,  window = 100, step = 25):
+        self.window = window
+        self.step = step
+        self.detector = None
+    def convert(self, X):
+        window = self.window
+        step = self.step
+        pos = math.ceil(window/2)
+        #step <= window
+
+        length = X.shape[0]
+
+        Xd = pd.DataFrame(X)
+        Xd.columns = pd.Index(['x'], dtype='object')
+        Xd['id'] = 1
+        Xd['time'] = Xd.index
+        
+        from tsfresh import extract_features
+        test = np.array(extract_features(Xd.iloc[0+pos-math.ceil(window/2):0+pos + math.floor(window/2)], column_id="id", column_sort="time", column_kind=None, column_value=None).fillna(0))
+        M = np.zeros((length - window, test.shape[1]+1 ))
+
+        
+        i = 0
+        while i + window <= M.shape[0]:
+            M[i:i+step, 0]= X[pos + i: pos + i + step]
+            vector = np.array(extract_features(Xd.iloc[i+pos-math.ceil(window/2):i+pos + math.floor(window/2)], column_id="id", column_sort="time", column_kind=None, column_value=None).fillna(0))
+
+            M[i:i+step, 1:] = vector
+            i+= step
+        num = M.shape[0]
+        if i <  num:
+            M[i: num, 0]= X[pos + i: pos + num]
+            M[i: num, 1:] = np.array(extract_features(Xd.iloc[i+pos-math.ceil(window/2):], column_id="id", column_sort="time", column_kind=None, column_value=None).fillna(0))
+        return M
+
+class Stat:
+    '''statisitc feature extraction. 
+    Features include [mean, variance, skewness, kurtosis, autocorrelation, maximum, 
+    minimum, entropy, seasonality, hurst component, AR coef]
+
+    '''
+    def __init__(self,  window = 100, data_step = 10, param = [{"coeff": 0, "k": 5}], lag = 1, freq = 720):
+        self.window = window
+        self.data_step = data_step
+        self.detector = None
+        self.param = param
+        self.lag = lag 
+        self.freq =freq
+        if data_step > int(window/2):
+            raise ValueError('value step shoudm\'t be greater than half of the window')
+        
+        
+    def convert(self, X):
+        freq = self.freq
+        n = self.window
+        data_step = self.data_step
+        X = pd.Series(X)
+        L = []
+        if n == 0:
+            df = X
+            raise ValueError('window lenght is set to zero')
+        else:
+            for i in range(n):
+                L.append(X.shift(i))
+            df = pd.concat(L, axis = 1)
+            df = df.iloc[n:]
+            df2 = pd.concat(L[:data_step], axis = 1)
+
+        
+        
+        df = df.reset_index()
+        #value 
+        x0 = df2[math.ceil(n/2) : - math.floor(n/2)].reset_index()
+        #mean 
+        x1 = (df.mean(axis=1))
+        #variance 
+        x2 = df.var(axis=1)
+        #AR-coef
+        self.ar_function = lambda x: self.ar_coefficient(x)
+        x3 = df.apply(self.ar_function, axis =1, result_type='expand'  )
+        #autocorrelation
+        self.auto_function = lambda x: self.autocorrelation(x)
+        x4 = df.apply(self.auto_function, axis =1, result_type='expand'  )
+        #kurtosis
+        x5 = (df.kurtosis(axis=1))
+        #skewness
+        x6 = (df.skew(axis=1))
+        #maximum
+        x7 = (df.max(axis=1))
+        #minimum
+        x8 = (df.min(axis=1))
+        #entropy
+        self.entropy_function = lambda x: self.sample_entropy(x)
+        x9 = df.apply(self.entropy_function, axis =1, result_type='expand')
+        
+        #seasonality
+        result = seasonal_decompose(X, model='additive', freq = freq, extrapolate_trend='freq')
+        #seasonal
+        x10 = pd.Series(np.array(result.seasonal[math.ceil(n/2) : - math.floor(n/2)]))
+        #trend 
+        x11 = pd.Series(np.array(result.trend[math.ceil(n/2) : - math.floor(n/2)]))
+        #resid 
+        x12 = pd.Series(np.array(result.resid[math.ceil(n/2) : - math.floor(n/2)]))
+        
+        #Hurst component
+        self.hurst_function = lambda x: self.hurst_f(x)
+        x13 = df.apply(self.hurst_function, axis =1, result_type='expand')
+        
+        L = [x0, x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12, x13]
+        M = pd.concat(L, axis = 1)
+        M = M.drop(columns=['index'])
+
+        return M
+    def ar_coefficient(self, x):
+        """
+        This feature calculator fits the unconditional maximum likelihood
+        of an autoregressive AR(k) process.
+        The k parameter is the maximum lag of the process
+
+        .. math::
+
+            X_{t}=\\varphi_0 +\\sum _{{i=1}}^{k}\\varphi_{i}X_{{t-i}}+\\varepsilon_{t}
+
+        For the configurations from param which should contain the maxlag "k" and such an AR process is calculated. Then
+        the coefficients :math:`\\varphi_{i}` whose index :math:`i` contained from "coeff" are returned.
+
+        :param x: the time series to calculate the feature of
+        :type x: numpy.ndarray
+        :param param: contains dictionaries {"coeff": x, "k": y} with x,y int
+        :type param: list
+        :return x: the different feature values
+        :return type: pandas.Series
+        """
+        calculated_ar_params = {}
+        param = self.param
+        x_as_list = list(x)
+
+        res = {}
+
+        for parameter_combination in param:
+            k = parameter_combination["k"]
+            p = parameter_combination["coeff"]
+
+            column_name = "coeff_{}__k_{}".format(p, k)
+
+            if k not in calculated_ar_params:
+                try:
+                    calculated_AR = AR(x_as_list)
+                    calculated_ar_params[k] = calculated_AR.fit(maxlag=k, solver="mle").params
+                except (LinAlgError, ValueError):
+                    calculated_ar_params[k] = [np.NaN] * k
+
+            mod = calculated_ar_params[k]
+
+            if p <= k:
+                try:
+                    res[column_name] = mod[p]
+                except IndexError:
+                    res[column_name] = 0
+            else:
+                res[column_name] = np.NaN
+
+        L = [(key, value) for key, value in res.items()]
+        L0 = []
+        for item in L:
+            L0.append(item[1])
+        return L0
+
+    def autocorrelation(self, x):
+        """
+        Calculates the autocorrelation of the specified lag, according to the formula [1]
+
+        .. math::
+
+            \\frac{1}{(n-l)\\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu)
+
+        where :math:`n` is the length of the time series :math:`X_i`, :math:`\\sigma^2` its variance and :math:`\\mu` its
+        mean. `l` denotes the lag.
+
+        .. rubric:: References
+
+        [1] https://en.wikipedia.org/wiki/Autocorrelation#Estimation
+
+        :param x: the time series to calculate the feature of
+        :type x: numpy.ndarray
+        :param lag: the lag
+        :type lag: int
+        :return: the value of this feature
+        :return type: float
+        """
+        lag = self.lag
+        # This is important: If a series is passed, the product below is calculated
+        # based on the index, which corresponds to squaring the series.
+        if isinstance(x, pd.Series):
+            x = x.values
+        if len(x) < lag:
+            return np.nan
+        # Slice the relevant subseries based on the lag
+        y1 = x[:(len(x) - lag)]
+        y2 = x[lag:]
+        # Subtract the mean of the whole series x
+        x_mean = np.mean(x)
+        # The result is sometimes referred to as "covariation"
+        sum_product = np.sum((y1 - x_mean) * (y2 - x_mean))
+        # Return the normalized unbiased covariance
+        v = np.var(x)
+        if np.isclose(v, 0):
+            return np.NaN
+        else:
+            return sum_product / ((len(x) - lag) * v)
+    def _into_subchunks(self, x, subchunk_length, every_n=1):
+        """
+        Split the time series x into subwindows of length "subchunk_length", starting every "every_n".
+
+        For example, the input data if [0, 1, 2, 3, 4, 5, 6] will be turned into a matrix
+
+            0  2  4
+            1  3  5
+            2  4  6
+
+        with the settings subchunk_length = 3 and every_n = 2
+        """
+        len_x = len(x)
+
+        assert subchunk_length > 1
+        assert every_n > 0
+
+        # how often can we shift a window of size subchunk_length over the input?
+        num_shifts = (len_x - subchunk_length) // every_n + 1
+        shift_starts = every_n * np.arange(num_shifts)
+        indices = np.arange(subchunk_length)
+
+        indexer = np.expand_dims(indices, axis=0) + np.expand_dims(shift_starts, axis=1)
+        return np.asarray(x)[indexer]
+    def sample_entropy(self, x):
+        """
+        Calculate and return sample entropy of x.
+
+        .. rubric:: References
+
+        |  [1] http://en.wikipedia.org/wiki/Sample_Entropy
+        |  [2] https://www.ncbi.nlm.nih.gov/pubmed/10843903?dopt=Abstract
+
+        :param x: the time series to calculate the feature of
+        :type x: numpy.ndarray
+
+        :return: the value of this feature
+        :return type: float
+        """
+        x = np.array(x)
+
+        # if one of the values is NaN, we can not compute anything meaningful
+        if np.isnan(x).any():
+            return np.nan
+
+        m = 2  # common value for m, according to wikipedia...
+        tolerance = 0.2 * np.std(x)  # 0.2 is a common value for r, according to wikipedia...
+
+        # Split time series and save all templates of length m
+        # Basically we turn [1, 2, 3, 4] into [1, 2], [2, 3], [3, 4]
+        xm = self._into_subchunks(x, m)
+
+        # Now calculate the maximum distance between each of those pairs
+        #   np.abs(xmi - xm).max(axis=1)
+        # and check how many are below the tolerance.
+        # For speed reasons, we are not doing this in a nested for loop,
+        # but with numpy magic.
+        # Example:
+        # if x = [1, 2, 3]
+        # then xm = [[1, 2], [2, 3]]
+        # so we will substract xm from [1, 2] => [[0, 0], [-1, -1]]
+        # and from [2, 3] => [[1, 1], [0, 0]]
+        # taking the abs and max gives us:
+        # [0, 1] and [1, 0]
+        # as the diagonal elements are always 0, we substract 1.
+        B = np.sum([np.sum(np.abs(xmi - xm).max(axis=1) <= tolerance) - 1 for xmi in xm])
+
+        # Similar for computing A
+        xmp1 = self._into_subchunks(x, m + 1)
+
+        A = np.sum([np.sum(np.abs(xmi - xmp1).max(axis=1) <= tolerance) - 1 for xmi in xmp1])
+
+        # Return SampEn
+        return -np.log(A / B)
+    def hurst_f(self, x):
+        H,c, M = compute_Hc(x)
+        return [H, c]
\ No newline at end of file
diff --git a/models/test_metrics.py b/models/test_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fbdf1b128ff536617fa714550a53d16cb6f03d2
--- /dev/null
+++ b/models/test_metrics.py
@@ -0,0 +1,30 @@
+# Test the implementation
+import time
+from .evaluation.metrics import get_metrics_optimized, get_metrics
+import numpy as np
+# Generate test data
+np.random.seed(42)
+score = np.random.rand(10000)
+labels = np.random.randint(0, 2, 10000)
+
+# Compare performance
+print("Testing original implementation...")
+start = time.time()
+metrics_original = get_metrics(score, labels)
+time_original = time.time() - start
+print(f"Original time: {time_original:.2f}s\n")
+
+print("Testing optimized implementation...")
+start = time.time()
+metrics_optimized = get_metrics_optimized(score, labels)
+time_optimized = time.time() - start
+print(f"Optimized time: {time_optimized:.2f}s\n")
+
+print(f"Speedup: {time_original/time_optimized:.2f}x")
+
+# Verify results are similar
+for key in metrics_original:
+    orig_val = metrics_original[key]
+    opt_val = metrics_optimized[key]
+    if abs(orig_val - opt_val) > 1e-5:
+        print(f"Warning: {key} differs: {orig_val} vs {opt_val}")
diff --git a/models/text_encoder.py b/models/text_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..3da8d22fe432da6782a5059484513060193ca440
--- /dev/null
+++ b/models/text_encoder.py
@@ -0,0 +1,107 @@
+import torch
+import torch.nn as nn
+from transformers import LlamaConfig, LlamaModel, GPT2Config, GPT2Model
+
+
+class TextEncoder(nn.Module):
+    """
+    文本编码器，基于LLaMA或GPT-2模型生成固定大小的嵌入表示。
+
+    Args:
+        model_name (str): 模型名称，支持 'llama' 或 'gpt2'，默认 'llama'。
+        d_proj (int): 输出嵌入维度，默认 512。
+        num_layers (int): Transformer层数，默认 6。
+        device (str, optional): 计算设备，默认自动选择 ('cuda' 或 'cpu')。
+    """
+
+    def __init__(self, model_name='llama', d_proj=512, num_layers=6, device=None):
+        super().__init__()
+        # 自动选择设备
+        if device is None:
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.device = device
+        self.model_name = model_name.lower()
+
+        # 模型配置与加载
+        if self.model_name == 'llama':
+            self.model_type = 'llama'
+            config = LlamaConfig.from_pretrained('huggyllama/llama-7b')
+            config.num_hidden_layers = num_layers
+            self.model = LlamaModel.from_pretrained("huggyllama/llama-7b", config=config)
+            d_model = config.hidden_size  # 4096 for LLaMA-7B
+        elif self.model_name == 'gpt2':
+            self.model_type = 'gpt2'
+            config = GPT2Config.from_pretrained('openai-community/gpt2')
+            config.n_layer = num_layers
+            self.model = GPT2Model.from_pretrained('openai-community/gpt2', config=config)
+            d_model = config.n_embd  # 768 for GPT-2
+        else:
+            raise ValueError("Unsupported model_name. Choose 'llama' or 'gpt2'.")
+
+        # 投影层
+        self.projection = nn.Linear(d_model, d_proj)
+
+        # 将模型移动到指定设备
+        self.model.to(self.device)
+        self.projection.to(self.device)
+
+        # 参数初始化
+        self._init_parameters()
+
+    def _init_parameters(self):
+        """初始化模型参数"""
+        # 投影层权重使用Xavier初始化
+        nn.init.xavier_uniform_(self.projection.weight)
+        nn.init.constant_(self.projection.bias, 0.0)
+
+    def forward(self, input_ids, attention_mask):
+        """
+        前向传播，生成文本嵌入。
+
+        Args:
+            input_ids (torch.Tensor): 输入token ID，形状 (batch_size, seq_len)。
+            attention_mask (torch.Tensor): 注意力掩码，形状 (batch_size, seq_len)，1为真实数据，0为填充。
+
+        Returns:
+            torch.Tensor: 嵌入表示，形状 (batch_size, d_proj)。
+        """
+        # 输入验证
+        assert input_ids.size(0) == attention_mask.size(0), "Batch size mismatch between input_ids and attention_mask"
+        assert input_ids.size(1) == attention_mask.size(
+            1), "Sequence length mismatch between input_ids and attention_mask"
+
+        # 将输入移动到指定设备
+        input_ids = input_ids.to(self.device)
+        attention_mask = attention_mask.to(self.device)
+
+        # 推理时禁用梯度计算
+        with torch.no_grad():
+            if self.model_type == 'llama':
+                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
+                hidden_states = outputs.last_hidden_state
+
+        # 计算平均隐藏状态，忽略填充部分
+        masked_hidden = hidden_states * attention_mask.unsqueeze(-1)  # (batch_size, seq_len, d_model)
+        sum_hidden = masked_hidden.sum(dim=1)  # (batch_size, d_model)
+        valid_counts = attention_mask.sum(dim=1, keepdim=True)  # (batch_size, 1)
+        mean_hidden = sum_hidden / valid_counts.clamp(min=1e-9)  # (batch_size, d_model)
+
+        # 投影到d_proj维度
+        embedding = self.projection(mean_hidden)  # (batch_size, d_proj)
+        return embedding
+
+
+# 使用示例
+if __name__ == "__main__":
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    # 模拟输入
+    batch_size, seq_len = 2, 10
+    input_ids = torch.randint(0, 1000, (batch_size, seq_len)).to(device)
+    attention_mask = torch.ones(batch_size, seq_len).to(device)
+    attention_mask[1, 8:] = 0  # 模拟填充值
+
+    # 初始化模型
+    encoder = TextEncoder(model_name='gpt2', d_proj=256, num_layers=4).to(device)
+    embedding = encoder(input_ids, attention_mask)
+    print(f"Embedding shape: {embedding.shape}")  # (2, 256)
diff --git a/models/time_moe.py b/models/time_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..aff430a200db845d0c4b6a3ccc3c5a3ba440f91c
--- /dev/null
+++ b/models/time_moe.py
@@ -0,0 +1,162 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+import math
+import tqdm
+import os
+from transformers import AutoTokenizer
+from typing import Optional, Tuple
+
+# Add debugging prints to understand the import issue
+import sys
+
+# print(f"Python path: {sys.path}")
+# print(f"Current working directory: {os.getcwd()}")
+# print(f"Current file location: {__file__}")
+# print(f"Current file directory: {os.path.dirname(__file__)}")
+#
+# # Check if the utils directory exists
+# utils_path = os.path.join(os.path.basename(os.path.dirname(__file__)), "utils")
+# print(f"Utils path: {utils_path}")
+# print(f"Utils directory exists: {os.path.exists(utils_path)}")
+# print(f"Utils directory contents: {os.listdir(utils_path) if os.path.exists(utils_path) else 'Directory not found'}")
+#
+# # Check if dataset.py exists
+# dataset_path = os.path.join(utils_path, "dataset.py")
+# print(f"Dataset file path: {dataset_path}")
+# print(f"Dataset file exists: {os.path.exists(dataset_path)}")
+
+# Try different import approaches
+
+os.chdir("/home/lihaoyang/Huawei/TSB-AD/TSB_AD")
+
+try:
+    from utils.dataset import ReconstructDataset
+
+    print("Relative import successful")
+except ImportError as e:
+    print(f"Relative import failed: {e}")
+
+    # Try absolute import
+    try:
+        from TSB_AD.utils.dataset import ReconstructDataset
+
+        print("Absolute import successful")
+    except ImportError as e2:
+        print(f"Absolute import failed: {e2}")
+
+        # Try adding parent directory to path
+        try:
+            parent_dir = os.path.dirname(os.path.dirname(__file__))
+            if parent_dir not in sys.path:
+                sys.path.insert(0, parent_dir)
+            from utils.dataset import ReconstructDataset
+
+            print("Import with modified path successful")
+        except ImportError as e3:
+            print(f"Import with modified path failed: {e3}")
+
+from .base import BaseDetector
+
+
+# ...existing code...
+
+class Time_MOE(BaseDetector):
+    def __init__(self, device, args=None, win_size=64, batch_size=32):
+        self.win_size = win_size
+        self.batch_size = batch_size
+        self.device = torch.device(f'cuda:{device}' if torch.cuda.is_available() else 'cpu')
+        self.model = self._build_model().to(self.device)
+
+    def _build_model(self):
+        from transformers import AutoModelForCausalLM
+        model = AutoModelForCausalLM.from_pretrained(
+            "Maple728/TimeMoE-50M", device_map=self.device, trust_remote_code=True,
+        )
+        return model
+
+    # def _acquire_device(self):
+    #     if True:
+    #         os.environ["CUDA_VISIBLE_DEVICES"] = str(
+    #             self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
+    #         device = torch.device('cuda:{}'.format(self.args.gpu))
+    #         print('Use GPU: cuda:{}'.format(self.args.gpu))
+    #     else:
+    #         device = torch.device('cpu')
+    #         print('Use CPU')
+    #     return device
+
+    def decision_function(self, x: torch.Tensor) -> torch.Tensor:
+        pass
+
+    def fit(self, data: torch.Tensor, labels: Optional[torch.Tensor] = None) -> None:
+        pass
+
+    def zero_shot(self, data):
+        test_loader = DataLoader(
+            dataset=ReconstructDataset(data, window_size=self.win_size, stride=self.win_size, normalize=True),
+            batch_size=self.batch_size,
+            shuffle=False)
+
+        loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
+
+        test_scores = []
+        test_labels = []
+        self.model.eval()
+        self.model.to(self.device)
+
+        with torch.no_grad():
+            for i, (batch_x, batch_y) in loop:
+                batch_x = batch_x.float().to(self.device)
+                # print(f"Batch {i} - batch_x shape: {batch_x.shape}, batch_y shape: {batch_y.shape}")
+                # print("Here is the batch_x:", batch_x[:10])
+                # Reshape batch_x to match model expectations
+                # TimeMoE expects 2D input: (batch_size, sequence_length)
+                if batch_x.dim() == 3:
+                    # If input is (batch_size, sequence_length, features), flatten features
+                    batch_x = batch_x.reshape(batch_x.shape[0], -1)
+                    # print(f"Batch {i} - reshaped batch_x to 2D: {batch_x.shape}")
+                elif batch_x.dim() > 3:
+                    # If more dimensions, flatten to 2D
+                    batch_x = batch_x.reshape(batch_x.shape[0], -1)
+                    # print(f"Batch {i} - reshaped batch_x to 2D: {batch_x.shape}")
+
+                # Ensure batch_x is 2D and convert to long tensor for token generation
+                if batch_x.dim() == 1:
+                    batch_x = batch_x.unsqueeze(0)
+                    # print(f"Batch {i} - batch_x was 1D, reshaped to 2D: {batch_x.shape}")
+
+                # Convert to integer tokens if needed (TimeMoE might expect discrete tokens)
+                # For time series, we might need to discretize or use the model differently
+                try:
+                    # Try direct generation first
+                    score = self.model.generate(batch_x.long(), max_new_tokens=self.win_size)
+                    score = score[:, -self.win_size:]
+                except Exception as e:
+                    print(f"Generation failed with long tensor, trying with float: {e}")
+                    try:
+                        # If that fails, try with original float tensor but ensure 2D
+                        score = self.model.generate(batch_x, max_new_tokens=self.win_size)
+                        score = score[:, -self.win_size:]
+                    except Exception as e2:
+                        print(f"Generation failed: {e2}")
+                        # Fallback: use the model's forward pass instead of generate
+                        outputs = self.model(batch_x)
+                        score = outputs.logits if hasattr(outputs, 'logits') else outputs
+                        # Take last win_size tokens
+                        score = score[:, -self.win_size:] if score.shape[1] >= self.win_size else score
+
+                score = score.detach().cpu().numpy()
+                test_scores.append(score)
+                test_labels.append(batch_y)
+
+        test_scores = np.concatenate(test_scores, axis=0).reshape(-1, 1)
+        test_labels = np.concatenate(test_labels, axis=0).reshape(-1, 1)
+
+        print("Test scores shape:", test_scores.shape)
+        print("Test labels shape:", test_labels.shape)
+
+        return test_scores.reshape(-1)
\ No newline at end of file
diff --git a/models/time_rcd/TimeRCD_pretrain_multi.py b/models/time_rcd/TimeRCD_pretrain_multi.py
new file mode 100644
index 0000000000000000000000000000000000000000..acd9f134afed09ea9963658181487b4b638910b7
--- /dev/null
+++ b/models/time_rcd/TimeRCD_pretrain_multi.py
@@ -0,0 +1,236 @@
+import datetime
+import itertools
+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, DistributedSampler
+import torch.nn.functional as F
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from torch.nn.parallel import DistributedDataParallel as DDP
+import random
+import numpy as np
+from typing import Tuple, List, Dict, Any, Union, Optional
+from dataclasses import dataclass
+
+from .dataset import ChatTSTimeRCDPretrainDataset
+from .ts_encoder_bi_bias import TimeSeriesEncoder
+from .time_rcd_config import TimeRCDConfig, default_config
+
+import warnings
+
+warnings.filterwarnings("ignore")
+
+@dataclass
+class PretrainBatch:
+    """Batch structure for pretraining tasks."""
+    time_series: torch.Tensor
+    labels: torch.Tensor
+    masked_time_series: torch.Tensor
+    mask_indices: torch.Tensor
+
+
+class TimeSeriesPretrainModel(nn.Module):
+    """Model for time series pretraining with masked reconstruction and anomaly detection."""
+
+    def __init__(self, config: TimeRCDConfig):
+        super().__init__()
+        self.config = config
+
+        # Extract TimeSeriesEncoder parameters from config
+        ts_config = config.ts_config
+        self.ts_encoder = TimeSeriesEncoder(
+            d_model=ts_config.d_model,
+            d_proj=ts_config.d_proj,
+            patch_size=ts_config.patch_size,
+            num_layers=ts_config.num_layers,
+            num_heads=ts_config.num_heads,
+            d_ff_dropout=ts_config.d_ff_dropout,
+            use_rope=ts_config.use_rope,
+            num_features=ts_config.num_features,
+            activation=ts_config.activation
+        )
+
+        # Masked reconstruction head
+        self.reconstruction_head = nn.Sequential(
+            nn.Linear(config.ts_config.d_proj, config.ts_config.d_proj * 4),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj * 4, config.ts_config.d_proj * 4),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj * 4, 1)  # (B, seq_len, num_features, 1)
+        )
+
+        # Anomaly detection head
+        self.anomaly_head = nn.Sequential(
+            nn.Linear(config.ts_config.d_proj, config.ts_config.d_proj // 2),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj // 2, 2)  # (B, seq_len, num_features, 2) for binary classification
+        )
+
+    def forward(self, time_series: torch.Tensor, mask: Optional[torch.Tensor] = None):
+        """Forward pass through the encoder."""
+        local_embeddings = self.ts_encoder(time_series, mask)
+        return local_embeddings
+
+    def masked_reconstruction_loss(self,
+                                   local_embeddings: torch.Tensor,  # (B, seq_len, num_features, d_proj)
+                                   original_time_series: torch.Tensor,  # (B, seq_len, num_features),
+                                   mask: torch.Tensor  # (B, seq_len)
+                                   ) -> torch.Tensor:
+        """Compute masked reconstruction loss."""
+        batch_size, seq_len, num_features = original_time_series.shape
+        patch_size = self.config.ts_config.patch_size
+
+        mask = mask.bool()
+
+        # local_embeddings: [B, seq_len, num_features, d_proj]
+        reconstructed = self.reconstruction_head(local_embeddings)  # (B, seq_len, num_features, 1)
+        reconstructed = reconstructed.view(batch_size, seq_len, num_features)
+
+        mask_expanded = mask.unsqueeze(-1).expand(-1, -1, num_features)  # (B, seq_len, num_features)
+        reconstruction_loss = F.mse_loss(
+            reconstructed[mask_expanded],
+            original_time_series[mask_expanded]
+        )
+        return reconstruction_loss
+
+    def anomaly_detection_loss(self,
+                               local_embeddings: torch.Tensor,  # (B, seq_len, num_features, d_proj)
+                               labels: torch.Tensor) -> torch.Tensor:  # (B, seq_len)
+        """Compute anomaly detection loss for each timestep."""
+        # Project local embeddings to anomaly scores
+        logits = self.anomaly_head(local_embeddings)  # (B, seq_len, num_features, 2)
+        logits = torch.mean(logits, dim=-2)  # Average over num_features to get (B, seq_len, 2)
+
+        # Reshape for loss computation
+        batch_size, seq_len, _ = logits.shape
+        logits = logits.view(-1, 2)  # (B*seq_len, 2)
+        labels = labels.view(-1)  # (B*seq_len)
+        labels = (labels > 0.5).long()
+        # Create mask for valid labels (not padding)
+        valid_mask = (labels != -1)
+
+        # Compute loss only on valid timesteps
+        if valid_mask.sum() > 0:
+            anomaly_loss = F.cross_entropy(
+                logits[valid_mask],
+                labels[valid_mask]
+            )
+        else:
+            anomaly_loss = torch.tensor(0.0, device=logits.device)
+
+        return anomaly_loss
+
+
+def create_random_mask(time_series: torch.Tensor,  # (B, max_seq_len, num_features)
+                       attention_mask: torch.Tensor,  # (B, max_seq_len)
+                       mask_ratio: float = 0.15) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Create random mask for time series patches, only masking valid sequence parts."""
+    batch_size, seq_len, num_features = time_series.shape
+    patch_size = default_config.ts_config.patch_size
+
+    mask = torch.zeros(batch_size, seq_len)  # (B, max_seq_len)
+
+    for i in range(batch_size):
+        # Get valid sequence length for this sample
+        valid_length = attention_mask[i].sum().item()
+
+        # Calculate number of patches in valid sequence
+        num_valid_patches = (valid_length - 1) // patch_size + 1
+        num_masked = int(num_valid_patches * mask_ratio)
+
+        if num_masked > 0:
+            # Only select patches from valid sequence
+            masked_patches = torch.randperm(num_valid_patches)[:num_masked]
+            for j in masked_patches:
+                start_idx = j * patch_size
+                end_idx = min((j + 1) * patch_size, valid_length)  # Don't exceed valid length
+                mask[i, start_idx:end_idx] = 1
+
+    # Create masked time series - only mask valid parts
+    masked_time_series = time_series.clone()
+    mask_indices = mask.bool() & attention_mask  # Only mask where both mask and attention_mask are True
+    mask_expanded = mask_indices.unsqueeze(-1).expand(-1, -1, num_features)  # (B, max_seq_len, num_features)
+    masked_time_series[mask_expanded] = torch.randn_like(masked_time_series[mask_expanded]) * 0.1
+
+    # Update mask to only include valid parts
+    mask = mask * attention_mask.float()
+
+    return masked_time_series, mask  # (B, max_seq_len, num_features), (B, max_seq_len)
+
+
+def collate_fn(batch):
+    """Collate function for pretraining dataset."""
+    time_series_list, normal_time_series_list, labels_list, attribute_list = zip(*batch)
+
+    # Convert to tensors and pad sequences
+    if time_series_list[0].ndim == 1:
+        time_series_tensors = [ts.unsqueeze(-1) for ts in time_series_list]  # Add feature dimension
+        normal_time_series_tensors = [nts.unsqueeze(-1) for nts in normal_time_series_list]
+    else:
+        time_series_tensors = [ts for ts in time_series_list]
+        normal_time_series_tensors = [nts for nts in normal_time_series_list]
+
+    # standardize time series
+    concatenated = torch.cat(time_series_tensors, dim=0)  # (total_length, num_features)
+    mean = concatenated.mean(dim=0, keepdim=True)  # (1, num_features)
+    std = concatenated.std(dim=0, keepdim=True)  # (1, num_features)
+    std = std + 1e-4
+    time_series_tensors_std = [(ts - mean) / std for ts in time_series_tensors]
+    normal_time_series_tensors_std = [(nts - mean) / std for nts in normal_time_series_tensors]
+    time_series_tensors = time_series_tensors_std
+    normal_time_series_tensors = normal_time_series_tensors_std
+
+    # labels_tensor = torch.stack(labels_list)
+    labels = [label for label in labels_list]
+    # Pad time series to same length
+    padded_time_series = torch.nn.utils.rnn.pad_sequence(
+        time_series_tensors, batch_first=True, padding_value=0.0
+    )  # (B, max_seq_len, num_features)
+    padded_normal_time_series = torch.nn.utils.rnn.pad_sequence(
+        normal_time_series_tensors, batch_first=True, padding_value=0.0
+    )  # (B, max_seq_len, num_features)
+    padded_labels = torch.nn.utils.rnn.pad_sequence(
+        labels, batch_first=True, padding_value=-1
+    )  # (B, max_seq_len)
+
+    sequence_lengths = [ts.size(0) for ts in time_series_tensors]
+    B, max_seq_len, num_features = padded_time_series.shape
+    attention_mask = torch.zeros(B, max_seq_len, dtype=torch.bool)  # (B, max_seq_len)
+    for i, length in enumerate(sequence_lengths):
+        attention_mask[i, :length] = True
+
+        # Create random masks for reconstruction task - only mask valid sequence parts
+    masked_time_series, mask = create_random_mask(padded_time_series, attention_mask)
+
+    return {
+        'time_series': padded_time_series,
+        'normal_time_series': padded_normal_time_series,
+        'masked_time_series': masked_time_series,
+        'mask': mask,  # for reconstruction task
+        'labels': padded_labels,
+        'attention_mask': attention_mask,  # for padding
+        'attribute': attribute_list
+    }
+
+def test_collate_fn(batch):
+    """Collate function for pretraining dataset."""
+    # Unpack the batch correctly - batch is a list of (time_series, mask) tuples
+    time_series_list, mask_list = zip(*batch)
+
+    # Stack into batch format instead of concatenating
+    # This maintains the batch dimension: (B, seq_len, num_features)
+    batched_time_series = torch.stack(time_series_list, dim=0)
+    print(f"batched_time_series shape: {batched_time_series.shape}")
+    # Stack masks into batch format: (B, seq_len)
+    batched_mask = torch.stack(mask_list, dim=0)
+    print(f"batched_mask shape: {batched_mask.shape}")
+
+    return {
+        'time_series': batched_time_series,
+        'attention_mask': batched_mask,  # for padding
+    }
\ No newline at end of file
diff --git a/models/time_rcd/__init__.py b/models/time_rcd/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-310.pyc b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd2408a3b8976be323e6a9cc6b5e51354c7571d0
Binary files /dev/null and b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-310.pyc differ
diff --git a/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-311.pyc b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..03d04f128ea03a8149aba696a5d61276d624d24e
Binary files /dev/null and b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-311.pyc differ
diff --git a/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-38.pyc b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6372a97bde21a2e4d29d4392e90a5d0cfa7b72c0
Binary files /dev/null and b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-38.pyc differ
diff --git a/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-39.pyc b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..13b5ee026877b7eb6da2efa8901e4daf116b35ae
Binary files /dev/null and b/models/time_rcd/__pycache__/AnomalyLlava_pretrain_multi.cpython-39.pyc differ
diff --git a/models/time_rcd/__pycache__/__init__.cpython-310.pyc b/models/time_rcd/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da4f2d78c6db2bdec529cc7e9b0336fcb08d55f0
Binary files /dev/null and b/models/time_rcd/__pycache__/__init__.cpython-310.pyc differ
diff --git a/models/time_rcd/__pycache__/__init__.cpython-311.pyc b/models/time_rcd/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09355910bffd46e8c588ca7c85e8add337b5743b
Binary files /dev/null and b/models/time_rcd/__pycache__/__init__.cpython-311.pyc differ
diff --git a/models/time_rcd/__pycache__/__init__.cpython-38.pyc b/models/time_rcd/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59090314381edff0fed237e0cdf0248d1258b7b0
Binary files /dev/null and b/models/time_rcd/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/time_rcd/__pycache__/__init__.cpython-39.pyc b/models/time_rcd/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55af45be79a720fc6555e3cfada6af5b77b97459
Binary files /dev/null and b/models/time_rcd/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/time_rcd/__pycache__/anomaly_llava_config.cpython-310.pyc b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e71fc93e9fed63406c4ea5a311d1f5a2c83d5718
Binary files /dev/null and b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-310.pyc differ
diff --git a/models/time_rcd/__pycache__/anomaly_llava_config.cpython-311.pyc b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b73d46cdf15718772315853e483a415ddb6487fc
Binary files /dev/null and b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-311.pyc differ
diff --git a/models/time_rcd/__pycache__/anomaly_llava_config.cpython-38.pyc b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..227f31d4e57d7c7b37efc81233811c67a92d7442
Binary files /dev/null and b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-38.pyc differ
diff --git a/models/time_rcd/__pycache__/anomaly_llava_config.cpython-39.pyc b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09fb2b6409d08c38d208c6fd032d7785d6c654ab
Binary files /dev/null and b/models/time_rcd/__pycache__/anomaly_llava_config.cpython-39.pyc differ
diff --git a/models/time_rcd/__pycache__/dataset.cpython-310.pyc b/models/time_rcd/__pycache__/dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33ee9805d72783b59e834e587056148f22643e56
Binary files /dev/null and b/models/time_rcd/__pycache__/dataset.cpython-310.pyc differ
diff --git a/models/time_rcd/__pycache__/dataset.cpython-311.pyc b/models/time_rcd/__pycache__/dataset.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b6176ad4a279281dacbf60350a915dd17433e9f0
Binary files /dev/null and b/models/time_rcd/__pycache__/dataset.cpython-311.pyc differ
diff --git a/models/time_rcd/__pycache__/dataset.cpython-38.pyc b/models/time_rcd/__pycache__/dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..132778bab55b4a715413b381f46200f1354007aa
Binary files /dev/null and b/models/time_rcd/__pycache__/dataset.cpython-38.pyc differ
diff --git a/models/time_rcd/__pycache__/dataset.cpython-39.pyc b/models/time_rcd/__pycache__/dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45a94c30e84007cdb16e1344e085c4093ffd678c
Binary files /dev/null and b/models/time_rcd/__pycache__/dataset.cpython-39.pyc differ
diff --git a/models/time_rcd/__pycache__/time_rcd_config.cpython-310.pyc b/models/time_rcd/__pycache__/time_rcd_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb00d0edec4fb2a5c2a3cecb2a08c8b638b99cdb
Binary files /dev/null and b/models/time_rcd/__pycache__/time_rcd_config.cpython-310.pyc differ
diff --git a/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-310.pyc b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3ac5951f2fecbc64040ce388cb9f7392b00dfd36
Binary files /dev/null and b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-310.pyc differ
diff --git a/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-311.pyc b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4595bdba794324681b349f85d5d01315afeb58f6
Binary files /dev/null and b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-311.pyc differ
diff --git a/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-38.pyc b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a24396d8ea3f5c8f08cdaac9ca0d0c6d9f9dbfa
Binary files /dev/null and b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-38.pyc differ
diff --git a/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-39.pyc b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59ab3e4fc81f4de88d250d7cc7dcc8aa479ba043
Binary files /dev/null and b/models/time_rcd/__pycache__/ts_encoder_bi_bias.cpython-39.pyc differ
diff --git a/models/time_rcd/dataset.py b/models/time_rcd/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9293b8047cf7ef989f7e233fccccdb2d80e33eb8
--- /dev/null
+++ b/models/time_rcd/dataset.py
@@ -0,0 +1,151 @@
+import json
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+import random
+import os
+import pickle
+from typing import Dict, List, Union, Optional, Tuple
+from pathlib import Path
+
+
+class ChatTSTimeRCDPretrainDataset(Dataset):
+    def __init__(self,
+                 dataset_dir: str,
+                 filename: str,
+                 split: str = 'train',
+                 train_ratio: float = 0.95,
+                 seed: int = 42):
+        file_path = os.path.join(dataset_dir, filename)
+        with open(file_path, 'rb') as f:
+            dataset = pickle.load(f)
+        random.seed(seed)
+        indices = list(range(len(dataset)))
+        random.shuffle(indices)
+        num_train = int(len(dataset) * train_ratio)
+        if split == 'train':
+            selected_indices = indices[:num_train]
+        elif split == 'test':
+            selected_indices = indices[num_train:]
+        else:
+            raise ValueError("split must be 'train' or 'test'")
+        self.data = [dataset[i] for i in selected_indices]
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        sample = self.data[idx]
+        time_series = torch.tensor(sample['time_series'], dtype=torch.float32)
+        normal_time_series = torch.tensor(sample['normal_time_series'], dtype=torch.float32)
+        labels = torch.tensor(sample['labels'], dtype=torch.long)
+        attribute = sample['attribute']
+        return time_series, normal_time_series, labels, attribute
+
+
+class ChatTSTimeRCDQADataset(Dataset):
+    """Dataset class for time series anomaly detection with QA pairs.
+
+    This dataset loads time series data and corresponding question-answer pairs
+    for anomaly detection tasks. It supports train/val split and efficient loading
+    of series data from the time_rcd_datasets format.
+
+    Attributes:
+        split (str): Dataset split, either 'train' or 'val'
+        series_dir (Path): Directory containing series JSON files
+        metadata (Dict): Dataset metadata loaded from metadata.json
+        series_files (List[str]): List of series file paths
+        window_size_range (Tuple[int, int]): Range of window sizes used in the dataset
+    """
+
+    def __init__(
+            self,
+            dataset_dir: str,
+            split: str = 'train',
+            train_ratio: float = 0.95,
+            seed: int = 42,
+            cache_size: int = 1000
+    ) -> None:
+        """Initialize the dataset.
+
+        Args:
+            dataset_dir: Path to the dataset directory containing metadata.json and series/
+            split: Dataset split, either 'train' or 'val'
+            train_ratio: Ratio of training samples (default: 0.8)
+            seed: Random seed for reproducibility (default: 42)
+            cache_size: Number of series files to keep in memory (default: 1000)
+        """
+        self.split = split
+        self.series_dir = Path(dataset_dir) / 'series'
+
+        # Get all series files and shuffle them
+        self.series_files = sorted(self.series_dir.glob('series_*.json'))
+        random.seed(seed)
+        random.shuffle(self.series_files)
+
+        # Split into train/val
+        split_idx = int(len(self.series_files) * train_ratio)
+        self.series_files = self.series_files[:split_idx] if split == 'train' else self.series_files[split_idx:]
+
+        # Initialize LRU cache for series data
+        self._cache = {}
+        self._cache_size = cache_size
+        self._cache_order = []
+
+    def _load_series(self, file_path: Path) -> Dict:
+        """Load a series file with caching.
+
+        Args:
+            file_path: Path to the series JSON file
+
+        Returns:
+            Dictionary containing the series data
+        """
+        if file_path in self._cache:
+            # Update cache order
+            self._cache_order.remove(file_path)
+            self._cache_order.append(file_path)
+            return self._cache[file_path]
+
+        # Load new file
+        with open(file_path, 'r') as f:
+            data = json.load(f)
+
+        # Update cache
+        if len(self._cache) >= self._cache_size:
+            # Remove oldest item
+            oldest = self._cache_order.pop(0)
+            del self._cache[oldest]
+
+        self._cache[file_path] = data
+        self._cache_order.append(file_path)
+        return data
+
+    def __len__(self) -> int:
+        """Return the number of samples in the dataset."""
+        return len(self.series_files)
+
+    def __getitem__(self, idx: int) -> Dict[str, Union[torch.Tensor, List[Dict]]]:
+        """Get a sample from the dataset.
+
+        Args:
+            idx: Index of the sample to retrieve
+
+        Returns:
+            Dictionary containing:
+                - time_series: Time series data as torch.Tensor
+                - windows: List of window data containing QA pairs
+                - sample_id: Unique identifier for the sample
+        """
+        file_path = self.series_files[idx]
+        data = self._load_series(file_path)
+
+        # Convert time series to tensor
+        time_series = np.array(data['original_data']['time_series'])
+        time_series_tensor = torch.FloatTensor(time_series)
+
+        return {
+            'time_series': time_series_tensor,
+            'analysis_data': data['windows']
+        }
+
diff --git a/models/time_rcd/full_reconstruction.py b/models/time_rcd/full_reconstruction.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c8d8766e7fef0d0720bb89661999b94b1bbe72c
--- /dev/null
+++ b/models/time_rcd/full_reconstruction.py
@@ -0,0 +1,214 @@
+import datetime
+import itertools
+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, DistributedSampler
+import torch.nn.functional as F
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from torch.nn.parallel import DistributedDataParallel as DDP
+import random
+import numpy as np
+from typing import Tuple, List, Dict, Any, Union, Optional
+from dataclasses import dataclass
+
+from .dataset import ChatTSTimeRCDPretrainDataset
+from .ts_encoder_bi_bias import TimeSeriesEncoder
+from .time_rcd_config import TimeRCDConfig, default_config
+
+import warnings
+warnings.filterwarnings("ignore")
+
+@dataclass
+class PretrainBatch:
+    """Batch structure for pretraining tasks."""
+    time_series: torch.Tensor
+    labels: torch.Tensor
+    masked_time_series: torch.Tensor
+    mask_indices: torch.Tensor
+    
+
+class TimeSeriesPretrainModel(nn.Module):
+    """Model for time series pretraining with masked reconstruction and anomaly detection."""
+    
+    def __init__(self, config: TimeRCDConfig):
+        super().__init__()
+        self.config = config
+        
+        # Extract TimeSeriesEncoder parameters from config
+        ts_config = config.ts_config
+        self.ts_encoder = TimeSeriesEncoder(
+            d_model=ts_config.d_model,
+            d_proj=ts_config.d_proj,
+            patch_size=ts_config.patch_size,
+            num_layers=ts_config.num_layers,
+            num_heads=ts_config.num_heads,
+            d_ff_dropout=ts_config.d_ff_dropout,
+            use_rope=ts_config.use_rope,
+            num_features=ts_config.num_features,
+            activation=ts_config.activation
+        )
+        
+        # Masked reconstruction head
+        self.reconstruction_head = nn.Sequential(
+            nn.Linear(config.ts_config.d_proj, config.ts_config.d_proj * 4),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj * 4, config.ts_config.d_proj * 4),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj * 4, 1)  # (B, seq_len, num_features, 1)
+        )
+        self.reconstruction_head.apply(self._init_weights)
+
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            nn.init.xavier_normal_(module.weight)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+
+    def forward(self, time_series: torch.Tensor, mask: Optional[torch.Tensor] = None):
+        """Forward pass through the encoder."""
+        local_embeddings = self.ts_encoder(time_series, mask)
+        return local_embeddings
+
+    def masked_reconstruction_loss(self, 
+                                   local_embeddings: torch.Tensor,  # (B, seq_len, num_features, d_proj)
+                                   original_time_series: torch.Tensor,  # (B, seq_len, num_features),
+                                   mask: torch.Tensor  # (B, seq_len)
+                                   ) -> torch.Tensor:
+        """Compute masked reconstruction loss."""
+        batch_size, seq_len, num_features = original_time_series.shape
+        patch_size = self.config.ts_config.patch_size
+        
+        mask = mask.bool()
+        
+        # local_embeddings: [B, seq_len, num_features, d_proj]
+        reconstructed = self.reconstruction_head(local_embeddings)  # (B, seq_len, num_features, 1)
+        reconstructed = reconstructed.view(batch_size, seq_len, num_features)  
+        
+        mask_expanded = mask.unsqueeze(-1).expand(-1, -1, num_features)  # (B, seq_len, num_features)
+        reconstruction_loss = F.mse_loss(
+            reconstructed[mask_expanded],
+            original_time_series[mask_expanded]
+        )
+        return reconstruction_loss
+    
+def create_random_mask(time_series: torch.Tensor,  #(B, max_seq_len, num_features)
+                       attention_mask: torch.Tensor,  # (B, max_seq_len)
+                       mask_ratio: float = 0.15) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Create random mask for time series patches, only masking valid sequence parts."""
+    batch_size, seq_len, num_features = time_series.shape
+    patch_size = default_config.ts_config.patch_size
+    
+    mask = torch.zeros(batch_size, seq_len)  # (B, max_seq_len)
+    
+    for i in range(batch_size):
+        # Get valid sequence length for this sample
+        valid_length = attention_mask[i].sum().item()
+        
+        # Calculate number of patches in valid sequence
+        num_valid_patches = (valid_length - 1) // patch_size + 1
+        num_masked = int(num_valid_patches * mask_ratio)
+        
+        if num_masked > 0:
+            # Only select patches from valid sequence
+            masked_patches = torch.randperm(num_valid_patches)[:num_masked]
+            for j in masked_patches:
+                start_idx = j * patch_size
+                end_idx = min((j + 1) * patch_size, valid_length)  # Don't exceed valid length
+                mask[i, start_idx:end_idx] = 1
+    
+    # Create masked time series - only mask valid parts
+    masked_time_series = time_series.clone()
+    mask_indices = mask.bool() & attention_mask  # Only mask where both mask and attention_mask are True
+    mask_expanded = mask_indices.unsqueeze(-1).expand(-1, -1, num_features)  # (B, max_seq_len, num_features)
+    masked_time_series[mask_expanded] = torch.randn_like(masked_time_series[mask_expanded]) * 0.1
+    
+    # Update mask to only include valid parts
+    mask = mask * attention_mask.float()
+    
+    return masked_time_series, mask  # (B, max_seq_len, num_features), (B, max_seq_len)
+
+
+def collate_fn(batch):
+    """Collate function for pretraining dataset."""
+    time_series_list, normal_time_series_list, labels_list, attribute_list = zip(*batch)
+    
+    # Convert to tensors and pad sequences
+    if time_series_list[0].ndim == 1:
+        time_series_tensors = [ts.unsqueeze(-1) for ts in time_series_list]  # Add feature dimension
+        normal_time_series_tensors = [nts.unsqueeze(-1) for nts in normal_time_series_list]
+    else:    
+        time_series_tensors = [ts for ts in time_series_list]
+        normal_time_series_tensors = [nts for nts in normal_time_series_list]
+
+    # standardize time series
+    # concatenated = torch.cat(time_series_tensors, dim=0)  # (total_length, num_features)
+    # mean = concatenated.mean(dim=0, keepdim=True)  # (1, num_features)
+    # std = concatenated.std(dim=0, keepdim=True)  # (1, num_features)
+    # std = std + 1e-4
+    # time_series_tensors_std = [(ts - mean) / std for ts in time_series_tensors]
+    # normal_time_series_tensors_std = [(nts - mean) / std for nts in normal_time_series_tensors]
+    # time_series_tensors = time_series_tensors_std
+    # normal_time_series_tensors = normal_time_series_tensors_std
+
+    means = []
+    stds = []
+    for i in range(len(time_series_tensors)):
+        ts = time_series_tensors[i]
+        mean = ts.mean(dim=0, keepdim=True)
+        std = ts.std(dim=0, keepdim=True) + 1e-4
+        means.append(mean)
+        stds.append(std)
+        time_series_tensors[i] = (ts - mean) / std
+    for i in range(len(normal_time_series_tensors)):
+        nts = normal_time_series_tensors[i]
+        mean = means[i]
+        std = stds[i]
+        normal_time_series_tensors[i] = (nts - mean) / std
+
+    # labels_tensor = torch.stack(labels_list)
+    labels = [label for label in labels_list]
+    # Pad time series to same length
+    padded_time_series = torch.nn.utils.rnn.pad_sequence(
+        time_series_tensors, batch_first=True, padding_value=0.0
+    )  # (B, max_seq_len, num_features) 
+    padded_normal_time_series = torch.nn.utils.rnn.pad_sequence(
+        normal_time_series_tensors, batch_first=True, padding_value=0.0
+    )  # (B, max_seq_len, num_features)
+    padded_labels = torch.nn.utils.rnn.pad_sequence(
+        labels, batch_first=True, padding_value=-1
+    )  # (B, max_seq_len)
+
+    sequence_lengths = [ts.size(0) for ts in time_series_tensors]
+    B, max_seq_len, num_features = padded_time_series.shape
+    attention_mask = torch.zeros(B, max_seq_len, dtype=torch.bool)  # (B, max_seq_len)
+    for i, length in enumerate(sequence_lengths):
+        attention_mask[i, :length] = True  
+    
+    # Create random masks for reconstruction task - only mask valid sequence parts
+    masked_time_series, mask = create_random_mask(padded_time_series, attention_mask)
+    
+    return {
+        'time_series': padded_time_series,
+        'normal_time_series': padded_normal_time_series,
+        'masked_time_series': masked_time_series,
+        'mask': mask,  # for reconstruction task
+        'labels': padded_labels,
+        'attention_mask': attention_mask,  # for padding
+        'attribute': attribute_list
+    }
+
+
+def set_seed(seed: int) -> None:
+    """Set random seed for reproducibility."""
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
\ No newline at end of file
diff --git a/models/time_rcd/time_rcd_config.py b/models/time_rcd/time_rcd_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b394aab496359d0a8d2c9c88beb9af300c6a6f0
--- /dev/null
+++ b/models/time_rcd/time_rcd_config.py
@@ -0,0 +1,93 @@
+from dataclasses import dataclass, field
+from typing import Dict, Optional
+
+
+@dataclass
+class TimeSeriesConfig:
+    """Configuration for time series encoder.
+
+    Attributes:
+        d_model: Dimension of model hidden states.
+        d_proj: Dimension of projection layer.
+        patch_size: Size of time series patches.
+        num_layers: Number of transformer layers.
+        num_heads: Number of attention heads.
+        d_ff_dropout: Dropout rate for feed-forward networks.
+        use_rope: Whether to use Rotary Position Embedding.
+        activation: Activation function name.
+        num_features: Number of input features.
+    """
+    d_model: int = 512
+    d_proj: int = 256
+    patch_size: int = 4
+    num_query_tokens: int = 1
+    num_layers: int = 8
+    num_heads: int = 8
+    d_ff_dropout: float = 0.1
+    use_rope: bool = True
+    activation: str = "gelu"
+    num_features: int = 1
+    test_batch_limit: int = 20
+
+
+@dataclass
+class TimeRCDConfig:
+    """Configuration class for Time_RCD model.
+
+    This class contains all hyperparameters and settings for the Time_RCD model.
+    It is implemented as a dataclass for easy instantiation and modification.
+
+    Attributes:
+        ts_config: Configuration for time series encoder.
+        batch_size: Training batch size.
+        learning_rate: Learning rate for optimization.
+        num_epochs: Number of training epochs.
+        max_seq_len: Maximum sequence length.
+        dropout: Dropout rate.
+        accumulation_steps: Gradient accumulation steps.
+        weight_decay: Weight decay for optimization.
+        enable_ts_train: Whether to train the time series encoder.
+        seed: Random seed for reproducibility.
+    """
+
+    # Model configurations
+    ts_config: TimeSeriesConfig = field(default_factory=TimeSeriesConfig)
+
+    # Training parameters
+    batch_size: int = 3
+    learning_rate: float = 1e-4
+    num_epochs: int = 1000
+    max_seq_len: int = 512
+    dropout: float = 0.1
+    accumulation_steps: int = 1
+    weight_decay: float = 1e-5
+    enable_ts_train: bool = False
+    seed: int = 72
+    log_freq: int = 100
+    save_freq: int = 10
+    save_step_freq: int = 100
+    model_prefix: str = "time_rcd_qa_by_pretrain"
+    test_batch_limit: int = 20
+    early_stopping_patience: int = 7
+    seed: int = 72
+    cuda_devices: str = "0, 1, 2, 3"
+    dist_port: str = "12355"     # Port for distributed training communication
+    device: str = "cuda"
+
+    def to_dict(self) -> Dict[str, any]:
+        return {
+            "ts_config": self.ts_config.__dict__,
+            "batch_size": self.batch_size,
+            "learning_rate": self.learning_rate,
+            "num_epochs": self.num_epochs,
+            "max_seq_len": self.max_seq_len,
+            "seed": self.seed,
+            "test_batch_limit": self.test_batch_limit,
+            "log_freq": self.log_freq,
+            "save_freq": self.save_freq,
+            "save_step_freq": self.save_step_freq,
+            "model_prefix": self.model_prefix,
+            "device": self.device,
+        }
+
+default_config = TimeRCDConfig()
\ No newline at end of file
diff --git a/models/time_rcd/ts_encoder_bi_bias.py b/models/time_rcd/ts_encoder_bi_bias.py
new file mode 100644
index 0000000000000000000000000000000000000000..09fb016506f725cc3b1a95c6d991cd06e4fe2ce7
--- /dev/null
+++ b/models/time_rcd/ts_encoder_bi_bias.py
@@ -0,0 +1,376 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from jaxtyping import Float, Int
+from einops import rearrange
+
+
+class RMSNorm(nn.Module):
+    """Root Mean Square Normalization layer."""
+
+    def __init__(self, size: int, dim: int = -1, eps: float = 1e-5) -> None:
+        super().__init__()
+        self.scale = nn.Parameter(torch.ones(size))
+        self.eps = eps
+        self.dim = dim
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        norm_x = x.to(torch.float32).pow(2).mean(dim=self.dim, keepdim=True)
+        x_normed = x * torch.rsqrt(norm_x + self.eps)
+        return (self.scale * x_normed).type_as(x)
+
+
+class RotaryEmbedding(nn.Module):
+    """Rotary Positional Embedding for injecting positional information."""
+
+    def __init__(self, dim):
+        super().__init__()
+        inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
+        self.register_buffer("inv_freq", inv_freq)
+
+    def forward(self, seq_len):
+        t = torch.arange(seq_len, device=self.inv_freq.device).type_as(self.inv_freq)
+        freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+        return freqs  # Shape: (seq_len, dim // 2)
+
+
+class BinaryAttentionBias(nn.Module):
+    """Binary Variate Attention for time series data."""
+
+    def __init__(self,
+                 num_heads: Int):
+        super().__init__()
+        self.num_heads = num_heads
+        self.emd = nn.Embedding(2, num_heads)
+
+    def forward(self,
+                query_id: Int[torch.Tensor, "batch_size q_len"],
+                kv_id: Int[torch.Tensor, "batch_size kv_len"],
+                ) -> Float[torch.Tensor, "batch_size num_heads q_len kv_len"]:
+        ind = torch.eq(query_id.unsqueeze(-1), kv_id.unsqueeze(-2))
+        ind = ind.unsqueeze(1)  # (batch_size, 1, q_len, kv_len)
+        weight = rearrange(self.emd.weight, "two num_heads -> two num_heads 1 1")  # (2, num_heads, 1, 1)
+        bias = ~ind * weight[:1] + ind * weight[1:]  # (batch_size, num_heads, q_len, kv_len)
+        return bias
+
+
+class MultiheadAttentionWithRoPE(nn.Module):
+    """Multi-head Attention with Rotary Positional Encoding (RoPE), non-causal by default."""
+    "========== NOtice that this applies BinaryAttentionBias ==========="
+
+    def __init__(self, embed_dim, num_heads, num_features):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+        self.num_features = num_features
+        assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
+
+        # Linear projections for Q, K, V, and output
+        self.q_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.k_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.v_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+
+        # Binary attention bias for time series
+        if num_features > 1:
+            self.binary_attention_bias = BinaryAttentionBias(num_heads)
+
+    def apply_rope(self, x, freqs):
+        """Apply Rotary Positional Encoding to the input tensor."""
+        B, seq_len, embed_dim = x.shape
+        assert embed_dim == self.embed_dim, "Embedding dimension mismatch"
+        assert freqs.shape == (seq_len, embed_dim // 2), "freqs shape mismatch"
+
+        # Reshape for rotation: split embed_dim into pairs
+        x_ = x.view(B, seq_len, embed_dim // 2, 2)
+        cos = freqs.cos().unsqueeze(0)  # (1, seq_len, embed_dim // 2, 1)
+        sin = freqs.sin().unsqueeze(0)  # (1, seq_len, embed_dim // 2, 1)
+
+        # Apply rotation to each pair
+        x_rot = torch.stack(
+            [
+                x_[..., 0] * cos - x_[..., 1] * sin,
+                x_[..., 0] * sin + x_[..., 1] * cos,
+            ],
+            dim=-1
+        )
+        return x_rot.view(B, seq_len, embed_dim)
+
+    def forward(self, query, key, value, freqs, query_id=None, kv_id=None, attn_mask=None):
+        """
+        Forward pass for multi-head attention with RoPE.
+
+        Args:
+            query (Tensor): Shape (B, T, C)
+            key (Tensor): Shape (B, T, C)
+            value (Tensor): Shape (B, T, C)
+            freqs (Tensor): RoPE frequencies, shape (T, embed_dim // 2)
+            query_id (Tensor, optional): Shape (B, q_len), feature IDs for query
+            kv_id (Tensor, optional): Shape (B, kv_len), feature IDs for key/value
+            attn_mask (Tensor, optional): Shape (B, T), True for valid positions, False for padding.
+
+        Returns:
+            Tensor: Attention output, shape (B, T, C)
+        """
+        B, T, C = query.shape
+        assert key.shape == (B, T, C) and value.shape == (B, T, C), "query, key, value shapes must match"
+
+        # Project inputs to Q, K, V
+        Q = self.q_proj(query)
+        K = self.k_proj(key)
+        V = self.v_proj(value)
+
+        # Apply RoPE to Q and K
+        Q_rot = self.apply_rope(Q, freqs)
+        K_rot = self.apply_rope(K, freqs)
+
+        # Reshape for multi-head attention
+        Q_rot = Q_rot.view(B, T, self.num_heads, self.head_dim).transpose(1, 2)  # (B, nh, T, hs)
+        K_rot = K_rot.view(B, T, self.num_heads, self.head_dim).transpose(1, 2)  # (B, nh, T, hs)
+        V = V.view(B, T, self.num_heads, self.head_dim).transpose(1, 2)  # (B, nh, T, hs)
+
+        # Prepare attention mask for padding
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(1).unsqueeze(2)  # (B, 1, 1, T)
+        else:
+            attn_mask = None
+
+        if query_id is not None and kv_id is not None:
+            # Add binary attention bias
+            attn_bias = self.binary_attention_bias(query_id, kv_id)  # (B, num_heads, q_len, kv_len)
+            scores = torch.matmul(Q_rot, K_rot.transpose(-2, -1)) / math.sqrt(
+                self.head_dim)  # (B, num_heads, q_len, kv_len)
+            scores += attn_bias
+            if attn_mask is not None:
+                scores = scores.masked_fill(~attn_mask, float('-inf'))
+            attn_weights = F.softmax(scores, dim=-1)  # (B, num_heads, q_len, kv_len)
+            y = torch.matmul(attn_weights, V)  # (B, num_heads, q_len, hs)
+
+        else:
+            # Compute scaled dot-product attention (non-causal) without binary bias
+            # for param in self.binary_attention_bias.parameters():
+            #     param.requires_grad = False
+            y = F.scaled_dot_product_attention(
+                Q_rot, K_rot, V,
+                attn_mask=attn_mask,
+                is_causal=False  # Non-causal attention for encoder
+            )  # (B, nh, T, hs)
+
+        # Reshape and project output
+        y = y.transpose(1, 2).contiguous().view(B, T, C)
+        y = self.out_proj(y)
+        return y
+
+
+class LlamaMLP(nn.Module):
+    def __init__(self, d_model, dim_feedforward=2048):
+        super().__init__()
+        self.hidden_size = d_model
+        self.intermediate_size = dim_feedforward
+        self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=True)
+        self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=True)
+        self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=True)
+        self.act_fn = F.gelu
+
+    def forward(self, x):
+        down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
+        return down_proj
+
+
+class TransformerEncoderLayerWithRoPE(nn.Module):
+    """Transformer Encoder Layer with RoPE and RMSNorm."""
+
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", num_features=1):
+        super().__init__()
+        self.self_attn = MultiheadAttentionWithRoPE(d_model, nhead, num_features)
+        self.dropout = nn.Dropout(dropout)
+        self.input_norm = RMSNorm(d_model)
+        self.output_norm = RMSNorm(d_model)
+        self.mlp = LlamaMLP(d_model, dim_feedforward)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, src, freqs, src_id=None, attn_mask=None):
+        residual = src
+        src = self.input_norm(src)
+        src = self.self_attn(src, src, src, freqs, src_id, src_id, attn_mask=attn_mask)
+        src = src + residual
+        residual = src
+        src = self.output_norm(src)
+        src = self.mlp(src)
+        src = residual + self.dropout2(src)
+        return src
+
+
+class CustomTransformerEncoder(nn.Module):
+    """Stack of Transformer Encoder Layers."""
+
+    def __init__(self, d_model, nhead, dim_feedforward, dropout, activation, num_layers, num_features):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            TransformerEncoderLayerWithRoPE(
+                d_model=d_model,
+                nhead=nhead,
+                dim_feedforward=dim_feedforward,
+                dropout=dropout,
+                activation=activation,
+                num_features=num_features
+            ) for _ in range(num_layers)
+        ])
+
+    def forward(self, src, freqs, src_id=None, attn_mask=None):
+        output = src
+        for layer in self.layers:
+            output = layer(output, freqs, src_id, attn_mask=attn_mask)
+        return output
+
+
+class TimeSeriesEncoder(nn.Module):
+    """
+    Time Series Encoder with PatchTST-like patching, RoPE.
+
+    Args:
+        d_model (int): Model dimension
+        d_proj (int): Projection dimension
+        patch_size (int): Size of each patch
+        num_layers (int): Number of encoder layers
+        num_heads (int): Number of attention heads
+        d_ff_dropout (float): Dropout rate
+        max_total_tokens (int): Maximum sequence length
+        use_rope (bool): Use RoPE if True
+        num_features (int): Number of features in the time series
+        activation (str): "relu" or "gelu"
+
+    Inputs:
+        time_series (Tensor): Shape (batch_size, seq_len, num_features)
+        mask (Tensor): Shape (batch_size, seq_len)
+
+    Outputs:
+        local_embeddings (Tensor): Shape (batch_size, seq_len, num_features, d_proj)
+    """
+
+    def __init__(self, d_model=2048, d_proj=512, patch_size=32, num_layers=6, num_heads=8,
+                 d_ff_dropout=0.1, max_total_tokens=8192, use_rope=True, num_features=1,
+                 activation="relu"):
+        super().__init__()
+        self.patch_size = patch_size
+        self.d_model = d_model
+        self.d_proj = d_proj
+        self.num_layers = num_layers
+        self.num_heads = num_heads
+        self.d_ff_dropout = d_ff_dropout
+        self.max_total_tokens = max_total_tokens
+        self.use_rope = use_rope
+        self.num_features = num_features
+        self.activation = activation
+
+        # Patch embedding layer
+        self.embedding_layer = nn.Linear(patch_size, d_model)
+
+        if use_rope:
+            # Initialize RoPE and custom encoder
+            self.rope_embedder = RotaryEmbedding(d_model)
+            self.transformer_encoder = CustomTransformerEncoder(
+                d_model=d_model,
+                nhead=num_heads,
+                dim_feedforward=d_model * 4,
+                dropout=d_ff_dropout,
+                activation=activation,
+                num_layers=num_layers,
+                num_features=num_features
+            )
+        else:
+            # Standard encoder without RoPE
+            encoder_layer = nn.TransformerEncoderLayer(
+                d_model=d_model,
+                nhead=num_heads,
+                dim_feedforward=d_model * 4,
+                dropout=d_ff_dropout,
+                batch_first=True,
+                activation=activation
+            )
+            self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)
+
+        # Output projection layers
+        self.projection_layer = nn.Linear(d_model, patch_size * d_proj)
+        self._init_parameters()
+
+    def _init_parameters(self):
+        for name, param in self.named_parameters():
+            if 'weight' in name and 'linear' in name:
+                if self.activation == "relu":
+                    nn.init.kaiming_uniform_(param, nonlinearity='relu')
+                elif self.activation == "gelu":
+                    nn.init.kaiming_uniform_(param, nonlinearity='gelu')
+            elif 'bias' in name:
+                nn.init.constant_(param, 0.0)
+
+    def forward(self, time_series, mask):
+        """Forward pass to generate local embeddings."""
+        if time_series.dim() == 2:
+            time_series = time_series.unsqueeze(-1)
+        device = time_series.device
+        B, seq_len, num_features = time_series.size()
+        assert num_features == self.num_features, f"Number of features mismatch with data: {num_features} vs param: {self.num_features}"
+        assert mask.size() == (B, seq_len), "Mask shape mismatch"
+
+        # Pad sequence to be divisible by patch_size
+        padded_length = math.ceil(seq_len / self.patch_size) * self.patch_size
+        if padded_length > seq_len:
+            pad_amount = padded_length - seq_len
+            time_series = F.pad(time_series, (0, 0, 0, pad_amount), value=0)
+            mask = F.pad(mask, (0, pad_amount), value=0)
+
+        # Convert to patches
+        num_patches = padded_length // self.patch_size
+        total_length = num_patches * num_features
+        patches = time_series.view(B, num_patches, self.patch_size, num_features)
+        patches = patches.permute(0, 3, 1, 2).contiguous()  # (B, num_features, num_patches, patch_size)
+        patches = patches.view(B, num_features * num_patches, self.patch_size)  # (B, L, patch_size)
+        # Create feature IDs for patches
+        feature_id = torch.arange(num_features, device=device).repeat_interleave(
+            num_patches)  # (num_features * num_patches = L,)
+        feature_id = feature_id.unsqueeze(0).expand(B, -1)  # (B, L)
+
+        # Embed patches
+        embedded_patches = self.embedding_layer(patches)  # (B, L, d_model)
+
+        # Create patch-level mask
+        mask = mask.view(B, num_patches, self.patch_size)
+        patch_mask = mask.sum(dim=-1) > 0  # (B, num_patches)
+        full_mask = patch_mask.unsqueeze(1).expand(-1, num_features, -1)  # (B, num_features, num_patches)
+        full_mask = full_mask.reshape(B, num_features * num_patches)  # (B, L)
+
+        # Generate RoPE frequencies if applicable
+        if self.use_rope:
+            freqs = self.rope_embedder(total_length).to(device)
+        else:
+            freqs = None
+
+        # Encode sequence
+        if num_features > 1:
+            output = self.transformer_encoder(
+                embedded_patches,
+                freqs=freqs,
+                src_id=feature_id,
+                attn_mask=full_mask
+            )
+        else:
+            output = self.transformer_encoder(
+                embedded_patches,
+                freqs=freqs,
+                attn_mask=full_mask
+            )
+
+        # Extract and project local embeddings
+        patch_embeddings = output  # (B, L, d_model)
+        patch_proj = self.projection_layer(patch_embeddings)  # (B, L, patch_size * d_proj)
+        local_embeddings = patch_proj.view(B, num_features, num_patches, self.patch_size, self.d_proj)
+        local_embeddings = local_embeddings.permute(0, 2, 3, 1, 4)  # (B, num_patches, patch_size, num_features, d_proj)
+        local_embeddings = local_embeddings.view(B, -1, num_features, self.d_proj)[:, :seq_len, :,
+                           :]  # (B, seq_len, num_features, d_proj)
+
+        return local_embeddings
diff --git a/models/ts_encoder.py b/models/ts_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b9bd370178436248cb8f6d1df44c44dfb9c9590
--- /dev/null
+++ b/models/ts_encoder.py
@@ -0,0 +1,348 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+
+class RMSNorm(nn.Module):
+    """Root Mean Square Normalization layer."""
+    def __init__(self, size: int, dim: int = -1, eps: float = 1e-5) -> None:
+        super().__init__()
+        self.scale = nn.Parameter(torch.ones(size))
+        self.eps = eps
+        self.dim = dim
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        norm_x = x.to(torch.float32).pow(2).mean(dim=self.dim, keepdim=True)
+        x_normed = x * torch.rsqrt(norm_x + self.eps)
+        return (self.scale * x_normed).type_as(x)
+
+class RotaryEmbedding(nn.Module):
+    """Rotary Positional Embedding for injecting positional information."""
+    def __init__(self, dim):
+        super().__init__()
+        inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
+        self.register_buffer("inv_freq", inv_freq)
+
+    def forward(self, seq_len):
+        t = torch.arange(seq_len, device=self.inv_freq.device).type_as(self.inv_freq)
+        freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+        return freqs  # Shape: (seq_len, dim // 2)
+
+class MultiheadAttentionWithRoPE(nn.Module):
+    """Multi-head Attention with Rotary Positional Encoding (RoPE), non-causal by default."""
+    def __init__(self, embed_dim, num_heads):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
+
+        # Linear projections for Q, K, V, and output
+        self.q_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.k_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.v_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+
+    def apply_rope(self, x, freqs):
+        """Apply Rotary Positional Encoding to the input tensor."""
+        B, seq_len, embed_dim = x.shape
+        assert embed_dim == self.embed_dim, "Embedding dimension mismatch"
+        assert freqs.shape == (seq_len, embed_dim // 2), "freqs shape mismatch"
+
+        # Reshape for rotation: split embed_dim into pairs
+        x_ = x.view(B, seq_len, embed_dim // 2, 2)
+        cos = freqs.cos().unsqueeze(0)  # (1, seq_len, embed_dim // 2, 1)
+        sin = freqs.sin().unsqueeze(0)  # (1, seq_len, embed_dim // 2, 1)
+
+        # Apply rotation to each pair
+        x_rot = torch.stack(
+            [
+                x_[..., 0] * cos - x_[..., 1] * sin,
+                x_[..., 0] * sin + x_[..., 1] * cos,
+            ],
+            dim=-1
+        )
+        return x_rot.view(B, seq_len, embed_dim)
+
+    def forward(self, query, key, value, freqs, attn_mask=None):
+        """
+        Forward pass for multi-head attention with RoPE.
+
+        Args:
+            query (Tensor): Shape (B, T, C)
+            key (Tensor): Shape (B, T, C)
+            value (Tensor): Shape (B, T, C)
+            freqs (Tensor): RoPE frequencies, shape (T, embed_dim // 2)
+            attn_mask (Tensor, optional): Shape (B, T)
+
+        Returns:
+            Tensor: Attention output, shape (B, T, C)
+        """
+        B, T, C = query.shape
+        assert key.shape == (B, T, C) and value.shape == (B, T, C), "query, key, value shapes must match"
+
+        # Project inputs to Q, K, V
+        Q = self.q_proj(query)
+        K = self.k_proj(key)
+        V = self.v_proj(value)
+
+        # Apply RoPE to Q and K
+        Q_rot = self.apply_rope(Q, freqs)
+        K_rot = self.apply_rope(K, freqs)
+
+        # Reshape for multi-head attention
+        Q_rot = Q_rot.view(B, T, self.num_heads, self.head_dim).transpose(1, 2)  # (B, nh, T, hs)
+        K_rot = K_rot.view(B, T, self.num_heads, self.head_dim).transpose(1, 2)  # (B, nh, T, hs)
+        V = V.view(B, T, self.num_heads, self.head_dim).transpose(1, 2)          # (B, nh, T, hs)
+
+        # Prepare attention mask for padding
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(1).unsqueeze(2)  # (B, 1, 1, T)
+        else:
+            attn_mask = None
+
+        # Compute scaled dot-product attention (non-causal)
+        y = F.scaled_dot_product_attention(
+            Q_rot, K_rot, V,
+            attn_mask=attn_mask,
+            is_causal=False  # Non-causal attention for encoder
+        )  # (B, nh, T, hs)
+
+        # Reshape and project output
+        y = y.transpose(1, 2).contiguous().view(B, T, C)
+        y = self.out_proj(y)
+        return y
+
+class LlamaMLP(nn.Module):
+    def __init__(self, d_model, dim_feedforward=2048):
+        super().__init__()
+        self.hidden_size = d_model
+        self.intermediate_size = dim_feedforward
+        self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=True)
+        self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=True)
+        self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=True)
+        self.act_fn = F.gelu
+
+    def forward(self, x):
+        down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
+        return down_proj
+
+class TransformerEncoderLayerWithRoPE(nn.Module):
+    """Transformer Encoder Layer with RoPE and RMSNorm."""
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
+        super().__init__()
+        self.self_attn = MultiheadAttentionWithRoPE(d_model, nhead)
+        self.dropout = nn.Dropout(dropout)
+        self.input_norm = RMSNorm(d_model)
+        self.output_norm = RMSNorm(d_model)
+        self.mlp = LlamaMLP(d_model, dim_feedforward)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, src, freqs, attn_mask=None):
+        residual = src
+        src = self.input_norm(src)
+        src = self.self_attn(src, src, src, freqs, attn_mask=attn_mask)
+        src = src + residual
+        residual = src
+        src = self.output_norm(src)
+        src = self.mlp(src)
+        src = residual + self.dropout2(src)
+        return src
+
+# class CustomTransformerEncoder(nn.Module):
+#     """Stack of Transformer Encoder Layers."""
+#     def __init__(self, d_model, nhead, dim_feedforward, dropout, activation, num_layers):
+#         super().__init__()
+#         self.layers = nn.ModuleList([
+#             TransformerEncoderLayerWithRoPE(
+#                 d_model=d_model,
+#                 nhead=nhead,
+#                 dim_feedforward=dim_feedforward,
+#                 dropout=dropout,
+#                 activation=activation
+#             ) for _ in range(num_layers)
+#         ])
+
+    # def forward(self, src, freqs, attn_mask=None):
+    #     output = src
+    #     for layer in self.layers:
+    #         output = layer(output, freqs, attn_mask=attn_mask)
+    #     return output
+
+class CustomTransformerEncoder(nn.Module):
+    """Stack of Transformer Encoder Layers."""
+    def __init__(self, encoder_layer, num_layers):
+        super().__init__()
+        self.layers = nn.ModuleList([encoder_layer for _ in range(num_layers)])
+
+    def forward(self, src, freqs, attn_mask=None):
+        output = src
+        for layer in self.layers:
+            output = layer(output, freqs, attn_mask=attn_mask)
+        return output
+
+class TimeSeriesEncoder(nn.Module):
+    """
+    Time Series Encoder with PatchTST-like patching, RoPE, and [CLS] token.
+
+    Args:
+        d_model (int): Model dimension
+        d_proj (int): Projection dimension
+        patch_size (int): Size of each patch
+        num_layers (int): Number of encoder layers
+        num_heads (int): Number of attention heads
+        d_ff_dropout (float): Dropout rate
+        num_features (int): Number of input features
+        max_total_tokens (int): Maximum sequence length
+        use_rope (bool): Use RoPE if True
+        activation (str): "relu" or "gelu"
+
+    Inputs:
+        time_series (Tensor): Shape (batch_size, seq_len, num_features)
+        mask (Tensor): Shape (batch_size, seq_len)
+
+    Outputs:
+        global_embedding (Tensor): Shape (batch_size, d_proj)
+        local_embeddings (Tensor): Shape (batch_size, seq_len, d_proj)
+    """
+    def __init__(self, d_model=2048, d_proj=512, patch_size=32, num_layers=6, num_heads=8,
+                 d_ff_dropout=0.1, num_features=None, max_total_tokens=8192, use_rope=True,
+                 activation="relu"):
+        super().__init__()
+        if num_features is None:
+            raise ValueError("num_features must be specified")
+        self.patch_size = patch_size
+        self.d_model = d_model
+        self.d_proj = d_proj
+        self.num_features = num_features
+        self.num_layers = num_layers
+        self.num_heads = num_heads
+        self.d_ff_dropout = d_ff_dropout
+        self.max_total_tokens = max_total_tokens
+        self.use_rope = use_rope
+        self.activation = activation
+
+        # Patch embedding layer
+        self.embedding_layer = nn.Linear(patch_size * num_features, d_model)
+        # Learnable [CLS] token
+        self.cls_token = nn.Parameter(torch.randn(1, 1, d_model))
+
+        # if use_rope:
+        #     # Initialize RoPE and custom encoder
+        #     self.rope_embedder = RotaryEmbedding(d_model)
+        #     self.transformer_encoder = CustomTransformerEncoder(
+        #         d_model=d_model,
+        #         nhead=num_heads,
+        #         dim_feedforward=d_model * 4,
+        #         dropout=d_ff_dropout,
+        #         activation=activation,
+        #         num_layers=num_layers
+        #     )
+        if use_rope:
+            # Initialize RoPE and custom encoder
+            self.rope_embedder = RotaryEmbedding(d_model)
+            encoder_layer = TransformerEncoderLayerWithRoPE(
+                d_model=d_model,
+                nhead=num_heads,
+                dim_feedforward=d_model * 4,
+                dropout=d_ff_dropout,
+                activation=activation
+            )
+            self.transformer_encoder = CustomTransformerEncoder(encoder_layer, num_layers)
+        else:
+            # Standard encoder without RoPE
+            encoder_layer = nn.TransformerEncoderLayer(
+                d_model=d_model,
+                nhead=num_heads,
+                dim_feedforward=d_model * 4,
+                dropout=d_ff_dropout,
+                batch_first=True,
+                activation=activation
+            )
+            self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)
+
+        # Output projection layers
+        self.projection_layer = nn.Linear(d_model, patch_size * d_proj)
+        self.cls_projection = nn.Linear(d_model, d_proj)
+        self._init_parameters()
+
+    def _init_parameters(self):
+        for name, param in self.named_parameters():
+            if 'weight' in name and 'linear' in name:
+                if self.activation == "relu":
+                    nn.init.kaiming_uniform_(param, nonlinearity='relu')
+                elif self.activation == "gelu":
+                    nn.init.kaiming_uniform_(param, nonlinearity='relu')
+            elif 'bias' in name:
+                nn.init.constant_(param, 0.0)
+        # [CLS] token 初始化
+        nn.init.normal_(self.cls_token, mean=0.0, std=0.02)
+        # CLS projection 层初始化
+        if self.activation == "relu":
+            nn.init.kaiming_uniform_(self.cls_projection.weight, nonlinearity='relu')
+        elif self.activation == "gelu":
+            nn.init.kaiming_uniform_(self.cls_projection.weight, nonlinearity='relu')
+        nn.init.constant_(self.cls_projection.bias, 0.0)
+
+    def forward(self, time_series, mask):
+        """Forward pass to generate global and local embeddings."""
+        print("Time Series Shape:", time_series.size())
+        if time_series.dim() == 2:
+            time_series = time_series.unsqueeze(-1)
+        device = time_series.device
+        B, seq_len, num_features = time_series.size()
+        assert mask.size() == (B, seq_len), "Mask shape mismatch"
+
+        # Pad sequence to be divisible by patch_size
+        padded_length = math.ceil(seq_len / self.patch_size) * self.patch_size
+        if padded_length > seq_len:
+            pad_amount = padded_length - seq_len
+            time_series = F.pad(time_series, (0, 0, 0, pad_amount), value=0)
+            mask = F.pad(mask, (0, pad_amount), value=0)
+
+        # Convert to patches
+        num_patches = padded_length // self.patch_size
+        patches = time_series.view(B, num_patches, self.patch_size, num_features)
+        patches = patches.view(B, num_patches, self.patch_size * num_features)
+        # Embed patches
+        embedded_patches = self.embedding_layer(patches)  # (B, num_patches, d_model)
+
+        # Create patch-level mask
+        mask = mask.view(B, num_patches, self.patch_size)
+        patch_mask = mask.sum(dim=-1) > 0  # (B, num_patches)
+
+        # Prepend [CLS] token
+        cls_token = self.cls_token.expand(B, -1, -1)
+        embedded_patches = torch.cat([cls_token, embedded_patches], dim=1)  # (B, 1 + num_patches, d_model)
+
+        # Full mask including [CLS]
+        cls_mask = torch.ones(B, 1, device=device, dtype=torch.bool)
+        full_mask = torch.cat([cls_mask, patch_mask], dim=1)  # (B, 1 + num_patches)
+
+        # Generate RoPE frequencies if applicable
+        if self.use_rope:
+            seq_len_with_cls = num_patches + 1
+            freqs = self.rope_embedder(seq_len_with_cls).to(device)
+        else:
+            freqs = None
+
+        # Encode sequence
+        output = self.transformer_encoder(
+            embedded_patches,
+            freqs=freqs,
+            attn_mask=full_mask
+        )
+
+        # Extract global embedding from [CLS]
+        cls_embedding = output[:, 0, :]
+        global_embedding = self.cls_projection(cls_embedding)
+
+        # Extract and project local embeddings
+        patch_embeddings = output[:, 1:, :]
+        patch_proj = self.projection_layer(patch_embeddings)
+        local_embeddings = patch_proj.view(B, num_patches, self.patch_size, self.d_proj)
+        local_embeddings = local_embeddings.view(B, -1, self.d_proj)[:, :seq_len, :]
+
+        return global_embedding, local_embeddings
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37b2ee55a7120e5b23d86392b29fb330fa1e28aa
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,15 @@
+gradio>=4.0.0
+huggingface_hub>=0.25.0
+numpy>=1.23
+pandas>=1.5
+scikit-learn>=1.1
+torch>=2.0
+torchvision>=0.15
+transformers>=4.35
+einops>=0.6
+jaxtyping>=0.2
+tqdm>=4.65
+matplotlib>=3.7
+seaborn>=0.12
+statsmodels>=0.14
+
diff --git a/testing.py b/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d5af81757550cf17027acdbd434b0494a43c9c9
--- /dev/null
+++ b/testing.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+# Author: Qinghua Liu <liu.11085@osu.edu>
+# License: Apache-2.0 License
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import pandas as pd
+import torch
+import random, argparse
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+from evaluation.metrics import get_metrics
+from utils.slidingWindows import find_length_rank
+from model_wrapper import *
+from HP_list import Optimal_Uni_algo_HP_dict, Optimal_Multi_algo_HP_dict
+import os
+# Cuda devices
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+# seeding
+seed = 2024
+torch.manual_seed(seed)
+torch.cuda.manual_seed(seed)
+torch.cuda.manual_seed_all(seed)
+np.random.seed(seed)
+random.seed(seed)
+torch.backends.cudnn.benchmark = False
+torch.backends.cudnn.deterministic = True
+import os
+print("CUDA Available: ", torch.cuda.is_available())
+print("cuDNN Version: ", torch.backends.cudnn.version())
+import pickle
+
+
+def get_result(filename):
+    pickle_filename = filename.replace('.csv', '_results.pkl')
+    df = pickle.load(open(pickle_filename, 'rb'))
+
+    return df['anomaly_score'].to_numpy()
+
+if __name__ == '__main__':
+    # Resolve dataset directory relative to this file (portable across machines)
+    parser = argparse.ArgumentParser(description='Running TSB-AD')
+    parser.add_argument('--mode', type=str, default='uni', choices=['uni', 'multi'],
+                    help='Encoder mode: uni for univariate, multi for multivariate')
+    parser.add_argument('--AD_Name', type=str, default='Time_RCD')
+    parser.add_argument('--filename', type=str, default='')
+    parser.add_argument('--data_direc', type=str, default='')
+    parser.add_argument('--save', type=bool, default=True)
+    Multi = parser.parse_args().mode == 'multi'
+    # Initialize list to store all results
+    all_results = []
+    all_logits = []
+    if Multi:
+        filter_list = [
+                "GHL",
+                "Daphnet",
+                "Exathlon",
+                "Genesis",
+                "OPP",
+                "SMD",
+                # "SWaT",
+                # "PSM",
+                "SMAP",
+                "MSL",
+                "CreditCard",
+                "GECCO",
+                "MITDB",
+                "SVDB",
+                "LTDB",
+                "CATSv2",
+                "TAO"
+            ]
+        base_dir = 'datasets/TSB-AD-M/'
+        files = os.listdir(base_dir)
+    else:
+        filter_list = [
+                    "Daphnet",
+                    "CATSv2",
+                    "SWaT",
+                    "LTDB",
+                    "TAO",
+                    "Exathlon",
+                    "MITDB",
+                    "MSL",
+                    "SMAP",
+                    "SMD",
+                    "SVDB",
+                    "OPP",
+
+                # "IOPS",
+                # "MGAB",
+                # "NAB",
+                # "NEK",
+                # "Power",
+                # "SED",
+                # "Stock",
+                # "TODS",
+                # "WSD",
+                # "YAHOO",
+                # "UCR"
+                ]
+        base_dir = 'datasets/TSB-AD-U/'
+        files = os.listdir(base_dir)
+
+
+
+    # ## ArgumentParser
+    for file in files:
+
+        if any(filter_item in file for filter_item in filter_list):
+            print(f"Skipping file: {file} due to filter criteria.")
+            continue
+
+        # Clear GPU memory before processing each file
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+
+        args = parser.parse_args()
+        # Set the file-specific values
+        args.filename = file
+        args.data_direc = base_dir
+        
+        if Multi:
+            Optimal_Det_HP = Optimal_Multi_algo_HP_dict[args.AD_Name]
+        else:
+            Optimal_Det_HP = Optimal_Uni_algo_HP_dict[args.AD_Name]
+        # try:
+            # Read data using a proper path join
+        df_path = os.path.join(args.data_direc, args.filename)
+        df = pd.read_csv(df_path).dropna()
+        data = df.iloc[:, 0:-1].values.astype(float)
+        label = df['Label'].astype(int).to_numpy()
+
+        slidingWindow = find_length_rank(data, rank=1)
+        train_index = args.filename.split('.')[0].split('_')[-3]
+        data_train = data[:int(train_index), :]
+        test_data  = data[int(train_index):, :]
+        label_test = label[int(train_index):]
+
+
+
+        logits = None  # ensure defined irrespective of branch
+
+        print(f"Running {args.AD_Name} on {args.filename}...")
+        if args.AD_Name in Semisupervise_AD_Pool:
+            output = run_Semisupervise_AD(args.AD_Name, data_train, test_data, **Optimal_Det_HP)
+        elif args.AD_Name in Unsupervise_AD_Pool:
+            if args.AD_Name == 'Time_RCD':
+                # For Time_RCD, we need to pass the test data directly
+                output, logits = run_Unsupervise_AD(args.AD_Name, data_train, test_data, Multi=Multi, **Optimal_Det_HP)
+            else:
+                output = run_Unsupervise_AD(args.AD_Name, data_train, test_data, **Optimal_Det_HP)
+        else:
+            raise Exception(f"{args.AD_Name} is not defined")
+
+        if isinstance(output, np.ndarray):
+            # output = MinMaxScaler(feature_range=(0,1)).fit_transform(output.reshape(-1,1)).ravel()
+
+            # Fix shape mismatch issue - ensure output and labels have the same length
+            min_length = min(len(output), len(label_test))  # Use label_test instead of label
+            output_aligned = output[:min_length]
+            label_aligned = label_test[:min_length]
+            logits_aligned = None
+            if logits is not None:
+                logits_aligned = logits[:min_length]
+
+
+            evaluation_result = get_metrics(output_aligned, label_aligned, slidingWindow=slidingWindow, pred=output_aligned > (np.mean(output_aligned)+3*np.std(output_aligned)))
+            evaluation_result_logits = None
+            if logits is not None:
+                evaluation_result_logits = get_metrics(logits_aligned, label_aligned, slidingWindow=slidingWindow, pred=logits_aligned > (np.mean(logits_aligned)+3*np.std(logits_aligned)))
+            
+            print(evaluation_result)
+
+            # Prepare result dictionary with filename and all metrics
+            result_dict = {
+                'filename': args.filename,
+                'AD_Name': args.AD_Name,
+                'sliding_window': slidingWindow,
+                'train_index': train_index,
+                'data_shape': f"{data.shape[0]}x{data.shape[1]}",
+                'output_length': len(output),
+                'label_length': len(label_test),  # Use label_test length
+                'aligned_length': min_length,
+                **evaluation_result  # Unpack all evaluation metrics
+            }
+            all_results.append(result_dict)
+
+            if logits is not None:
+                logit_dict = {
+                    'filename': args.filename,
+                    'AD_Name': args.AD_Name,
+                    'sliding_window': slidingWindow,
+                    'train_index': train_index,
+                    'data_shape': f"{data.shape[0]}x{data.shape[1]}",
+                    'output_length': len(logits),
+                    'label_length': len(label_test),  # Use label_test length
+                    'aligned_length': min_length,
+                    **evaluation_result_logits  # Unpack all evaluation metrics for logits
+                }
+                all_logits.append(logit_dict)
+            # Save value, label, and anomaly scores to pickle file
+            if args.save:
+                output_filename = f'{args.filename.split(".")[0]}_results.pkl'
+                output_path = os.path.join(
+                    os.path.join(os.getcwd(), (f"{'Multi' if Multi else 'Uni'}_"+args.AD_Name), output_filename))
+                if not os.path.exists(output_path):
+                    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+                pd.DataFrame({
+                    'value': test_data[:min_length].tolist(),
+                    'label': label_aligned.tolist(),
+                    'anomaly_score': output_aligned.tolist(),
+                    'logits': logits_aligned.tolist() if logits is not None else None
+                }).to_pickle(output_path)
+                print(f'Results saved to {output_path}')
+        else:
+            print(f'At {args.filename}: '+output)
+            # Save error information as well
+            result_dict = {
+                'filename': args.filename,
+                'AD_Name': args.AD_Name,
+                'sliding_window': None,
+                'train_index': None,
+                'data_shape': None,
+                'error_message': output
+            }
+            all_results.append(result_dict)
+
+    # Convert results to DataFrame and save to CSV
+    if all_results:
+        results_df = pd.DataFrame(all_results)
+        # win_size =  str(Optimal_Det_HP['win_size']) if Optimal_Det_HP['win_size'] else ""
+        output_filename = f'{"Multi" if Multi else "Uni"}_{args.AD_Name}.csv'
+        results_df.to_csv(output_filename, index=False)
+        print(f"\nAll results saved to {output_filename}")
+        print(f"Total file processed: {len(all_results)}")
+        print(f"Results shape: {results_df.shape}")
+        if all_logits:
+            logits_df = pd.DataFrame(all_logits)
+            logits_output_filename = f'{"Multi" if Multi else "Uni"}_{args.AD_Name}.csv'
+            logits_df.to_csv(logits_output_filename, index=False)
+            print(f"Logits results saved to {logits_output_filename}")
+    else:
+        print("No results to save.")
diff --git a/training.py b/training.py
new file mode 100644
index 0000000000000000000000000000000000000000..905c46a42d4ebd8b5e3aa6466a581c81b3784f6b
--- /dev/null
+++ b/training.py
@@ -0,0 +1,838 @@
+import datetime
+import itertools
+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, DistributedSampler
+import torch.nn.functional as F
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from torch.nn.parallel import DistributedDataParallel as DDP
+import random
+import numpy as np
+from typing import Tuple, List, Dict, Any, Union, Optional
+import argparse
+import json
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+import random
+import os
+import pickle
+from typing import Dict, List, Union, Optional, Tuple
+from pathlib import Path
+from dataclasses import dataclass
+
+import sys
+from models.time_rcd.ts_encoder_bi_bias import TimeSeriesEncoder
+from models.time_rcd.time_rcd_config import TimeRCDConfig, default_config
+
+import warnings
+warnings.filterwarnings("ignore")
+
+# PYTHONPATH=/home2/lijinbo/Projects/AnomalyLlava-master/ python src/models/Moirai/AnomalyLlava_pretrain_multi.py
+@dataclass
+class PretrainBatch:
+    """Batch structure for pretraining tasks."""
+    time_series: torch.Tensor
+    labels: torch.Tensor
+    masked_time_series: torch.Tensor
+    mask_indices: torch.Tensor
+    
+class ChatTSAnomalyPretrainDataset(Dataset):
+    def __init__(self, 
+                 dataset_dir: str, 
+                 filename: str,
+                 split: str = 'train', 
+                 train_ratio: float = 0.95, 
+                 seed: int = 42):
+        file_path = os.path.join(dataset_dir, filename)
+        with open(file_path, 'rb') as f:
+            dataset = pickle.load(f)
+        random.seed(seed)
+        indices = list(range(len(dataset)))
+        random.shuffle(indices)
+        num_train = int(len(dataset) * train_ratio)
+        if split == 'train':
+            selected_indices = indices[:num_train]
+        elif split == 'test':
+            selected_indices = indices[num_train:]
+        else:
+            raise ValueError("split must be 'train' or 'test'")
+        self.data = [dataset[i] for i in selected_indices]
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        sample = self.data[idx]
+        time_series = torch.tensor(sample['time_series'], dtype=torch.float32)
+        normal_time_series = torch.tensor(sample['normal_time_series'], dtype=torch.float32)
+        labels = torch.tensor(sample['labels'], dtype=torch.long)
+        attribute = sample['attribute']
+        return time_series, normal_time_series, labels, attribute
+
+class TimeSeriesPretrainModel(nn.Module):
+    """Model for time series pretraining with masked reconstruction and anomaly detection."""
+    
+    def __init__(self, config: TimeRCDConfig):
+        super().__init__()
+        self.config = config
+        
+        # Extract TimeSeriesEncoder parameters from config
+        ts_config = config.ts_config
+        self.ts_encoder = TimeSeriesEncoder(
+            d_model=ts_config.d_model,
+            d_proj=ts_config.d_proj,
+            patch_size=ts_config.patch_size,
+            num_layers=ts_config.num_layers,
+            num_heads=ts_config.num_heads,
+            d_ff_dropout=ts_config.d_ff_dropout,
+            use_rope=ts_config.use_rope,
+            num_features=ts_config.num_features,
+            activation=ts_config.activation
+        )
+        
+        # Masked reconstruction head
+        self.reconstruction_head = nn.Sequential(
+            nn.Linear(config.ts_config.d_proj, config.ts_config.d_proj * 4),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj * 4, config.ts_config.d_proj * 4),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj * 4, 1)  # (B, seq_len, num_features, 1)
+        )
+        
+        # Anomaly detection head
+        self.anomaly_head = nn.Sequential(
+            nn.Linear(config.ts_config.d_proj, config.ts_config.d_proj // 2),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.ts_config.d_proj // 2, 2)  # (B, seq_len, num_features, 2) for binary classification
+        )
+
+        self.anomaly_head.apply(self._init_weights)
+        self.reconstruction_head.apply(self._init_weights)
+
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            nn.init.xavier_normal_(module.weight)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+
+    def forward(self, time_series: torch.Tensor, mask: Optional[torch.Tensor] = None):
+        """Forward pass through the encoder."""
+        local_embeddings = self.ts_encoder(time_series, mask)
+        return local_embeddings
+
+    def masked_reconstruction_loss(self, 
+                                   local_embeddings: torch.Tensor,  # (B, seq_len, num_features, d_proj)
+                                   original_time_series: torch.Tensor,  # (B, seq_len, num_features),
+                                   mask: torch.Tensor  # (B, seq_len)
+                                   ) -> torch.Tensor:
+        """Compute masked reconstruction loss."""
+        batch_size, seq_len, num_features = original_time_series.shape
+        patch_size = self.config.ts_config.patch_size
+        
+        # 确保数据类型一致
+        mask = mask.bool()
+        
+        # 只对 masked 的位置计算损失
+        # local_embeddings: [B, seq_len, num_features, d_proj]
+        # 通过重构头预测原始值
+        reconstructed = self.reconstruction_head(local_embeddings)  # (B, seq_len, num_features, 1)
+        reconstructed = reconstructed.view(batch_size, seq_len, num_features)  
+        
+        # 只对被 mask 的位置计算损失
+        mask_expanded = mask.unsqueeze(-1).expand(-1, -1, num_features)  # (B, seq_len, num_features)
+        reconstruction_loss = F.mse_loss(
+            reconstructed[mask_expanded],
+            original_time_series[mask_expanded]
+        )
+        return reconstruction_loss
+    
+    def anomaly_detection_loss(self, 
+                               local_embeddings: torch.Tensor,  # (B, seq_len, num_features, d_proj)
+                               labels: torch.Tensor) -> torch.Tensor:  # (B, seq_len)
+        """Compute anomaly detection loss for each timestep."""
+        # Project local embeddings to anomaly scores
+        logits = self.anomaly_head(local_embeddings)  # (B, seq_len, num_features, 2)
+        logits = torch.mean(logits, dim=-2)  # Average over num_features to get (B, seq_len, 2)
+                            
+        
+        # Reshape for loss computation
+        batch_size, seq_len, _ = logits.shape
+        logits = logits.view(-1, 2)  # (B*seq_len, 2)
+        labels = labels.view(-1)  # (B*seq_len)
+        labels = (labels > 0.5).long()
+        # Create mask for valid labels (not padding)
+        valid_mask = (labels != -1)
+        
+        # Compute loss only on valid timesteps
+        if valid_mask.sum() > 0:
+            anomaly_loss = F.cross_entropy(
+                logits[valid_mask],
+                labels[valid_mask]
+            )
+        else:
+            anomaly_loss = torch.tensor(0.0, device=logits.device)
+            
+        return anomaly_loss
+
+
+def create_random_mask(time_series: torch.Tensor,  #(B, max_seq_len, num_features)
+                       attention_mask: torch.Tensor,  # (B, max_seq_len)
+                       mask_ratio: float = 0.15) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Create random mask for time series patches, only masking valid sequence parts."""
+    batch_size, seq_len, num_features = time_series.shape
+    patch_size = default_config.ts_config.patch_size
+    
+    mask = torch.zeros(batch_size, seq_len)  # (B, max_seq_len)
+    
+    for i in range(batch_size):
+        # Get valid sequence length for this sample
+        valid_length = attention_mask[i].sum().item()
+        
+        # Calculate number of patches in valid sequence
+        num_valid_patches = (valid_length - 1) // patch_size + 1
+        num_masked = int(num_valid_patches * mask_ratio)
+        
+        if num_masked > 0:
+            # Only select patches from valid sequence
+            masked_patches = torch.randperm(num_valid_patches)[:num_masked]
+            for j in masked_patches:
+                start_idx = j * patch_size
+                end_idx = min((j + 1) * patch_size, valid_length)  # Don't exceed valid length
+                mask[i, start_idx:end_idx] = 1
+    
+    # Create masked time series - only mask valid parts
+    masked_time_series = time_series.clone()
+    mask_indices = mask.bool() & attention_mask  # Only mask where both mask and attention_mask are True
+    mask_expanded = mask_indices.unsqueeze(-1).expand(-1, -1, num_features)  # (B, max_seq_len, num_features)
+    # mask的部分赋值为0而不是随机
+    masked_time_series[mask_expanded] = 0.0
+    # masked_time_series[mask_expanded] = torch.randn_like(masked_time_series[mask_expanded]) * 0.1
+    
+    # Update mask to only include valid parts
+    mask = mask * attention_mask.float()
+    
+    return masked_time_series, mask  # (B, max_seq_len, num_features), (B, max_seq_len)
+
+
+def collate_fn(batch):
+    """Collate function for pretraining dataset."""
+    time_series_list, normal_time_series_list, labels_list, attribute_list = zip(*batch)
+    
+    # Convert to tensors and pad sequences
+    if time_series_list[0].ndim == 1:
+        time_series_tensors = [ts.unsqueeze(-1) for ts in time_series_list]  # Add feature dimension
+        normal_time_series_tensors = [nts.unsqueeze(-1) for nts in normal_time_series_list]
+    else:    
+        time_series_tensors = [ts for ts in time_series_list]
+        normal_time_series_tensors = [nts for nts in normal_time_series_list]
+
+    # standardize time series
+    concatenated = torch.cat(time_series_tensors, dim=0)  # (total_length, num_features)
+    mean = concatenated.mean(dim=0, keepdim=True)  # (1, num_features)
+    std = concatenated.std(dim=0, keepdim=True)  # (1, num_features)
+    std = std + 1e-4
+    time_series_tensors_std = [(ts - mean) / std for ts in time_series_tensors]
+    normal_time_series_tensors_std = [(nts - mean) / std for nts in normal_time_series_tensors]
+    time_series_tensors = time_series_tensors_std
+    normal_time_series_tensors = normal_time_series_tensors_std
+
+    # labels_tensor = torch.stack(labels_list)
+    labels = [label for label in labels_list]
+    # Pad time series to same length
+    padded_time_series = torch.nn.utils.rnn.pad_sequence(
+        time_series_tensors, batch_first=True, padding_value=0.0
+    )  # (B, max_seq_len, num_features) 
+    padded_normal_time_series = torch.nn.utils.rnn.pad_sequence(
+        normal_time_series_tensors, batch_first=True, padding_value=0.0
+    )  # (B, max_seq_len, num_features)
+    padded_labels = torch.nn.utils.rnn.pad_sequence(
+        labels, batch_first=True, padding_value=-1
+    )  # (B, max_seq_len)
+
+    sequence_lengths = [ts.size(0) for ts in time_series_tensors]
+    B, max_seq_len, num_features = padded_time_series.shape
+    attention_mask = torch.zeros(B, max_seq_len, dtype=torch.bool)  # (B, max_seq_len)
+    for i, length in enumerate(sequence_lengths):
+        attention_mask[i, :length] = True  
+    
+    # Create random masks for reconstruction task - only mask valid sequence parts
+    masked_time_series, mask = create_random_mask(padded_time_series, attention_mask)
+    
+    return {
+        'time_series': padded_time_series,
+        'normal_time_series': padded_normal_time_series,
+        'masked_time_series': masked_time_series,
+        'mask': mask,  # for reconstruction task
+        'labels': padded_labels,
+        'attention_mask': attention_mask,  # for padding
+        'attribute': attribute_list
+    }
+
+
+def set_seed(seed: int) -> None:
+    """Set random seed for reproducibility."""
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+
+def setup_distributed(rank: int, world_size: int, config: TimeRCDConfig) -> None:
+    """Setup distributed training environment."""
+    os.environ['MASTER_ADDR'] = 'localhost'
+    os.environ['MASTER_PORT'] = config.dist_port
+    
+    try:
+        dist.init_process_group(
+            "nccl", 
+            rank=rank, 
+            world_size=world_size,
+            timeout=datetime.timedelta(minutes=30)
+        )
+        torch.cuda.set_device(rank)
+        
+        if rank == 0:
+            print(f"Successfully initialized distributed training on rank {rank} with world size {world_size}")
+                
+    except Exception as e:
+        print(f"Rank {rank}: Initialization failed with error: {e}")
+        raise e
+
+
+def cleanup_distributed() -> None:
+    """Clean up distributed training environment."""
+    if dist.is_initialized():
+        dist.destroy_process_group()
+
+
+def evaluate_epoch(test_loader: DataLoader,
+                  model: nn.Module,
+                  config: TimeRCDConfig,
+                  device: torch.device,
+                  rank: int) -> float:
+    """Evaluate model on test dataset."""
+    model.eval()
+    total_loss = 0.0
+    total_recon_loss = 0.0
+    total_anomaly_loss = 0.0
+    num_batches = 0
+    
+    with torch.no_grad():
+        for batch in itertools.islice(test_loader, min(len(test_loader), config.test_batch_limit)):
+            # Move data to device
+            time_series = batch['time_series'].to(device)
+            masked_time_series = batch['masked_time_series'].to(device)
+            mask = batch['mask'].to(device)
+            labels = batch['labels'].to(device)
+            attention_mask = batch['attention_mask'].to(device)
+            
+            # Forward pass
+            local_embeddings = model(masked_time_series, attention_mask & (~mask.bool()))
+                
+            # Compute losses
+            recon_loss = model.module.masked_reconstruction_loss(
+                local_embeddings, time_series, mask
+            )
+            anomaly_loss = model.module.anomaly_detection_loss(local_embeddings, labels)
+            
+            total_loss_batch = recon_loss + anomaly_loss
+            total_loss += total_loss_batch.item()
+            total_recon_loss += recon_loss.item()
+            total_anomaly_loss += anomaly_loss.item()
+            num_batches += 1
+    
+    avg_loss = total_loss / num_batches if num_batches > 0 else 0.0
+    avg_recon_loss = total_recon_loss / num_batches if num_batches > 0 else 0.0
+    avg_anomaly_loss = total_anomaly_loss / num_batches if num_batches > 0 else 0.0
+    
+    if rank == 0:
+        print(f"Validation Results:")
+        print(f"  Average Total Loss: {avg_loss:.4f}")
+        print(f"  Average Recon Loss: {avg_recon_loss:.4f}")
+        print(f"  Average Anomaly Loss: {avg_anomaly_loss:.4f}")
+    
+    return avg_loss
+
+
+def train_epoch(train_loader: DataLoader,
+                model: nn.Module,
+                optimizer: optim.Optimizer,
+                config: TimeRCDConfig,
+                device: torch.device,
+                epoch: int,
+                rank: int,
+                scaler: Optional[torch.cuda.amp.GradScaler] = None) -> float:
+    """Train for one epoch with multiple pretraining tasks."""
+    model.train()
+    total_loss = 0.0
+    total_recon_loss = 0.0
+    total_anomaly_loss = 0.0
+    num_batches = 0
+    
+    for batch_idx, batch in enumerate(train_loader):
+        if batch_idx % 10 == 0:
+            torch.cuda.empty_cache()
+
+        optimizer.zero_grad()
+        
+        # Move data to device
+        time_series = batch['time_series'].to(device)  # (B, max_seq_len, num_features)
+        masked_time_series = batch['masked_time_series'].to(device)
+        mask = batch['mask'].to(device)  # (B, max_seq_len)
+        labels = batch['labels'].to(device)
+        attention_mask = batch['attention_mask'].to(device)
+        
+        if config.mixed_precision and scaler is not None:
+            with torch.amp.autocast('cuda'):
+                local_embeddings = model(masked_time_series, attention_mask & (~mask.bool()))
+                
+                recon_loss = model.module.masked_reconstruction_loss(
+                    local_embeddings, time_series, mask
+                )
+                anomaly_loss = model.module.anomaly_detection_loss(local_embeddings, labels)
+            
+            total_loss_batch = recon_loss + anomaly_loss
+            scaler.scale(total_loss_batch).backward()
+            scaler.step(optimizer)
+            scaler.update()
+        else:
+            local_embeddings = model(masked_time_series, attention_mask & (~mask.bool()))
+                
+            recon_loss = model.module.masked_reconstruction_loss(
+                local_embeddings, time_series, mask
+            )
+            anomaly_loss = model.module.anomaly_detection_loss(local_embeddings, labels)
+            
+            total_loss_batch = recon_loss + anomaly_loss
+            total_loss_batch.backward()
+            optimizer.step()
+        
+        # Accumulate losses
+        total_loss += total_loss_batch.item()
+        total_recon_loss += recon_loss.item()
+        total_anomaly_loss += anomaly_loss.item()
+        num_batches += 1
+        
+        # Log progress based on log_freq
+        if rank == 0 and batch_idx % config.log_freq == 0:
+            print(f"Epoch {epoch}, Batch {batch_idx}/{len(train_loader)}")
+            print(f"  Total Loss: {total_loss_batch.item():.4f}")
+            print(f"  Recon Loss: {recon_loss.item():.4f}")
+            print(f"  Anomaly Loss: {anomaly_loss.item():.4f}")
+    
+    avg_loss = total_loss / num_batches
+    avg_recon_loss = total_recon_loss / num_batches
+    avg_anomaly_loss = total_anomaly_loss / num_batches
+    
+    if rank == 0:
+        print(f"Epoch {epoch} completed:")
+        print(f"  Average Total Loss: {avg_loss:.4f}")
+        print(f"  Average Recon Loss: {avg_recon_loss:.4f}")
+        print(f"  Average Anomaly Loss: {avg_anomaly_loss:.4f}")
+    
+    return avg_loss
+
+
+def save_checkpoint(model: nn.Module,
+                   optimizer: optim.Optimizer,
+                   config: TimeRCDConfig,
+                   epoch: int,
+                   avg_loss: float,
+                   rank: int = 0,
+                   is_best: bool = False) -> None:
+    """Save model checkpoint."""
+    if rank != 0:
+        return
+    
+    # Extract model state dict (handle DDP wrapper)
+    if hasattr(model, 'module'):
+        model_state_dict = model.module.state_dict()
+    else:
+        model_state_dict = model.state_dict()
+    
+    checkpoint = {
+        'epoch': epoch,
+        'model_state_dict': model_state_dict,
+        'optimizer_state_dict': optimizer.state_dict(),
+        'loss': avg_loss,
+        'config': config.to_dict()
+    }
+    
+    os.makedirs(config.checkpoint_dir, exist_ok=True)
+    
+    # Always save the latest checkpoint
+    latest_path = os.path.join(config.checkpoint_dir, "pretrain_checkpoint_latest.pth")
+    torch.save(checkpoint, latest_path)
+    
+    # Save the checkpoint at specified frequency
+    if epoch % config.save_freq == 0 or epoch == config.num_epochs - 1:
+        save_path = os.path.join(config.checkpoint_dir, f"pretrain_checkpoint_epoch_{epoch}.pth")
+        torch.save(checkpoint, save_path)
+        print(f"Checkpoint saved to {save_path} (epoch {epoch}, loss: {avg_loss:.4f})")
+
+    # Save best model if this is the best validation loss
+    if is_best:
+        best_path = os.path.join(config.checkpoint_dir, "pretrain_checkpoint_best.pth")
+        torch.save(checkpoint, best_path)
+        print(f"New best model saved to {best_path} (epoch {epoch}, val_loss: {avg_loss:.4f})")
+
+        # Save just the time series encoder for downstream tasks
+        if hasattr(model, 'module'):
+            ts_encoder_state = model.module.ts_encoder.state_dict()
+        else:
+            ts_encoder_state = model.ts_encoder.state_dict()
+        
+        best_encoder_path = os.path.join(config.checkpoint_dir, "pretrained_ts_encoder.pth")
+        torch.save(ts_encoder_state, best_encoder_path)
+        print(f"Best pretrained time series encoder saved to {best_encoder_path}")
+
+
+def train_multiple_datasets(dataset_filenames: List[str], config: TimeRCDConfig) -> None:
+    """Train on multiple datasets sequentially with model weight continuation."""
+    print(f'\n{"=" * 50}')
+    print(f"Starting Multi-Dataset Sequential Training")
+    print(f"Number of datasets: {len(dataset_filenames)}")
+    print(f"Datasets: {dataset_filenames}")
+    print("Training Mode: Continuous (model weights carried over between datasets)")
+    print("=" * 50)
+    
+    # Parse GPU IDs from config
+    gpu_ids = [int(x.strip()) for x in config.cuda_devices.split(',')]
+    world_size = len(gpu_ids)
+    
+    # Set CUDA_VISIBLE_DEVICES
+    os.environ['CUDA_VISIBLE_DEVICES'] = config.cuda_devices
+    
+    # Global checkpoint path for model continuation
+    global_checkpoint_path = None
+    # global_checkpoint_path = "experiments/checkpoints/pretrain_multi_activate_big/dataset_8_12.pkl/pretrain_checkpoint_best.pth"
+    
+    for dataset_idx, filename in enumerate(dataset_filenames):
+        print(f"\n{'='*50}")
+        print(f"Training on Dataset {dataset_idx + 1}/{len(dataset_filenames)}: {filename}")
+        if global_checkpoint_path is not None:
+            print(f"Continuing from previous dataset's trained model...")
+        print(f"{'='*50}")
+
+        batch_size_list = [256, 64, 64, 32, 32, 16, 16, 48, 
+                           16, 16, 16, 32, 16, 16, 16, 16,
+                           16, 16, 16, 16, 12, 12, 12, 16,
+                           12, 12, 12, 12, 12, 12, 12, 16,
+                           12, 12, 12, 12, 12, 12, 12, 12,
+                           12, 12, 12, 12, 12, 12, 12, 12,
+                           12, 12, 12, 12, 12, 12, 12, 8]
+        num_features = int(os.path.splitext(filename)[0].split('_')[-1])
+        print(f"Using batch size: {batch_size_list[num_features - 1] if num_features <= len(batch_size_list) else batch_size_list[-1]} for {filename}")
+        if num_features <= len(batch_size_list):
+            batch_size = batch_size_list[num_features - 1]
+        else:
+            batch_size = batch_size_list[-1]        
+        config.batch_size = batch_size
+
+        # Create dataset-specific checkpoint directory
+        original_checkpoint_dir = config.checkpoint_dir
+        config.checkpoint_dir = os.path.join(original_checkpoint_dir, f"{filename}")
+        os.makedirs(config.checkpoint_dir, exist_ok=True)
+        
+        # Set the checkpoint path for model continuation
+        config.continuation_checkpoint = global_checkpoint_path
+        
+        config.ts_config.num_features = num_features
+        if world_size == 1:
+            # Single GPU training
+            print(f"Running single GPU pretraining for {filename}...")
+            train_worker(0, 1, config, filename)
+        else:
+            # Multi-GPU distributed training
+            print(f"Running distributed pretraining for {filename}...")
+            mp.spawn(
+                train_worker,
+                args=(world_size, config, filename),
+                nprocs=world_size,
+                join=True
+            )
+        
+        # Update global checkpoint path for next dataset
+        global_checkpoint_path = os.path.join(config.checkpoint_dir, "pretrain_checkpoint_best.pth")
+        
+        # Restore original checkpoint directory
+        config.checkpoint_dir = original_checkpoint_dir
+        
+        print(f"Completed training on dataset: {filename}")
+        if dataset_idx < len(dataset_filenames) - 1:
+            print(f"Model weights will be loaded for next dataset training...")
+    
+    print(f"\n{'='*50}")
+    print("Multi-Dataset Sequential Training Completed!")
+    print(f"All {len(dataset_filenames)} datasets have been processed with model continuation.")
+    print(f"{'='*50}")
+
+
+def train_worker(rank: int, world_size: int, config: TimeRCDConfig, filename: str = None) -> None:
+    """Training worker function for each process."""
+    print(f"Running DDP on rank {rank} with world_size {world_size} for dataset: {filename}")
+    
+    # Setup distributed training
+    setup_distributed(rank, world_size, config)
+    
+    # Set device for this process
+    device = torch.device(f"cuda:{rank}")
+    
+    # Set random seed
+    set_seed(config.seed + rank)
+    
+    try:
+        # Initialize model
+        model = TimeSeriesPretrainModel(config).to(device)
+        
+        # Load checkpoint if continuing from previous dataset
+        checkpoint = None
+        if hasattr(config, 'continuation_checkpoint') and config.continuation_checkpoint and os.path.exists(config.continuation_checkpoint):
+            if rank == 0:
+                print(f"Loading checkpoint from previous dataset: {config.continuation_checkpoint}")
+            checkpoint = torch.load(config.continuation_checkpoint, map_location=device)
+            
+            # Handle DDP state_dict compatibility
+            state_dict = checkpoint['model_state_dict']
+            
+            # Remove 'module.' prefix if it exists (from DDP wrapped model)
+            if any(key.startswith('module.') for key in state_dict.keys()):
+                new_state_dict = {}
+                for key, value in state_dict.items():
+                    if key.startswith('module.'):
+                        new_key = key[7:]  # Remove 'module.' prefix
+                        new_state_dict[new_key] = value
+                    else:
+                        new_state_dict[key] = value
+                state_dict = new_state_dict
+            
+            model.load_state_dict(state_dict, strict=False)
+            if rank == 0:
+                print(f"Successfully loaded model weights from previous dataset training")
+        
+        # Wrap model with DDP
+        # model = DDP(model, device_ids=[rank], find_unused_parameters=True)
+        model = DDP(model, device_ids=[rank])
+        
+        # Setup optimizer
+        optimizer = optim.AdamW(
+            model.parameters(),
+            lr=config.learning_rate,
+            weight_decay=config.weight_decay
+        )
+        
+        # Load optimizer state if continuing and optimizer state exists
+        if checkpoint is not None and 'optimizer_state_dict' in checkpoint:
+            try:
+                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+                if rank == 0:
+                    print("Successfully loaded optimizer state from previous dataset training")
+            except Exception as e:
+                if rank == 0:
+                    print(f"Warning: Could not load optimizer state: {e}")
+                    print("Continuing with fresh optimizer state")
+                    print("This is normal when model architecture or optimizer parameters change")
+        
+        # Setup mixed precision scaler
+        scaler = torch.amp.GradScaler() if config.mixed_precision else None
+        
+        # Load data
+        train_dataset = ChatTSAnomalyPretrainDataset(config.pretrain_data_path, filename, split="train")
+        test_dataset = ChatTSAnomalyPretrainDataset(config.pretrain_data_path, filename, split="test")
+        
+        # Create distributed samplers
+        train_sampler = DistributedSampler(
+            train_dataset,
+            num_replicas=world_size,
+            rank=rank,
+            shuffle=True
+        )
+        
+        train_loader = DataLoader(
+            train_dataset,
+            batch_size=config.batch_size,
+            sampler=train_sampler,
+            collate_fn=collate_fn,
+            num_workers=2,
+            pin_memory=True
+        )
+        
+        # Create test sampler and loader for validation
+        test_sampler = DistributedSampler(
+            test_dataset,
+            num_replicas=world_size,
+            rank=rank,
+            shuffle=False
+        )
+        
+        test_loader = DataLoader(
+            test_dataset,
+            batch_size=config.batch_size,
+            sampler=test_sampler,
+            collate_fn=collate_fn,
+            num_workers=2,
+            pin_memory=True
+        )
+        
+        # Early stopping parameters
+        best_val_loss = float('inf')
+        patience_counter = 0
+        early_stopping_patience = getattr(config, 'early_stopping_patience', 10)
+        
+        # Training loop
+        if rank == 0:
+            dataset_name = filename if filename else "default"
+            continuation_info = ""
+            if hasattr(config, 'continuation_checkpoint') and config.continuation_checkpoint and os.path.exists(config.continuation_checkpoint):
+                continuation_info = " (continuing from previous dataset)"
+            print(f"Starting pretraining for {config.num_epochs} epochs on dataset {dataset_name}{continuation_info}...")
+            print(f"Total training batches per process: {len(train_loader)}")
+            print(f"Total validation batches per process: {min(config.test_batch_limit, len(test_loader))}")
+            print(f"Early stopping patience: {early_stopping_patience} epochs")
+            print(f"Tasks: Masked Reconstruction + Anomaly Detection")
+        
+        for epoch in range(config.num_epochs):
+            # Set epoch for distributed samplers
+            train_sampler.set_epoch(epoch)
+            test_sampler.set_epoch(epoch)
+            
+            # Training phase
+            avg_train_loss = train_epoch(train_loader, model, optimizer, 
+                                       config, device, epoch, rank, scaler)
+            
+            # Validation phase
+            avg_val_loss = evaluate_epoch(test_loader, model, config, device, rank)
+            
+            # Check if this is the best model so far
+            is_best = avg_val_loss < best_val_loss
+            if is_best:
+                best_val_loss = avg_val_loss
+                patience_counter = 0
+                if rank == 0:
+                    print(f"New best validation loss: {best_val_loss:.4f}")
+            else:
+                patience_counter += 1
+                if rank == 0:
+                    print(f"Validation loss did not improve. Patience: {patience_counter}/{early_stopping_patience}")
+            
+            # Save checkpoint with best model flag
+            save_checkpoint(model, optimizer, config, epoch, avg_val_loss, rank, is_best)
+            
+            # Early stopping check
+            if patience_counter >= early_stopping_patience:
+                if rank == 0:
+                    print(f"Early stopping triggered after {epoch + 1} epochs")
+                    print(f"Best validation loss: {best_val_loss:.4f}")
+                break
+
+    
+    finally:
+        # Clean up distributed training
+        cleanup_distributed()
+
+
+def main() -> None:
+
+    # PYTHONPATH=/home2/lijinbo/Projects/AnomalyLlava-master/ python src/models/Moirai/AnomalyLlava_pretrain_multi.py
+    """Main function to launch distributed pretraining."""
+    # Load configuration
+    config = default_config
+    
+    # Update config for pretraining
+    config.num_epochs = 50
+    config.learning_rate = 5e-4  # Higher learning rate for pretraining
+    config.batch_size = 64
+    config.ts_config.patch_size = 16
+    config.checkpoint_dir = "checkpoints/"
+    config.cuda_devices = "3"
+    config.mixed_precision = False
+    config.dist_port = "16798"
+    config.early_stopping_patience = 7  # Stop training if validation loss doesn't improve for 10 epochs
+    config.pretrain_data_path = "training_data/"
+
+    # ===== Multidataset Training Configuration =====
+    # Change to True for multi-dataset training
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--mode', type=str, default='multi', choices=['multi', 'single'])
+    args = parser.parse_args()
+    # Change to True for single-dataset training
+    if args.mode == 'multi':
+        use_multi_dataset_training = True
+    else:
+        use_multi_dataset_training = False
+    # Filename for single dataset training
+    single_dataset_filename = "uni_data_0_1.pkl"
+    # Filename list for multi-dataset training
+    dataset_filenames = [
+        "dataset_0_1.pkl",
+        "dataset_1_1.pkl",
+        "dataset_2_1.pkl",
+        "dataset_7_8.pkl",
+        "dataset_8_12.pkl",
+        "dataset_9_16.pkl",
+        "dataset_10_20.pkl",
+    ]
+
+    # Parse GPU IDs from config
+    gpu_ids = [int(x.strip()) for x in config.cuda_devices.split(',')]
+    world_size = len(gpu_ids)
+    
+    print(f"Using GPUs: {gpu_ids}")
+    print(f"World size: {world_size}")
+    print(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', config.cuda_devices)}")
+    print("=" * 50)
+    print("AnomalyLLava Time Series Pretraining")
+    print("Tasks:")
+    print("  1. Masked Reconstruction - Reconstruct masked time series patches")
+    print("  2. Anomaly Detection - Binary classification of normal/anomalous series")
+    print("Features:")
+    print("  - Early stopping with validation loss monitoring")
+    print("  - Best model checkpoint saving")
+    print(f"  - Early stopping patience: {config.early_stopping_patience} epochs")
+    if use_multi_dataset_training:
+        print("  - Sequential multi-dataset training with model weight continuation")
+    print("=" * 50)
+    
+    # Create checkpoint directory
+    os.makedirs(config.checkpoint_dir, exist_ok=True)
+    
+    if use_multi_dataset_training:
+        # Multi-dataset training
+        print(f"Training Mode: Multi-Dataset Sequential ({len(dataset_filenames)} datasets)")
+        print(f"Datasets will be trained sequentially with model weight continuation")
+        train_multiple_datasets(dataset_filenames, config)
+    else:
+        # Single dataset training
+        print(f"Training Mode: Single Dataset ({single_dataset_filename})")
+        # Set CUDA_VISIBLE_DEVICES
+        os.environ['CUDA_VISIBLE_DEVICES'] = config.cuda_devices
+        
+        num_features = int(os.path.splitext(single_dataset_filename)[0].split('_')[-1])
+        config.ts_config.num_features = num_features
+        if world_size == 1:
+            # Single GPU training
+            print("Running single GPU pretraining...")
+            train_worker(0, 1, config, single_dataset_filename)
+        else:
+            # Multi-GPU distributed training
+            print("Running distributed pretraining...")
+            mp.spawn(
+                train_worker,
+                args=(world_size, config, single_dataset_filename),
+                nprocs=world_size,
+                join=True
+            )
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/utils/dataset.py b/utils/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..bed97d3e6a45ae5ed9d85a62c425ebc07870a8b1
--- /dev/null
+++ b/utils/dataset.py
@@ -0,0 +1,304 @@
+import torch
+import torch.utils.data
+import numpy as np
+epsilon = 1e-8
+
+class TimeRCDDataset(torch.utils.data.Dataset):
+
+    def __init__(self, data, window_size, stride=1, normalize=False, pad_to_multiple=True):
+        super().__init__()
+        self.window_size = window_size
+        self.stride = stride
+        # Ensure numpy array and a consistent 2D shape (N, C)
+        data = np.asarray(data)
+        if data.ndim == 1:
+            data = data.reshape(-1, 1)
+        self.original_length = data.shape[0]
+        self.pad_to_multiple = pad_to_multiple
+
+        # Normalize data if other than UCR
+        self.data = self._normalize_data(data) if normalize else data
+        # self.data = data
+        # self.univariate = self.data.shape[0] == 1
+
+        # Handle padding if requested
+        if self.pad_to_multiple:
+            self.data, self.padding_mask = self._pad_data_to_multiple()
+        else:
+            self.padding_mask = np.ones(self.data.shape[0], dtype=bool)  # All data is real
+
+    def _normalize_data(self, data, epsilon=1e-8):
+        """ Normalize data using mean and standard deviation. """
+        mean, std = np.mean(data, axis=0), np.std(data, axis=0)
+        std = np.where(std == 0, epsilon, std)
+        return ((data - mean) / std)
+
+    def _pad_data_to_multiple(self):
+        """ Pad data to make its length a multiple of window_size and return padding mask. """
+        data_length = self.data.shape[0]
+        remainder = data_length % self.window_size
+
+        if remainder == 0:
+            # No padding needed - all data is real
+            padding_mask = np.ones(data_length, dtype=bool)
+            return self.data, padding_mask
+
+        # Calculate padding needed
+        padding_length = self.window_size - remainder
+        print(f"Padding AnomalyClipDataset: original length {data_length}, window_size {self.window_size}, adding {padding_length} samples")
+
+        # Pad by repeating the last row, keeping 2D shape (1, C)
+        last_row = self.data[-1:, :]
+        padding_data = np.repeat(last_row, padding_length, axis=0)
+        padded_data = np.vstack([self.data, padding_data])
+
+        # Create padding mask: True for real data, False for padded data
+        padding_mask = np.ones(data_length + padding_length, dtype=bool)
+        padding_mask[data_length:] = False  # Mark padded samples as False
+
+        return padded_data, padding_mask
+
+    def __getitem__(self, index):
+        start = index * self.stride
+        end = start + self.window_size
+
+        if end > self.data.shape[0]:
+            raise IndexError("Index out of bounds for the dataset.")
+
+        # Always return (window_size, num_features)
+        sample = torch.tensor(self.data[start:end, :], dtype=torch.float32)
+        mask = torch.tensor(self.padding_mask[start:end], dtype=torch.bool)
+
+        # if self.univariate:
+        #     sample = sample.unsqueeze(-1)  # Add channel dimension for univariate data
+
+        return sample, mask
+
+    def __len__(self):
+        return max(0, (self.data.shape[0] - self.window_size) // self.stride + 1)
+
+
+class ReconstructDataset(torch.utils.data.Dataset):
+    def __init__(self, data, window_size, stride=1, normalize=True):
+        super().__init__()
+        self.window_size = window_size
+        self.stride = stride
+        self.data = self._normalize_data(data) if normalize else data
+        data = np.asarray(data)
+        if data.ndim == 1:
+            data = data.reshape(-1, 1)
+        self.univariate = data.shape[1] == 1
+        self.sample_num = max(0, (self.data.shape[0] - window_size) // stride + 1)
+        self.samples, self.targets = self._generate_samples()
+
+    def _normalize_data(self, data, epsilon=1e-8):
+        mean, std = np.mean(data, axis=0), np.std(data, axis=0)
+        std = np.where(std == 0, epsilon, std)  # Avoid division by zero
+        return (data - mean) / std
+
+    def _generate_samples(self):
+        data = torch.tensor(self.data, dtype=torch.float32)
+
+        if self.univariate:
+            data = data.squeeze()
+            X = torch.stack([data[i * self.stride : i * self.stride + self.window_size] for i in range(self.sample_num)])
+            X = X.unsqueeze(-1)
+        else:
+            X = torch.stack([data[i * self.stride : i * self.stride + self.window_size, :] for i in range(self.sample_num)])
+
+        return X, X
+
+    def __len__(self):
+        return self.sample_num
+
+    def __getitem__(self, index):
+        return self.samples[index], self.targets[index]
+
+class ForecastDataset(torch.utils.data.Dataset):
+    def __init__(self, data, window_size, pred_len, stride=1, normalize=True):
+        super().__init__()
+        self.window_size = window_size
+        self.pred_len = pred_len
+        self.stride = stride
+        self.data = self._normalize_data(data) if normalize else data
+
+        data = np.asarray(data)
+        if data.ndim == 1:
+            data = data.reshape(-1, 1)
+        self.sample_num = max((self.data.shape[0] - window_size - pred_len) // stride + 1, 0)
+
+        # Generate samples efficiently
+        self.samples, self.targets = self._generate_samples()
+
+    def _normalize_data(self, data, epsilon=1e-8):
+        """ Normalize data using mean and standard deviation. """
+        mean, std = np.mean(data, axis=0), np.std(data, axis=0)
+        std = np.where(std == 0, epsilon, std)  # Avoid division by zero
+        return (data - mean) / std
+
+    def _generate_samples(self):
+        """ Generate windowed samples efficiently using vectorized slicing. """
+        data = torch.tensor(self.data, dtype=torch.float32)
+
+        indices = np.arange(0, self.sample_num * self.stride, self.stride)
+
+        X = torch.stack([data[i : i + self.window_size] for i in indices])
+        Y = torch.stack([data[i + self.window_size : i + self.window_size + self.pred_len] for i in indices])
+
+        return X, Y  # Inputs & targets
+
+    def __len__(self):
+        return self.sample_num
+
+    def __getitem__(self, index):
+        return self.samples[index], self.targets[index]
+
+# class ForecastDataset(torch.utils.data.Dataset):
+#     def __init__(self, data, window_size, pred_len, normalize=True):
+#         super().__init__()
+#         self.normalize = normalize
+
+#         if self.normalize:
+#             data_mean = np.mean(data, axis=0)
+#             data_std = np.std(data, axis=0)
+#             data_std = np.where(data_std == 0, epsilon, data_std)
+#             self.data = (data - data_mean) / data_std
+#         else:
+#             self.data = data
+
+#         self.window_size = window_size
+        
+#         if data.shape[1] == 1:
+#             data = data.squeeze()
+#             self.len, = data.shape
+#             self.sample_num = max(self.len - self.window_size - pred_len + 1, 0)
+#             X = torch.zeros((self.sample_num, self.window_size))
+#             Y = torch.zeros((self.sample_num, pred_len))
+            
+#             for i in range(self.sample_num):
+#                 X[i, :] = torch.from_numpy(data[i : i + self.window_size])
+#                 Y[i, :] = torch.from_numpy(np.array(
+#                     data[i + self.window_size: i + self.window_size + pred_len]
+#                 ))
+            
+#             self.samples, self.targets = torch.unsqueeze(X, -1), torch.unsqueeze(Y, -1)
+    
+#         else:
+#             self.len = self.data.shape[0]
+#             self.sample_num = max(self.len - self.window_size - pred_len + 1, 0)
+
+#             X = torch.zeros((self.sample_num, self.window_size, self.data.shape[1]))
+#             Y = torch.zeros((self.sample_num, pred_len, self.data.shape[1]))
+
+#             for i in range(self.sample_num):
+#                 X[i, :] = torch.from_numpy(data[i : i + self.window_size, :])
+#                 Y[i, :] = torch.from_numpy(data[i + self.window_size: i + self.window_size + pred_len, :])
+            
+#             self.samples, self.targets = X, Y
+
+#     def __len__(self):
+#         return self.sample_num
+
+#     def __getitem__(self, index):
+#         return self.samples[index, :, :], self.targets[index, :, :]
+
+class TSDataset(torch.utils.data.Dataset):
+
+    def __init__(self, X, y=None, mean=None, std=None):
+        super(TSDataset, self).__init__()
+        self.X = X
+        self.mean = mean
+        self.std = std
+
+    def __len__(self):
+        return self.X.shape[0]
+
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+        sample = self.X[idx, :]
+
+        if self.mean is not None and self.std is not None:
+            sample = (sample - self.mean) / self.std
+            # assert_almost_equal (0, sample.mean(), decimal=1)
+
+        return torch.from_numpy(sample), idx
+
+
+class ReconstructDataset_Moment(torch.utils.data.Dataset):
+    def __init__(self, data, window_size, stride=1, normalize=True):
+        super().__init__()
+        self.window_size = window_size
+        self.stride = stride
+        self.data = self._normalize_data(data) if normalize else data
+
+        self.univariate = self.data.shape[1] == 1
+        self.sample_num = max((self.data.shape[0] - window_size) // stride + 1, 0)
+
+        self.samples = self._generate_samples()
+        self.input_mask = np.ones(self.window_size, dtype=np.float32)  # Fixed input mask
+
+    def _normalize_data(self, data, epsilon=1e-8):
+        mean, std = np.mean(data, axis=0), np.std(data, axis=0)
+        std = np.where(std == 0, epsilon, std)  # Avoid division by zero
+        return (data - mean) / std
+
+    def _generate_samples(self):
+        data = torch.tensor(self.data, dtype=torch.float32)
+        indices = np.arange(0, self.sample_num * self.stride, self.stride)
+
+        if self.univariate:
+            X = torch.stack([data[i : i + self.window_size] for i in indices])
+        else:
+            X = torch.stack([data[i : i + self.window_size, :] for i in indices])
+
+        return X
+
+    def __len__(self):
+        return self.sample_num
+
+    def __getitem__(self, index):
+        return self.samples[index], self.input_mask
+
+class TACLipDataset(torch.utils.data.Dataset):
+    def __init__(self, data, win_size, step=1, flag="test"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.test = data
+        print("Before normalization", self.test[:20])
+        self.test = self._normalize_data(self.test)
+        print("After normalization", self.test[:20])
+        self.test_labels = np.zeros(self.test.shape[0])
+        
+    def _normalize_data(self, data, epsilon=1e-8):
+        mean, std = np.mean(data, axis=0), np.std(data, axis=0)
+        std = np.where(std == 0, epsilon, std)  # Avoid division by zero
+        return (data - mean) / std        
+
+    def __len__(self):
+        """
+        Number of images in the object dataset.
+        """
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
diff --git a/utils/slidingWindows.py b/utils/slidingWindows.py
new file mode 100644
index 0000000000000000000000000000000000000000..984f893a3c69cf59598632a2fd504071ee20412f
--- /dev/null
+++ b/utils/slidingWindows.py
@@ -0,0 +1,72 @@
+from statsmodels.tsa.stattools import acf
+from scipy.signal import argrelextrema
+import numpy as np
+from statsmodels.graphics.tsaplots import plot_acf
+
+
+# determine sliding window (period) based on ACF
+def find_length_rank(data, rank=1):
+    data = data.squeeze()
+    if len(data.shape) > 1: return 0
+    if rank == 0: return 1
+    data = data[:min(20000, len(data))]
+
+    base = 3
+    auto_corr = acf(data, nlags=400, fft=True)[base:]
+
+    # plot_acf(data, lags=400, fft=True)
+    # plt.xlabel('Lags')
+    # plt.ylabel('Autocorrelation')
+    # plt.title('Autocorrelation Function (ACF)')
+    # plt.savefig('/data/liuqinghua/code/ts/TSAD-AutoML/AutoAD_Solution/candidate_pool/cd_diagram/ts_acf.png')
+
+    local_max = argrelextrema(auto_corr, np.greater)[0]
+
+    # print('auto_corr: ', auto_corr)
+    # print('local_max: ', local_max)
+
+    try:
+        # max_local_max = np.argmax([auto_corr[lcm] for lcm in local_max])
+        sorted_local_max = np.argsort([auto_corr[lcm] for lcm in local_max])[::-1]  # Ascending order
+        max_local_max = sorted_local_max[0]  # Default
+        if rank == 1: max_local_max = sorted_local_max[0]
+        if rank == 2:
+            for i in sorted_local_max[1:]:
+                if i > sorted_local_max[0]:
+                    max_local_max = i
+                    break
+        if rank == 3:
+            for i in sorted_local_max[1:]:
+                if i > sorted_local_max[0]:
+                    id_tmp = i
+                    break
+            for i in sorted_local_max[id_tmp:]:
+                if i > sorted_local_max[id_tmp]:
+                    max_local_max = i
+                    break
+        # print('sorted_local_max: ', sorted_local_max)
+        # print('max_local_max: ', max_local_max)
+        if local_max[max_local_max] < 3 or local_max[max_local_max] > 300:
+            return 125
+        return local_max[max_local_max] + base
+    except:
+        return 125
+
+
+# determine sliding window (period) based on ACF, Original version
+def find_length(data):
+    if len(data.shape) > 1:
+        return 0
+    data = data[:min(20000, len(data))]
+
+    base = 3
+    auto_corr = acf(data, nlags=400, fft=True)[base:]
+
+    local_max = argrelextrema(auto_corr, np.greater)[0]
+    try:
+        max_local_max = np.argmax([auto_corr[lcm] for lcm in local_max])
+        if local_max[max_local_max] < 3 or local_max[max_local_max] > 300:
+            return 125
+        return local_max[max_local_max] + base
+    except:
+        return 125
diff --git a/utils/stat_models.py b/utils/stat_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f011dcb82756568a678269faf978269c5eabde8
--- /dev/null
+++ b/utils/stat_models.py
@@ -0,0 +1,224 @@
+""" A collection of statistical models
+code copied from pyod documentation https://github.com/yzhao062/pyod/blob/master/pyod/utils/stat_models.py
+"""
+# Author: Yue Zhao <zhaoy@cmu.edu>
+# License: BSD 2 clause
+
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from scipy.stats import pearsonr
+from sklearn.utils.validation import check_array
+from sklearn.utils.validation import check_consistent_length
+from numba import njit
+
+def pairwise_distances_no_broadcast(X, Y):
+    """Utility function to calculate row-wise euclidean distance of two matrix.
+    Different from pair-wise calculation, this function would not broadcast.
+    For instance, X and Y are both (4,3) matrices, the function would return
+    a distance vector with shape (4,), instead of (4,4).
+    Parameters
+    ----------
+    X : array of shape (n_samples, n_features)
+        First input samples
+    Y : array of shape (n_samples, n_features)
+        Second input samples
+    Returns
+    -------
+    distance : array of shape (n_samples,)
+        Row-wise euclidean distance of X and Y
+    """
+    X = check_array(X)
+    Y = check_array(Y)
+
+    if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]:
+        raise ValueError("pairwise_distances_no_broadcast function receive"
+                         "matrix with different shapes {0} and {1}".format(
+            X.shape, Y.shape))
+    return _pairwise_distances_no_broadcast_helper(X, Y)
+
+
+def _pairwise_distances_no_broadcast_helper(X, Y):  # pragma: no cover
+    """Internal function for calculating the distance with numba. Do not use.
+    Parameters
+    ----------
+    X : array of shape (n_samples, n_features)
+        First input samples
+    Y : array of shape (n_samples, n_features)
+        Second input samples
+    Returns
+    -------
+    distance : array of shape (n_samples,)
+        Intermediate results. Do not use.
+    """
+    euclidean_sq = np.square(Y - X)
+    return np.sqrt(np.sum(euclidean_sq, axis=1)).ravel()
+
+
+def wpearsonr(x, y, w=None):
+    """Utility function to calculate the weighted Pearson correlation of two
+    samples.
+    See https://stats.stackexchange.com/questions/221246/such-thing-as-a-weighted-correlation
+    for more information
+    Parameters
+    ----------
+    x : array, shape (n,)
+        Input x.
+    y : array, shape (n,)
+        Input y.
+    w : array, shape (n,)
+        Weights w.
+    Returns
+    -------
+    scores : float in range of [-1,1]
+        Weighted Pearson Correlation between x and y.
+    """
+
+    # unweighted version
+    # note the return is different
+    # TODO: fix output differences
+    if w is None:
+        return pearsonr(x, y)
+
+    x = np.asarray(x)
+    y = np.asarray(y)
+    w = np.asarray(w)
+
+    check_consistent_length([x, y, w])
+    # n = len(x)
+
+    w_sum = w.sum()
+    mx = np.sum(x * w) / w_sum
+    my = np.sum(y * w) / w_sum
+
+    xm, ym = (x - mx), (y - my)
+
+    r_num = np.sum(xm * ym * w) / w_sum
+
+    xm2 = np.sum(xm * xm * w) / w_sum
+    ym2 = np.sum(ym * ym * w) / w_sum
+
+    r_den = np.sqrt(xm2 * ym2)
+    r = r_num / r_den
+
+    r = max(min(r, 1.0), -1.0)
+
+    # TODO: disable p value calculation due to python 2.7 break
+    #    df = n_train_ - 2
+    #
+    #    if abs(r) == 1.0:
+    #        prob = 0.0
+    #    else:
+    #        t_squared = r ** 2 * (df / ((1.0 - r) * (1.0 + r)))
+    #        prob = _betai(0.5 * df, 0.5, df / (df + t_squared))
+    return r  # , prob
+
+
+#####################################
+#      PROBABILITY CALCULATIONS     #
+#####################################
+
+# TODO: disable p value calculation due to python 2.7 break
+# def _betai(a, b, x):
+#     x = np.asarray(x)
+#     x = np.where(x < 1.0, x, 1.0)  # if x > 1 then return 1.0
+#     return betainc(a, b, x)
+
+
+def pearsonr_mat(mat, w=None):
+    """Utility function to calculate pearson matrix (row-wise).
+    Parameters
+    ----------
+    mat : numpy array of shape (n_samples, n_features)
+        Input matrix.
+    w : numpy array of shape (n_features,)
+        Weights.
+    Returns
+    -------
+    pear_mat : numpy array of shape (n_samples, n_samples)
+        Row-wise pearson score matrix.
+    """
+    mat = check_array(mat)
+    n_row = mat.shape[0]
+    n_col = mat.shape[1]
+    pear_mat = np.full([n_row, n_row], 1).astype(float)
+
+    if w is not None:
+        for cx in range(n_row):
+            for cy in range(cx + 1, n_row):
+                curr_pear = wpearsonr(mat[cx, :], mat[cy, :], w)
+                pear_mat[cx, cy] = curr_pear
+                pear_mat[cy, cx] = curr_pear
+    else:
+        for cx in range(n_col):
+            for cy in range(cx + 1, n_row):
+                curr_pear = pearsonr(mat[cx, :], mat[cy, :])[0]
+                pear_mat[cx, cy] = curr_pear
+                pear_mat[cy, cx] = curr_pear
+
+    return pear_mat
+
+def column_ecdf(matrix: np.ndarray) -> np.ndarray:
+    """
+    Utility function to compute the column wise empirical cumulative distribution of a 2D feature matrix,
+    where the rows are samples and the columns are features per sample. The accumulation is done in the positive
+    direction of the sample axis.
+
+    E.G.
+    p(1) = 0.2, p(0) = 0.3, p(2) = 0.1, p(6) = 0.4
+    ECDF E(5) = p(x <= 5)
+    ECDF E would be E(-1) = 0, E(0) = 0.3, E(1) = 0.5, E(2) = 0.6, E(3) = 0.6, E(4) = 0.6, E(5) = 0.6, E(6) = 1
+
+    Similar to and tested against:
+    https://www.statsmodels.org/stable/generated/statsmodels.distributions.empirical_distribution.ECDF.html
+
+    Returns
+    -------
+
+    """
+    # check the matrix dimensions
+    assert len(matrix.shape) == 2, 'Matrix needs to be two dimensional for the ECDF computation.'
+
+    # create a probability array the same shape as the feature matrix which we will reorder to build
+    # the ecdf
+    probabilities = np.linspace(np.ones(matrix.shape[1]) / matrix.shape[0], np.ones(matrix.shape[1]), matrix.shape[0])
+
+    # get the sorting indices for a numpy array
+    sort_idx = np.argsort(matrix, axis=0)
+
+    # sort the numpy array, as we need to look for duplicates in the feature values (that would have different
+    # probabilities if we would just resort the probabilities array)
+    matrix = np.take_along_axis(matrix, sort_idx, axis=0)
+
+    # deal with equal values
+    ecdf_terminate_equals_inplace(matrix, probabilities)
+
+    # return the resorted accumulated probabilities (by reverting the sorting of the input matrix)
+    # looks a little complicated but is faster this way
+    reordered_probabilities = np.ones_like(probabilities)
+    np.put_along_axis(reordered_probabilities, sort_idx, probabilities, axis=0)
+    return reordered_probabilities
+
+@njit
+def ecdf_terminate_equals_inplace(matrix: np.ndarray, probabilities: np.ndarray):
+    """
+    This is a helper function for computing the ecdf of an array. It has been outsourced from the original
+    function in order to be able to use the njit compiler of numpy for increased speeds, as it unfortunately
+    needs a loop over all rows and columns of a matrix. It acts in place on the probabilities' matrix.
+
+    Parameters
+    ----------
+    matrix : a feature matrix where the rows are samples and each column is a feature !(expected to be sorted)!
+
+    probabilities : a probability matrix that will be used building the ecdf. It has values between 0 and 1 and
+                    is also sorted.
+
+    Returns
+    -------
+
+    """
+    for cx in range(probabilities.shape[1]):
+        for rx in range(probabilities.shape[0] - 2, -1, -1):
+            if matrix[rx, cx] == matrix[rx + 1, cx]:
+                probabilities[rx, cx] = probabilities[rx + 1, cx]
\ No newline at end of file
diff --git a/utils/torch_utility.py b/utils/torch_utility.py
new file mode 100644
index 0000000000000000000000000000000000000000..f46ef0895fdf7ddc948c5b3db133d5b68f67160d
--- /dev/null
+++ b/utils/torch_utility.py
@@ -0,0 +1,207 @@
+import numpy as np
+import torch
+from torch import nn
+import subprocess as sp
+import os, math
+
+class EarlyStoppingTorch:
+    """Early stops the training if validation loss doesn't improve after a given patience."""
+    def __init__(self, save_path=None, patience=7, verbose=False, delta=0.0001):
+        """
+        Args:
+            save_path : 
+            patience (int): How long to wait after last time validation loss improved.
+                            Default: 7
+            verbose (bool): If True, prints a message for each validation loss improvement. 
+                            Default: False
+            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
+                            Default: 0
+        """
+        self.save_path = save_path
+        self.patience = patience
+        self.verbose = verbose
+        self.counter = 0
+        self.best_score = None
+        self.early_stop = False
+        self.val_loss_min = np.inf
+        self.delta = delta
+
+    def __call__(self, val_loss, model):
+
+        score = -val_loss
+
+        if self.best_score is None:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model)
+        elif score < self.best_score + self.delta:
+            self.counter += 1
+            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
+            if self.counter >= self.patience:
+                self.early_stop = True
+        else:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model)
+            self.counter = 0
+
+    def save_checkpoint(self, val_loss, model):
+        '''Saves model when validation loss decrease.'''
+        if self.verbose:
+            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
+        if self.save_path:
+            path = os.path.join(self.save_path, 'best_network.pth')
+            torch.save(model.state_dict(), path)	
+        self.val_loss_min = val_loss
+
+class PositionalEmbedding(nn.Module):
+    def __init__(self, d_model, max_len=5000):
+        super(PositionalEmbedding, self).__init__()
+        # Compute the positional encodings once in log space.
+        pe = torch.zeros(max_len, d_model).float()
+        pe.require_grad = False
+
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        return self.pe[:, :x.size(1)]
+
+class TokenEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(TokenEmbedding, self).__init__()
+        padding = 1 if torch.__version__ >= '1.5.0' else 2
+        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
+                                   kernel_size=3, padding=padding, padding_mode='circular', bias=False)
+        for m in self.modules():
+            if isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_in', nonlinearity='leaky_relu')
+
+    def forward(self, x):
+        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
+        return x
+    
+class TemporalEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='fixed', freq='h'):
+        super(TemporalEmbedding, self).__init__()
+
+        minute_size = 4
+        hour_size = 24
+        weekday_size = 7
+        day_size = 32
+        month_size = 13
+
+        Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
+        if freq == 't':
+            self.minute_embed = Embed(minute_size, d_model)
+        self.hour_embed = Embed(hour_size, d_model)
+        self.weekday_embed = Embed(weekday_size, d_model)
+        self.day_embed = Embed(day_size, d_model)
+        self.month_embed = Embed(month_size, d_model)
+
+    def forward(self, x):
+        x = x.long()
+        minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
+            self, 'minute_embed') else 0.
+        hour_x = self.hour_embed(x[:, :, 3])
+        weekday_x = self.weekday_embed(x[:, :, 2])
+        day_x = self.day_embed(x[:, :, 1])
+        month_x = self.month_embed(x[:, :, 0])
+
+        return hour_x + weekday_x + day_x + month_x + minute_x
+
+class FixedEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(FixedEmbedding, self).__init__()
+
+        w = torch.zeros(c_in, d_model).float()
+        w.require_grad = False
+
+        position = torch.arange(0, c_in).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        w[:, 0::2] = torch.sin(position * div_term)
+        w[:, 1::2] = torch.cos(position * div_term)
+
+        self.emb = nn.Embedding(c_in, d_model)
+        self.emb.weight = nn.Parameter(w, requires_grad=False)
+
+    def forward(self, x):
+        return self.emb(x).detach()
+
+class TimeFeatureEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='timeF', freq='h'):
+        super(TimeFeatureEmbedding, self).__init__()
+
+        freq_map = {'h': 4, 't': 5, 's': 6,
+                    'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
+        d_inp = freq_map[freq]
+        self.embed = nn.Linear(d_inp, d_model, bias=False)
+
+    def forward(self, x):
+        return self.embed(x)
+
+class DataEmbedding(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x) + self.position_embedding(x)
+        else:
+            x = self.value_embedding(
+                x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
+        return self.dropout(x)
+
+def adjust_learning_rate(optimizer, epoch, lradj, learning_rate):
+    # lr = args.learning_rate * (0.2 ** (epoch // 2))
+    if lradj == 'type1':
+        lr_adjust = {epoch: learning_rate * (0.5 ** ((epoch - 1) // 1))}
+    elif lradj == 'type2':
+        lr_adjust = {
+            2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
+            10: 5e-7, 15: 1e-7, 20: 5e-8
+        }
+    if epoch in lr_adjust.keys():
+        lr = lr_adjust[epoch]
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = lr
+        print('Updating learning rate to {}'.format(lr))
+
+
+def min_memory_id():
+    output = sp.check_output(["/usr/bin/nvidia-smi", "--query-gpu=memory.used", "--format=csv"])
+    memory = [int(s.split(" ")[0]) for s in output.decode().split("\n")[1:-1]]
+    assert len(memory) == torch.cuda.device_count()
+    return np.argmin(memory)
+
+
+def get_gpu(cuda):
+    if cuda == True and torch.cuda.is_available():
+        try:
+            device = torch.device(f"cuda:{min_memory_id()}")
+            torch.cuda.set_device(device)
+            print(f"----- Using GPU {torch.cuda.current_device()} -----")
+        except:
+            device = torch.device("cuda")
+            print(f"----- Using GPU {torch.cuda.get_device_name()} -----")
+    else:
+        if cuda == True and not torch.cuda.is_available():
+            print("----- GPU is unavailable -----")
+        device = torch.device("cpu")
+        print("----- Using CPU -----")
+    return device
\ No newline at end of file
diff --git a/utils/utility.py b/utils/utility.py
new file mode 100644
index 0000000000000000000000000000000000000000..32d219b7fd09e12883e0c69fa5900068e58756f6
--- /dev/null
+++ b/utils/utility.py
@@ -0,0 +1,789 @@
+"""A set of utility functions to support outlier detection.
+"""
+
+from __future__ import division
+from __future__ import print_function
+from joblib.parallel import cpu_count
+import numpy as np
+from numpy import percentile
+import numbers
+
+import sklearn
+from sklearn.metrics import precision_score
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils import column_or_1d
+from sklearn.utils import check_array
+from sklearn.utils import check_consistent_length
+from sklearn.utils import check_random_state
+from sklearn.utils.random import sample_without_replacement
+import torch.nn as nn
+
+MAX_INT = np.iinfo(np.int32).max
+MIN_INT = -1 * MAX_INT
+
+def zscore(a, axis=0, ddof=0):
+    a = np.asanyarray(a)
+    mns = a.mean(axis=axis)
+    sstd = a.std(axis=axis, ddof=ddof)
+
+    if axis and mns.ndim < a.ndim:
+        res = ((a - np.expand_dims(mns, axis=axis)) /
+               np.expand_dims(sstd, axis=axis))
+    else:
+        res = (a - mns) / sstd
+
+    return np.nan_to_num(res)
+
+def pairwise_distances_no_broadcast(X, Y):
+    """Utility function to calculate row-wise euclidean distance of two matrix.
+    Different from pair-wise calculation, this function would not broadcast.
+    For instance, X and Y are both (4,3) matrices, the function would return
+    a distance vector with shape (4,), instead of (4,4).
+    Parameters
+    ----------
+    X : array of shape (n_samples, n_features)
+        First input samples
+    Y : array of shape (n_samples, n_features)
+        Second input samples
+    Returns
+    -------
+    distance : array of shape (n_samples,)
+        Row-wise euclidean distance of X and Y
+    """
+    X = check_array(X)
+    Y = check_array(Y)
+
+    if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]:
+        raise ValueError("pairwise_distances_no_broadcast function receive"
+                         "matrix with different shapes {0} and {1}".format(
+            X.shape, Y.shape))
+        
+    euclidean_sq = np.square(Y - X)
+    return np.sqrt(np.sum(euclidean_sq, axis=1)).ravel()
+
+def getSplit(X):
+    """
+    Randomly selects a split value from set of scalar data 'X'.
+    Returns the split value.
+    
+    Parameters
+    ----------
+    X : array 
+        Array of scalar values
+    Returns
+    -------
+    float
+        split value
+    """
+    xmin = X.min()
+    xmax = X.max()
+    return np.random.uniform(xmin, xmax)
+
+def similarityScore(S, node, alpha):
+    """
+    Given a set of instances S falling into node and a value alpha >=0,
+    returns for all element x in S the weighted similarity score between x
+    and the centroid M of S (node.M)
+    
+    Parameters
+    ----------
+    S : array  of instances
+        Array  of instances that fall into a node
+    node: a DiFF tree node
+        S is the set of instances "falling" into the node
+    alpha: float
+        alpha is the distance scaling hyper-parameter
+    Returns
+    -------
+    array
+        the array of similarity values between the instances in S and the mean of training instances falling in node
+    """
+    d = np.shape(S)[1]
+    if len(S) > 0:
+        d = np.shape(S)[1]
+        U = (S-node.M)/node.Mstd # normalize using the standard deviation vector to the mean
+        U = (2)**(-alpha*(np.sum(U*U/d, axis=1)))
+    else:
+        U = 0
+
+    return U
+
+
+def EE(hist):
+    """
+    given a list of positive values as a histogram drawn from any information source,
+    returns the empirical entropy of its discrete probability function.
+    
+    Parameters
+    ----------
+    hist: array 
+        histogram
+    Returns
+    -------
+    float
+        empirical entropy estimated from the histogram
+    """
+    h = np.asarray(hist, dtype=np.float64)
+    if h.sum() <= 0 or (h < 0).any():
+        return 0
+    h = h/h.sum()
+    return -(h*np.ma.log2(h)).sum()
+
+
+def weightFeature(s, nbins):
+    '''
+    Given a list of values corresponding to a feature dimension, returns a weight (in [0,1]) that is 
+    one minus the normalized empirical entropy, a way to characterize the importance of the feature dimension. 
+    
+    Parameters
+    ----------
+    s: array 
+        list of scalar values corresponding to a feature dimension
+    nbins: int
+        the number of bins used to discretize the feature dimension using an histogram.
+    Returns
+    -------
+    float
+        the importance weight for feature s.
+    '''
+    if s.min() == s.max():
+        return 0
+    hist = np.histogram(s, bins=nbins, density=True)
+    ent = EE(hist[0])
+    ent = ent/np.log2(nbins)
+    return 1-ent
+
+
+def check_parameter(param, low=MIN_INT, high=MAX_INT, param_name='',
+                    include_left=False, include_right=False):
+    """Check if an input is within the defined range.
+
+    Parameters
+    ----------
+    param : int, float
+        The input parameter to check.
+
+    low : int, float
+        The lower bound of the range.
+
+    high : int, float
+        The higher bound of the range.
+
+    param_name : str, optional (default='')
+        The name of the parameter.
+
+    include_left : bool, optional (default=False)
+        Whether includes the lower bound (lower bound <=).
+
+    include_right : bool, optional (default=False)
+        Whether includes the higher bound (<= higher bound).
+
+    Returns
+    -------
+    within_range : bool or raise errors
+        Whether the parameter is within the range of (low, high)
+
+    """
+
+    # param, low and high should all be numerical
+    if not isinstance(param, (numbers.Integral, np.integer, float)):
+        raise TypeError('{param_name} is set to {param} Not numerical'.format(
+            param=param, param_name=param_name))
+
+    if not isinstance(low, (numbers.Integral, np.integer, float)):
+        raise TypeError('low is set to {low}. Not numerical'.format(low=low))
+
+    if not isinstance(high, (numbers.Integral, np.integer, float)):
+        raise TypeError('high is set to {high}. Not numerical'.format(
+            high=high))
+
+    # at least one of the bounds should be specified
+    if low is MIN_INT and high is MAX_INT:
+        raise ValueError('Neither low nor high bounds is undefined')
+
+    # if wrong bound values are used
+    if low > high:
+        raise ValueError(
+            'Lower bound > Higher bound')
+
+    # value check under different bound conditions
+    if (include_left and include_right) and (param < low or param > high):
+        raise ValueError(
+            '{param_name} is set to {param}. '
+            'Not in the range of [{low}, {high}].'.format(
+                param=param, low=low, high=high, param_name=param_name))
+
+    elif (include_left and not include_right) and (
+            param < low or param >= high):
+        raise ValueError(
+            '{param_name} is set to {param}. '
+            'Not in the range of [{low}, {high}).'.format(
+                param=param, low=low, high=high, param_name=param_name))
+
+    elif (not include_left and include_right) and (
+            param <= low or param > high):
+        raise ValueError(
+            '{param_name} is set to {param}. '
+            'Not in the range of ({low}, {high}].'.format(
+                param=param, low=low, high=high, param_name=param_name))
+
+    elif (not include_left and not include_right) and (
+            param <= low or param >= high):
+        raise ValueError(
+            '{param_name} is set to {param}. '
+            'Not in the range of ({low}, {high}).'.format(
+                param=param, low=low, high=high, param_name=param_name))
+    else:
+        return True
+
+
+def check_detector(detector):
+    """Checks if fit and decision_function methods exist for given detector
+    Parameters
+    ----------
+    detector : pyod.models
+        Detector instance for which the check is performed.
+    """
+
+    if not hasattr(detector, 'fit') or not hasattr(detector,
+                                                   'decision_function'):
+        raise AttributeError("%s is not a detector instance." % (detector))
+
+
+def standardizer(X, X_t=None, keep_scalar=False):
+    """Conduct Z-normalization on data to turn input samples become zero-mean
+    and unit variance.
+    Parameters
+    ----------
+    X : numpy array of shape (n_samples, n_features)
+        The training samples
+    X_t : numpy array of shape (n_samples_new, n_features), optional (default=None)
+        The data to be converted
+    keep_scalar : bool, optional (default=False)
+        The flag to indicate whether to return the scalar
+    Returns
+    -------
+    X_norm : numpy array of shape (n_samples, n_features)
+        X after the Z-score normalization
+    X_t_norm : numpy array of shape (n_samples, n_features)
+        X_t after the Z-score normalization
+    scalar : sklearn scalar object
+        The scalar used in conversion
+    """
+    X = check_array(X)
+    scaler = StandardScaler().fit(X)
+
+    if X_t is None:
+        if keep_scalar:
+            return scaler.transform(X), scaler
+        else:
+            return scaler.transform(X)
+    else:
+        X_t = check_array(X_t)
+        if X.shape[1] != X_t.shape[1]:
+            raise ValueError(
+                "The number of input data feature should be consistent"
+                "X has {0} features and X_t has {1} features.".format(
+                    X.shape[1], X_t.shape[1]))
+        if keep_scalar:
+            return scaler.transform(X), scaler.transform(X_t), scaler
+        else:
+            return scaler.transform(X), scaler.transform(X_t)
+
+
+def score_to_label(pred_scores, outliers_fraction=0.1):
+    """Turn raw outlier outlier scores to binary labels (0 or 1).
+    Parameters
+    ----------
+    pred_scores : list or numpy array of shape (n_samples,)
+        Raw outlier scores. Outliers are assumed have larger values.
+    outliers_fraction : float in (0,1)
+        Percentage of outliers.
+    Returns
+    -------
+    outlier_labels : numpy array of shape (n_samples,)
+        For each observation, tells whether or not
+        it should be considered as an outlier according to the
+        fitted model. Return the outlier probability, ranging
+        in [0,1].
+    """
+    # check input values
+    pred_scores = column_or_1d(pred_scores)
+    check_parameter(outliers_fraction, 0, 1)
+
+    threshold = percentile(pred_scores, 100 * (1 - outliers_fraction))
+    pred_labels = (pred_scores > threshold).astype('int')
+    return pred_labels
+
+
+def precision_n_scores(y, y_pred, n=None):
+    """Utility function to calculate precision @ rank n.
+    Parameters
+    ----------
+    y : list or numpy array of shape (n_samples,)
+        The ground truth. Binary (0: inliers, 1: outliers).
+    y_pred : list or numpy array of shape (n_samples,)
+        The raw outlier scores as returned by a fitted model.
+    n : int, optional (default=None)
+        The number of outliers. if not defined, infer using ground truth.
+    Returns
+    -------
+    precision_at_rank_n : float
+        Precision at rank n score.
+    """
+
+    # turn raw prediction decision scores into binary labels
+    y_pred = get_label_n(y, y_pred, n)
+
+    # enforce formats of y and labels_
+    y = column_or_1d(y)
+    y_pred = column_or_1d(y_pred)
+
+    return precision_score(y, y_pred)
+
+
+def get_label_n(y, y_pred, n=None):
+    """Function to turn raw outlier scores into binary labels by assign 1
+    to top n outlier scores.
+    Parameters
+    ----------
+    y : list or numpy array of shape (n_samples,)
+        The ground truth. Binary (0: inliers, 1: outliers).
+    y_pred : list or numpy array of shape (n_samples,)
+        The raw outlier scores as returned by a fitted model.
+    n : int, optional (default=None)
+        The number of outliers. if not defined, infer using ground truth.
+    Returns
+    -------
+    labels : numpy array of shape (n_samples,)
+        binary labels 0: normal points and 1: outliers
+    Examples
+    --------
+    >>> from pyod.utils.utility import get_label_n
+    >>> y = [0, 1, 1, 0, 0]
+    >>> y_pred = [0.1, 0.5, 0.3, 0.2, 0.7]
+    >>> get_label_n(y, y_pred)
+    array([0, 1, 0, 0, 1])
+    """
+
+    # enforce formats of inputs
+    y = column_or_1d(y)
+    y_pred = column_or_1d(y_pred)
+
+    check_consistent_length(y, y_pred)
+    y_len = len(y)  # the length of targets
+
+    # calculate the percentage of outliers
+    if n is not None:
+        outliers_fraction = n / y_len
+    else:
+        outliers_fraction = np.count_nonzero(y) / y_len
+
+    threshold = percentile(y_pred, 100 * (1 - outliers_fraction))
+    y_pred = (y_pred > threshold).astype('int')
+
+    return y_pred
+
+def get_intersection(lst1, lst2):
+    """get the overlapping between two lists
+    Parameters
+    ----------
+    li1 : list or numpy array
+        Input list 1.
+    li2 : list or numpy array
+        Input list 2.
+    Returns
+    -------
+    difference : list
+        The overlapping between li1 and li2.
+    """
+    return list(set(lst1) & set(lst2))
+
+
+def get_list_diff(li1, li2):
+    """get the elements in li1 but not li2. li1-li2
+    Parameters
+    ----------
+    li1 : list or numpy array
+        Input list 1.
+    li2 : list or numpy array
+        Input list 2.
+    Returns
+    -------
+    difference : list
+        The difference between li1 and li2.
+    """
+
+    return (list(set(li1) - set(li2)))
+
+def get_diff_elements(li1, li2):
+    """get the elements in li1 but not li2, and vice versa
+    Parameters
+    ----------
+    li1 : list or numpy array
+        Input list 1.
+    li2 : list or numpy array
+        Input list 2.
+    Returns
+    -------
+    difference : list
+        The difference between li1 and li2.
+    """
+    
+    return (list(set(li1) - set(li2)) + list(set(li2) - set(li1)))
+
+def argmaxn(value_list, n, order='desc'):
+    """Return the index of top n elements in the list
+    if order is set to 'desc', otherwise return the index of n smallest ones.
+    Parameters
+    ----------
+    value_list : list, array, numpy array of shape (n_samples,)
+        A list containing all values.
+    n : int
+        The number of elements to select.
+    order : str, optional (default='desc')
+        The order to sort {'desc', 'asc'}:
+        - 'desc': descending
+        - 'asc': ascending
+    Returns
+    -------
+    index_list : numpy array of shape (n,)
+        The index of the top n elements.
+    """
+
+    value_list = column_or_1d(value_list)
+    length = len(value_list)
+
+    # validate the choice of n
+    check_parameter(n, 1, length, include_left=True, include_right=True,
+                    param_name='n')
+
+    # for the smallest n, flip the value
+    if order != 'desc':
+        n = length - n
+
+    value_sorted = np.partition(value_list, length - n)
+    threshold = value_sorted[int(length - n)]
+
+    if order == 'desc':
+        return np.where(np.greater_equal(value_list, threshold))[0]
+    else:  # return the index of n smallest elements
+        return np.where(np.less(value_list, threshold))[0]
+
+
+def invert_order(scores, method='multiplication'):
+    """ Invert the order of a list of values. The smallest value becomes
+    the largest in the inverted list. This is useful while combining
+    multiple detectors since their score order could be different.
+    Parameters
+    ----------
+    scores : list, array or numpy array with shape (n_samples,)
+        The list of values to be inverted
+    method : str, optional (default='multiplication')
+        Methods used for order inversion. Valid methods are:
+        - 'multiplication': multiply by -1
+        - 'subtraction': max(scores) - scores
+    Returns
+    -------
+    inverted_scores : numpy array of shape (n_samples,)
+        The inverted list
+    Examples
+    --------
+    >>> scores1 = [0.1, 0.3, 0.5, 0.7, 0.2, 0.1]
+    >>> invert_order(scores1)
+    array([-0.1, -0.3, -0.5, -0.7, -0.2, -0.1])
+    >>> invert_order(scores1, method='subtraction')
+    array([0.6, 0.4, 0.2, 0. , 0.5, 0.6])
+    """
+
+    scores = column_or_1d(scores)
+
+    if method == 'multiplication':
+        return scores.ravel() * -1
+
+    if method == 'subtraction':
+        return (scores.max() - scores).ravel()
+
+
+def _get_sklearn_version():  # pragma: no cover
+    """ Utility function to decide the version of sklearn.
+    PyOD will result in different behaviors with different sklearn version
+    Returns
+    -------
+    sk_learn version : int
+    """
+
+    sklearn_version = str(sklearn.__version__)
+    if int(sklearn_version.split(".")[1]) < 19 or int(
+            sklearn_version.split(".")[1]) > 23:
+        raise ValueError("Sklearn version error")
+
+    return int(sklearn_version.split(".")[1])
+
+
+def _sklearn_version_21():  # pragma: no cover
+    """ Utility function to decide the version of sklearn
+    In sklearn 21.0, LOF is changed. Specifically, _decision_function
+    is replaced by _score_samples
+    Returns
+    -------
+    sklearn_21_flag : bool
+        True if sklearn.__version__ is newer than 0.21.0
+    """
+    sklearn_version = str(sklearn.__version__)
+    if int(sklearn_version.split(".")[1]) > 20:
+        return True
+    else:
+        return False
+
+
+def generate_bagging_indices(random_state, bootstrap_features, n_features,
+                             min_features, max_features):
+    """ Randomly draw feature indices. Internal use only.
+    Modified from sklearn/ensemble/bagging.py
+    Parameters
+    ----------
+    random_state : RandomState
+        A random number generator instance to define the state of the random
+        permutations generator.
+    bootstrap_features : bool
+        Specifies whether to bootstrap indice generation
+    n_features : int
+        Specifies the population size when generating indices
+    min_features : int
+        Lower limit for number of features to randomly sample
+    max_features : int
+        Upper limit for number of features to randomly sample
+    Returns
+    -------
+    feature_indices : numpy array, shape (n_samples,)
+        Indices for features to bag
+    """
+
+    # Get valid random state
+    random_state = check_random_state(random_state)
+
+    # decide number of features to draw
+    random_n_features = random_state.randint(min_features, max_features)
+
+    # Draw indices
+    feature_indices = generate_indices(random_state, bootstrap_features,
+                                       n_features, random_n_features)
+
+    return feature_indices
+
+
+def generate_indices(random_state, bootstrap, n_population, n_samples):
+    """ Draw randomly sampled indices. Internal use only.
+    See sklearn/ensemble/bagging.py
+    Parameters
+    ----------
+    random_state : RandomState
+        A random number generator instance to define the state of the random
+        permutations generator.
+    bootstrap :  bool
+        Specifies whether to bootstrap indice generation
+    n_population : int
+        Specifies the population size when generating indices
+    n_samples : int
+        Specifies number of samples to draw
+    Returns
+    -------
+    indices : numpy array, shape (n_samples,)
+        randomly drawn indices
+    """
+
+    # Draw sample indices
+    if bootstrap:
+        indices = random_state.randint(0, n_population, n_samples)
+    else:
+        indices = sample_without_replacement(n_population, n_samples,
+                                             random_state=random_state)
+
+    return indices
+
+
+def EuclideanDist(x,y):
+    return np.sqrt(np.sum((x - y) ** 2))
+
+def dist2set(x, X):
+    l=len(X)
+    ldist=[]
+    for i in range(l):
+        ldist.append(EuclideanDist(x,X[i]))
+    return ldist
+
+def c_factor(n) :
+    if(n<2):
+        n=2
+    return 2.0*(np.log(n-1)+0.5772156649) - (2.0*(n-1.)/(n*1.0))
+
+
+def all_branches(node, current=[], branches = None):
+    current = current[:node.e]
+    if branches is None: branches = []
+    if node.ntype == 'inNode':
+        current.append('L')
+        all_branches(node.left, current=current, branches=branches)
+        current = current[:-1]
+        current.append('R')
+        all_branches(node.right, current=current, branches=branches)
+    else:
+        branches.append(current)
+    return branches
+
+
+def branch2num(branch, init_root=0):
+    num = [init_root]
+    for b in branch:
+        if b == 'L':
+            num.append(num[-1] * 2 + 1)
+        if b == 'R':
+            num.append(num[-1] * 2 + 2)
+    return num
+
+def _get_n_jobs(n_jobs):
+    """Get number of jobs for the computation.
+    See sklearn/utils/__init__.py for more information.
+
+    This function reimplements the logic of joblib to determine the actual
+    number of jobs depending on the cpu count. If -1 all CPUs are used.
+    If 1 is given, no parallel computing code is used at all, which is useful
+    for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
+    Thus for n_jobs = -2, all CPUs but one are used.
+    Parameters
+    ----------
+    n_jobs : int
+        Number of jobs stated in joblib convention.
+    Returns
+    -------
+    n_jobs : int
+        The actual number of jobs as positive integer.
+    """
+    if n_jobs < 0:
+        return max(cpu_count() + 1 + n_jobs, 1)
+    elif n_jobs == 0:
+        raise ValueError('Parameter n_jobs == 0 has no meaning.')
+    else:
+        return n_jobs
+
+
+def _partition_estimators(n_estimators, n_jobs):
+    """Private function used to partition estimators between jobs.
+    See sklearn/ensemble/base.py for more information.
+    """
+    # Compute the number of jobs
+    n_jobs = min(_get_n_jobs(n_jobs), n_estimators)
+
+    # Partition estimators between jobs
+    n_estimators_per_job = (n_estimators // n_jobs) * np.ones(n_jobs, dtype=int)
+    n_estimators_per_job[:n_estimators % n_jobs] += 1
+    starts = np.cumsum(n_estimators_per_job)
+
+    return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
+
+
+def _pprint(params, offset=0, printer=repr):
+    # noinspection PyPep8
+    """Pretty print the dictionary 'params'
+
+    See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
+    and sklearn/base.py for more information.
+
+    :param params: The dictionary to pretty print
+    :type params: dict
+
+    :param offset: The offset in characters to add at the begin of each line.
+    :type offset: int
+
+    :param printer: The function to convert entries to strings, typically
+        the builtin str or repr
+    :type printer: callable
+
+    :return: None
+    """
+
+    # Do a multi-line justified repr:
+    options = np.get_printoptions()
+    np.set_printoptions(precision=5, threshold=64, edgeitems=2)
+    params_list = list()
+    this_line_length = offset
+    line_sep = ',\n' + (1 + offset // 2) * ' '
+    for i, (k, v) in enumerate(sorted(params.items())):
+        if type(v) is float:
+            # use str for representing floating point numbers
+            # this way we get consistent representation across
+            # architectures and versions.
+            this_repr = '%s=%s' % (k, str(v))
+        else:
+            # use repr of the rest
+            this_repr = '%s=%s' % (k, printer(v))
+        if len(this_repr) > 500:
+            this_repr = this_repr[:300] + '...' + this_repr[-100:]
+        if i > 0:
+            if this_line_length + len(this_repr) >= 75 or '\n' in this_repr:
+                params_list.append(line_sep)
+                this_line_length = len(line_sep)
+            else:
+                params_list.append(', ')
+                this_line_length += 2
+        params_list.append(this_repr)
+        this_line_length += len(this_repr)
+
+    np.set_printoptions(**options)
+    lines = ''.join(params_list)
+    # Strip trailing space to avoid nightmare in doctests
+    lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n'))
+    return lines
+
+def get_activation_by_name(name):
+    activations = {
+        'relu': nn.ReLU(),
+        'sigmoid': nn.Sigmoid(),
+        'tanh': nn.Tanh(),
+        'leakyrelu':nn.LeakyReLU()
+    }
+
+    if name in activations.keys():
+        return activations[name]
+
+    else:
+        raise ValueError(name, "is not a valid activation function")
+
+def get_optimal_n_bins(X, upper_bound=None, epsilon=1):
+    """ Determine optimal number of bins for a histogram using the Birge 
+    Rozenblac method (see :cite:`birge2006many` for details.)
+     
+    See  https://doi.org/10.1051/ps:2006001 
+     
+    Parameters 
+    ---------- 
+    X : array-like of shape (n_samples, n_features) 
+        The samples to determine the optimal number of bins for. 
+         
+    upper_bound :  int, default=None 
+        The maximum value of n_bins to be considered. 
+        If set to None, np.sqrt(X.shape[0]) will be used as upper bound. 
+         
+    epsilon : float, default = 1 
+        A stabilizing term added to the logarithm to prevent division by zero. 
+         
+    Returns 
+    ------- 
+    optimal_n_bins : int 
+        The optimal value of n_bins according to the Birge Rozenblac method 
+    """
+    if upper_bound is None:
+        upper_bound = int(np.sqrt(X.shape[0]))
+
+    n = X.shape[0]
+    maximum_likelihood = np.zeros((upper_bound - 1, 1))
+
+    for i, b in enumerate(range(1, upper_bound)):
+        histogram, _ = np.histogram(X, bins=b)
+
+        maximum_likelihood[i] = np.sum(
+            histogram * np.log(b * histogram / n + epsilon) - (
+                    b - 1 + np.power(np.log(b), 2.5)))
+
+    return np.argmax(maximum_likelihood) + 1
\ No newline at end of file