"""Fault classification training utilities for PMU and PV datasets.

This module trains deep learning models on high-frequency PMU measurements and
supports classical machine learning baselines so the resulting artefacts can be
served via the Gradio app in this repository or on Hugging Face Spaces.  It
implements a full training pipeline including preprocessing, sequence
generation, model definition (CNN-LSTM, Temporal Convolutional Network, or
Support Vector Machine), evaluation, and export of deployment metadata.

Example
-------
python fault_classification_pmu.py \
    --data-path data/Fault_Classification_PMU_Data.csv \
    --label-column FaultType \
    --model-type tcn \
    --model-out pmu_tcn_model.keras \
    --scaler-out pmu_feature_scaler.pkl \
    --metadata-out pmu_metadata.json

The script accepts CSV input where each row contains a timestamped PMU
measurement and a categorical fault label.  Features default to the 14 PMU
channels used in the project documentation, but any subset can be provided
via the ``--feature-columns`` argument.  Data is automatically standardised
and windowed to create temporal sequences that feed into the neural network.

The exported metadata JSON file contains the feature ordering, label names,
sequence length, stride, and chosen architecture.  The Gradio front-end
consumes this file to replicate the same preprocessing steps during inference.
"""
from __future__ import annotations

import argparse
import json
import os
import shutil
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Tuple

import math

os.environ.setdefault("CUDA_VISIBLE_DEVICES", "-1")
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")
os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0")

import joblib
import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from tensorflow.keras import callbacks, layers, models, optimizers


class ProgressCallback(callbacks.Callback):
    """Custom callback to provide training progress updates."""

    def __init__(
        self,
        total_epochs,
        status_file_path=None,
        *,
        status_update_interval: float = 10.0,
        batch_log_frequency: int = 10,
    ):
        super().__init__()
        self.total_epochs = total_epochs
        self.status_file_path = status_file_path
        self.status_update_interval = max(1.0, float(status_update_interval))
        self.batch_log_frequency = max(1, int(batch_log_frequency))
        self.current_epoch = 0
        self.train_start_time: Optional[float] = None
        self.last_status_report: Optional[float] = None
        self.total_batches_per_epoch = 0
        self.batches_seen = 0

    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
    def _now(self) -> float:
        import time

        return time.perf_counter()

    def _training_elapsed(self, now: Optional[float] = None) -> float:
        if self.train_start_time is None:
            return 0.0
        if now is None:
            now = self._now()
        return max(0.0, now - self.train_start_time)

    def _report_status(self, message: str, *, force: bool = False) -> None:
        now = self._now()
        if not force and self.last_status_report is not None:
            if now - self.last_status_report < self.status_update_interval:
                return

        print(message, flush=True)

        if self.status_file_path:
            try:
                with open(self.status_file_path, "w") as f:
                    f.write(message)
            except Exception:
                # Silently ignore status file failures; progress should still stream to stdout
                pass

        self.last_status_report = now

    # ------------------------------------------------------------------
    # Keras callback overrides
    # ------------------------------------------------------------------
    def on_train_begin(self, logs=None):
        params = self.params or {}
        steps = params.get("steps") or params.get("steps_per_epoch")
        if steps:
            self.total_batches_per_epoch = int(steps)
        else:
            samples = params.get("samples")
            batch_size = params.get("batch_size") or 0
            if samples and batch_size:
                self.total_batches_per_epoch = math.ceil(samples / batch_size)
            else:
                self.total_batches_per_epoch = 0

        self.batches_seen = 0
        self.last_status_report = None
        self.train_start_time = self._now()

    def on_epoch_begin(self, epoch, logs=None):
        import time

        now = self._now()
        if self.train_start_time is None:
            self.train_start_time = now

        self.current_epoch = epoch + 1
        self.batches_seen = 0

        progress_pct = (self.current_epoch / self.total_epochs) * 100
        elapsed_time = self._training_elapsed(now)
        status_msg = (
            f"Training epoch {self.current_epoch}/{self.total_epochs} "
            f"({progress_pct:.1f}%) - {elapsed_time:.1f}s elapsed"
        )
        self._report_status(status_msg, force=True)

        if self.current_epoch == 1:
            wall_clock = time.strftime("%H:%M:%S")
            print(f"Starting first epoch at {wall_clock}", flush=True)

    def on_batch_begin(self, batch, logs=None):
        if self.current_epoch == 1 and batch % self.batch_log_frequency == 0:
            elapsed = self._training_elapsed()
            print(f"Epoch {self.current_epoch}, Batch {batch} started - {elapsed:.1f}s elapsed", flush=True)

    def on_batch_end(self, batch, logs=None):
        self.batches_seen = batch + 1

        if self.current_epoch == 1 and batch % self.batch_log_frequency == 0:
            logs = logs or {}
            loss = logs.get("loss", 0)
            elapsed = self._training_elapsed()
            print(
                f"Epoch {self.current_epoch}, Batch {batch} completed - Loss: {loss:.4f}, {elapsed:.1f}s elapsed",
                flush=True,
            )

        total_batches = self.total_batches_per_epoch or 0
        if not total_batches:
            params = self.params or {}
            total_batches = (
                params.get("steps")
                or params.get("steps_per_epoch")
                or 0
            )

        if total_batches:
            epoch_fraction = min(1.0, (batch + 1) / total_batches)
        else:
            epoch_fraction = 0.0

        overall_progress = (
            (self.current_epoch - 1 + epoch_fraction) / self.total_epochs * 100
        )
        elapsed_time = self._training_elapsed()
        status_msg = (
            f"Epoch {self.current_epoch}/{self.total_epochs} - Batch {batch + 1}/{total_batches or '?'} "
            f"({overall_progress:.1f}%) - {elapsed_time:.1f}s elapsed"
        )
        self._report_status(status_msg)

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        loss = logs.get("loss", 0)
        val_loss = logs.get("val_loss", 0)
        accuracy = logs.get("accuracy", logs.get("acc", 0))
        val_accuracy = logs.get("val_accuracy", logs.get("val_acc", 0))
        _ = epoch  # Suppress unused variable warning

        elapsed_time = self._training_elapsed()
        status_msg = (
            f"Epoch {self.current_epoch}/{self.total_epochs} completed - "
            f"Loss: {loss:.4f}, Val Loss: {val_loss:.4f}, "
            f"Acc: {accuracy:.4f}, Val Acc: {val_accuracy:.4f} - {elapsed_time:.1f}s total"
        )
        self._report_status(status_msg, force=True)

    def on_train_end(self, logs=None):
        total_elapsed = self._training_elapsed()
        final_message = (
            f"Training finished after {self.total_epochs} epoch(s) - "
            f"{total_elapsed:.1f}s total elapsed"
        )
        self._report_status(final_message, force=True)


# Default PMU feature set as described in the user provided table.  Timestamp is
# intentionally omitted because it is not a model input feature.
DEFAULT_FEATURE_COLUMNS: List[str] = [
    "[325] UPMU_SUB22:FREQ",
    "[326] UPMU_SUB22:DFDT",
    "[327] UPMU_SUB22:FLAG",
    "[328] UPMU_SUB22-L1:MAG",
    "[329] UPMU_SUB22-L1:ANG",
    "[330] UPMU_SUB22-L2:MAG",
    "[331] UPMU_SUB22-L2:ANG",
    "[332] UPMU_SUB22-L3:MAG",
    "[333] UPMU_SUB22-L3:ANG",
    "[334] UPMU_SUB22-C1:MAG",
    "[335] UPMU_SUB22-C1:ANG",
    "[336] UPMU_SUB22-C2:MAG",
    "[337] UPMU_SUB22-C2:ANG",
    "[338] UPMU_SUB22-C3:MAG",
    "[339] UPMU_SUB22-C3:ANG",
]

LABEL_GUESS_CANDIDATES: Tuple[str, ...] = ("Fault", "FaultType", "Label", "Target", "Class")


def _normalise_column_name(name: str) -> str:
    return str(name).strip().lower()


def _resolve_label_column(df: pd.DataFrame, requested: str) -> str:
    columns = [str(col) for col in df.columns]
    if not columns:
        raise ValueError("Provided dataframe does not contain any columns.")

    requested = str(requested or "").strip()
    if requested and requested in df.columns:
        return requested

    if requested:
        for col in df.columns:
            if str(col).strip() == requested:
                return str(col)
        lowered = requested.lower()
        lowered_map = {_normalise_column_name(col): str(col) for col in df.columns}
        if lowered in lowered_map:
            return lowered_map[lowered]

    lowered_map = {_normalise_column_name(col): str(col) for col in df.columns}
    for guess in LABEL_GUESS_CANDIDATES:
        key = guess.lower()
        if key in lowered_map:
            return lowered_map[key]

    for col in reversed(df.columns):
        if not is_numeric_dtype(df[col]):
            return str(col)

    available = ", ".join(columns)
    raise ValueError(
        f"Label column '{requested or ' '}' not found in provided dataframe. "
        f"Available columns: {available}"
    )


def _resolve_features(df: pd.DataFrame, feature_columns: Sequence[str] | None, label_column: str) -> List[str]:
    if feature_columns:
        missing = [c for c in feature_columns if c not in df.columns]
        if missing:
            raise ValueError(f"Feature columns not present in CSV: {missing}")
        return list(feature_columns)

    # Prefer the documented PMU ordering when the columns exist, falling back to
    # any remaining numeric columns.
    preferred = [c for c in DEFAULT_FEATURE_COLUMNS if c in df.columns]

    excluded = {label_column, label_column.lower(), "timestamp", "Timestamp"}
    remainder = [c for c in df.columns if c not in preferred and c not in excluded]
    ordered = preferred + remainder
    if not ordered:
        raise ValueError("No feature columns detected. Specify --feature-columns explicitly.")
    return ordered


def load_dataset(
    csv_path: Path,
    *,
    feature_columns: Sequence[str] | None,
    label_column: str,
) -> Tuple[np.ndarray, np.ndarray, List[str], str]:
    """Load the dataset from CSV.

    Parameters
    ----------
    csv_path:
        Path to the CSV file containing PMU measurements.
    feature_columns:
        Optional explicit ordering of feature columns.
    label_column:
        Name of the column containing the categorical fault label.

    Returns
    -------
    features: np.ndarray
        2-D array of shape (n_samples, n_features).
    labels: np.ndarray
        1-D array of label strings.
    columns: list[str]
        Actual feature ordering used.
    resolved_label: str
        The column name that supplied the labels.
    """
    df = pd.read_csv(csv_path, sep=None, engine="python")
    resolved_label = _resolve_label_column(df, label_column)

    columns = _resolve_features(df, feature_columns, resolved_label)
    features = df[columns].astype(np.float32).values
    labels = df[resolved_label].astype(str).values
    return features, labels, columns, resolved_label


def load_dataset_from_dataframe(
    df: pd.DataFrame,
    *,
    feature_columns: Sequence[str] | None,
    label_column: str,
) -> Tuple[np.ndarray, np.ndarray, List[str], str]:
    """Load dataset arrays directly from a DataFrame."""

    resolved_label = _resolve_label_column(df, label_column)

    columns = _resolve_features(df, feature_columns, resolved_label)
    features = df[columns].astype(np.float32).values
    labels = df[resolved_label].astype(str).values
    return features, labels, columns, resolved_label


def create_sequences(
    features: np.ndarray,
    labels: np.ndarray,
    *,
    sequence_length: int,
    stride: int,
) -> Tuple[np.ndarray, np.ndarray]:
    """Create overlapping sequences suitable for sequence models.

    The label assigned to a sequence corresponds to the label of the final
    timestep in the window.  This choice aligns with fault detection use cases
    where the most recent measurement dictates the state of the system.
    """
    if sequence_length <= 0:
        raise ValueError("sequence_length must be > 0")
    if stride <= 0:
        raise ValueError("stride must be > 0")
    if features.shape[0] != labels.shape[0]:
        raise ValueError("Features and labels must contain the same number of rows")
    if features.shape[0] < sequence_length:
        raise ValueError("Not enough samples to create a single sequence")

    sequences: List[np.ndarray] = []
    seq_labels: List[str] = []
    for start in range(0, features.shape[0] - sequence_length + 1, stride):
        end = start + sequence_length
        sequences.append(features[start:end])
        seq_labels.append(labels[end - 1])
    return np.stack(sequences), np.array(seq_labels)


def build_cnn_lstm(
    input_shape: Tuple[int, int],
    num_classes: int,
    *,
    conv_filters: int = 128,
    kernel_size: int = 3,
    lstm_units: int = 128,
    dropout: float = 0.3,
) -> models.Model:
    """Construct a compact yet expressive CNN-LSTM architecture."""
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv1D(conv_filters, kernel_size, padding="same", activation="relu")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv1D(conv_filters, kernel_size, dilation_rate=2, padding="same", activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout)(x)
    x = layers.LSTM(lstm_units, return_sequences=False)(x)
    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = models.Model(inputs, outputs)
    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model


def build_tcn(
    input_shape: Tuple[int, int],
    num_classes: int,
    *,
    filters: int = 64,
    kernel_size: int = 3,
    dilations: Sequence[int] = (1, 2, 4, 8),
    dropout: float = 0.2,
) -> models.Model:
    """Construct a lightweight Temporal Convolutional Network."""

    inputs = layers.Input(shape=input_shape)
    x = inputs
    for dilation in dilations:
        residual = x
        x = layers.Conv1D(
            filters,
            kernel_size,
            padding="causal",
            activation="relu",
            dilation_rate=dilation,
        )(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(dropout)(x)
        x = layers.Conv1D(
            filters,
            kernel_size,
            padding="causal",
            activation="relu",
            dilation_rate=dilation,
        )(x)
        x = layers.BatchNormalization()(x)
        if residual.shape[-1] != filters:
            residual = layers.Conv1D(filters, 1, padding="same")(residual)
        x = layers.Add()([x, residual])
        x = layers.Activation("relu")(x)

    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model


def train_model(
    sequences: np.ndarray,
    labels: np.ndarray,
    *,
    validation_split: float,
    batch_size: int,
    epochs: int,
    model_type: str = "cnn_lstm",
    tensorboard_log_dir: Optional[Path] = None,
    status_file_path: Optional[Path] = None,
) -> Tuple[object, LabelEncoder, Dict[str, object]]:
    """Train a sequence model and return training history and validation outputs."""

    model_type = model_type.lower().strip()
    if model_type not in {"cnn_lstm", "tcn", "svm"}:
        raise ValueError("model_type must be either 'cnn_lstm', 'tcn', or 'svm'")

    # Handle status file for progress tracking
    status_file = status_file_path if status_file_path else None

    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(labels)

    if model_type == "svm":
        features = sequences.reshape(sequences.shape[0], -1)
    else:
        features = sequences

    tb_dir: Optional[str] = None
    if model_type != "svm" and tensorboard_log_dir is not None:
        tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
        tb_dir = str(tensorboard_log_dir.resolve())
    else:
        tensorboard_log_dir = None

    # Check if we can use stratification (each class needs at least 2 samples)
    unique_labels, label_counts = np.unique(y, return_counts=True)
    min_samples_per_class = np.min(label_counts)

    print(f"Label distribution: {dict(zip(unique_labels, label_counts))}")
    print(f"Minimum samples per class: {min_samples_per_class}")
    print(f"Total sequences: {len(sequences)}, Features per sequence: {sequences.shape[1:]}")

    # Check for potential memory issues
    import sys
    data_size_mb = sequences.nbytes / (1024 * 1024)
    print(f"Data size: {data_size_mb:.2f} MB")
    if data_size_mb > 1000:  # > 1GB
        print("Warning: Large dataset detected. Consider reducing batch size or sequence length.")

    # Validate data ranges
    if np.any(np.isnan(sequences)) or np.any(np.isinf(sequences)):
        print("Warning: NaN or Inf values detected in sequences")
        sequences = np.nan_to_num(sequences, nan=0.0, posinf=1e6, neginf=-1e6)

    # Use stratification only if each class has at least 2 samples
    if min_samples_per_class >= 2:
        X_train, X_val, y_train, y_val = train_test_split(
            features, y, test_size=validation_split, stratify=y, random_state=42
        )
    else:
        print(f"Warning: Some classes have only {min_samples_per_class} sample(s). Using simple random split instead of stratified split.")

        # If validation split would result in empty validation set for some classes,
        # reduce validation split or use a minimum number of samples
        total_samples = len(y)
        if validation_split * total_samples < len(unique_labels):
            # Ensure at least one sample per class in validation if possible
            adjusted_split = max(0.1, len(unique_labels) / total_samples)
            adjusted_split = min(adjusted_split, 0.3)  # Cap at 30%
            print(f"Adjusting validation split from {validation_split} to {adjusted_split}")
            validation_split = adjusted_split

        X_train, X_val, y_train, y_val = train_test_split(
            features, y, test_size=validation_split, random_state=42
        )

    if model_type == "cnn_lstm":
        print("Building CNN-LSTM model...")

        # Optimize model for large datasets
        if len(sequences) > 100000:
            print("Using lightweight CNN-LSTM for large dataset")
            model = build_cnn_lstm(
                input_shape=sequences.shape[1:],
                num_classes=len(label_encoder.classes_),
                conv_filters=64,  # Reduce from 128
                lstm_units=64,    # Reduce from 128
                dropout=0.2       # Reduce dropout
            )
        else:
            model = build_cnn_lstm(
                input_shape=sequences.shape[1:], num_classes=len(label_encoder.classes_)
            )
        print(f"CNN-LSTM model built. Input shape: {sequences.shape[1:]}, Classes: {len(label_encoder.classes_)}")
        print(f"Model parameters: {model.count_params():,}")

        # Adjust callbacks for dataset size
        if len(sequences) > 100000:
            callbacks_list = [
                ProgressCallback(total_epochs=epochs, status_file_path=str(status_file) if status_file else None),
                callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, min_lr=1e-5),
                callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),  # More aggressive
            ]
            print("Using aggressive callbacks for large dataset")
        else:
            callbacks_list = [
                ProgressCallback(total_epochs=epochs, status_file_path=str(status_file) if status_file else None),
                callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5),
                callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True),
            ]
        if tensorboard_log_dir is not None:
            callbacks_list.insert(-2, callbacks.TensorBoard(log_dir=tb_dir, histogram_freq=0, write_graph=False))  # Reduce TensorBoard overhead

        print(f"Starting CNN-LSTM training with {len(X_train)} training samples, {len(X_val)} validation samples")
        print(f"Batch size: {batch_size}, Epochs: {epochs}")

        if status_file:
            with open(status_file, 'w') as f:
                f.write(f"CNN-LSTM training started - {len(X_train)} train, {len(X_val)} val samples, batch_size={batch_size}")

        history = model.fit(
            X_train,
            y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks_list,
            verbose=2,
        )

        print("CNN-LSTM training completed, starting prediction...")
        if status_file:
            with open(status_file, 'w') as f:
                f.write("CNN-LSTM training completed, evaluating model...")

        print(f"Making predictions on {len(X_val)} validation samples...")
        if status_file:
            with open(status_file, 'w') as f:
                f.write(f"Making predictions on {len(X_val)} validation samples...")
        y_pred = model.predict(X_val, verbose=0).argmax(axis=1)
        print("Predictions completed")
        training_history: Dict[str, object] = history.history
    elif model_type == "tcn":
        print("Building TCN model...")
        model = build_tcn(input_shape=sequences.shape[1:], num_classes=len(label_encoder.classes_))
        print(f"TCN model built. Input shape: {sequences.shape[1:]}, Classes: {len(label_encoder.classes_)}")

        callbacks_list = [
            ProgressCallback(total_epochs=epochs, status_file_path=str(status_file) if status_file else None),
            callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5),
            callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True),
        ]
        if tensorboard_log_dir is not None:
            callbacks_list.insert(-2, callbacks.TensorBoard(log_dir=tb_dir, histogram_freq=0, write_graph=False))  # Reduce TensorBoard overhead

        print(f"Starting TCN training with {len(X_train)} training samples, {len(X_val)} validation samples")
        print(f"Batch size: {batch_size}, Epochs: {epochs}")

        if status_file:
            with open(status_file, 'w') as f:
                f.write(f"TCN training started - {len(X_train)} train, {len(X_val)} val samples, batch_size={batch_size}")

        history = model.fit(
            X_train,
            y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks_list,
            verbose=2,
        )

        print("TCN training completed, starting prediction...")
        if status_file:
            with open(status_file, 'w') as f:
                f.write("TCN training completed, evaluating model...")

        print(f"Making TCN predictions on {len(X_val)} validation samples...")
        if status_file:
            with open(status_file, 'w') as f:
                f.write(f"Making TCN predictions on {len(X_val)} validation samples...")
        y_pred = model.predict(X_val, verbose=0).argmax(axis=1)
        print("TCN predictions completed")
        training_history = history.history
    else:  # svm
        print("Training SVM model...", flush=True)
        if status_file:
            with open(status_file, 'w') as f:
                f.write("Training SVM model...")

        model = SVC(kernel="rbf", probability=True, class_weight="balanced")
        model.fit(X_train, y_train)

        print("SVM training completed. Evaluating...", flush=True)
        if status_file:
            with open(status_file, 'w') as f:
                f.write("SVM training completed. Evaluating...")

        y_pred = model.predict(X_val)
        training_history = {
            "train_accuracy": float(model.score(X_train, y_train)),
            "val_accuracy": float(accuracy_score(y_val, y_pred)),
        }

    cm = confusion_matrix(y_val, y_pred)
    metrics: Dict[str, object] = {
        "history": training_history,
        "validation": {
            "y_true": y_val,
            "y_pred": y_pred,
            "class_names": label_encoder.classes_.tolist(),
            "confusion_matrix": cm,
        },
        "model_type": model_type,
        "input_shape": list(sequences.shape[1:]),
        "tensorboard_log_dir": tb_dir,
    }
    return model, label_encoder, metrics


def standardise_sequences(sequences: np.ndarray) -> Tuple[np.ndarray, StandardScaler]:
    """Apply standard scaling per feature across all timesteps."""
    scaler = StandardScaler()
    flattened = sequences.reshape(-1, sequences.shape[-1])
    scaled = scaler.fit_transform(flattened)
    return scaled.reshape(sequences.shape), scaler


def export_artifacts(
    *,
    model: object,
    scaler: StandardScaler,
    label_encoder: LabelEncoder,
    feature_columns: Sequence[str],
    label_column: str,
    sequence_length: int,
    stride: int,
    model_path: Path,
    scaler_path: Path,
    metadata_path: Path,
    metrics: dict,
) -> None:
    """Persist trained assets to disk for deployment."""
    model_path.parent.mkdir(parents=True, exist_ok=True)
    scaler_path.parent.mkdir(parents=True, exist_ok=True)
    metadata_path.parent.mkdir(parents=True, exist_ok=True)
    model_type = str(metrics.get("model_type", "cnn_lstm"))
    if model_type == "svm":
        joblib.dump(model, model_path)
    else:
        model.save(model_path)
    joblib.dump(scaler, scaler_path)

    validation = metrics["validation"]
    report_dict = classification_report(
        validation["y_true"],
        validation["y_pred"],
        target_names=label_encoder.classes_,
        output_dict=True,
    )

    metadata = {
        "feature_columns": list(feature_columns),
        "label_classes": label_encoder.classes_.tolist(),
        "label_column": label_column,
        "sequence_length": sequence_length,
        "stride": stride,
        "model_path": str(model_path),
        "scaler_path": str(scaler_path),
        "training_history": metrics["history"],
        "classification_report": report_dict,
        "model_type": model_type,
        "model_format": "joblib" if model_type == "svm" else "keras",
        "input_shape": metrics.get("input_shape"),
        "tensorboard_log_dir": metrics.get("tensorboard_log_dir"),
    }
    confusion = validation.get("confusion_matrix")
    if confusion is None:
        confusion = confusion_matrix(validation["y_true"], validation["y_pred"])
    metadata["confusion_matrix"] = np.asarray(confusion).tolist()

    metadata_path.write_text(json.dumps(metadata, indent=2))


def train_from_dataframe(
    df: pd.DataFrame,
    *,
    label_column: str,
    feature_columns: Sequence[str] | None = None,
    sequence_length: int = 32,
    stride: int = 4,
    validation_split: float = 0.2,
    batch_size: int = 128,
    epochs: int = 50,
    model_type: str = "cnn_lstm",
    model_path: Path | str = "pmu_cnn_lstm_model.keras",
    scaler_path: Path | str = "pmu_feature_scaler.pkl",
    metadata_path: Path | str = "pmu_metadata.json",
    enable_tensorboard: bool = True,
    tensorboard_root: Path | str | None = None,
) -> dict:
    """Train a PMU fault classification model using an in-memory dataframe."""

    model_path = Path(model_path)
    scaler_path = Path(scaler_path)
    metadata_path = Path(metadata_path)

    # Create status file for progress tracking
    status_file = model_path.parent / "training_status.txt"
    print(f"Training progress will be written to: {status_file}")

    tensorboard_log_dir: Optional[Path] = None
    if enable_tensorboard and model_type.lower() != "svm":
        base_dir = Path(tensorboard_root) if tensorboard_root is not None else Path("tensorboard_runs")
        timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
        tensorboard_log_dir = base_dir / f"run-{timestamp}"

    features, labels, used_columns, resolved_label = load_dataset_from_dataframe(
        df, feature_columns=feature_columns, label_column=label_column
    )

    print(f"Input data: {len(features)} samples")
    print(f"Creating sequences with length={sequence_length}, stride={stride}")

    sequences, seq_labels = create_sequences(
        features,
        labels,
        sequence_length=sequence_length,
        stride=stride,
    )

    print(f"Generated {len(sequences)} sequences")

    # Validate sequence count and adjust parameters if necessary
    if len(sequences) < 10:
        raise ValueError(
            f"Only {len(sequences)} sequences generated. Need at least 10 for training. "
            f"Try reducing sequence_length (currently {sequence_length}) or stride (currently {stride}), "
            "or provide more data."
        )

    # If very few sequences, recommend SVM instead of deep learning
    if len(sequences) < 100 and model_type in ['cnn_lstm', 'tcn']:
        print(f"Warning: Only {len(sequences)} sequences available. Consider using SVM for small datasets.")

    sequences, scaler = standardise_sequences(sequences)

    # Adjust training parameters based on data size
    original_batch_size = batch_size
    original_epochs = epochs
    original_validation_split = validation_split

    # Handle large datasets (>100K sequences) - optimize for memory and speed
    if len(sequences) > 100000:
        print(f"Large dataset detected ({len(sequences)} sequences). Optimizing parameters...")
        batch_size = min(batch_size * 2, 512)  # Increase batch size for efficiency
        epochs = min(epochs, 30)  # Reduce epochs for large datasets
        print(f"Adjusted parameters for large dataset:")
        print(f"  Batch size: {original_batch_size} -> {batch_size}")
        print(f"  Epochs: {original_epochs} -> {epochs}")

        # Force garbage collection
        import gc
        gc.collect()

    elif len(sequences) < 100:
        # For very small datasets
        batch_size = max(min(batch_size, len(sequences) // 4), 4)  # Ensure batch_size >= 4
        epochs = min(epochs, 20)  # Reduce epochs to prevent overfitting
        validation_split = min(validation_split, 0.3)  # Reduce validation split
        print(f"Adjusted parameters for small dataset:")
        print(f"  Batch size: {original_batch_size} -> {batch_size}")
        print(f"  Epochs: {original_epochs} -> {epochs}")
        print(f"  Validation split: {original_validation_split} -> {validation_split}")

    model, label_encoder, metrics = train_model(
        sequences,
        seq_labels,
        validation_split=validation_split,
        batch_size=batch_size,
        epochs=epochs,
        model_type=model_type,
        tensorboard_log_dir=tensorboard_log_dir,
        status_file_path=status_file,
    )

    export_artifacts(
        model=model,
        scaler=scaler,
        label_encoder=label_encoder,
        feature_columns=used_columns,
        label_column=resolved_label,
        sequence_length=sequence_length,
        stride=stride,
        model_path=model_path,
        scaler_path=scaler_path,
        metadata_path=metadata_path,
        metrics=metrics,
    )

    tensorboard_zip_path: Optional[str] = None
    if tensorboard_log_dir and tensorboard_log_dir.exists():
        try:
            tensorboard_zip_path = shutil.make_archive(
                base_name=str(tensorboard_log_dir.parent / tensorboard_log_dir.name),
                format="zip",
                root_dir=str(tensorboard_log_dir.parent),
                base_dir=tensorboard_log_dir.name,
            )
            tensorboard_zip_path = str(Path(tensorboard_zip_path).resolve())
        except Exception:
            tensorboard_zip_path = None

    report_dict = classification_report(
        metrics["validation"]["y_true"],
        metrics["validation"]["y_pred"],
        target_names=metrics["validation"]["class_names"],
        output_dict=True,
    )
    confusion = metrics["validation"].get("confusion_matrix")
    if confusion is None:
        confusion = confusion_matrix(metrics["validation"]["y_true"], metrics["validation"]["y_pred"])

    return {
        "num_samples": int(df.shape[0]),
        "num_sequences": int(sequences.shape[0]),
        "feature_columns": used_columns,
        "class_names": label_encoder.classes_.tolist(),
        "model_path": str(model_path.resolve()),
        "scaler_path": str(scaler_path.resolve()),
        "metadata_path": str(metadata_path.resolve()),
        "history": metrics["history"],
        "model_type": metrics.get("model_type", model_type),
        "classification_report": report_dict,
        "confusion_matrix": np.asarray(confusion).tolist(),
        "tensorboard_log_dir": metrics.get("tensorboard_log_dir"),
        "tensorboard_zip_path": tensorboard_zip_path,
        "label_column": resolved_label,
    }


def run_training(args: argparse.Namespace) -> None:
    csv_path = Path(args.data_path)
    model_out = Path(args.model_out)
    scaler_out = Path(args.scaler_out)
    metadata_out = Path(args.metadata_out)

    features, labels, feature_columns, resolved_label = load_dataset(
        csv_path, feature_columns=args.feature_columns, label_column=args.label_column
    )

    sequences, seq_labels = create_sequences(
        features,
        labels,
        sequence_length=args.sequence_length,
        stride=args.stride,
    )

    sequences, scaler = standardise_sequences(sequences)
    tensorboard_log_dir: Optional[Path] = None
    if args.tensorboard and args.model_type != "svm":
        if args.tensorboard_log_dir:
            tensorboard_log_dir = Path(args.tensorboard_log_dir)
        else:
            tensorboard_log_dir = Path("tensorboard_runs") / datetime.utcnow().strftime("%Y%m%d-%H%M%S")
    model, label_encoder, metrics = train_model(
        sequences,
        seq_labels,
        validation_split=args.validation_split,
        batch_size=args.batch_size,
        epochs=args.epochs,
        model_type=args.model_type,
        tensorboard_log_dir=tensorboard_log_dir,
        status_file_path=None,  # No status file for CLI usage
    )

    export_artifacts(
        model=model,
        scaler=scaler,
        label_encoder=label_encoder,
        feature_columns=feature_columns,
        label_column=resolved_label,
        sequence_length=args.sequence_length,
        stride=args.stride,
        model_path=model_out,
        scaler_path=scaler_out,
        metadata_path=metadata_out,
        metrics=metrics,
    )

    print("Training complete")
    print(f"Model architecture  : {args.model_type}")
    print(f"Model saved to       : {model_out}")
    print(f"Scaler saved to      : {scaler_out}")
    print(f"Metadata saved to    : {metadata_out}")
    print("Validation metrics:")
    report = classification_report(
        metrics["validation"]["y_true"], metrics["validation"]["y_pred"], target_names=metrics["validation"]["class_names"]
    )
    print(report)
    if metrics.get("tensorboard_log_dir"):
        tb_dir = metrics["tensorboard_log_dir"]
        print(f"TensorBoard logs written to: {tb_dir}")
        print(f"Launch TensorBoard with: tensorboard --logdir \"{tb_dir}\"")


def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Train a sequence model for PMU fault classification")
    parser.add_argument("--data-path", required=True, help="Path to Fault_Classification_PMU_Data CSV")
    parser.add_argument(
        "--label-column",
        default="Fault",
        help="Name of the target label column (default: Fault)",
    )
    parser.add_argument(
        "--feature-columns",
        nargs="*",
        default=None,
        help="Optional explicit list of feature columns. Defaults to all non-label columns",
    )
    parser.add_argument("--sequence-length", type=int, default=32, help="Number of timesteps per training window")
    parser.add_argument("--stride", type=int, default=4, help="Step size between consecutive windows")
    parser.add_argument("--validation-split", type=float, default=0.2, help="Validation set fraction")
    parser.add_argument("--batch-size", type=int, default=128, help="Training batch size")
    parser.add_argument("--epochs", type=int, default=50, help="Maximum number of training epochs")
    parser.add_argument(
        "--model-type",
        choices=["cnn_lstm", "tcn", "svm"],
        default="cnn_lstm",
        help="Model architecture to train (choices: cnn_lstm, tcn, svm)",
    )
    parser.add_argument("--model-out", default="pmu_cnn_lstm_model.keras", help="Path to save trained Keras model")
    parser.add_argument("--scaler-out", default="pmu_feature_scaler.pkl", help="Path to save fitted StandardScaler")
    parser.add_argument("--metadata-out", default="pmu_metadata.json", help="Path to save metadata JSON")
    parser.add_argument(
        "--tensorboard-log-dir",
        default=None,
        help="Optional directory to write TensorBoard logs (defaults to tensorboard_runs/<timestamp>)",
    )
    parser.add_argument(
        "--no-tensorboard",
        dest="tensorboard",
        action="store_false",
        help="Disable TensorBoard logging for neural network models",
    )
    parser.set_defaults(tensorboard=True)
    return parser.parse_args(argv)


def main(argv: Sequence[str] | None = None) -> None:
    args = parse_args(argv)
    run_training(args)


if __name__ == "__main__":
    main()