AnoushkaJain3
/

curation_machine_learning_models

Tabular Classification

English

Computational Neuroscience

Model card Files Files and versions Community

AnoushkaJain3 commited on 19 days ago

Commit

f47b72d

•

1 Parent(s): 1464285

Upload cascaded_classifier.py

Browse files

Files changed (1) hide show

cascaded_classifier.py +135 -0

cascaded_classifier.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from __future__ import annotations
+from typing import Iterable, Union
+from numpy import ndarray
+from pandas import DataFrame
+from sklearn.pipeline import Pipeline
+import numpy as np
+import sys
+import skops.io as sio
+from huggingface_hub import hf_hub_download
+import pandas as pd
+PREDICTOR_SPLIT_TARGET = 0  # Represents 'not noise'.
+class CascadedClassifier(Pipeline):
+    def __init__(self, steps: list[tuple[str, Pipeline]], memory=None):
+        """
+        Initializes a cascaded classifier pipeline with two classification steps.
+        Parameters
+        ----------
+        steps: list[tuple[str, Pipeline]]
+            A list of (name, pipeline) tuples for noise and SUA classifiers.
+        memory: optional
+            Used to cache the fitted transformers of the pipeline.
+        """
+        super().__init__(steps, memory=memory)
+        assert len(steps) == 2, 'CascadedClassifier must have exactly 2 steps'
+        self._steps = steps
+    @property
+    def feature_names_in_(self) -> list[str]:
+        """
+        Returns the feature names used in the noise classifier.
+        Returns
+        -------
+        list[str]
+            The input feature names.
+        """
+        return self.named_steps["noise"][0].feature_names_in_
+    def predict(self, X: list[str] | ndarray | Iterable | DataFrame, **predict_params) -> ndarray:
+        """
+        Predicts labels for the input data using a cascading approach.
+        Parameters
+        ----------
+        X: list[str] | ndarray | Iterable | DataFrame
+            The input data.
+        predict_params: dict
+            Parameters for the predict method.
+        Returns
+        -------
+        ndarray
+            The predicted labels.
+        """
+        # Step 1: Get initial predictions from the noise classifier.
+        y = self.named_steps["noise"][0].predict(X)
+        # Identify rows where the prediction is 'not noise'.
+        predict_rows = (y == PREDICTOR_SPLIT_TARGET)
+        X_predict = X[predict_rows]
+        # If no rows require further classification, return the initial predictions.
+        if len(X_predict) == 0:
+            return y
+        # Step 2: Get predictions from the SUA classifier for the 'not noise' subset.
+        y2 = self.named_steps["sua"][0].predict(X_predict)
+        # Shift the SUA/MUA labels to avoid overlap with noise labels.
+        y2 += 2  # Assuming noise is labeled as 0 or 1.
+        # Update the initial predictions with the SUA classifier results.
+        y[predict_rows] = y2
+        return y
+    def predict_proba(
+            self,
+            X: Union[list[str], ndarray, Iterable, pd.DataFrame],
+        ) -> ndarray:
+            """
+            Predict the probabilities for the input data and normalize them so the sum is 1.
+            Parameters
+            ----------
+            X : Union[list[str], ndarray, Iterable, pd.DataFrame]
+                The input data.
+            predict_params : dict
+                Parameters for the predict method.
+            Returns
+            -------
+            ndarray
+                The normalized predicted probabilities for noise, SUA, and MUA.
+                Shape: (n_samples, 3)
+            Notes
+            -----
+            The output probabilities are ordered as [SUA, noise, MUA].
+            All rows sum to 1 after normalization.
+            """
+            if len(X) == 0:
+                return np.array([], dtype=np.float64).reshape(0, 3)
+            # Initialize probabilities array with zeros
+            n_samples = len(X)
+            out_proba = np.zeros((n_samples, 3), dtype=np.float64)
+            try:
+                # Get noise classifier probabilities
+                y_proba_noise = self.named_steps["noise"][0].predict_proba(X)
+                # Get SUA vs MUA probabilities
+                y_proba_sua = self.named_steps["sua"][0].predict_proba(X)
+                for i in range(n_samples):
+                    if y_proba_noise[i, 0] > y_proba_noise[i, 1]:  # neural > noise
+                        out_proba[i, 0] = 0                     # noise, there is no noise
+                        out_proba[i, 1] = y_proba_sua[i, 0]  # MUA
+                        out_proba[i, 2] = y_proba_sua[i, 1]    # SUA
+                    else:  # noise >= neural
+                        out_proba[i, 0] = y_proba_noise[i, 1]  # noise
+                        out_proba[i, 1] = y_proba_noise[i, 0]  # MUA (neural probability)
+                        out_proba[i, 2] = 0                      # SUA (no SUA)
+                return out_proba
+            except Exception as e:
+                raise RuntimeError(
+                    f"Error during probability prediction: {str(e)}"
+                ) from e