AnoushkaJain3 commited on
Commit
f47b72d
1 Parent(s): 1464285

Upload cascaded_classifier.py

Browse files
Files changed (1) hide show
  1. cascaded_classifier.py +135 -0
cascaded_classifier.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Iterable, Union
3
+ from numpy import ndarray
4
+ from pandas import DataFrame
5
+ from sklearn.pipeline import Pipeline
6
+ import numpy as np
7
+ import sys
8
+ import skops.io as sio
9
+ from huggingface_hub import hf_hub_download
10
+ import pandas as pd
11
+
12
+ PREDICTOR_SPLIT_TARGET = 0 # Represents 'not noise'.
13
+
14
+ class CascadedClassifier(Pipeline):
15
+ def __init__(self, steps: list[tuple[str, Pipeline]], memory=None):
16
+ """
17
+ Initializes a cascaded classifier pipeline with two classification steps.
18
+
19
+ Parameters
20
+ ----------
21
+ steps: list[tuple[str, Pipeline]]
22
+ A list of (name, pipeline) tuples for noise and SUA classifiers.
23
+ memory: optional
24
+ Used to cache the fitted transformers of the pipeline.
25
+ """
26
+ super().__init__(steps, memory=memory)
27
+ assert len(steps) == 2, 'CascadedClassifier must have exactly 2 steps'
28
+ self._steps = steps
29
+
30
+ @property
31
+ def feature_names_in_(self) -> list[str]:
32
+ """
33
+ Returns the feature names used in the noise classifier.
34
+
35
+ Returns
36
+ -------
37
+ list[str]
38
+ The input feature names.
39
+ """
40
+ return self.named_steps["noise"][0].feature_names_in_
41
+
42
+ def predict(self, X: list[str] | ndarray | Iterable | DataFrame, **predict_params) -> ndarray:
43
+ """
44
+ Predicts labels for the input data using a cascading approach.
45
+
46
+ Parameters
47
+ ----------
48
+ X: list[str] | ndarray | Iterable | DataFrame
49
+ The input data.
50
+
51
+ predict_params: dict
52
+ Parameters for the predict method.
53
+
54
+ Returns
55
+ -------
56
+ ndarray
57
+ The predicted labels.
58
+ """
59
+ # Step 1: Get initial predictions from the noise classifier.
60
+ y = self.named_steps["noise"][0].predict(X)
61
+
62
+ # Identify rows where the prediction is 'not noise'.
63
+ predict_rows = (y == PREDICTOR_SPLIT_TARGET)
64
+ X_predict = X[predict_rows]
65
+
66
+ # If no rows require further classification, return the initial predictions.
67
+ if len(X_predict) == 0:
68
+ return y
69
+
70
+ # Step 2: Get predictions from the SUA classifier for the 'not noise' subset.
71
+ y2 = self.named_steps["sua"][0].predict(X_predict)
72
+
73
+ # Shift the SUA/MUA labels to avoid overlap with noise labels.
74
+ y2 += 2 # Assuming noise is labeled as 0 or 1.
75
+
76
+ # Update the initial predictions with the SUA classifier results.
77
+ y[predict_rows] = y2
78
+
79
+ return y
80
+
81
+ def predict_proba(
82
+ self,
83
+ X: Union[list[str], ndarray, Iterable, pd.DataFrame],
84
+ ) -> ndarray:
85
+ """
86
+ Predict the probabilities for the input data and normalize them so the sum is 1.
87
+
88
+ Parameters
89
+ ----------
90
+ X : Union[list[str], ndarray, Iterable, pd.DataFrame]
91
+ The input data.
92
+ predict_params : dict
93
+ Parameters for the predict method.
94
+
95
+ Returns
96
+ -------
97
+ ndarray
98
+ The normalized predicted probabilities for noise, SUA, and MUA.
99
+ Shape: (n_samples, 3)
100
+
101
+ Notes
102
+ -----
103
+ The output probabilities are ordered as [SUA, noise, MUA].
104
+ All rows sum to 1 after normalization.
105
+ """
106
+ if len(X) == 0:
107
+ return np.array([], dtype=np.float64).reshape(0, 3)
108
+
109
+ # Initialize probabilities array with zeros
110
+ n_samples = len(X)
111
+ out_proba = np.zeros((n_samples, 3), dtype=np.float64)
112
+
113
+ try:
114
+ # Get noise classifier probabilities
115
+ y_proba_noise = self.named_steps["noise"][0].predict_proba(X)
116
+ # Get SUA vs MUA probabilities
117
+ y_proba_sua = self.named_steps["sua"][0].predict_proba(X)
118
+
119
+ for i in range(n_samples):
120
+ if y_proba_noise[i, 0] > y_proba_noise[i, 1]: # neural > noise
121
+ out_proba[i, 0] = 0 # noise, there is no noise
122
+ out_proba[i, 1] = y_proba_sua[i, 0] # MUA
123
+ out_proba[i, 2] = y_proba_sua[i, 1] # SUA
124
+ else: # noise >= neural
125
+ out_proba[i, 0] = y_proba_noise[i, 1] # noise
126
+ out_proba[i, 1] = y_proba_noise[i, 0] # MUA (neural probability)
127
+ out_proba[i, 2] = 0 # SUA (no SUA)
128
+
129
+ return out_proba
130
+
131
+ except Exception as e:
132
+ raise RuntimeError(
133
+ f"Error during probability prediction: {str(e)}"
134
+ ) from e
135
+