AnoushkaJain3
commited on
Commit
•
f47b72d
1
Parent(s):
1464285
Upload cascaded_classifier.py
Browse files- cascaded_classifier.py +135 -0
cascaded_classifier.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
from typing import Iterable, Union
|
3 |
+
from numpy import ndarray
|
4 |
+
from pandas import DataFrame
|
5 |
+
from sklearn.pipeline import Pipeline
|
6 |
+
import numpy as np
|
7 |
+
import sys
|
8 |
+
import skops.io as sio
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
PREDICTOR_SPLIT_TARGET = 0 # Represents 'not noise'.
|
13 |
+
|
14 |
+
class CascadedClassifier(Pipeline):
|
15 |
+
def __init__(self, steps: list[tuple[str, Pipeline]], memory=None):
|
16 |
+
"""
|
17 |
+
Initializes a cascaded classifier pipeline with two classification steps.
|
18 |
+
|
19 |
+
Parameters
|
20 |
+
----------
|
21 |
+
steps: list[tuple[str, Pipeline]]
|
22 |
+
A list of (name, pipeline) tuples for noise and SUA classifiers.
|
23 |
+
memory: optional
|
24 |
+
Used to cache the fitted transformers of the pipeline.
|
25 |
+
"""
|
26 |
+
super().__init__(steps, memory=memory)
|
27 |
+
assert len(steps) == 2, 'CascadedClassifier must have exactly 2 steps'
|
28 |
+
self._steps = steps
|
29 |
+
|
30 |
+
@property
|
31 |
+
def feature_names_in_(self) -> list[str]:
|
32 |
+
"""
|
33 |
+
Returns the feature names used in the noise classifier.
|
34 |
+
|
35 |
+
Returns
|
36 |
+
-------
|
37 |
+
list[str]
|
38 |
+
The input feature names.
|
39 |
+
"""
|
40 |
+
return self.named_steps["noise"][0].feature_names_in_
|
41 |
+
|
42 |
+
def predict(self, X: list[str] | ndarray | Iterable | DataFrame, **predict_params) -> ndarray:
|
43 |
+
"""
|
44 |
+
Predicts labels for the input data using a cascading approach.
|
45 |
+
|
46 |
+
Parameters
|
47 |
+
----------
|
48 |
+
X: list[str] | ndarray | Iterable | DataFrame
|
49 |
+
The input data.
|
50 |
+
|
51 |
+
predict_params: dict
|
52 |
+
Parameters for the predict method.
|
53 |
+
|
54 |
+
Returns
|
55 |
+
-------
|
56 |
+
ndarray
|
57 |
+
The predicted labels.
|
58 |
+
"""
|
59 |
+
# Step 1: Get initial predictions from the noise classifier.
|
60 |
+
y = self.named_steps["noise"][0].predict(X)
|
61 |
+
|
62 |
+
# Identify rows where the prediction is 'not noise'.
|
63 |
+
predict_rows = (y == PREDICTOR_SPLIT_TARGET)
|
64 |
+
X_predict = X[predict_rows]
|
65 |
+
|
66 |
+
# If no rows require further classification, return the initial predictions.
|
67 |
+
if len(X_predict) == 0:
|
68 |
+
return y
|
69 |
+
|
70 |
+
# Step 2: Get predictions from the SUA classifier for the 'not noise' subset.
|
71 |
+
y2 = self.named_steps["sua"][0].predict(X_predict)
|
72 |
+
|
73 |
+
# Shift the SUA/MUA labels to avoid overlap with noise labels.
|
74 |
+
y2 += 2 # Assuming noise is labeled as 0 or 1.
|
75 |
+
|
76 |
+
# Update the initial predictions with the SUA classifier results.
|
77 |
+
y[predict_rows] = y2
|
78 |
+
|
79 |
+
return y
|
80 |
+
|
81 |
+
def predict_proba(
|
82 |
+
self,
|
83 |
+
X: Union[list[str], ndarray, Iterable, pd.DataFrame],
|
84 |
+
) -> ndarray:
|
85 |
+
"""
|
86 |
+
Predict the probabilities for the input data and normalize them so the sum is 1.
|
87 |
+
|
88 |
+
Parameters
|
89 |
+
----------
|
90 |
+
X : Union[list[str], ndarray, Iterable, pd.DataFrame]
|
91 |
+
The input data.
|
92 |
+
predict_params : dict
|
93 |
+
Parameters for the predict method.
|
94 |
+
|
95 |
+
Returns
|
96 |
+
-------
|
97 |
+
ndarray
|
98 |
+
The normalized predicted probabilities for noise, SUA, and MUA.
|
99 |
+
Shape: (n_samples, 3)
|
100 |
+
|
101 |
+
Notes
|
102 |
+
-----
|
103 |
+
The output probabilities are ordered as [SUA, noise, MUA].
|
104 |
+
All rows sum to 1 after normalization.
|
105 |
+
"""
|
106 |
+
if len(X) == 0:
|
107 |
+
return np.array([], dtype=np.float64).reshape(0, 3)
|
108 |
+
|
109 |
+
# Initialize probabilities array with zeros
|
110 |
+
n_samples = len(X)
|
111 |
+
out_proba = np.zeros((n_samples, 3), dtype=np.float64)
|
112 |
+
|
113 |
+
try:
|
114 |
+
# Get noise classifier probabilities
|
115 |
+
y_proba_noise = self.named_steps["noise"][0].predict_proba(X)
|
116 |
+
# Get SUA vs MUA probabilities
|
117 |
+
y_proba_sua = self.named_steps["sua"][0].predict_proba(X)
|
118 |
+
|
119 |
+
for i in range(n_samples):
|
120 |
+
if y_proba_noise[i, 0] > y_proba_noise[i, 1]: # neural > noise
|
121 |
+
out_proba[i, 0] = 0 # noise, there is no noise
|
122 |
+
out_proba[i, 1] = y_proba_sua[i, 0] # MUA
|
123 |
+
out_proba[i, 2] = y_proba_sua[i, 1] # SUA
|
124 |
+
else: # noise >= neural
|
125 |
+
out_proba[i, 0] = y_proba_noise[i, 1] # noise
|
126 |
+
out_proba[i, 1] = y_proba_noise[i, 0] # MUA (neural probability)
|
127 |
+
out_proba[i, 2] = 0 # SUA (no SUA)
|
128 |
+
|
129 |
+
return out_proba
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
raise RuntimeError(
|
133 |
+
f"Error during probability prediction: {str(e)}"
|
134 |
+
) from e
|
135 |
+
|