TabPFN commited on
Commit
0f0db0b
1 Parent(s): d54bdcf

Create new file

Browse files
Files changed (1) hide show
  1. decision_boundary.py +300 -0
decision_boundary.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ from matplotlib.colors import ListedColormap
3
+
4
+ from functools import reduce
5
+
6
+ import numpy as np
7
+
8
+ from sklearn.preprocessing import LabelEncoder
9
+ from sklearn.utils import check_matplotlib_support
10
+ from sklearn.utils import _safe_indexing
11
+ from sklearn.base import is_regressor
12
+ from sklearn.utils.validation import check_is_fitted
13
+
14
+
15
+ def _check_boundary_response_method(estimator, response_method):
16
+ """Return prediction method from the `response_method` for decision boundary.
17
+ Parameters
18
+ ----------
19
+ estimator : object
20
+ Fitted estimator to check.
21
+ response_method : {'auto', 'predict_proba', 'decision_function', 'predict'}
22
+ Specifies whether to use :term:`predict_proba`,
23
+ :term:`decision_function`, :term:`predict` as the target response.
24
+ If set to 'auto', the response method is tried in the following order:
25
+ :term:`decision_function`, :term:`predict_proba`, :term:`predict`.
26
+ Returns
27
+ -------
28
+ prediction_method: callable
29
+ Prediction method of estimator.
30
+ """
31
+ has_classes = hasattr(estimator, "classes_")
32
+
33
+ if has_classes and len(estimator.classes_) > 2:
34
+ if response_method not in {"auto", "predict"}:
35
+ msg = (
36
+ "Multiclass classifiers are only supported when response_method is"
37
+ " 'predict' or 'auto'"
38
+ )
39
+ raise ValueError(msg)
40
+ methods_list = ["predict"]
41
+ elif response_method == "auto":
42
+ methods_list = ["decision_function", "predict_proba", "predict"]
43
+ else:
44
+ methods_list = [response_method]
45
+
46
+ prediction_method = [getattr(estimator, method, None) for method in methods_list]
47
+ prediction_method = reduce(lambda x, y: x or y, prediction_method)
48
+ if prediction_method is None:
49
+ raise ValueError(
50
+ f"{estimator.__class__.__name__} has none of the following attributes: "
51
+ f"{', '.join(methods_list)}."
52
+ )
53
+
54
+ return prediction_method
55
+
56
+
57
+ class DecisionBoundaryDisplay:
58
+ """Decisions boundary visualization.
59
+ It is recommended to use
60
+ :func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`
61
+ to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as
62
+ attributes.
63
+ Read more in the :ref:`User Guide <visualizations>`.
64
+ .. versionadded:: 1.1
65
+ Parameters
66
+ ----------
67
+ xx0 : ndarray of shape (grid_resolution, grid_resolution)
68
+ First output of :func:`meshgrid <numpy.meshgrid>`.
69
+ xx1 : ndarray of shape (grid_resolution, grid_resolution)
70
+ Second output of :func:`meshgrid <numpy.meshgrid>`.
71
+ response : ndarray of shape (grid_resolution, grid_resolution)
72
+ Values of the response function.
73
+ xlabel : str, default=None
74
+ Default label to place on x axis.
75
+ ylabel : str, default=None
76
+ Default label to place on y axis.
77
+ Attributes
78
+ ----------
79
+ surface_ : matplotlib `QuadContourSet` or `QuadMesh`
80
+ If `plot_method` is 'contour' or 'contourf', `surface_` is a
81
+ :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If
82
+ `plot_method is `pcolormesh`, `surface_` is a
83
+ :class:`QuadMesh <matplotlib.collections.QuadMesh>`.
84
+ ax_ : matplotlib Axes
85
+ Axes with confusion matrix.
86
+ figure_ : matplotlib Figure
87
+ Figure containing the confusion matrix.
88
+ """
89
+
90
+ def __init__(self, *, xx0, xx1, response, xlabel=None, ylabel=None):
91
+ self.xx0 = xx0
92
+ self.xx1 = xx1
93
+ self.response = response
94
+ self.xlabel = xlabel
95
+ self.ylabel = ylabel
96
+
97
+ def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwargs):
98
+ """Plot visualization.
99
+ Parameters
100
+ ----------
101
+ plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
102
+ Plotting method to call when plotting the response. Please refer
103
+ to the following matplotlib documentation for details:
104
+ :func:`contourf <matplotlib.pyplot.contourf>`,
105
+ :func:`contour <matplotlib.pyplot.contour>`,
106
+ :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.
107
+ ax : Matplotlib axes, default=None
108
+ Axes object to plot on. If `None`, a new figure and axes is
109
+ created.
110
+ xlabel : str, default=None
111
+ Overwrite the x-axis label.
112
+ ylabel : str, default=None
113
+ Overwrite the y-axis label.
114
+ **kwargs : dict
115
+ Additional keyword arguments to be passed to the `plot_method`.
116
+ Returns
117
+ -------
118
+ display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`
119
+ """
120
+ check_matplotlib_support("DecisionBoundaryDisplay.plot")
121
+ import matplotlib.pyplot as plt # noqa
122
+
123
+ if plot_method not in ("contourf", "contour", "pcolormesh"):
124
+ raise ValueError(
125
+ "plot_method must be 'contourf', 'contour', or 'pcolormesh'"
126
+ )
127
+
128
+ if ax is None:
129
+ _, ax = plt.subplots()
130
+
131
+ plot_func = getattr(ax, plot_method)
132
+ self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)
133
+
134
+ if xlabel is not None or not ax.get_xlabel():
135
+ xlabel = self.xlabel if xlabel is None else xlabel
136
+ ax.set_xlabel(xlabel)
137
+ if ylabel is not None or not ax.get_ylabel():
138
+ ylabel = self.ylabel if ylabel is None else ylabel
139
+ ax.set_ylabel(ylabel)
140
+
141
+ self.ax_ = ax
142
+ self.figure_ = ax.figure
143
+ return self
144
+
145
+ @classmethod
146
+ def from_estimator(
147
+ cls,
148
+ estimator,
149
+ X,
150
+ *,
151
+ grid_resolution=100,
152
+ eps=1.0,
153
+ plot_method="contourf",
154
+ response_method="auto",
155
+ xlabel=None,
156
+ ylabel=None,
157
+ ax=None,
158
+ **kwargs,
159
+ ):
160
+ """Plot decision boundary given an estimator.
161
+ Read more in the :ref:`User Guide <visualizations>`.
162
+ Parameters
163
+ ----------
164
+ estimator : object
165
+ Trained estimator used to plot the decision boundary.
166
+ X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
167
+ Input data that should be only 2-dimensional.
168
+ grid_resolution : int, default=100
169
+ Number of grid points to use for plotting decision boundary.
170
+ Higher values will make the plot look nicer but be slower to
171
+ render.
172
+ eps : float, default=1.0
173
+ Extends the minimum and maximum values of X for evaluating the
174
+ response function.
175
+ plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
176
+ Plotting method to call when plotting the response. Please refer
177
+ to the following matplotlib documentation for details:
178
+ :func:`contourf <matplotlib.pyplot.contourf>`,
179
+ :func:`contour <matplotlib.pyplot.contour>`,
180
+ :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.
181
+ response_method : {'auto', 'predict_proba', 'decision_function', \
182
+ 'predict'}, default='auto'
183
+ Specifies whether to use :term:`predict_proba`,
184
+ :term:`decision_function`, :term:`predict` as the target response.
185
+ If set to 'auto', the response method is tried in the following order:
186
+ :term:`decision_function`, :term:`predict_proba`, :term:`predict`.
187
+ For multiclass problems, :term:`predict` is selected when
188
+ `response_method="auto"`.
189
+ xlabel : str, default=None
190
+ The label used for the x-axis. If `None`, an attempt is made to
191
+ extract a label from `X` if it is a dataframe, otherwise an empty
192
+ string is used.
193
+ ylabel : str, default=None
194
+ The label used for the y-axis. If `None`, an attempt is made to
195
+ extract a label from `X` if it is a dataframe, otherwise an empty
196
+ string is used.
197
+ ax : Matplotlib axes, default=None
198
+ Axes object to plot on. If `None`, a new figure and axes is
199
+ created.
200
+ **kwargs : dict
201
+ Additional keyword arguments to be passed to the
202
+ `plot_method`.
203
+ Returns
204
+ -------
205
+ display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
206
+ Object that stores the result.
207
+ See Also
208
+ --------
209
+ DecisionBoundaryDisplay : Decision boundary visualization.
210
+ ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix
211
+ given an estimator, the data, and the label.
212
+ ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix
213
+ given the true and predicted labels.
214
+ Examples
215
+ --------
216
+ >>> import matplotlib.pyplot as plt
217
+ >>> from sklearn.datasets import load_iris
218
+ >>> from sklearn.linear_model import LogisticRegression
219
+ >>> from sklearn.inspection import DecisionBoundaryDisplay
220
+ >>> iris = load_iris()
221
+ >>> X = iris.data[:, :2]
222
+ >>> classifier = LogisticRegression().fit(X, iris.target)
223
+ >>> disp = DecisionBoundaryDisplay.from_estimator(
224
+ ... classifier, X, response_method="predict",
225
+ ... xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
226
+ ... alpha=0.5,
227
+ ... )
228
+ >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
229
+ <...>
230
+ >>> plt.show()
231
+ """
232
+ check_matplotlib_support(f"{cls.__name__}.from_estimator")
233
+ check_is_fitted(estimator)
234
+
235
+ if not grid_resolution > 1:
236
+ raise ValueError(
237
+ "grid_resolution must be greater than 1. Got"
238
+ f" {grid_resolution} instead."
239
+ )
240
+
241
+ if not eps >= 0:
242
+ raise ValueError(
243
+ f"eps must be greater than or equal to 0. Got {eps} instead."
244
+ )
245
+
246
+ possible_plot_methods = ("contourf", "contour", "pcolormesh")
247
+ if plot_method not in possible_plot_methods:
248
+ available_methods = ", ".join(possible_plot_methods)
249
+ raise ValueError(
250
+ f"plot_method must be one of {available_methods}. "
251
+ f"Got {plot_method} instead."
252
+ )
253
+
254
+ x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)
255
+
256
+ x0_min, x0_max = x0.min() - eps, x0.max() + eps
257
+ x1_min, x1_max = x1.min() - eps, x1.max() + eps
258
+
259
+ xx0, xx1 = np.meshgrid(
260
+ np.linspace(x0_min, x0_max, grid_resolution),
261
+ np.linspace(x1_min, x1_max, grid_resolution),
262
+ )
263
+ if hasattr(X, "iloc"):
264
+ # we need to preserve the feature names and therefore get an empty dataframe
265
+ X_grid = X.iloc[[], :].copy()
266
+ X_grid.iloc[:, 0] = xx0.ravel()
267
+ X_grid.iloc[:, 1] = xx1.ravel()
268
+ else:
269
+ X_grid = np.c_[xx0.ravel(), xx1.ravel()]
270
+
271
+ pred_func = _check_boundary_response_method(estimator, response_method)
272
+ response = pred_func(X_grid)
273
+
274
+ # convert classes predictions into integers
275
+ if pred_func.__name__ == "predict" and hasattr(estimator, "classes_"):
276
+ encoder = LabelEncoder()
277
+ encoder.classes_ = estimator.classes_
278
+ response = encoder.transform(response)
279
+
280
+ if response.ndim != 1:
281
+ if is_regressor(estimator):
282
+ raise ValueError("Multi-output regressors are not supported")
283
+
284
+ # TODO: Support pos_label
285
+ response = response[:, 1]
286
+
287
+ if xlabel is None:
288
+ xlabel = X.columns[0] if hasattr(X, "columns") else ""
289
+
290
+ if ylabel is None:
291
+ ylabel = X.columns[1] if hasattr(X, "columns") else ""
292
+
293
+ display = DecisionBoundaryDisplay(
294
+ xx0=xx0,
295
+ xx1=xx1,
296
+ response=response.reshape(xx0.shape),
297
+ xlabel=xlabel,
298
+ ylabel=ylabel,
299
+ )
300
+ return display.plot(ax=ax, plot_method=plot_method, **kwargs)