TabPFN commited on
Commit
d54bdcf
·
1 Parent(s): 4810f64

Update TabPFN/scripts/decision_boundary.py

Browse files
Files changed (1) hide show
  1. TabPFN/scripts/decision_boundary.py +299 -0
TabPFN/scripts/decision_boundary.py CHANGED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import reduce
2
+
3
+ import numpy as np
4
+
5
+ from sklearn.preprocessing import LabelEncoder
6
+ from sklearn.utils import check_matplotlib_support
7
+ from sklearn.utils import _safe_indexing
8
+ from sklearn.base import is_regressor
9
+ from sklearn.utils.validation import check_is_fitted
10
+ import matplotlib.pyplot as plt
11
+ from matplotlib.colors import ListedColormap
12
+
13
+
14
+ def _check_boundary_response_method(estimator, response_method):
15
+ """Return prediction method from the `response_method` for decision boundary.
16
+ Parameters
17
+ ----------
18
+ estimator : object
19
+ Fitted estimator to check.
20
+ response_method : {'auto', 'predict_proba', 'decision_function', 'predict'}
21
+ Specifies whether to use :term:`predict_proba`,
22
+ :term:`decision_function`, :term:`predict` as the target response.
23
+ If set to 'auto', the response method is tried in the following order:
24
+ :term:`decision_function`, :term:`predict_proba`, :term:`predict`.
25
+ Returns
26
+ -------
27
+ prediction_method: callable
28
+ Prediction method of estimator.
29
+ """
30
+ has_classes = hasattr(estimator, "classes_")
31
+
32
+ if has_classes and len(estimator.classes_) > 2:
33
+ if response_method not in {"auto", "predict"}:
34
+ msg = (
35
+ "Multiclass classifiers are only supported when response_method is"
36
+ " 'predict' or 'auto'"
37
+ )
38
+ raise ValueError(msg)
39
+ methods_list = ["predict"]
40
+ elif response_method == "auto":
41
+ methods_list = ["decision_function", "predict_proba", "predict"]
42
+ else:
43
+ methods_list = [response_method]
44
+
45
+ prediction_method = [getattr(estimator, method, None) for method in methods_list]
46
+ prediction_method = reduce(lambda x, y: x or y, prediction_method)
47
+ if prediction_method is None:
48
+ raise ValueError(
49
+ f"{estimator.__class__.__name__} has none of the following attributes: "
50
+ f"{', '.join(methods_list)}."
51
+ )
52
+
53
+ return prediction_method
54
+
55
+
56
+ class DecisionBoundaryDisplay:
57
+ """Decisions boundary visualization.
58
+ It is recommended to use
59
+ :func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`
60
+ to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as
61
+ attributes.
62
+ Read more in the :ref:`User Guide <visualizations>`.
63
+ .. versionadded:: 1.1
64
+ Parameters
65
+ ----------
66
+ xx0 : ndarray of shape (grid_resolution, grid_resolution)
67
+ First output of :func:`meshgrid <numpy.meshgrid>`.
68
+ xx1 : ndarray of shape (grid_resolution, grid_resolution)
69
+ Second output of :func:`meshgrid <numpy.meshgrid>`.
70
+ response : ndarray of shape (grid_resolution, grid_resolution)
71
+ Values of the response function.
72
+ xlabel : str, default=None
73
+ Default label to place on x axis.
74
+ ylabel : str, default=None
75
+ Default label to place on y axis.
76
+ Attributes
77
+ ----------
78
+ surface_ : matplotlib `QuadContourSet` or `QuadMesh`
79
+ If `plot_method` is 'contour' or 'contourf', `surface_` is a
80
+ :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If
81
+ `plot_method is `pcolormesh`, `surface_` is a
82
+ :class:`QuadMesh <matplotlib.collections.QuadMesh>`.
83
+ ax_ : matplotlib Axes
84
+ Axes with confusion matrix.
85
+ figure_ : matplotlib Figure
86
+ Figure containing the confusion matrix.
87
+ """
88
+
89
+ def __init__(self, *, xx0, xx1, response, xlabel=None, ylabel=None):
90
+ self.xx0 = xx0
91
+ self.xx1 = xx1
92
+ self.response = response
93
+ self.xlabel = xlabel
94
+ self.ylabel = ylabel
95
+
96
+ def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwargs):
97
+ """Plot visualization.
98
+ Parameters
99
+ ----------
100
+ plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
101
+ Plotting method to call when plotting the response. Please refer
102
+ to the following matplotlib documentation for details:
103
+ :func:`contourf <matplotlib.pyplot.contourf>`,
104
+ :func:`contour <matplotlib.pyplot.contour>`,
105
+ :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.
106
+ ax : Matplotlib axes, default=None
107
+ Axes object to plot on. If `None`, a new figure and axes is
108
+ created.
109
+ xlabel : str, default=None
110
+ Overwrite the x-axis label.
111
+ ylabel : str, default=None
112
+ Overwrite the y-axis label.
113
+ **kwargs : dict
114
+ Additional keyword arguments to be passed to the `plot_method`.
115
+ Returns
116
+ -------
117
+ display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`
118
+ """
119
+ check_matplotlib_support("DecisionBoundaryDisplay.plot")
120
+ import matplotlib.pyplot as plt # noqa
121
+
122
+ if plot_method not in ("contourf", "contour", "pcolormesh"):
123
+ raise ValueError(
124
+ "plot_method must be 'contourf', 'contour', or 'pcolormesh'"
125
+ )
126
+
127
+ if ax is None:
128
+ _, ax = plt.subplots()
129
+
130
+ plot_func = getattr(ax, plot_method)
131
+ self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)
132
+
133
+ if xlabel is not None or not ax.get_xlabel():
134
+ xlabel = self.xlabel if xlabel is None else xlabel
135
+ ax.set_xlabel(xlabel)
136
+ if ylabel is not None or not ax.get_ylabel():
137
+ ylabel = self.ylabel if ylabel is None else ylabel
138
+ ax.set_ylabel(ylabel)
139
+
140
+ self.ax_ = ax
141
+ self.figure_ = ax.figure
142
+ return self
143
+
144
+ @classmethod
145
+ def from_estimator(
146
+ cls,
147
+ estimator,
148
+ X,
149
+ *,
150
+ grid_resolution=100,
151
+ eps=1.0,
152
+ plot_method="contourf",
153
+ response_method="auto",
154
+ xlabel=None,
155
+ ylabel=None,
156
+ ax=None,
157
+ **kwargs,
158
+ ):
159
+ """Plot decision boundary given an estimator.
160
+ Read more in the :ref:`User Guide <visualizations>`.
161
+ Parameters
162
+ ----------
163
+ estimator : object
164
+ Trained estimator used to plot the decision boundary.
165
+ X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
166
+ Input data that should be only 2-dimensional.
167
+ grid_resolution : int, default=100
168
+ Number of grid points to use for plotting decision boundary.
169
+ Higher values will make the plot look nicer but be slower to
170
+ render.
171
+ eps : float, default=1.0
172
+ Extends the minimum and maximum values of X for evaluating the
173
+ response function.
174
+ plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
175
+ Plotting method to call when plotting the response. Please refer
176
+ to the following matplotlib documentation for details:
177
+ :func:`contourf <matplotlib.pyplot.contourf>`,
178
+ :func:`contour <matplotlib.pyplot.contour>`,
179
+ :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.
180
+ response_method : {'auto', 'predict_proba', 'decision_function', \
181
+ 'predict'}, default='auto'
182
+ Specifies whether to use :term:`predict_proba`,
183
+ :term:`decision_function`, :term:`predict` as the target response.
184
+ If set to 'auto', the response method is tried in the following order:
185
+ :term:`decision_function`, :term:`predict_proba`, :term:`predict`.
186
+ For multiclass problems, :term:`predict` is selected when
187
+ `response_method="auto"`.
188
+ xlabel : str, default=None
189
+ The label used for the x-axis. If `None`, an attempt is made to
190
+ extract a label from `X` if it is a dataframe, otherwise an empty
191
+ string is used.
192
+ ylabel : str, default=None
193
+ The label used for the y-axis. If `None`, an attempt is made to
194
+ extract a label from `X` if it is a dataframe, otherwise an empty
195
+ string is used.
196
+ ax : Matplotlib axes, default=None
197
+ Axes object to plot on. If `None`, a new figure and axes is
198
+ created.
199
+ **kwargs : dict
200
+ Additional keyword arguments to be passed to the
201
+ `plot_method`.
202
+ Returns
203
+ -------
204
+ display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
205
+ Object that stores the result.
206
+ See Also
207
+ --------
208
+ DecisionBoundaryDisplay : Decision boundary visualization.
209
+ ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix
210
+ given an estimator, the data, and the label.
211
+ ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix
212
+ given the true and predicted labels.
213
+ Examples
214
+ --------
215
+ >>> import matplotlib.pyplot as plt
216
+ >>> from sklearn.datasets import load_iris
217
+ >>> from sklearn.linear_model import LogisticRegression
218
+ >>> from sklearn.inspection import DecisionBoundaryDisplay
219
+ >>> iris = load_iris()
220
+ >>> X = iris.data[:, :2]
221
+ >>> classifier = LogisticRegression().fit(X, iris.target)
222
+ >>> disp = DecisionBoundaryDisplay.from_estimator(
223
+ ... classifier, X, response_method="predict",
224
+ ... xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
225
+ ... alpha=0.5,
226
+ ... )
227
+ >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
228
+ <...>
229
+ >>> plt.show()
230
+ """
231
+ check_matplotlib_support(f"{cls.__name__}.from_estimator")
232
+ check_is_fitted(estimator)
233
+
234
+ if not grid_resolution > 1:
235
+ raise ValueError(
236
+ "grid_resolution must be greater than 1. Got"
237
+ f" {grid_resolution} instead."
238
+ )
239
+
240
+ if not eps >= 0:
241
+ raise ValueError(
242
+ f"eps must be greater than or equal to 0. Got {eps} instead."
243
+ )
244
+
245
+ possible_plot_methods = ("contourf", "contour", "pcolormesh")
246
+ if plot_method not in possible_plot_methods:
247
+ available_methods = ", ".join(possible_plot_methods)
248
+ raise ValueError(
249
+ f"plot_method must be one of {available_methods}. "
250
+ f"Got {plot_method} instead."
251
+ )
252
+
253
+ x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)
254
+
255
+ x0_min, x0_max = x0.min() - eps, x0.max() + eps
256
+ x1_min, x1_max = x1.min() - eps, x1.max() + eps
257
+
258
+ xx0, xx1 = np.meshgrid(
259
+ np.linspace(x0_min, x0_max, grid_resolution),
260
+ np.linspace(x1_min, x1_max, grid_resolution),
261
+ )
262
+ if hasattr(X, "iloc"):
263
+ # we need to preserve the feature names and therefore get an empty dataframe
264
+ X_grid = X.iloc[[], :].copy()
265
+ X_grid.iloc[:, 0] = xx0.ravel()
266
+ X_grid.iloc[:, 1] = xx1.ravel()
267
+ else:
268
+ X_grid = np.c_[xx0.ravel(), xx1.ravel()]
269
+
270
+ pred_func = _check_boundary_response_method(estimator, response_method)
271
+ response = pred_func(X_grid)
272
+
273
+ # convert classes predictions into integers
274
+ if pred_func.__name__ == "predict" and hasattr(estimator, "classes_"):
275
+ encoder = LabelEncoder()
276
+ encoder.classes_ = estimator.classes_
277
+ response = encoder.transform(response)
278
+
279
+ if response.ndim != 1:
280
+ if is_regressor(estimator):
281
+ raise ValueError("Multi-output regressors are not supported")
282
+
283
+ # TODO: Support pos_label
284
+ response = response[:, 1]
285
+
286
+ if xlabel is None:
287
+ xlabel = X.columns[0] if hasattr(X, "columns") else ""
288
+
289
+ if ylabel is None:
290
+ ylabel = X.columns[1] if hasattr(X, "columns") else ""
291
+
292
+ display = DecisionBoundaryDisplay(
293
+ xx0=xx0,
294
+ xx1=xx1,
295
+ response=response.reshape(xx0.shape),
296
+ xlabel=xlabel,
297
+ ylabel=ylabel,
298
+ )
299
+ return display.plot(ax=ax, plot_method=plot_method, **kwargs)