diff --git "a/.venv/Lib/site-packages/sklearn/model_selection/_validation.py" "b/.venv/Lib/site-packages/sklearn/model_selection/_validation.py" new file mode 100644--- /dev/null +++ "b/.venv/Lib/site-packages/sklearn/model_selection/_validation.py" @@ -0,0 +1,2550 @@ +""" +The :mod:`sklearn.model_selection._validation` module includes classes and +functions to validate the model. +""" + +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + + +import numbers +import time +import warnings +from collections import Counter +from contextlib import suppress +from functools import partial +from numbers import Real +from traceback import format_exc + +import numpy as np +import scipy.sparse as sp +from joblib import logger + +from ..base import clone, is_classifier +from ..exceptions import FitFailedWarning, UnsetMetadataPassedError +from ..metrics import check_scoring, get_scorer_names +from ..metrics._scorer import _MultimetricScorer +from ..preprocessing import LabelEncoder +from ..utils import Bunch, _safe_indexing, check_random_state, indexable +from ..utils._array_api import device, get_namespace +from ..utils._param_validation import ( + HasMethods, + Integral, + Interval, + StrOptions, + validate_params, +) +from ..utils.metadata_routing import ( + MetadataRouter, + MethodMapping, + _routing_enabled, + process_routing, +) +from ..utils.metaestimators import _safe_split +from ..utils.parallel import Parallel, delayed +from ..utils.validation import _check_method_params, _num_samples +from ._split import check_cv + +__all__ = [ + "cross_validate", + "cross_val_score", + "cross_val_predict", + "permutation_test_score", + "learning_curve", + "validation_curve", +] + + +def _check_params_groups_deprecation(fit_params, params, groups, version): + """A helper function to check deprecations on `groups` and `fit_params`. + + # TODO(SLEP6): To be removed when set_config(enable_metadata_routing=False) is not + # possible. + """ + if params is not None and fit_params is not None: + raise ValueError( + "`params` and `fit_params` cannot both be provided. Pass parameters " + "via `params`. `fit_params` is deprecated and will be removed in " + f"version {version}." + ) + elif fit_params is not None: + warnings.warn( + ( + "`fit_params` is deprecated and will be removed in version {version}. " + "Pass parameters via `params` instead." + ), + FutureWarning, + ) + params = fit_params + + params = {} if params is None else params + + _check_groups_routing_disabled(groups) + + return params + + +# TODO(SLEP6): To be removed when set_config(enable_metadata_routing=False) is not +# possible. +def _check_groups_routing_disabled(groups): + if groups is not None and _routing_enabled(): + raise ValueError( + "`groups` can only be passed if metadata routing is not enabled via" + " `sklearn.set_config(enable_metadata_routing=True)`. When routing is" + " enabled, pass `groups` alongside other metadata via the `params` argument" + " instead." + ) + + +@validate_params( + { + "estimator": [HasMethods("fit")], + "X": ["array-like", "sparse matrix"], + "y": ["array-like", None], + "groups": ["array-like", None], + "scoring": [ + StrOptions(set(get_scorer_names())), + callable, + list, + tuple, + dict, + None, + ], + "cv": ["cv_object"], + "n_jobs": [Integral, None], + "verbose": ["verbose"], + "params": [dict, None], + "pre_dispatch": [Integral, str], + "return_train_score": ["boolean"], + "return_estimator": ["boolean"], + "return_indices": ["boolean"], + "error_score": [StrOptions({"raise"}), Real], + }, + prefer_skip_nested_validation=False, # estimator is not validated yet +) +def cross_validate( + estimator, + X, + y=None, + *, + groups=None, + scoring=None, + cv=None, + n_jobs=None, + verbose=0, + params=None, + pre_dispatch="2*n_jobs", + return_train_score=False, + return_estimator=False, + return_indices=False, + error_score=np.nan, +): + """Evaluate metric(s) by cross-validation and also record fit/score times. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : estimator object implementing 'fit' + The object to use to fit the data. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The data to fit. Can be for example a list, or an array. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None + The target variable to try to predict in the case of + supervised learning. + + groups : array-like of shape (n_samples,), default=None + Group labels for the samples used while splitting the dataset into + train/test set. Only used in conjunction with a "Group" :term:`cv` + instance (e.g., :class:`GroupKFold`). + + .. versionchanged:: 1.4 + ``groups`` can only be passed if metadata routing is not enabled + via ``sklearn.set_config(enable_metadata_routing=True)``. When routing + is enabled, pass ``groups`` alongside other metadata via the ``params`` + argument instead. E.g.: + ``cross_validate(..., params={'groups': groups})``. + + scoring : str, callable, list, tuple, or dict, default=None + Strategy to evaluate the performance of the cross-validated model on + the test set. If `None`, the + :ref:`default evaluation criterion ` of the estimator + is used. + + If `scoring` represents a single score, one can use: + + - a single string (see :ref:`scoring_parameter`); + - a callable (see :ref:`scoring_callable`) that returns a single value. + + If `scoring` represents multiple scores, one can use: + + - a list or tuple of unique strings; + - a callable returning a dictionary where the keys are the metric + names and the values are the metric scores; + - a dictionary with metric names as keys and callables a values. + + See :ref:`multimetric_grid_search` for an example. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - int, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For int/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with `shuffle=False` so the splits will be the same across calls. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + ``cv`` default value if None changed from 3-fold to 5-fold. + + n_jobs : int, default=None + Number of jobs to run in parallel. Training the estimator and computing + the score are parallelized over the cross-validation splits. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + + verbose : int, default=0 + The verbosity level. + + params : dict, default=None + Parameters to pass to the underlying estimator's ``fit``, the scorer, + and the CV splitter. + + .. versionadded:: 1.4 + + pre_dispatch : int or str, default='2*n_jobs' + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - An int, giving the exact number of total jobs that are spawned + - A str, giving an expression as a function of n_jobs, as in '2*n_jobs' + + return_train_score : bool, default=False + Whether to include train scores. + Computing training scores is used to get insights on how different + parameter settings impact the overfitting/underfitting trade-off. + However computing the scores on the training set can be computationally + expensive and is not strictly required to select the parameters that + yield the best generalization performance. + + .. versionadded:: 0.19 + + .. versionchanged:: 0.21 + Default value was changed from ``True`` to ``False`` + + return_estimator : bool, default=False + Whether to return the estimators fitted on each split. + + .. versionadded:: 0.20 + + return_indices : bool, default=False + Whether to return the train-test indices selected for each split. + + .. versionadded:: 1.3 + + error_score : 'raise' or numeric, default=np.nan + Value to assign to the score if an error occurs in estimator fitting. + If set to 'raise', the error is raised. + If a numeric value is given, FitFailedWarning is raised. + + .. versionadded:: 0.20 + + Returns + ------- + scores : dict of float arrays of shape (n_splits,) + Array of scores of the estimator for each run of the cross validation. + + A dict of arrays containing the score/time arrays for each scorer is + returned. The possible keys for this ``dict`` are: + + ``test_score`` + The score array for test scores on each cv split. + Suffix ``_score`` in ``test_score`` changes to a specific + metric like ``test_r2`` or ``test_auc`` if there are + multiple scoring metrics in the scoring parameter. + ``train_score`` + The score array for train scores on each cv split. + Suffix ``_score`` in ``train_score`` changes to a specific + metric like ``train_r2`` or ``train_auc`` if there are + multiple scoring metrics in the scoring parameter. + This is available only if ``return_train_score`` parameter + is ``True``. + ``fit_time`` + The time for fitting the estimator on the train + set for each cv split. + ``score_time`` + The time for scoring the estimator on the test set for each + cv split. (Note time for scoring on the train set is not + included even if ``return_train_score`` is set to ``True`` + ``estimator`` + The estimator objects for each cv split. + This is available only if ``return_estimator`` parameter + is set to ``True``. + ``indices`` + The train/test positional indices for each cv split. A dictionary + is returned where the keys are either `"train"` or `"test"` + and the associated values are a list of integer-dtyped NumPy + arrays with the indices. Available only if `return_indices=True`. + + See Also + -------- + cross_val_score : Run cross-validation for single metric evaluation. + + cross_val_predict : Get predictions from each split of cross-validation for + diagnostic purposes. + + sklearn.metrics.make_scorer : Make a scorer from a performance metric or + loss function. + + Examples + -------- + >>> from sklearn import datasets, linear_model + >>> from sklearn.model_selection import cross_validate + >>> from sklearn.metrics import make_scorer + >>> from sklearn.metrics import confusion_matrix + >>> from sklearn.svm import LinearSVC + >>> diabetes = datasets.load_diabetes() + >>> X = diabetes.data[:150] + >>> y = diabetes.target[:150] + >>> lasso = linear_model.Lasso() + + Single metric evaluation using ``cross_validate`` + + >>> cv_results = cross_validate(lasso, X, y, cv=3) + >>> sorted(cv_results.keys()) + ['fit_time', 'score_time', 'test_score'] + >>> cv_results['test_score'] + array([0.3315057 , 0.08022103, 0.03531816]) + + Multiple metric evaluation using ``cross_validate`` + (please refer the ``scoring`` parameter doc for more information) + + >>> scores = cross_validate(lasso, X, y, cv=3, + ... scoring=('r2', 'neg_mean_squared_error'), + ... return_train_score=True) + >>> print(scores['test_neg_mean_squared_error']) + [-3635.5... -3573.3... -6114.7...] + >>> print(scores['train_r2']) + [0.28009951 0.3908844 0.22784907] + """ + _check_groups_routing_disabled(groups) + + X, y = indexable(X, y) + + cv = check_cv(cv, y, classifier=is_classifier(estimator)) + + scorers = check_scoring( + estimator, scoring=scoring, raise_exc=(error_score == "raise") + ) + + if _routing_enabled(): + # For estimators, a MetadataRouter is created in get_metadata_routing + # methods. For these router methods, we create the router to use + # `process_routing` on it. + router = ( + MetadataRouter(owner="cross_validate") + .add( + splitter=cv, + method_mapping=MethodMapping().add(caller="fit", callee="split"), + ) + .add( + estimator=estimator, + # TODO(SLEP6): also pass metadata to the predict method for + # scoring? + method_mapping=MethodMapping().add(caller="fit", callee="fit"), + ) + .add( + scorer=scorers, + method_mapping=MethodMapping().add(caller="fit", callee="score"), + ) + ) + try: + routed_params = process_routing(router, "fit", **params) + except UnsetMetadataPassedError as e: + # The default exception would mention `fit` since in the above + # `process_routing` code, we pass `fit` as the caller. However, + # the user is not calling `fit` directly, so we change the message + # to make it more suitable for this case. + unrequested_params = sorted(e.unrequested_params) + raise UnsetMetadataPassedError( + message=( + f"{unrequested_params} are passed to cross validation but are not" + " explicitly set as requested or not requested for cross_validate's" + f" estimator: {estimator.__class__.__name__}. Call" + " `.set_fit_request({{metadata}}=True)` on the estimator for" + f" each metadata in {unrequested_params} that you" + " want to use and `metadata=False` for not using it. See the" + " Metadata Routing User guide" + " for more" + " information." + ), + unrequested_params=e.unrequested_params, + routed_params=e.routed_params, + ) + else: + routed_params = Bunch() + routed_params.splitter = Bunch(split={"groups": groups}) + routed_params.estimator = Bunch(fit=params) + routed_params.scorer = Bunch(score={}) + + indices = cv.split(X, y, **routed_params.splitter.split) + if return_indices: + # materialize the indices since we need to store them in the returned dict + indices = list(indices) + + # We clone the estimator to make sure that all the folds are + # independent, and that it is pickle-able. + parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) + results = parallel( + delayed(_fit_and_score)( + clone(estimator), + X, + y, + scorer=scorers, + train=train, + test=test, + verbose=verbose, + parameters=None, + fit_params=routed_params.estimator.fit, + score_params=routed_params.scorer.score, + return_train_score=return_train_score, + return_times=True, + return_estimator=return_estimator, + error_score=error_score, + ) + for train, test in indices + ) + + _warn_or_raise_about_fit_failures(results, error_score) + + # For callable scoring, the return type is only know after calling. If the + # return type is a dictionary, the error scores can now be inserted with + # the correct key. + if callable(scoring): + _insert_error_scores(results, error_score) + + results = _aggregate_score_dicts(results) + + ret = {} + ret["fit_time"] = results["fit_time"] + ret["score_time"] = results["score_time"] + + if return_estimator: + ret["estimator"] = results["estimator"] + + if return_indices: + ret["indices"] = {} + ret["indices"]["train"], ret["indices"]["test"] = zip(*indices) + + test_scores_dict = _normalize_score_results(results["test_scores"]) + if return_train_score: + train_scores_dict = _normalize_score_results(results["train_scores"]) + + for name in test_scores_dict: + ret["test_%s" % name] = test_scores_dict[name] + if return_train_score: + key = "train_%s" % name + ret[key] = train_scores_dict[name] + + return ret + + +def _insert_error_scores(results, error_score): + """Insert error in `results` by replacing them inplace with `error_score`. + + This only applies to multimetric scores because `_fit_and_score` will + handle the single metric case. + """ + successful_score = None + failed_indices = [] + for i, result in enumerate(results): + if result["fit_error"] is not None: + failed_indices.append(i) + elif successful_score is None: + successful_score = result["test_scores"] + + if isinstance(successful_score, dict): + formatted_error = {name: error_score for name in successful_score} + for i in failed_indices: + results[i]["test_scores"] = formatted_error.copy() + if "train_scores" in results[i]: + results[i]["train_scores"] = formatted_error.copy() + + +def _normalize_score_results(scores, scaler_score_key="score"): + """Creates a scoring dictionary based on the type of `scores`""" + if isinstance(scores[0], dict): + # multimetric scoring + return _aggregate_score_dicts(scores) + # scaler + return {scaler_score_key: scores} + + +def _warn_or_raise_about_fit_failures(results, error_score): + fit_errors = [ + result["fit_error"] for result in results if result["fit_error"] is not None + ] + if fit_errors: + num_failed_fits = len(fit_errors) + num_fits = len(results) + fit_errors_counter = Counter(fit_errors) + delimiter = "-" * 80 + "\n" + fit_errors_summary = "\n".join( + f"{delimiter}{n} fits failed with the following error:\n{error}" + for error, n in fit_errors_counter.items() + ) + + if num_failed_fits == num_fits: + all_fits_failed_message = ( + f"\nAll the {num_fits} fits failed.\n" + "It is very likely that your model is misconfigured.\n" + "You can try to debug the error by setting error_score='raise'.\n\n" + f"Below are more details about the failures:\n{fit_errors_summary}" + ) + raise ValueError(all_fits_failed_message) + + else: + some_fits_failed_message = ( + f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n" + "The score on these train-test partitions for these parameters" + f" will be set to {error_score}.\n" + "If these failures are not expected, you can try to debug them " + "by setting error_score='raise'.\n\n" + f"Below are more details about the failures:\n{fit_errors_summary}" + ) + warnings.warn(some_fits_failed_message, FitFailedWarning) + + +@validate_params( + { + "estimator": [HasMethods("fit")], + "X": ["array-like", "sparse matrix"], + "y": ["array-like", None], + "groups": ["array-like", None], + "scoring": [StrOptions(set(get_scorer_names())), callable, None], + "cv": ["cv_object"], + "n_jobs": [Integral, None], + "verbose": ["verbose"], + "params": [dict, None], + "pre_dispatch": [Integral, str, None], + "error_score": [StrOptions({"raise"}), Real], + }, + prefer_skip_nested_validation=False, # estimator is not validated yet +) +def cross_val_score( + estimator, + X, + y=None, + *, + groups=None, + scoring=None, + cv=None, + n_jobs=None, + verbose=0, + params=None, + pre_dispatch="2*n_jobs", + error_score=np.nan, +): + """Evaluate a score by cross-validation. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : estimator object implementing 'fit' + The object to use to fit the data. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The data to fit. Can be for example a list, or an array. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ + default=None + The target variable to try to predict in the case of + supervised learning. + + groups : array-like of shape (n_samples,), default=None + Group labels for the samples used while splitting the dataset into + train/test set. Only used in conjunction with a "Group" :term:`cv` + instance (e.g., :class:`GroupKFold`). + + .. versionchanged:: 1.4 + ``groups`` can only be passed if metadata routing is not enabled + via ``sklearn.set_config(enable_metadata_routing=True)``. When routing + is enabled, pass ``groups`` alongside other metadata via the ``params`` + argument instead. E.g.: + ``cross_val_score(..., params={'groups': groups})``. + + scoring : str or callable, default=None + A str (see :ref:`scoring_parameter`) or a scorer callable object / function with + signature ``scorer(estimator, X, y)`` which should return only a single value. + + Similar to :func:`cross_validate` + but only a single metric is permitted. + + If `None`, the estimator's default scorer (if available) is used. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - `None`, to use the default 5-fold cross validation, + - int, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable that generates (train, test) splits as arrays of indices. + + For `int`/`None` inputs, if the estimator is a classifier and `y` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with `shuffle=False` so the splits will be the same across calls. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + `cv` default value if `None` changed from 3-fold to 5-fold. + + n_jobs : int, default=None + Number of jobs to run in parallel. Training the estimator and computing + the score are parallelized over the cross-validation splits. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + + verbose : int, default=0 + The verbosity level. + + params : dict, default=None + Parameters to pass to the underlying estimator's ``fit``, the scorer, + and the CV splitter. + + .. versionadded:: 1.4 + + pre_dispatch : int or str, default='2*n_jobs' + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - ``None``, in which case all the jobs are immediately created and spawned. Use + this for lightweight and fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + - An int, giving the exact number of total jobs that are spawned + - A str, giving an expression as a function of n_jobs, as in '2*n_jobs' + + error_score : 'raise' or numeric, default=np.nan + Value to assign to the score if an error occurs in estimator fitting. + If set to 'raise', the error is raised. + If a numeric value is given, FitFailedWarning is raised. + + .. versionadded:: 0.20 + + Returns + ------- + scores : ndarray of float of shape=(len(list(cv)),) + Array of scores of the estimator for each run of the cross validation. + + See Also + -------- + cross_validate : To run cross-validation on multiple metrics and also to + return train scores, fit times and score times. + + cross_val_predict : Get predictions from each split of cross-validation for + diagnostic purposes. + + sklearn.metrics.make_scorer : Make a scorer from a performance metric or + loss function. + + Examples + -------- + >>> from sklearn import datasets, linear_model + >>> from sklearn.model_selection import cross_val_score + >>> diabetes = datasets.load_diabetes() + >>> X = diabetes.data[:150] + >>> y = diabetes.target[:150] + >>> lasso = linear_model.Lasso() + >>> print(cross_val_score(lasso, X, y, cv=3)) + [0.3315057 0.08022103 0.03531816] + """ + # To ensure multimetric format is not supported + scorer = check_scoring(estimator, scoring=scoring) + + cv_results = cross_validate( + estimator=estimator, + X=X, + y=y, + groups=groups, + scoring={"score": scorer}, + cv=cv, + n_jobs=n_jobs, + verbose=verbose, + params=params, + pre_dispatch=pre_dispatch, + error_score=error_score, + ) + return cv_results["test_score"] + + +def _fit_and_score( + estimator, + X, + y, + *, + scorer, + train, + test, + verbose, + parameters, + fit_params, + score_params, + return_train_score=False, + return_parameters=False, + return_n_test_samples=False, + return_times=False, + return_estimator=False, + split_progress=None, + candidate_progress=None, + error_score=np.nan, +): + """Fit estimator and compute scores for a given dataset split. + + Parameters + ---------- + estimator : estimator object implementing 'fit' + The object to use to fit the data. + + X : array-like of shape (n_samples, n_features) + The data to fit. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None + The target variable to try to predict in the case of + supervised learning. + + scorer : A single callable or dict mapping scorer name to the callable + If it is a single callable, the return value for ``train_scores`` and + ``test_scores`` is a single float. + + For a dict, it should be one mapping the scorer name to the scorer + callable object / function. + + The callable object / fn should have signature + ``scorer(estimator, X, y)``. + + train : array-like of shape (n_train_samples,) + Indices of training samples. + + test : array-like of shape (n_test_samples,) + Indices of test samples. + + verbose : int + The verbosity level. + + error_score : 'raise' or numeric, default=np.nan + Value to assign to the score if an error occurs in estimator fitting. + If set to 'raise', the error is raised. + If a numeric value is given, FitFailedWarning is raised. + + parameters : dict or None + Parameters to be set on the estimator. + + fit_params : dict or None + Parameters that will be passed to ``estimator.fit``. + + score_params : dict or None + Parameters that will be passed to the scorer. + + return_train_score : bool, default=False + Compute and return score on training set. + + return_parameters : bool, default=False + Return parameters that has been used for the estimator. + + split_progress : {list, tuple} of int, default=None + A list or tuple of format (, ). + + candidate_progress : {list, tuple} of int, default=None + A list or tuple of format + (, ). + + return_n_test_samples : bool, default=False + Whether to return the ``n_test_samples``. + + return_times : bool, default=False + Whether to return the fit/score times. + + return_estimator : bool, default=False + Whether to return the fitted estimator. + + Returns + ------- + result : dict with the following attributes + train_scores : dict of scorer name -> float + Score on training set (for all the scorers), + returned only if `return_train_score` is `True`. + test_scores : dict of scorer name -> float + Score on testing set (for all the scorers). + n_test_samples : int + Number of test samples. + fit_time : float + Time spent for fitting in seconds. + score_time : float + Time spent for scoring in seconds. + parameters : dict or None + The parameters that have been evaluated. + estimator : estimator object + The fitted estimator. + fit_error : str or None + Traceback str if the fit failed, None if the fit succeeded. + """ + xp, _ = get_namespace(X) + X_device = device(X) + + # Make sure that we can fancy index X even if train and test are provided + # as NumPy arrays by NumPy only cross-validation splitters. + train, test = xp.asarray(train, device=X_device), xp.asarray(test, device=X_device) + + if not isinstance(error_score, numbers.Number) and error_score != "raise": + raise ValueError( + "error_score must be the string 'raise' or a numeric value. " + "(Hint: if using 'raise', please make sure that it has been " + "spelled correctly.)" + ) + + progress_msg = "" + if verbose > 2: + if split_progress is not None: + progress_msg = f" {split_progress[0]+1}/{split_progress[1]}" + if candidate_progress and verbose > 9: + progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}" + + if verbose > 1: + if parameters is None: + params_msg = "" + else: + sorted_keys = sorted(parameters) # Ensure deterministic o/p + params_msg = ", ".join(f"{k}={parameters[k]}" for k in sorted_keys) + if verbose > 9: + start_msg = f"[CV{progress_msg}] START {params_msg}" + print(f"{start_msg}{(80 - len(start_msg)) * '.'}") + + # Adjust length of sample weights + fit_params = fit_params if fit_params is not None else {} + fit_params = _check_method_params(X, params=fit_params, indices=train) + score_params = score_params if score_params is not None else {} + score_params_train = _check_method_params(X, params=score_params, indices=train) + score_params_test = _check_method_params(X, params=score_params, indices=test) + + if parameters is not None: + # here we clone the parameters, since sometimes the parameters + # themselves might be estimators, e.g. when we search over different + # estimators in a pipeline. + # ref: https://github.com/scikit-learn/scikit-learn/pull/26786 + estimator = estimator.set_params(**clone(parameters, safe=False)) + + start_time = time.time() + + X_train, y_train = _safe_split(estimator, X, y, train) + X_test, y_test = _safe_split(estimator, X, y, test, train) + + result = {} + try: + if y_train is None: + estimator.fit(X_train, **fit_params) + else: + estimator.fit(X_train, y_train, **fit_params) + + except Exception: + # Note fit time as time until error + fit_time = time.time() - start_time + score_time = 0.0 + if error_score == "raise": + raise + elif isinstance(error_score, numbers.Number): + if isinstance(scorer, _MultimetricScorer): + test_scores = {name: error_score for name in scorer._scorers} + if return_train_score: + train_scores = test_scores.copy() + else: + test_scores = error_score + if return_train_score: + train_scores = error_score + result["fit_error"] = format_exc() + else: + result["fit_error"] = None + + fit_time = time.time() - start_time + test_scores = _score( + estimator, X_test, y_test, scorer, score_params_test, error_score + ) + score_time = time.time() - start_time - fit_time + if return_train_score: + train_scores = _score( + estimator, X_train, y_train, scorer, score_params_train, error_score + ) + + if verbose > 1: + total_time = score_time + fit_time + end_msg = f"[CV{progress_msg}] END " + result_msg = params_msg + (";" if params_msg else "") + if verbose > 2: + if isinstance(test_scores, dict): + for scorer_name in sorted(test_scores): + result_msg += f" {scorer_name}: (" + if return_train_score: + scorer_scores = train_scores[scorer_name] + result_msg += f"train={scorer_scores:.3f}, " + result_msg += f"test={test_scores[scorer_name]:.3f})" + else: + result_msg += ", score=" + if return_train_score: + result_msg += f"(train={train_scores:.3f}, test={test_scores:.3f})" + else: + result_msg += f"{test_scores:.3f}" + result_msg += f" total time={logger.short_format_time(total_time)}" + + # Right align the result_msg + end_msg += "." * (80 - len(end_msg) - len(result_msg)) + end_msg += result_msg + print(end_msg) + + result["test_scores"] = test_scores + if return_train_score: + result["train_scores"] = train_scores + if return_n_test_samples: + result["n_test_samples"] = _num_samples(X_test) + if return_times: + result["fit_time"] = fit_time + result["score_time"] = score_time + if return_parameters: + result["parameters"] = parameters + if return_estimator: + result["estimator"] = estimator + return result + + +def _score(estimator, X_test, y_test, scorer, score_params, error_score="raise"): + """Compute the score(s) of an estimator on a given test set. + + Will return a dict of floats if `scorer` is a _MultiMetricScorer, otherwise a single + float is returned. + """ + score_params = {} if score_params is None else score_params + + try: + if y_test is None: + scores = scorer(estimator, X_test, **score_params) + else: + scores = scorer(estimator, X_test, y_test, **score_params) + except Exception: + if isinstance(scorer, _MultimetricScorer): + # If `_MultimetricScorer` raises exception, the `error_score` + # parameter is equal to "raise". + raise + else: + if error_score == "raise": + raise + else: + scores = error_score + warnings.warn( + ( + "Scoring failed. The score on this train-test partition for " + f"these parameters will be set to {error_score}. Details: \n" + f"{format_exc()}" + ), + UserWarning, + ) + + # Check non-raised error messages in `_MultimetricScorer` + if isinstance(scorer, _MultimetricScorer): + exception_messages = [ + (name, str_e) for name, str_e in scores.items() if isinstance(str_e, str) + ] + if exception_messages: + # error_score != "raise" + for name, str_e in exception_messages: + scores[name] = error_score + warnings.warn( + ( + "Scoring failed. The score on this train-test partition for " + f"these parameters will be set to {error_score}. Details: \n" + f"{str_e}" + ), + UserWarning, + ) + + error_msg = "scoring must return a number, got %s (%s) instead. (scorer=%s)" + if isinstance(scores, dict): + for name, score in scores.items(): + if hasattr(score, "item"): + with suppress(ValueError): + # e.g. unwrap memmapped scalars + score = score.item() + if not isinstance(score, numbers.Number): + raise ValueError(error_msg % (score, type(score), name)) + scores[name] = score + else: # scalar + if hasattr(scores, "item"): + with suppress(ValueError): + # e.g. unwrap memmapped scalars + scores = scores.item() + if not isinstance(scores, numbers.Number): + raise ValueError(error_msg % (scores, type(scores), scorer)) + return scores + + +@validate_params( + { + "estimator": [HasMethods(["fit", "predict"])], + "X": ["array-like", "sparse matrix"], + "y": ["array-like", "sparse matrix", None], + "groups": ["array-like", None], + "cv": ["cv_object"], + "n_jobs": [Integral, None], + "verbose": ["verbose"], + "params": [dict, None], + "pre_dispatch": [Integral, str, None], + "method": [ + StrOptions( + { + "predict", + "predict_proba", + "predict_log_proba", + "decision_function", + } + ) + ], + }, + prefer_skip_nested_validation=False, # estimator is not validated yet +) +def cross_val_predict( + estimator, + X, + y=None, + *, + groups=None, + cv=None, + n_jobs=None, + verbose=0, + params=None, + pre_dispatch="2*n_jobs", + method="predict", +): + """Generate cross-validated estimates for each input data point. + + The data is split according to the cv parameter. Each sample belongs + to exactly one test set, and its prediction is computed with an + estimator fitted on the corresponding training set. + + Passing these predictions into an evaluation metric may not be a valid + way to measure generalization performance. Results can differ from + :func:`cross_validate` and :func:`cross_val_score` unless all tests sets + have equal size and the metric decomposes over samples. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : estimator + The estimator instance to use to fit the data. It must implement a `fit` + method and the method given by the `method` parameter. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The data to fit. Can be, for example a list, or an array at least 2d. + + y : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs), \ + default=None + The target variable to try to predict in the case of + supervised learning. + + groups : array-like of shape (n_samples,), default=None + Group labels for the samples used while splitting the dataset into + train/test set. Only used in conjunction with a "Group" :term:`cv` + instance (e.g., :class:`GroupKFold`). + + .. versionchanged:: 1.4 + ``groups`` can only be passed if metadata routing is not enabled + via ``sklearn.set_config(enable_metadata_routing=True)``. When routing + is enabled, pass ``groups`` alongside other metadata via the ``params`` + argument instead. E.g.: + ``cross_val_predict(..., params={'groups': groups})``. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - int, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable that generates (train, test) splits as arrays of indices. + + For int/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with `shuffle=False` so the splits will be the same across calls. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + ``cv`` default value if None changed from 3-fold to 5-fold. + + n_jobs : int, default=None + Number of jobs to run in parallel. Training the estimator and + predicting are parallelized over the cross-validation splits. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + + verbose : int, default=0 + The verbosity level. + + params : dict, default=None + Parameters to pass to the underlying estimator's ``fit`` and the CV + splitter. + + .. versionadded:: 1.4 + + pre_dispatch : int or str, default='2*n_jobs' + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediately created and spawned. Use + this for lightweight and fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + - An int, giving the exact number of total jobs that are spawned + - A str, giving an expression as a function of n_jobs, as in '2*n_jobs' + + method : {'predict', 'predict_proba', 'predict_log_proba', \ + 'decision_function'}, default='predict' + The method to be invoked by `estimator`. + + Returns + ------- + predictions : ndarray + This is the result of calling `method`. Shape: + + - When `method` is 'predict' and in special case where `method` is + 'decision_function' and the target is binary: (n_samples,) + - When `method` is one of {'predict_proba', 'predict_log_proba', + 'decision_function'} (unless special case above): + (n_samples, n_classes) + - If `estimator` is :term:`multioutput`, an extra dimension + 'n_outputs' is added to the end of each shape above. + + See Also + -------- + cross_val_score : Calculate score for each CV split. + cross_validate : Calculate one or more scores and timings for each CV + split. + + Notes + ----- + In the case that one or more classes are absent in a training portion, a + default score needs to be assigned to all instances for that class if + ``method`` produces columns per class, as in {'decision_function', + 'predict_proba', 'predict_log_proba'}. For ``predict_proba`` this value is + 0. In order to ensure finite output, we approximate negative infinity by + the minimum finite float value for the dtype in other cases. + + Examples + -------- + >>> from sklearn import datasets, linear_model + >>> from sklearn.model_selection import cross_val_predict + >>> diabetes = datasets.load_diabetes() + >>> X = diabetes.data[:150] + >>> y = diabetes.target[:150] + >>> lasso = linear_model.Lasso() + >>> y_pred = cross_val_predict(lasso, X, y, cv=3) + """ + _check_groups_routing_disabled(groups) + X, y = indexable(X, y) + + if _routing_enabled(): + # For estimators, a MetadataRouter is created in get_metadata_routing + # methods. For these router methods, we create the router to use + # `process_routing` on it. + router = ( + MetadataRouter(owner="cross_validate") + .add( + splitter=cv, + method_mapping=MethodMapping().add(caller="fit", callee="split"), + ) + .add( + estimator=estimator, + # TODO(SLEP6): also pass metadata for the predict method. + method_mapping=MethodMapping().add(caller="fit", callee="fit"), + ) + ) + try: + routed_params = process_routing(router, "fit", **params) + except UnsetMetadataPassedError as e: + # The default exception would mention `fit` since in the above + # `process_routing` code, we pass `fit` as the caller. However, + # the user is not calling `fit` directly, so we change the message + # to make it more suitable for this case. + unrequested_params = sorted(e.unrequested_params) + raise UnsetMetadataPassedError( + message=( + f"{unrequested_params} are passed to `cross_val_predict` but are" + " not explicitly set as requested or not requested for" + f" cross_validate's estimator: {estimator.__class__.__name__} Call" + " `.set_fit_request({{metadata}}=True)` on the estimator for" + f" each metadata in {unrequested_params} that you want to use and" + " `metadata=False` for not using it. See the Metadata Routing User" + " guide " + " for more information." + ), + unrequested_params=e.unrequested_params, + routed_params=e.routed_params, + ) + else: + routed_params = Bunch() + routed_params.splitter = Bunch(split={"groups": groups}) + routed_params.estimator = Bunch(fit=params) + + cv = check_cv(cv, y, classifier=is_classifier(estimator)) + splits = list(cv.split(X, y, **routed_params.splitter.split)) + + test_indices = np.concatenate([test for _, test in splits]) + if not _check_is_permutation(test_indices, _num_samples(X)): + raise ValueError("cross_val_predict only works for partitions") + + # If classification methods produce multiple columns of output, + # we need to manually encode classes to ensure consistent column ordering. + encode = ( + method in ["decision_function", "predict_proba", "predict_log_proba"] + and y is not None + ) + if encode: + y = np.asarray(y) + if y.ndim == 1: + le = LabelEncoder() + y = le.fit_transform(y) + elif y.ndim == 2: + y_enc = np.zeros_like(y, dtype=int) + for i_label in range(y.shape[1]): + y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label]) + y = y_enc + + # We clone the estimator to make sure that all the folds are + # independent, and that it is pickle-able. + parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) + predictions = parallel( + delayed(_fit_and_predict)( + clone(estimator), + X, + y, + train, + test, + routed_params.estimator.fit, + method, + ) + for train, test in splits + ) + + inv_test_indices = np.empty(len(test_indices), dtype=int) + inv_test_indices[test_indices] = np.arange(len(test_indices)) + + if sp.issparse(predictions[0]): + predictions = sp.vstack(predictions, format=predictions[0].format) + elif encode and isinstance(predictions[0], list): + # `predictions` is a list of method outputs from each fold. + # If each of those is also a list, then treat this as a + # multioutput-multiclass task. We need to separately concatenate + # the method outputs for each label into an `n_labels` long list. + n_labels = y.shape[1] + concat_pred = [] + for i_label in range(n_labels): + label_preds = np.concatenate([p[i_label] for p in predictions]) + concat_pred.append(label_preds) + predictions = concat_pred + else: + predictions = np.concatenate(predictions) + + if isinstance(predictions, list): + return [p[inv_test_indices] for p in predictions] + else: + return predictions[inv_test_indices] + + +def _fit_and_predict(estimator, X, y, train, test, fit_params, method): + """Fit estimator and predict values for a given dataset split. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : estimator object implementing 'fit' and 'predict' + The object to use to fit the data. + + X : array-like of shape (n_samples, n_features) + The data to fit. + + .. versionchanged:: 0.20 + X is only required to be an object with finite length or shape now + + y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None + The target variable to try to predict in the case of + supervised learning. + + train : array-like of shape (n_train_samples,) + Indices of training samples. + + test : array-like of shape (n_test_samples,) + Indices of test samples. + + fit_params : dict or None + Parameters that will be passed to ``estimator.fit``. + + method : str + Invokes the passed method name of the passed estimator. + + Returns + ------- + predictions : sequence + Result of calling 'estimator.method' + """ + # Adjust length of sample weights + fit_params = fit_params if fit_params is not None else {} + fit_params = _check_method_params(X, params=fit_params, indices=train) + + X_train, y_train = _safe_split(estimator, X, y, train) + X_test, _ = _safe_split(estimator, X, y, test, train) + + if y_train is None: + estimator.fit(X_train, **fit_params) + else: + estimator.fit(X_train, y_train, **fit_params) + func = getattr(estimator, method) + predictions = func(X_test) + + encode = ( + method in ["decision_function", "predict_proba", "predict_log_proba"] + and y is not None + ) + + if encode: + if isinstance(predictions, list): + predictions = [ + _enforce_prediction_order( + estimator.classes_[i_label], + predictions[i_label], + n_classes=len(set(y[:, i_label])), + method=method, + ) + for i_label in range(len(predictions)) + ] + else: + # A 2D y array should be a binary label indicator matrix + n_classes = len(set(y)) if y.ndim == 1 else y.shape[1] + predictions = _enforce_prediction_order( + estimator.classes_, predictions, n_classes, method + ) + return predictions + + +def _enforce_prediction_order(classes, predictions, n_classes, method): + """Ensure that prediction arrays have correct column order + + When doing cross-validation, if one or more classes are + not present in the subset of data used for training, + then the output prediction array might not have the same + columns as other folds. Use the list of class names + (assumed to be ints) to enforce the correct column order. + + Note that `classes` is the list of classes in this fold + (a subset of the classes in the full training set) + and `n_classes` is the number of classes in the full training set. + """ + if n_classes != len(classes): + recommendation = ( + "To fix this, use a cross-validation " + "technique resulting in properly " + "stratified folds" + ) + warnings.warn( + "Number of classes in training fold ({}) does " + "not match total number of classes ({}). " + "Results may not be appropriate for your use case. " + "{}".format(len(classes), n_classes, recommendation), + RuntimeWarning, + ) + if method == "decision_function": + if predictions.ndim == 2 and predictions.shape[1] != len(classes): + # This handles the case when the shape of predictions + # does not match the number of classes used to train + # it with. This case is found when sklearn.svm.SVC is + # set to `decision_function_shape='ovo'`. + raise ValueError( + "Output shape {} of {} does not match " + "number of classes ({}) in fold. " + "Irregular decision_function outputs " + "are not currently supported by " + "cross_val_predict".format(predictions.shape, method, len(classes)) + ) + if len(classes) <= 2: + # In this special case, `predictions` contains a 1D array. + raise ValueError( + "Only {} class/es in training fold, but {} " + "in overall dataset. This " + "is not supported for decision_function " + "with imbalanced folds. {}".format( + len(classes), n_classes, recommendation + ) + ) + + float_min = np.finfo(predictions.dtype).min + default_values = { + "decision_function": float_min, + "predict_log_proba": float_min, + "predict_proba": 0, + } + predictions_for_all_classes = np.full( + (_num_samples(predictions), n_classes), + default_values[method], + dtype=predictions.dtype, + ) + predictions_for_all_classes[:, classes] = predictions + predictions = predictions_for_all_classes + return predictions + + +def _check_is_permutation(indices, n_samples): + """Check whether indices is a reordering of the array np.arange(n_samples) + + Parameters + ---------- + indices : ndarray + int array to test + n_samples : int + number of expected elements + + Returns + ------- + is_partition : bool + True iff sorted(indices) is np.arange(n) + """ + if len(indices) != n_samples: + return False + hit = np.zeros(n_samples, dtype=bool) + hit[indices] = True + if not np.all(hit): + return False + return True + + +@validate_params( + { + "estimator": [HasMethods("fit")], + "X": ["array-like", "sparse matrix"], + "y": ["array-like", None], + "groups": ["array-like", None], + "cv": ["cv_object"], + "n_permutations": [Interval(Integral, 1, None, closed="left")], + "n_jobs": [Integral, None], + "random_state": ["random_state"], + "verbose": ["verbose"], + "scoring": [StrOptions(set(get_scorer_names())), callable, None], + "fit_params": [dict, None], + "params": [dict, None], + }, + prefer_skip_nested_validation=False, # estimator is not validated yet +) +def permutation_test_score( + estimator, + X, + y, + *, + groups=None, + cv=None, + n_permutations=100, + n_jobs=None, + random_state=0, + verbose=0, + scoring=None, + fit_params=None, + params=None, +): + """Evaluate the significance of a cross-validated score with permutations. + + Permutes targets to generate 'randomized data' and compute the empirical + p-value against the null hypothesis that features and targets are + independent. + + The p-value represents the fraction of randomized data sets where the + estimator performed as well or better than in the original data. A small + p-value suggests that there is a real dependency between features and + targets which has been used by the estimator to give good predictions. + A large p-value may be due to lack of real dependency between features + and targets or the estimator was not able to use the dependency to + give good predictions. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : estimator object implementing 'fit' + The object to use to fit the data. + + X : array-like of shape at least 2D + The data to fit. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None + The target variable to try to predict in the case of + supervised learning. + + groups : array-like of shape (n_samples,), default=None + Labels to constrain permutation within groups, i.e. ``y`` values + are permuted among samples with the same group identifier. + When not specified, ``y`` values are permuted among all samples. + + When a grouped cross-validator is used, the group labels are + also passed on to the ``split`` method of the cross-validator. The + cross-validator uses them for grouping the samples while splitting + the dataset into train/test set. + + .. versionchanged:: 1.6 + ``groups`` can only be passed if metadata routing is not enabled + via ``sklearn.set_config(enable_metadata_routing=True)``. When routing + is enabled, pass ``groups`` alongside other metadata via the ``params`` + argument instead. E.g.: + ``permutation_test_score(..., params={'groups': groups})``. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - `None`, to use the default 5-fold cross validation, + - int, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For `int`/`None` inputs, if the estimator is a classifier and `y` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with `shuffle=False` so the splits will be the same across calls. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + `cv` default value if `None` changed from 3-fold to 5-fold. + + n_permutations : int, default=100 + Number of times to permute ``y``. + + n_jobs : int, default=None + Number of jobs to run in parallel. Training the estimator and computing + the cross-validated score are parallelized over the permutations. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + + random_state : int, RandomState instance or None, default=0 + Pass an int for reproducible output for permutation of + ``y`` values among samples. See :term:`Glossary `. + + verbose : int, default=0 + The verbosity level. + + scoring : str or callable, default=None + A single str (see :ref:`scoring_parameter`) or a callable + (see :ref:`scoring_callable`) to evaluate the predictions on the test set. + + If `None` the estimator's score method is used. + + fit_params : dict, default=None + Parameters to pass to the fit method of the estimator. + + .. deprecated:: 1.6 + This parameter is deprecated and will be removed in version 1.6. Use + ``params`` instead. + + params : dict, default=None + Parameters to pass to the `fit` method of the estimator, the scorer + and the cv splitter. + + - If `enable_metadata_routing=False` (default): Parameters directly passed to + the `fit` method of the estimator. + + - If `enable_metadata_routing=True`: Parameters safely routed to the `fit` + method of the estimator, `cv` object and `scorer`. See :ref:`Metadata Routing + User Guide ` for more details. + + .. versionadded:: 1.6 + + Returns + ------- + score : float + The true score without permuting targets. + + permutation_scores : array of shape (n_permutations,) + The scores obtained for each permutations. + + pvalue : float + The p-value, which approximates the probability that the score would + be obtained by chance. This is calculated as: + + `(C + 1) / (n_permutations + 1)` + + Where C is the number of permutations whose score >= the true score. + + The best possible p-value is 1/(n_permutations + 1), the worst is 1.0. + + Notes + ----- + This function implements Test 1 in: + + Ojala and Garriga. `Permutation Tests for Studying Classifier Performance + `_. The + Journal of Machine Learning Research (2010) vol. 11 + + Examples + -------- + >>> from sklearn.datasets import make_classification + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.model_selection import permutation_test_score + >>> X, y = make_classification(random_state=0) + >>> estimator = LogisticRegression() + >>> score, permutation_scores, pvalue = permutation_test_score( + ... estimator, X, y, random_state=0 + ... ) + >>> print(f"Original Score: {score:.3f}") + Original Score: 0.810 + >>> print( + ... f"Permutation Scores: {permutation_scores.mean():.3f} +/- " + ... f"{permutation_scores.std():.3f}" + ... ) + Permutation Scores: 0.505 +/- 0.057 + >>> print(f"P-value: {pvalue:.3f}") + P-value: 0.010 + """ + params = _check_params_groups_deprecation(fit_params, params, groups, "1.8") + + X, y, groups = indexable(X, y, groups) + + cv = check_cv(cv, y, classifier=is_classifier(estimator)) + scorer = check_scoring(estimator, scoring=scoring) + random_state = check_random_state(random_state) + + if _routing_enabled(): + router = ( + MetadataRouter(owner="permutation_test_score") + .add( + estimator=estimator, + # TODO(SLEP6): also pass metadata to the predict method for + # scoring? + method_mapping=MethodMapping().add(caller="fit", callee="fit"), + ) + .add( + splitter=cv, + method_mapping=MethodMapping().add(caller="fit", callee="split"), + ) + .add( + scorer=scorer, + method_mapping=MethodMapping().add(caller="fit", callee="score"), + ) + ) + + try: + routed_params = process_routing(router, "fit", **params) + except UnsetMetadataPassedError as e: + # The default exception would mention `fit` since in the above + # `process_routing` code, we pass `fit` as the caller. However, + # the user is not calling `fit` directly, so we change the message + # to make it more suitable for this case. + unrequested_params = sorted(e.unrequested_params) + raise UnsetMetadataPassedError( + message=( + f"{unrequested_params} are passed to `permutation_test_score`" + " but are not explicitly set as requested or not requested" + " for permutation_test_score's" + f" estimator: {estimator.__class__.__name__}. Call" + " `.set_fit_request({{metadata}}=True)` on the estimator for" + f" each metadata in {unrequested_params} that you" + " want to use and `metadata=False` for not using it. See the" + " Metadata Routing User guide" + " for more" + " information." + ), + unrequested_params=e.unrequested_params, + routed_params=e.routed_params, + ) + + else: + routed_params = Bunch() + routed_params.estimator = Bunch(fit=params) + routed_params.splitter = Bunch(split={"groups": groups}) + routed_params.scorer = Bunch(score={}) + + # We clone the estimator to make sure that all the folds are + # independent, and that it is pickle-able. + score = _permutation_test_score( + clone(estimator), + X, + y, + cv, + scorer, + split_params=routed_params.splitter.split, + fit_params=routed_params.estimator.fit, + score_params=routed_params.scorer.score, + ) + permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( + delayed(_permutation_test_score)( + clone(estimator), + X, + _shuffle(y, groups, random_state), + cv, + scorer, + split_params=routed_params.splitter.split, + fit_params=routed_params.estimator.fit, + score_params=routed_params.scorer.score, + ) + for _ in range(n_permutations) + ) + permutation_scores = np.array(permutation_scores) + pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1) + return score, permutation_scores, pvalue + + +def _permutation_test_score( + estimator, X, y, cv, scorer, split_params, fit_params, score_params +): + """Auxiliary function for permutation_test_score""" + # Adjust length of sample weights + fit_params = fit_params if fit_params is not None else {} + score_params = score_params if score_params is not None else {} + + avg_score = [] + for train, test in cv.split(X, y, **split_params): + X_train, y_train = _safe_split(estimator, X, y, train) + X_test, y_test = _safe_split(estimator, X, y, test, train) + fit_params_train = _check_method_params(X, params=fit_params, indices=train) + score_params_test = _check_method_params(X, params=score_params, indices=test) + estimator.fit(X_train, y_train, **fit_params_train) + avg_score.append(scorer(estimator, X_test, y_test, **score_params_test)) + return np.mean(avg_score) + + +def _shuffle(y, groups, random_state): + """Return a shuffled copy of y eventually shuffle among same groups.""" + if groups is None: + indices = random_state.permutation(len(y)) + else: + indices = np.arange(len(groups)) + for group in np.unique(groups): + this_mask = groups == group + indices[this_mask] = random_state.permutation(indices[this_mask]) + return _safe_indexing(y, indices) + + +@validate_params( + { + "estimator": [HasMethods(["fit"])], + "X": ["array-like", "sparse matrix"], + "y": ["array-like", None], + "groups": ["array-like", None], + "train_sizes": ["array-like"], + "cv": ["cv_object"], + "scoring": [StrOptions(set(get_scorer_names())), callable, None], + "exploit_incremental_learning": ["boolean"], + "n_jobs": [Integral, None], + "pre_dispatch": [Integral, str], + "verbose": ["verbose"], + "shuffle": ["boolean"], + "random_state": ["random_state"], + "error_score": [StrOptions({"raise"}), Real], + "return_times": ["boolean"], + "fit_params": [dict, None], + "params": [dict, None], + }, + prefer_skip_nested_validation=False, # estimator is not validated yet +) +def learning_curve( + estimator, + X, + y, + *, + groups=None, + train_sizes=np.linspace(0.1, 1.0, 5), + cv=None, + scoring=None, + exploit_incremental_learning=False, + n_jobs=None, + pre_dispatch="all", + verbose=0, + shuffle=False, + random_state=None, + error_score=np.nan, + return_times=False, + fit_params=None, + params=None, +): + """Learning curve. + + Determines cross-validated training and test scores for different training + set sizes. + + A cross-validation generator splits the whole dataset k times in training + and test data. Subsets of the training set with varying sizes will be used + to train the estimator and a score for each training subset size and the + test set will be computed. Afterwards, the scores will be averaged over + all k runs for each training subset size. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : object type that implements the "fit" method + An object of that type which is cloned for each validation. It must + also implement "predict" unless `scoring` is a callable that doesn't + rely on "predict" to compute a score. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + Training vector, where `n_samples` is the number of samples and + `n_features` is the number of features. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None + Target relative to X for classification or regression; + None for unsupervised learning. + + groups : array-like of shape (n_samples,), default=None + Group labels for the samples used while splitting the dataset into + train/test set. Only used in conjunction with a "Group" :term:`cv` + instance (e.g., :class:`GroupKFold`). + + .. versionchanged:: 1.6 + ``groups`` can only be passed if metadata routing is not enabled + via ``sklearn.set_config(enable_metadata_routing=True)``. When routing + is enabled, pass ``groups`` alongside other metadata via the ``params`` + argument instead. E.g.: + ``learning_curve(..., params={'groups': groups})``. + + train_sizes : array-like of shape (n_ticks,), \ + default=np.linspace(0.1, 1.0, 5) + Relative or absolute numbers of training examples that will be used to + generate the learning curve. If the dtype is float, it is regarded as a + fraction of the maximum size of the training set (that is determined + by the selected validation method), i.e. it has to be within (0, 1]. + Otherwise it is interpreted as absolute sizes of the training sets. + Note that for classification the number of samples usually has to + be big enough to contain at least one sample from each class. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - int, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For int/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with `shuffle=False` so the splits will be the same across calls. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + ``cv`` default value if None changed from 3-fold to 5-fold. + + scoring : str or callable, default=None + A str (see :ref:`scoring_parameter`) or a scorer callable object / function with + signature ``scorer(estimator, X, y)``. + + exploit_incremental_learning : bool, default=False + If the estimator supports incremental learning, this will be + used to speed up fitting for different training set sizes. + + n_jobs : int, default=None + Number of jobs to run in parallel. Training the estimator and computing + the score are parallelized over the different training and test sets. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + + pre_dispatch : int or str, default='all' + Number of predispatched jobs for parallel execution (default is + all). The option can reduce the allocated memory. The str can + be an expression like '2*n_jobs'. + + verbose : int, default=0 + Controls the verbosity: the higher, the more messages. + + shuffle : bool, default=False + Whether to shuffle training data before taking prefixes of it + based on``train_sizes``. + + random_state : int, RandomState instance or None, default=None + Used when ``shuffle`` is True. Pass an int for reproducible + output across multiple function calls. + See :term:`Glossary `. + + error_score : 'raise' or numeric, default=np.nan + Value to assign to the score if an error occurs in estimator fitting. + If set to 'raise', the error is raised. + If a numeric value is given, FitFailedWarning is raised. + + .. versionadded:: 0.20 + + return_times : bool, default=False + Whether to return the fit and score times. + + fit_params : dict, default=None + Parameters to pass to the fit method of the estimator. + + .. deprecated:: 1.6 + This parameter is deprecated and will be removed in version 1.8. Use + ``params`` instead. + + params : dict, default=None + Parameters to pass to the `fit` method of the estimator and to the scorer. + + - If `enable_metadata_routing=False` (default): Parameters directly passed to + the `fit` method of the estimator. + + - If `enable_metadata_routing=True`: Parameters safely routed to the `fit` + method of the estimator. See :ref:`Metadata Routing User Guide + ` for more details. + + .. versionadded:: 1.6 + + Returns + ------- + train_sizes_abs : array of shape (n_unique_ticks,) + Numbers of training examples that has been used to generate the + learning curve. Note that the number of ticks might be less + than n_ticks because duplicate entries will be removed. + + train_scores : array of shape (n_ticks, n_cv_folds) + Scores on training sets. + + test_scores : array of shape (n_ticks, n_cv_folds) + Scores on test set. + + fit_times : array of shape (n_ticks, n_cv_folds) + Times spent for fitting in seconds. Only present if ``return_times`` + is True. + + score_times : array of shape (n_ticks, n_cv_folds) + Times spent for scoring in seconds. Only present if ``return_times`` + is True. + + Examples + -------- + >>> from sklearn.datasets import make_classification + >>> from sklearn.tree import DecisionTreeClassifier + >>> from sklearn.model_selection import learning_curve + >>> X, y = make_classification(n_samples=100, n_features=10, random_state=42) + >>> tree = DecisionTreeClassifier(max_depth=4, random_state=42) + >>> train_size_abs, train_scores, test_scores = learning_curve( + ... tree, X, y, train_sizes=[0.3, 0.6, 0.9] + ... ) + >>> for train_size, cv_train_scores, cv_test_scores in zip( + ... train_size_abs, train_scores, test_scores + ... ): + ... print(f"{train_size} samples were used to train the model") + ... print(f"The average train accuracy is {cv_train_scores.mean():.2f}") + ... print(f"The average test accuracy is {cv_test_scores.mean():.2f}") + 24 samples were used to train the model + The average train accuracy is 1.00 + The average test accuracy is 0.85 + 48 samples were used to train the model + The average train accuracy is 1.00 + The average test accuracy is 0.90 + 72 samples were used to train the model + The average train accuracy is 1.00 + The average test accuracy is 0.93 + """ + if exploit_incremental_learning and not hasattr(estimator, "partial_fit"): + raise ValueError( + "An estimator must support the partial_fit interface " + "to exploit incremental learning" + ) + + params = _check_params_groups_deprecation(fit_params, params, groups, "1.8") + + X, y, groups = indexable(X, y, groups) + + cv = check_cv(cv, y, classifier=is_classifier(estimator)) + + scorer = check_scoring(estimator, scoring=scoring) + + if _routing_enabled(): + router = ( + MetadataRouter(owner="learning_curve") + .add( + estimator=estimator, + # TODO(SLEP6): also pass metadata to the predict method for + # scoring? + method_mapping=MethodMapping() + .add(caller="fit", callee="fit") + .add(caller="fit", callee="partial_fit"), + ) + .add( + splitter=cv, + method_mapping=MethodMapping().add(caller="fit", callee="split"), + ) + .add( + scorer=scorer, + method_mapping=MethodMapping().add(caller="fit", callee="score"), + ) + ) + + try: + routed_params = process_routing(router, "fit", **params) + except UnsetMetadataPassedError as e: + # The default exception would mention `fit` since in the above + # `process_routing` code, we pass `fit` as the caller. However, + # the user is not calling `fit` directly, so we change the message + # to make it more suitable for this case. + unrequested_params = sorted(e.unrequested_params) + raise UnsetMetadataPassedError( + message=( + f"{unrequested_params} are passed to `learning_curve` but are not" + " explicitly set as requested or not requested for learning_curve's" + f" estimator: {estimator.__class__.__name__}. Call" + " `.set_fit_request({{metadata}}=True)` on the estimator for" + f" each metadata in {unrequested_params} that you" + " want to use and `metadata=False` for not using it. See the" + " Metadata Routing User guide" + " for more" + " information." + ), + unrequested_params=e.unrequested_params, + routed_params=e.routed_params, + ) + + else: + routed_params = Bunch() + routed_params.estimator = Bunch(fit=params, partial_fit=params) + routed_params.splitter = Bunch(split={"groups": groups}) + routed_params.scorer = Bunch(score={}) + + # Store cv as list as we will be iterating over the list multiple times + cv_iter = list(cv.split(X, y, **routed_params.splitter.split)) + + n_max_training_samples = len(cv_iter[0][0]) + # Because the lengths of folds can be significantly different, it is + # not guaranteed that we use all of the available training data when we + # use the first 'n_max_training_samples' samples. + train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples) + n_unique_ticks = train_sizes_abs.shape[0] + if verbose > 0: + print("[learning_curve] Training set sizes: " + str(train_sizes_abs)) + + parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) + + if shuffle: + rng = check_random_state(random_state) + cv_iter = ((rng.permutation(train), test) for train, test in cv_iter) + + if exploit_incremental_learning: + classes = np.unique(y) if is_classifier(estimator) else None + out = parallel( + delayed(_incremental_fit_estimator)( + clone(estimator), + X, + y, + classes, + train, + test, + train_sizes_abs, + scorer, + return_times, + error_score=error_score, + fit_params=routed_params.estimator.partial_fit, + score_params=routed_params.scorer.score, + ) + for train, test in cv_iter + ) + out = np.asarray(out).transpose((2, 1, 0)) + else: + train_test_proportions = [] + for train, test in cv_iter: + for n_train_samples in train_sizes_abs: + train_test_proportions.append((train[:n_train_samples], test)) + + results = parallel( + delayed(_fit_and_score)( + clone(estimator), + X, + y, + scorer=scorer, + train=train, + test=test, + verbose=verbose, + parameters=None, + fit_params=routed_params.estimator.fit, + score_params=routed_params.scorer.score, + return_train_score=True, + error_score=error_score, + return_times=return_times, + ) + for train, test in train_test_proportions + ) + _warn_or_raise_about_fit_failures(results, error_score) + results = _aggregate_score_dicts(results) + train_scores = results["train_scores"].reshape(-1, n_unique_ticks).T + test_scores = results["test_scores"].reshape(-1, n_unique_ticks).T + out = [train_scores, test_scores] + + if return_times: + fit_times = results["fit_time"].reshape(-1, n_unique_ticks).T + score_times = results["score_time"].reshape(-1, n_unique_ticks).T + out.extend([fit_times, score_times]) + + ret = train_sizes_abs, out[0], out[1] + + if return_times: + ret = ret + (out[2], out[3]) + + return ret + + +def _translate_train_sizes(train_sizes, n_max_training_samples): + """Determine absolute sizes of training subsets and validate 'train_sizes'. + + Examples: + _translate_train_sizes([0.5, 1.0], 10) -> [5, 10] + _translate_train_sizes([5, 10], 10) -> [5, 10] + + Parameters + ---------- + train_sizes : array-like of shape (n_ticks,) + Numbers of training examples that will be used to generate the + learning curve. If the dtype is float, it is regarded as a + fraction of 'n_max_training_samples', i.e. it has to be within (0, 1]. + + n_max_training_samples : int + Maximum number of training samples (upper bound of 'train_sizes'). + + Returns + ------- + train_sizes_abs : array of shape (n_unique_ticks,) + Numbers of training examples that will be used to generate the + learning curve. Note that the number of ticks might be less + than n_ticks because duplicate entries will be removed. + """ + train_sizes_abs = np.asarray(train_sizes) + n_ticks = train_sizes_abs.shape[0] + n_min_required_samples = np.min(train_sizes_abs) + n_max_required_samples = np.max(train_sizes_abs) + if np.issubdtype(train_sizes_abs.dtype, np.floating): + if n_min_required_samples <= 0.0 or n_max_required_samples > 1.0: + raise ValueError( + "train_sizes has been interpreted as fractions " + "of the maximum number of training samples and " + "must be within (0, 1], but is within [%f, %f]." + % (n_min_required_samples, n_max_required_samples) + ) + train_sizes_abs = (train_sizes_abs * n_max_training_samples).astype( + dtype=int, copy=False + ) + train_sizes_abs = np.clip(train_sizes_abs, 1, n_max_training_samples) + else: + if ( + n_min_required_samples <= 0 + or n_max_required_samples > n_max_training_samples + ): + raise ValueError( + "train_sizes has been interpreted as absolute " + "numbers of training samples and must be within " + "(0, %d], but is within [%d, %d]." + % ( + n_max_training_samples, + n_min_required_samples, + n_max_required_samples, + ) + ) + + train_sizes_abs = np.unique(train_sizes_abs) + if n_ticks > train_sizes_abs.shape[0]: + warnings.warn( + "Removed duplicate entries from 'train_sizes'. Number " + "of ticks will be less than the size of " + "'train_sizes': %d instead of %d." % (train_sizes_abs.shape[0], n_ticks), + RuntimeWarning, + ) + + return train_sizes_abs + + +def _incremental_fit_estimator( + estimator, + X, + y, + classes, + train, + test, + train_sizes, + scorer, + return_times, + error_score, + fit_params, + score_params, +): + """Train estimator on training subsets incrementally and compute scores.""" + train_scores, test_scores, fit_times, score_times = [], [], [], [] + partitions = zip(train_sizes, np.split(train, train_sizes)[:-1]) + if fit_params is None: + fit_params = {} + if classes is None: + partial_fit_func = partial(estimator.partial_fit, **fit_params) + else: + partial_fit_func = partial(estimator.partial_fit, classes=classes, **fit_params) + score_params = score_params if score_params is not None else {} + score_params_train = _check_method_params(X, params=score_params, indices=train) + score_params_test = _check_method_params(X, params=score_params, indices=test) + + for n_train_samples, partial_train in partitions: + train_subset = train[:n_train_samples] + X_train, y_train = _safe_split(estimator, X, y, train_subset) + X_partial_train, y_partial_train = _safe_split(estimator, X, y, partial_train) + X_test, y_test = _safe_split(estimator, X, y, test, train_subset) + start_fit = time.time() + if y_partial_train is None: + partial_fit_func(X_partial_train) + else: + partial_fit_func(X_partial_train, y_partial_train) + fit_time = time.time() - start_fit + fit_times.append(fit_time) + + start_score = time.time() + + test_scores.append( + _score( + estimator, + X_test, + y_test, + scorer, + score_params=score_params_test, + error_score=error_score, + ) + ) + train_scores.append( + _score( + estimator, + X_train, + y_train, + scorer, + score_params=score_params_train, + error_score=error_score, + ) + ) + score_time = time.time() - start_score + score_times.append(score_time) + + ret = ( + (train_scores, test_scores, fit_times, score_times) + if return_times + else (train_scores, test_scores) + ) + + return np.array(ret).T + + +@validate_params( + { + "estimator": [HasMethods(["fit"])], + "X": ["array-like", "sparse matrix"], + "y": ["array-like", None], + "param_name": [str], + "param_range": ["array-like"], + "groups": ["array-like", None], + "cv": ["cv_object"], + "scoring": [StrOptions(set(get_scorer_names())), callable, None], + "n_jobs": [Integral, None], + "pre_dispatch": [Integral, str], + "verbose": ["verbose"], + "error_score": [StrOptions({"raise"}), Real], + "fit_params": [dict, None], + "params": [dict, None], + }, + prefer_skip_nested_validation=False, # estimator is not validated yet +) +def validation_curve( + estimator, + X, + y, + *, + param_name, + param_range, + groups=None, + cv=None, + scoring=None, + n_jobs=None, + pre_dispatch="all", + verbose=0, + error_score=np.nan, + fit_params=None, + params=None, +): + """Validation curve. + + Determine training and test scores for varying parameter values. + + Compute scores for an estimator with different values of a specified + parameter. This is similar to grid search with one parameter. However, this + will also compute training scores and is merely a utility for plotting the + results. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : object type that implements the "fit" method + An object of that type which is cloned for each validation. It must + also implement "predict" unless `scoring` is a callable that doesn't + rely on "predict" to compute a score. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + Training vector, where `n_samples` is the number of samples and + `n_features` is the number of features. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None + Target relative to X for classification or regression; + None for unsupervised learning. + + param_name : str + Name of the parameter that will be varied. + + param_range : array-like of shape (n_values,) + The values of the parameter that will be evaluated. + + groups : array-like of shape (n_samples,), default=None + Group labels for the samples used while splitting the dataset into + train/test set. Only used in conjunction with a "Group" :term:`cv` + instance (e.g., :class:`GroupKFold`). + + .. versionchanged:: 1.6 + ``groups`` can only be passed if metadata routing is not enabled + via ``sklearn.set_config(enable_metadata_routing=True)``. When routing + is enabled, pass ``groups`` alongside other metadata via the ``params`` + argument instead. E.g.: + ``validation_curve(..., params={'groups': groups})``. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - int, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For int/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with `shuffle=False` so the splits will be the same across calls. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + ``cv`` default value if None changed from 3-fold to 5-fold. + + scoring : str or callable, default=None + A str (see :ref:`scoring_parameter`) or a scorer callable object / function with + signature ``scorer(estimator, X, y)``. + + n_jobs : int, default=None + Number of jobs to run in parallel. Training the estimator and computing + the score are parallelized over the combinations of each parameter + value and each cross-validation split. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + + pre_dispatch : int or str, default='all' + Number of predispatched jobs for parallel execution (default is + all). The option can reduce the allocated memory. The str can + be an expression like '2*n_jobs'. + + verbose : int, default=0 + Controls the verbosity: the higher, the more messages. + + error_score : 'raise' or numeric, default=np.nan + Value to assign to the score if an error occurs in estimator fitting. + If set to 'raise', the error is raised. + If a numeric value is given, FitFailedWarning is raised. + + .. versionadded:: 0.20 + + fit_params : dict, default=None + Parameters to pass to the fit method of the estimator. + + .. deprecated:: 1.6 + This parameter is deprecated and will be removed in version 1.8. Use + ``params`` instead. + + params : dict, default=None + Parameters to pass to the estimator, scorer and cross-validation object. + + - If `enable_metadata_routing=False` (default): Parameters directly passed to + the `fit` method of the estimator. + + - If `enable_metadata_routing=True`: Parameters safely routed to the `fit` + method of the estimator, to the scorer and to the cross-validation object. + See :ref:`Metadata Routing User Guide ` for more details. + + .. versionadded:: 1.6 + + Returns + ------- + train_scores : array of shape (n_ticks, n_cv_folds) + Scores on training sets. + + test_scores : array of shape (n_ticks, n_cv_folds) + Scores on test set. + + Notes + ----- + See :ref:`sphx_glr_auto_examples_model_selection_plot_train_error_vs_test_error.py` + + Examples + -------- + >>> import numpy as np + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import validation_curve + >>> from sklearn.linear_model import LogisticRegression + >>> X, y = make_classification(n_samples=1_000, random_state=0) + >>> logistic_regression = LogisticRegression() + >>> param_name, param_range = "C", np.logspace(-8, 3, 10) + >>> train_scores, test_scores = validation_curve( + ... logistic_regression, X, y, param_name=param_name, param_range=param_range + ... ) + >>> print(f"The average train accuracy is {train_scores.mean():.2f}") + The average train accuracy is 0.81 + >>> print(f"The average test accuracy is {test_scores.mean():.2f}") + The average test accuracy is 0.81 + """ + params = _check_params_groups_deprecation(fit_params, params, groups, "1.8") + X, y, groups = indexable(X, y, groups) + + cv = check_cv(cv, y, classifier=is_classifier(estimator)) + scorer = check_scoring(estimator, scoring=scoring) + + if _routing_enabled(): + router = ( + MetadataRouter(owner="validation_curve") + .add( + estimator=estimator, + method_mapping=MethodMapping().add(caller="fit", callee="fit"), + ) + .add( + splitter=cv, + method_mapping=MethodMapping().add(caller="fit", callee="split"), + ) + .add( + scorer=scorer, + method_mapping=MethodMapping().add(caller="fit", callee="score"), + ) + ) + + try: + routed_params = process_routing(router, "fit", **params) + except UnsetMetadataPassedError as e: + # The default exception would mention `fit` since in the above + # `process_routing` code, we pass `fit` as the caller. However, + # the user is not calling `fit` directly, so we change the message + # to make it more suitable for this case. + unrequested_params = sorted(e.unrequested_params) + raise UnsetMetadataPassedError( + message=( + f"{unrequested_params} are passed to `validation_curve` but are not" + " explicitly set as requested or not requested for" + f" validation_curve's estimator: {estimator.__class__.__name__}." + " Call `.set_fit_request({{metadata}}=True)` on the estimator for" + f" each metadata in {unrequested_params} that you" + " want to use and `metadata=False` for not using it. See the" + " Metadata Routing User guide" + " for more" + " information." + ), + unrequested_params=e.unrequested_params, + routed_params=e.routed_params, + ) + + else: + routed_params = Bunch() + routed_params.estimator = Bunch(fit=params) + routed_params.splitter = Bunch(split={"groups": groups}) + routed_params.scorer = Bunch(score={}) + + parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) + results = parallel( + delayed(_fit_and_score)( + clone(estimator), + X, + y, + scorer=scorer, + train=train, + test=test, + verbose=verbose, + parameters={param_name: v}, + fit_params=routed_params.estimator.fit, + score_params=routed_params.scorer.score, + return_train_score=True, + error_score=error_score, + ) + # NOTE do not change order of iteration to allow one time cv splitters + for train, test in cv.split(X, y, **routed_params.splitter.split) + for v in param_range + ) + n_params = len(param_range) + + results = _aggregate_score_dicts(results) + train_scores = results["train_scores"].reshape(-1, n_params).T + test_scores = results["test_scores"].reshape(-1, n_params).T + + return train_scores, test_scores + + +def _aggregate_score_dicts(scores): + """Aggregate the list of dict to dict of np ndarray + + The aggregated output of _aggregate_score_dicts will be a list of dict + of form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...] + Convert it to a dict of array {'prec': np.array([0.1 ...]), ...} + + Parameters + ---------- + + scores : list of dict + List of dicts of the scores for all scorers. This is a flat list, + assumed originally to be of row major order. + + Example + ------- + + >>> scores = [{'a': 1, 'b':10}, {'a': 2, 'b':2}, {'a': 3, 'b':3}, + ... {'a': 10, 'b': 10}] # doctest: +SKIP + >>> _aggregate_score_dicts(scores) # doctest: +SKIP + {'a': array([1, 2, 3, 10]), + 'b': array([10, 2, 3, 10])} + """ + return { + key: ( + np.asarray([score[key] for score in scores]) + if isinstance(scores[0][key], numbers.Number) + else [score[key] for score in scores] + ) + for key in scores[0] + }