diff --git a/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cfe21c720a6a6f97d6857de1d0cf268ab20dda53
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82f899edc59cb41fdd671b256a228e5e06dfc5e24c92712e75005b251b000865
+size 1108
diff --git a/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7b7718d29ecb2075088f54c5f2c5fc0d01d9404b
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ec0955788914fa81f698e97a4d1aff773d7a125ed6e769c6271a0b48fc4011d
+size 363
diff --git a/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3265a7d933efe836193228b86e84c6c7a8b45afd
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef7cbcb58c2edcfea45c058b751faf7783e710462a924e9aacad8d47a7e9f94b
+size 1549
diff --git a/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8f610044b5cc550df4d4ef18cd2131306dba05be
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6931af256195fcdd2e47dd8b0f9edf16fbf03b198e77b70e3dfd9877cdf09515
+size 1190
diff --git a/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..08e36a9fb7d7eb1d95b74eebf7c1b870d4a052c1
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c46f6c5f221d877de604b906403b20cbdf674f1225bcdbb3e15bd1882a69a471
+size 1501
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/__init__.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0cea88a8108e7c0c65504c7c20ee9648c7a7f38
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/__init__.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_data.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_data.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92b5a2ae00e06f5e34b8f0f928cfb6a54c1f581a
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_data.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_discretization.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_discretization.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8f1bf8e07e48854f71219e8d57e65e04421adb4
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_discretization.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_encoders.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_encoders.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..673bfab9c9680e0a09239382e3959966d9ac86ec
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_encoders.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_function_transformer.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_function_transformer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93fe7febf23b2fb5238eaa513caa8e0cb4c36c9b
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_function_transformer.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_label.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_label.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..36fc1c01e52563baa131527cec48817da42e23ae
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_label.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_polynomial.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_polynomial.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3ff486e5112680c566941d3b47e35942e43204d0
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_polynomial.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_target_encoder.cpython-39.pyc b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_target_encoder.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5cbff0038b957aac33a47b7dddffef44be6a520
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/preprocessing/__pycache__/_target_encoder.cpython-39.pyc differ
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/tests/__init__.py b/.venv/Lib/site-packages/sklearn/preprocessing/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_discretization.py b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_discretization.py
new file mode 100644
index 0000000000000000000000000000000000000000..69a0fc5ad9df1f908bcbe46bd8f20af7bca83d86
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_discretization.py
@@ -0,0 +1,500 @@
+import warnings
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+
+from sklearn import clone
+from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+
+X = [[-2, 1.5, -4, -1], [-1, 2.5, -3, -0.5], [0, 3.5, -2, 0.5], [1, 4.5, -1, 2]]
+
+
+@pytest.mark.parametrize(
+    "strategy, expected, sample_weight",
+    [
+        ("uniform", [[0, 0, 0, 0], [1, 1, 1, 0], [2, 2, 2, 1], [2, 2, 2, 2]], None),
+        ("kmeans", [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]], None),
+        ("quantile", [[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]], None),
+        (
+            "quantile",
+            [[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]],
+            [1, 1, 2, 1],
+        ),
+        (
+            "quantile",
+            [[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]],
+            [1, 1, 1, 1],
+        ),
+        (
+            "quantile",
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]],
+            [0, 1, 1, 1],
+        ),
+        (
+            "kmeans",
+            [[0, 0, 0, 0], [1, 1, 1, 0], [1, 1, 1, 1], [2, 2, 2, 2]],
+            [1, 0, 3, 1],
+        ),
+        (
+            "kmeans",
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]],
+            [1, 1, 1, 1],
+        ),
+    ],
+)
+def test_fit_transform(strategy, expected, sample_weight):
+    est = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy=strategy)
+    est.fit(X, sample_weight=sample_weight)
+    assert_array_equal(expected, est.transform(X))
+
+
+def test_valid_n_bins():
+    KBinsDiscretizer(n_bins=2).fit_transform(X)
+    KBinsDiscretizer(n_bins=np.array([2])[0]).fit_transform(X)
+    assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(int)
+
+
+@pytest.mark.parametrize("strategy", ["uniform"])
+def test_kbinsdiscretizer_wrong_strategy_with_weights(strategy):
+    """Check that we raise an error when the wrong strategy is used."""
+    sample_weight = np.ones(shape=(len(X)))
+    est = KBinsDiscretizer(n_bins=3, strategy=strategy)
+    err_msg = (
+        "`sample_weight` was provided but it cannot be used with strategy='uniform'."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit(X, sample_weight=sample_weight)
+
+
+def test_invalid_n_bins_array():
+    # Bad shape
+    n_bins = np.full((2, 4), 2.0)
+    est = KBinsDiscretizer(n_bins=n_bins)
+    err_msg = r"n_bins must be a scalar or array of shape \(n_features,\)."
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+    # Incorrect number of features
+    n_bins = [1, 2, 2]
+    est = KBinsDiscretizer(n_bins=n_bins)
+    err_msg = r"n_bins must be a scalar or array of shape \(n_features,\)."
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+    # Bad bin values
+    n_bins = [1, 2, 2, 1]
+    est = KBinsDiscretizer(n_bins=n_bins)
+    err_msg = (
+        "KBinsDiscretizer received an invalid number of bins "
+        "at indices 0, 3. Number of bins must be at least 2, "
+        "and must be an int."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+    # Float bin values
+    n_bins = [2.1, 2, 2.1, 2]
+    est = KBinsDiscretizer(n_bins=n_bins)
+    err_msg = (
+        "KBinsDiscretizer received an invalid number of bins "
+        "at indices 0, 2. Number of bins must be at least 2, "
+        "and must be an int."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+
+@pytest.mark.parametrize(
+    "strategy, expected, sample_weight",
+    [
+        ("uniform", [[0, 0, 0, 0], [0, 1, 1, 0], [1, 2, 2, 1], [1, 2, 2, 2]], None),
+        ("kmeans", [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 2, 2, 2]], None),
+        ("quantile", [[0, 0, 0, 0], [0, 1, 1, 1], [1, 2, 2, 2], [1, 2, 2, 2]], None),
+        (
+            "quantile",
+            [[0, 0, 0, 0], [0, 1, 1, 1], [1, 2, 2, 2], [1, 2, 2, 2]],
+            [1, 1, 3, 1],
+        ),
+        (
+            "quantile",
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]],
+            [0, 1, 3, 1],
+        ),
+        # (
+        #     "quantile",
+        #     [[0, 0, 0, 0], [0, 1, 1, 1], [1, 2, 2, 2], [1, 2, 2, 2]],
+        #     [1, 1, 1, 1],
+        # ),
+        #
+        # TODO: This test case above aims to test if the case where an array of
+        #       ones passed in sample_weight parameter is equal to the case when
+        #       sample_weight is None.
+        #       Unfortunately, the behavior of `_weighted_percentile` when
+        #       `sample_weight = [1, 1, 1, 1]` are currently not equivalent.
+        #       This problem has been addressed in issue :
+        #       https://github.com/scikit-learn/scikit-learn/issues/17370
+        (
+            "kmeans",
+            [[0, 0, 0, 0], [0, 1, 1, 0], [1, 1, 1, 1], [1, 2, 2, 2]],
+            [1, 0, 3, 1],
+        ),
+    ],
+)
+def test_fit_transform_n_bins_array(strategy, expected, sample_weight):
+    est = KBinsDiscretizer(
+        n_bins=[2, 3, 3, 3], encode="ordinal", strategy=strategy
+    ).fit(X, sample_weight=sample_weight)
+    assert_array_equal(expected, est.transform(X))
+
+    # test the shape of bin_edges_
+    n_features = np.array(X).shape[1]
+    assert est.bin_edges_.shape == (n_features,)
+    for bin_edges, n_bins in zip(est.bin_edges_, est.n_bins_):
+        assert bin_edges.shape == (n_bins + 1,)
+
+
+@pytest.mark.filterwarnings("ignore: Bins whose width are too small")
+def test_kbinsdiscretizer_effect_sample_weight():
+    """Check the impact of `sample_weight` one computed quantiles."""
+    X = np.array([[-2], [-1], [1], [3], [500], [1000]])
+    # add a large number of bins such that each sample with a non-null weight
+    # will be used as bin edge
+    est = KBinsDiscretizer(n_bins=10, encode="ordinal", strategy="quantile")
+    est.fit(X, sample_weight=[1, 1, 1, 1, 0, 0])
+    assert_allclose(est.bin_edges_[0], [-2, -1, 1, 3])
+    assert_allclose(est.transform(X), [[0.0], [1.0], [2.0], [2.0], [2.0], [2.0]])
+
+
+@pytest.mark.parametrize("strategy", ["kmeans", "quantile"])
+def test_kbinsdiscretizer_no_mutating_sample_weight(strategy):
+    """Make sure that `sample_weight` is not changed in place."""
+    est = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy=strategy)
+    sample_weight = np.array([1, 3, 1, 2], dtype=np.float64)
+    sample_weight_copy = np.copy(sample_weight)
+    est.fit(X, sample_weight=sample_weight)
+    assert_allclose(sample_weight, sample_weight_copy)
+
+
+@pytest.mark.parametrize("strategy", ["uniform", "kmeans", "quantile"])
+def test_same_min_max(strategy):
+    warnings.simplefilter("always")
+    X = np.array([[1, -2], [1, -1], [1, 0], [1, 1]])
+    est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode="ordinal")
+    warning_message = "Feature 0 is constant and will be replaced with 0."
+    with pytest.warns(UserWarning, match=warning_message):
+        est.fit(X)
+    assert est.n_bins_[0] == 1
+    # replace the feature with zeros
+    Xt = est.transform(X)
+    assert_array_equal(Xt[:, 0], np.zeros(X.shape[0]))
+
+
+def test_transform_1d_behavior():
+    X = np.arange(4)
+    est = KBinsDiscretizer(n_bins=2)
+    with pytest.raises(ValueError):
+        est.fit(X)
+
+    est = KBinsDiscretizer(n_bins=2)
+    est.fit(X.reshape(-1, 1))
+    with pytest.raises(ValueError):
+        est.transform(X)
+
+
+@pytest.mark.parametrize("i", range(1, 9))
+def test_numeric_stability(i):
+    X_init = np.array([2.0, 4.0, 6.0, 8.0, 10.0]).reshape(-1, 1)
+    Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1)
+
+    # Test up to discretizing nano units
+    X = X_init / 10**i
+    Xt = KBinsDiscretizer(n_bins=2, encode="ordinal").fit_transform(X)
+    assert_array_equal(Xt_expected, Xt)
+
+
+def test_encode_options():
+    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode="ordinal").fit(X)
+    Xt_1 = est.transform(X)
+    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode="onehot-dense").fit(X)
+    Xt_2 = est.transform(X)
+    assert not sp.issparse(Xt_2)
+    assert_array_equal(
+        OneHotEncoder(
+            categories=[np.arange(i) for i in [2, 3, 3, 3]], sparse_output=False
+        ).fit_transform(Xt_1),
+        Xt_2,
+    )
+    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode="onehot").fit(X)
+    Xt_3 = est.transform(X)
+    assert sp.issparse(Xt_3)
+    assert_array_equal(
+        OneHotEncoder(
+            categories=[np.arange(i) for i in [2, 3, 3, 3]], sparse_output=True
+        )
+        .fit_transform(Xt_1)
+        .toarray(),
+        Xt_3.toarray(),
+    )
+
+
+@pytest.mark.parametrize(
+    "strategy, expected_2bins, expected_3bins, expected_5bins",
+    [
+        ("uniform", [0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 2, 2], [0, 0, 1, 1, 4, 4]),
+        ("kmeans", [0, 0, 0, 0, 1, 1], [0, 0, 1, 1, 2, 2], [0, 0, 1, 2, 3, 4]),
+        ("quantile", [0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2], [0, 1, 2, 3, 4, 4]),
+    ],
+)
+def test_nonuniform_strategies(
+    strategy, expected_2bins, expected_3bins, expected_5bins
+):
+    X = np.array([0, 0.5, 2, 3, 9, 10]).reshape(-1, 1)
+
+    # with 2 bins
+    est = KBinsDiscretizer(n_bins=2, strategy=strategy, encode="ordinal")
+    Xt = est.fit_transform(X)
+    assert_array_equal(expected_2bins, Xt.ravel())
+
+    # with 3 bins
+    est = KBinsDiscretizer(n_bins=3, strategy=strategy, encode="ordinal")
+    Xt = est.fit_transform(X)
+    assert_array_equal(expected_3bins, Xt.ravel())
+
+    # with 5 bins
+    est = KBinsDiscretizer(n_bins=5, strategy=strategy, encode="ordinal")
+    Xt = est.fit_transform(X)
+    assert_array_equal(expected_5bins, Xt.ravel())
+
+
+@pytest.mark.parametrize(
+    "strategy, expected_inv",
+    [
+        (
+            "uniform",
+            [
+                [-1.5, 2.0, -3.5, -0.5],
+                [-0.5, 3.0, -2.5, -0.5],
+                [0.5, 4.0, -1.5, 0.5],
+                [0.5, 4.0, -1.5, 1.5],
+            ],
+        ),
+        (
+            "kmeans",
+            [
+                [-1.375, 2.125, -3.375, -0.5625],
+                [-1.375, 2.125, -3.375, -0.5625],
+                [-0.125, 3.375, -2.125, 0.5625],
+                [0.75, 4.25, -1.25, 1.625],
+            ],
+        ),
+        (
+            "quantile",
+            [
+                [-1.5, 2.0, -3.5, -0.75],
+                [-0.5, 3.0, -2.5, 0.0],
+                [0.5, 4.0, -1.5, 1.25],
+                [0.5, 4.0, -1.5, 1.25],
+            ],
+        ),
+    ],
+)
+@pytest.mark.parametrize("encode", ["ordinal", "onehot", "onehot-dense"])
+def test_inverse_transform(strategy, encode, expected_inv):
+    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy, encode=encode)
+    Xt = kbd.fit_transform(X)
+    Xinv = kbd.inverse_transform(Xt)
+    assert_array_almost_equal(expected_inv, Xinv)
+
+
+@pytest.mark.parametrize("strategy", ["uniform", "kmeans", "quantile"])
+def test_transform_outside_fit_range(strategy):
+    X = np.array([0, 1, 2, 3])[:, None]
+    kbd = KBinsDiscretizer(n_bins=4, strategy=strategy, encode="ordinal")
+    kbd.fit(X)
+
+    X2 = np.array([-2, 5])[:, None]
+    X2t = kbd.transform(X2)
+    assert_array_equal(X2t.max(axis=0) + 1, kbd.n_bins_)
+    assert_array_equal(X2t.min(axis=0), [0])
+
+
+def test_overwrite():
+    X = np.array([0, 1, 2, 3])[:, None]
+    X_before = X.copy()
+
+    est = KBinsDiscretizer(n_bins=3, encode="ordinal")
+    Xt = est.fit_transform(X)
+    assert_array_equal(X, X_before)
+
+    Xt_before = Xt.copy()
+    Xinv = est.inverse_transform(Xt)
+    assert_array_equal(Xt, Xt_before)
+    assert_array_equal(Xinv, np.array([[0.5], [1.5], [2.5], [2.5]]))
+
+
+@pytest.mark.parametrize(
+    "strategy, expected_bin_edges", [("quantile", [0, 1, 3]), ("kmeans", [0, 1.5, 3])]
+)
+def test_redundant_bins(strategy, expected_bin_edges):
+    X = [[0], [0], [0], [0], [3], [3]]
+    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy, subsample=None)
+    warning_message = "Consider decreasing the number of bins."
+    with pytest.warns(UserWarning, match=warning_message):
+        kbd.fit(X)
+    assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
+
+
+def test_percentile_numeric_stability():
+    X = np.array([0.05, 0.05, 0.95]).reshape(-1, 1)
+    bin_edges = np.array([0.05, 0.23, 0.41, 0.59, 0.77, 0.95])
+    Xt = np.array([0, 0, 4]).reshape(-1, 1)
+    kbd = KBinsDiscretizer(n_bins=10, encode="ordinal", strategy="quantile")
+    warning_message = "Consider decreasing the number of bins."
+    with pytest.warns(UserWarning, match=warning_message):
+        kbd.fit(X)
+
+    assert_array_almost_equal(kbd.bin_edges_[0], bin_edges)
+    assert_array_almost_equal(kbd.transform(X), Xt)
+
+
+@pytest.mark.parametrize("in_dtype", [np.float16, np.float32, np.float64])
+@pytest.mark.parametrize("out_dtype", [None, np.float32, np.float64])
+@pytest.mark.parametrize("encode", ["ordinal", "onehot", "onehot-dense"])
+def test_consistent_dtype(in_dtype, out_dtype, encode):
+    X_input = np.array(X, dtype=in_dtype)
+    kbd = KBinsDiscretizer(n_bins=3, encode=encode, dtype=out_dtype)
+    kbd.fit(X_input)
+
+    # test output dtype
+    if out_dtype is not None:
+        expected_dtype = out_dtype
+    elif out_dtype is None and X_input.dtype == np.float16:
+        # wrong numeric input dtype are cast in np.float64
+        expected_dtype = np.float64
+    else:
+        expected_dtype = X_input.dtype
+    Xt = kbd.transform(X_input)
+    assert Xt.dtype == expected_dtype
+
+
+@pytest.mark.parametrize("input_dtype", [np.float16, np.float32, np.float64])
+@pytest.mark.parametrize("encode", ["ordinal", "onehot", "onehot-dense"])
+def test_32_equal_64(input_dtype, encode):
+    # TODO this check is redundant with common checks and can be removed
+    #  once #16290 is merged
+    X_input = np.array(X, dtype=input_dtype)
+
+    # 32 bit output
+    kbd_32 = KBinsDiscretizer(n_bins=3, encode=encode, dtype=np.float32)
+    kbd_32.fit(X_input)
+    Xt_32 = kbd_32.transform(X_input)
+
+    # 64 bit output
+    kbd_64 = KBinsDiscretizer(n_bins=3, encode=encode, dtype=np.float64)
+    kbd_64.fit(X_input)
+    Xt_64 = kbd_64.transform(X_input)
+
+    assert_allclose_dense_sparse(Xt_32, Xt_64)
+
+
+def test_kbinsdiscretizer_subsample_default():
+    # Since the size of X is small (< 2e5), subsampling will not take place.
+    X = np.array([-2, 1.5, -4, -1]).reshape(-1, 1)
+    kbd_default = KBinsDiscretizer(n_bins=10, encode="ordinal", strategy="quantile")
+    kbd_default.fit(X)
+
+    kbd_without_subsampling = clone(kbd_default)
+    kbd_without_subsampling.set_params(subsample=None)
+    kbd_without_subsampling.fit(X)
+
+    for bin_kbd_default, bin_kbd_with_subsampling in zip(
+        kbd_default.bin_edges_[0], kbd_without_subsampling.bin_edges_[0]
+    ):
+        np.testing.assert_allclose(bin_kbd_default, bin_kbd_with_subsampling)
+    assert kbd_default.bin_edges_.shape == kbd_without_subsampling.bin_edges_.shape
+
+
+@pytest.mark.parametrize(
+    "encode, expected_names",
+    [
+        (
+            "onehot",
+            [
+                f"feat{col_id}_{float(bin_id)}"
+                for col_id in range(3)
+                for bin_id in range(4)
+            ],
+        ),
+        (
+            "onehot-dense",
+            [
+                f"feat{col_id}_{float(bin_id)}"
+                for col_id in range(3)
+                for bin_id in range(4)
+            ],
+        ),
+        ("ordinal", [f"feat{col_id}" for col_id in range(3)]),
+    ],
+)
+def test_kbinsdiscrtizer_get_feature_names_out(encode, expected_names):
+    """Check get_feature_names_out for different settings.
+    Non-regression test for #22731
+    """
+    X = [[-2, 1, -4], [-1, 2, -3], [0, 3, -2], [1, 4, -1]]
+
+    kbd = KBinsDiscretizer(n_bins=4, encode=encode).fit(X)
+    Xt = kbd.transform(X)
+
+    input_features = [f"feat{i}" for i in range(3)]
+    output_names = kbd.get_feature_names_out(input_features)
+    assert Xt.shape[1] == output_names.shape[0]
+
+    assert_array_equal(output_names, expected_names)
+
+
+@pytest.mark.parametrize("strategy", ["uniform", "kmeans", "quantile"])
+def test_kbinsdiscretizer_subsample(strategy, global_random_seed):
+    # Check that the bin edges are almost the same when subsampling is used.
+    X = np.random.RandomState(global_random_seed).random_sample((100000, 1)) + 1
+
+    kbd_subsampling = KBinsDiscretizer(
+        strategy=strategy, subsample=50000, random_state=global_random_seed
+    )
+    kbd_subsampling.fit(X)
+
+    kbd_no_subsampling = clone(kbd_subsampling)
+    kbd_no_subsampling.set_params(subsample=None)
+    kbd_no_subsampling.fit(X)
+
+    # We use a large tolerance because we can't expect the bin edges to be exactly the
+    # same when subsampling is used.
+    assert_allclose(
+        kbd_subsampling.bin_edges_[0], kbd_no_subsampling.bin_edges_[0], rtol=1e-2
+    )
+
+
+# TODO(1.7): remove this test
+def test_KBD_inverse_transform_Xt_deprecation():
+    X = np.arange(10)[:, None]
+    kbd = KBinsDiscretizer()
+    X = kbd.fit_transform(X)
+
+    with pytest.raises(TypeError, match="Missing required positional argument"):
+        kbd.inverse_transform()
+
+    with pytest.raises(TypeError, match="Cannot use both X and Xt. Use X only"):
+        kbd.inverse_transform(X=X, Xt=X)
+
+    with warnings.catch_warnings(record=True):
+        warnings.simplefilter("error")
+        kbd.inverse_transform(X)
+
+    with pytest.warns(FutureWarning, match="Xt was renamed X in version 1.5"):
+        kbd.inverse_transform(Xt=X)
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_encoders.py b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..04c45cb7b6aa5e79a0806f9c497a43397ec6a28d
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_encoders.py
@@ -0,0 +1,2367 @@
+import re
+import warnings
+
+import numpy as np
+import pytest
+from scipy import sparse
+
+from sklearn.exceptions import NotFittedError
+from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
+from sklearn.utils._missing import is_scalar_nan
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+
+def test_one_hot_encoder_sparse_dense():
+    # check that sparse and dense will give the same results
+
+    X = np.array([[3, 2, 1], [0, 1, 1]])
+    enc_sparse = OneHotEncoder()
+    enc_dense = OneHotEncoder(sparse_output=False)
+
+    X_trans_sparse = enc_sparse.fit_transform(X)
+    X_trans_dense = enc_dense.fit_transform(X)
+
+    assert X_trans_sparse.shape == (2, 5)
+    assert X_trans_dense.shape == (2, 5)
+
+    assert sparse.issparse(X_trans_sparse)
+    assert not sparse.issparse(X_trans_dense)
+
+    # check outcome
+    assert_array_equal(
+        X_trans_sparse.toarray(), [[0.0, 1.0, 0.0, 1.0, 1.0], [1.0, 0.0, 1.0, 0.0, 1.0]]
+    )
+    assert_array_equal(X_trans_sparse.toarray(), X_trans_dense)
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+def test_one_hot_encoder_handle_unknown(handle_unknown):
+    X = np.array([[0, 2, 1], [1, 0, 3], [1, 0, 2]])
+    X2 = np.array([[4, 1, 1]])
+
+    # Test that one hot encoder raises error for unknown features
+    # present during transform.
+    oh = OneHotEncoder(handle_unknown="error")
+    oh.fit(X)
+    with pytest.raises(ValueError, match="Found unknown categories"):
+        oh.transform(X2)
+
+    # Test the ignore option, ignores unknown features (giving all 0's)
+    oh = OneHotEncoder(handle_unknown=handle_unknown)
+    oh.fit(X)
+    X2_passed = X2.copy()
+    assert_array_equal(
+        oh.transform(X2_passed).toarray(),
+        np.array([[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]]),
+    )
+    # ensure transformed data was not modified in place
+    assert_allclose(X2, X2_passed)
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+def test_one_hot_encoder_handle_unknown_strings(handle_unknown):
+    X = np.array(["11111111", "22", "333", "4444"]).reshape((-1, 1))
+    X2 = np.array(["55555", "22"]).reshape((-1, 1))
+    # Non Regression test for the issue #12470
+    # Test the ignore option, when categories are numpy string dtype
+    # particularly when the known category strings are larger
+    # than the unknown category strings
+    oh = OneHotEncoder(handle_unknown=handle_unknown)
+    oh.fit(X)
+    X2_passed = X2.copy()
+    assert_array_equal(
+        oh.transform(X2_passed).toarray(),
+        np.array([[0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]]),
+    )
+    # ensure transformed data was not modified in place
+    assert_array_equal(X2, X2_passed)
+
+
+@pytest.mark.parametrize("output_dtype", [np.int32, np.float32, np.float64])
+@pytest.mark.parametrize("input_dtype", [np.int32, np.float32, np.float64])
+def test_one_hot_encoder_dtype(input_dtype, output_dtype):
+    X = np.asarray([[0, 1]], dtype=input_dtype).T
+    X_expected = np.asarray([[1, 0], [0, 1]], dtype=output_dtype)
+
+    oh = OneHotEncoder(categories="auto", dtype=output_dtype)
+    assert_array_equal(oh.fit_transform(X).toarray(), X_expected)
+    assert_array_equal(oh.fit(X).transform(X).toarray(), X_expected)
+
+    oh = OneHotEncoder(categories="auto", dtype=output_dtype, sparse_output=False)
+    assert_array_equal(oh.fit_transform(X), X_expected)
+    assert_array_equal(oh.fit(X).transform(X), X_expected)
+
+
+@pytest.mark.parametrize("output_dtype", [np.int32, np.float32, np.float64])
+def test_one_hot_encoder_dtype_pandas(output_dtype):
+    pd = pytest.importorskip("pandas")
+
+    X_df = pd.DataFrame({"A": ["a", "b"], "B": [1, 2]})
+    X_expected = np.array([[1, 0, 1, 0], [0, 1, 0, 1]], dtype=output_dtype)
+
+    oh = OneHotEncoder(dtype=output_dtype)
+    assert_array_equal(oh.fit_transform(X_df).toarray(), X_expected)
+    assert_array_equal(oh.fit(X_df).transform(X_df).toarray(), X_expected)
+
+    oh = OneHotEncoder(dtype=output_dtype, sparse_output=False)
+    assert_array_equal(oh.fit_transform(X_df), X_expected)
+    assert_array_equal(oh.fit(X_df).transform(X_df), X_expected)
+
+
+def test_one_hot_encoder_feature_names():
+    enc = OneHotEncoder()
+    X = [
+        ["Male", 1, "girl", 2, 3],
+        ["Female", 41, "girl", 1, 10],
+        ["Male", 51, "boy", 12, 3],
+        ["Male", 91, "girl", 21, 30],
+    ]
+
+    enc.fit(X)
+    feature_names = enc.get_feature_names_out()
+
+    assert_array_equal(
+        [
+            "x0_Female",
+            "x0_Male",
+            "x1_1",
+            "x1_41",
+            "x1_51",
+            "x1_91",
+            "x2_boy",
+            "x2_girl",
+            "x3_1",
+            "x3_2",
+            "x3_12",
+            "x3_21",
+            "x4_3",
+            "x4_10",
+            "x4_30",
+        ],
+        feature_names,
+    )
+
+    feature_names2 = enc.get_feature_names_out(["one", "two", "three", "four", "five"])
+
+    assert_array_equal(
+        [
+            "one_Female",
+            "one_Male",
+            "two_1",
+            "two_41",
+            "two_51",
+            "two_91",
+            "three_boy",
+            "three_girl",
+            "four_1",
+            "four_2",
+            "four_12",
+            "four_21",
+            "five_3",
+            "five_10",
+            "five_30",
+        ],
+        feature_names2,
+    )
+
+    with pytest.raises(ValueError, match="input_features should have length"):
+        enc.get_feature_names_out(["one", "two"])
+
+
+def test_one_hot_encoder_feature_names_unicode():
+    enc = OneHotEncoder()
+    X = np.array([["c❤t1", "dat2"]], dtype=object).T
+    enc.fit(X)
+    feature_names = enc.get_feature_names_out()
+    assert_array_equal(["x0_c❤t1", "x0_dat2"], feature_names)
+    feature_names = enc.get_feature_names_out(input_features=["n👍me"])
+    assert_array_equal(["n👍me_c❤t1", "n👍me_dat2"], feature_names)
+
+
+def test_one_hot_encoder_custom_feature_name_combiner():
+    """Check the behaviour of `feature_name_combiner` as a callable."""
+
+    def name_combiner(feature, category):
+        return feature + "_" + repr(category)
+
+    enc = OneHotEncoder(feature_name_combiner=name_combiner)
+    X = np.array([["None", None]], dtype=object).T
+    enc.fit(X)
+    feature_names = enc.get_feature_names_out()
+    assert_array_equal(["x0_'None'", "x0_None"], feature_names)
+    feature_names = enc.get_feature_names_out(input_features=["a"])
+    assert_array_equal(["a_'None'", "a_None"], feature_names)
+
+    def wrong_combiner(feature, category):
+        # we should be returning a Python string
+        return 0
+
+    enc = OneHotEncoder(feature_name_combiner=wrong_combiner).fit(X)
+    err_msg = (
+        "When `feature_name_combiner` is a callable, it should return a Python string."
+    )
+    with pytest.raises(TypeError, match=err_msg):
+        enc.get_feature_names_out()
+
+
+def test_one_hot_encoder_set_params():
+    X = np.array([[1, 2]]).T
+    oh = OneHotEncoder()
+    # set params on not yet fitted object
+    oh.set_params(categories=[[0, 1, 2, 3]])
+    assert oh.get_params()["categories"] == [[0, 1, 2, 3]]
+    assert oh.fit_transform(X).toarray().shape == (2, 4)
+    # set params on already fitted object
+    oh.set_params(categories=[[0, 1, 2, 3, 4]])
+    assert oh.fit_transform(X).toarray().shape == (2, 5)
+
+
+def check_categorical_onehot(X):
+    enc = OneHotEncoder(categories="auto")
+    Xtr1 = enc.fit_transform(X)
+
+    enc = OneHotEncoder(categories="auto", sparse_output=False)
+    Xtr2 = enc.fit_transform(X)
+
+    assert_allclose(Xtr1.toarray(), Xtr2)
+
+    assert sparse.issparse(Xtr1) and Xtr1.format == "csr"
+    return Xtr1.toarray()
+
+
+@pytest.mark.parametrize(
+    "X",
+    [
+        [["def", 1, 55], ["abc", 2, 55]],
+        np.array([[10, 1, 55], [5, 2, 55]]),
+        np.array([["b", "A", "cat"], ["a", "B", "cat"]], dtype=object),
+        np.array([["b", 1, "cat"], ["a", np.nan, "cat"]], dtype=object),
+        np.array([["b", 1, "cat"], ["a", float("nan"), "cat"]], dtype=object),
+        np.array([[None, 1, "cat"], ["a", 2, "cat"]], dtype=object),
+        np.array([[None, 1, None], ["a", np.nan, None]], dtype=object),
+        np.array([[None, 1, None], ["a", float("nan"), None]], dtype=object),
+    ],
+    ids=[
+        "mixed",
+        "numeric",
+        "object",
+        "mixed-nan",
+        "mixed-float-nan",
+        "mixed-None",
+        "mixed-None-nan",
+        "mixed-None-float-nan",
+    ],
+)
+def test_one_hot_encoder(X):
+    Xtr = check_categorical_onehot(np.array(X)[:, [0]])
+    assert_allclose(Xtr, [[0, 1], [1, 0]])
+
+    Xtr = check_categorical_onehot(np.array(X)[:, [0, 1]])
+    assert_allclose(Xtr, [[0, 1, 1, 0], [1, 0, 0, 1]])
+
+    Xtr = OneHotEncoder(categories="auto").fit_transform(X)
+    assert_allclose(Xtr.toarray(), [[0, 1, 1, 0, 1], [1, 0, 0, 1, 1]])
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+@pytest.mark.parametrize("sparse_", [False, True])
+@pytest.mark.parametrize("drop", [None, "first"])
+def test_one_hot_encoder_inverse(handle_unknown, sparse_, drop):
+    X = [["abc", 2, 55], ["def", 1, 55], ["abc", 3, 55]]
+    enc = OneHotEncoder(sparse_output=sparse_, drop=drop)
+    X_tr = enc.fit_transform(X)
+    exp = np.array(X, dtype=object)
+    assert_array_equal(enc.inverse_transform(X_tr), exp)
+
+    X = [[2, 55], [1, 55], [3, 55]]
+    enc = OneHotEncoder(sparse_output=sparse_, categories="auto", drop=drop)
+    X_tr = enc.fit_transform(X)
+    exp = np.array(X)
+    assert_array_equal(enc.inverse_transform(X_tr), exp)
+
+    if drop is None:
+        # with unknown categories
+        # drop is incompatible with handle_unknown=ignore
+        X = [["abc", 2, 55], ["def", 1, 55], ["abc", 3, 55]]
+        enc = OneHotEncoder(
+            sparse_output=sparse_,
+            handle_unknown=handle_unknown,
+            categories=[["abc", "def"], [1, 2], [54, 55, 56]],
+        )
+        X_tr = enc.fit_transform(X)
+        exp = np.array(X, dtype=object)
+        exp[2, 1] = None
+        assert_array_equal(enc.inverse_transform(X_tr), exp)
+
+        # with an otherwise numerical output, still object if unknown
+        X = [[2, 55], [1, 55], [3, 55]]
+        enc = OneHotEncoder(
+            sparse_output=sparse_,
+            categories=[[1, 2], [54, 56]],
+            handle_unknown=handle_unknown,
+        )
+        X_tr = enc.fit_transform(X)
+        exp = np.array(X, dtype=object)
+        exp[2, 0] = None
+        exp[:, 1] = None
+        assert_array_equal(enc.inverse_transform(X_tr), exp)
+
+    # incorrect shape raises
+    X_tr = np.array([[0, 1, 1], [1, 0, 1]])
+    msg = re.escape("Shape of the passed X data is not correct")
+    with pytest.raises(ValueError, match=msg):
+        enc.inverse_transform(X_tr)
+
+
+@pytest.mark.parametrize("sparse_", [False, True])
+@pytest.mark.parametrize(
+    "X, X_trans",
+    [
+        ([[2, 55], [1, 55], [2, 55]], [[0, 1, 1], [0, 0, 0], [0, 1, 1]]),
+        (
+            [["one", "a"], ["two", "a"], ["three", "b"], ["two", "a"]],
+            [[0, 0, 0, 0, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 0]],
+        ),
+    ],
+)
+def test_one_hot_encoder_inverse_transform_raise_error_with_unknown(
+    X, X_trans, sparse_
+):
+    """Check that `inverse_transform` raise an error with unknown samples, no
+    dropped feature, and `handle_unknow="error`.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/14934
+    """
+    enc = OneHotEncoder(sparse_output=sparse_).fit(X)
+    msg = (
+        r"Samples \[(\d )*\d\] can not be inverted when drop=None and "
+        r"handle_unknown='error' because they contain all zeros"
+    )
+
+    if sparse_:
+        # emulate sparse data transform by a one-hot encoder sparse.
+        X_trans = _convert_container(X_trans, "sparse")
+    with pytest.raises(ValueError, match=msg):
+        enc.inverse_transform(X_trans)
+
+
+def test_one_hot_encoder_inverse_if_binary():
+    X = np.array([["Male", 1], ["Female", 3], ["Female", 2]], dtype=object)
+    ohe = OneHotEncoder(drop="if_binary", sparse_output=False)
+    X_tr = ohe.fit_transform(X)
+    assert_array_equal(ohe.inverse_transform(X_tr), X)
+
+
+@pytest.mark.parametrize("drop", ["if_binary", "first", None])
+@pytest.mark.parametrize("reset_drop", ["if_binary", "first", None])
+def test_one_hot_encoder_drop_reset(drop, reset_drop):
+    # check that resetting drop option without refitting does not throw an error
+    X = np.array([["Male", 1], ["Female", 3], ["Female", 2]], dtype=object)
+    ohe = OneHotEncoder(drop=drop, sparse_output=False)
+    ohe.fit(X)
+    X_tr = ohe.transform(X)
+    feature_names = ohe.get_feature_names_out()
+    ohe.set_params(drop=reset_drop)
+    assert_array_equal(ohe.inverse_transform(X_tr), X)
+    assert_allclose(ohe.transform(X), X_tr)
+    assert_array_equal(ohe.get_feature_names_out(), feature_names)
+
+
+@pytest.mark.parametrize("method", ["fit", "fit_transform"])
+@pytest.mark.parametrize("X", [[1, 2], np.array([3.0, 4.0])])
+def test_X_is_not_1D(X, method):
+    oh = OneHotEncoder()
+
+    msg = "Expected 2D array, got 1D array instead"
+    with pytest.raises(ValueError, match=msg):
+        getattr(oh, method)(X)
+
+
+@pytest.mark.parametrize("method", ["fit", "fit_transform"])
+def test_X_is_not_1D_pandas(method):
+    pd = pytest.importorskip("pandas")
+    X = pd.Series([6, 3, 4, 6])
+    oh = OneHotEncoder()
+
+    msg = f"Expected a 2-dimensional container but got {type(X)} instead."
+    with pytest.raises(ValueError, match=msg):
+        getattr(oh, method)(X)
+
+
+@pytest.mark.parametrize(
+    "X, cat_exp, cat_dtype",
+    [
+        ([["abc", 55], ["def", 55]], [["abc", "def"], [55]], np.object_),
+        (np.array([[1, 2], [3, 2]]), [[1, 3], [2]], np.integer),
+        (
+            np.array([["A", "cat"], ["B", "cat"]], dtype=object),
+            [["A", "B"], ["cat"]],
+            np.object_,
+        ),
+        (np.array([["A", "cat"], ["B", "cat"]]), [["A", "B"], ["cat"]], np.str_),
+        (np.array([[1, 2], [np.nan, 2]]), [[1, np.nan], [2]], np.float64),
+        (
+            np.array([["A", np.nan], [None, np.nan]], dtype=object),
+            [["A", None], [np.nan]],
+            np.object_,
+        ),
+        (
+            np.array([["A", float("nan")], [None, float("nan")]], dtype=object),
+            [["A", None], [float("nan")]],
+            np.object_,
+        ),
+    ],
+    ids=[
+        "mixed",
+        "numeric",
+        "object",
+        "string",
+        "missing-float",
+        "missing-np.nan-object",
+        "missing-float-nan-object",
+    ],
+)
+def test_one_hot_encoder_categories(X, cat_exp, cat_dtype):
+    # order of categories should not depend on order of samples
+    for Xi in [X, X[::-1]]:
+        enc = OneHotEncoder(categories="auto")
+        enc.fit(Xi)
+        # assert enc.categories == 'auto'
+        assert isinstance(enc.categories_, list)
+        for res, exp in zip(enc.categories_, cat_exp):
+            res_list = res.tolist()
+            if is_scalar_nan(exp[-1]):
+                assert is_scalar_nan(res_list[-1])
+                assert res_list[:-1] == exp[:-1]
+            else:
+                assert res.tolist() == exp
+            assert np.issubdtype(res.dtype, cat_dtype)
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+@pytest.mark.parametrize(
+    "X, X2, cats, cat_dtype",
+    [
+        (
+            np.array([["a", "b"]], dtype=object).T,
+            np.array([["a", "d"]], dtype=object).T,
+            [["a", "b", "c"]],
+            np.object_,
+        ),
+        (
+            np.array([[1, 2]], dtype="int64").T,
+            np.array([[1, 4]], dtype="int64").T,
+            [[1, 2, 3]],
+            np.int64,
+        ),
+        (
+            np.array([["a", "b"]], dtype=object).T,
+            np.array([["a", "d"]], dtype=object).T,
+            [np.array(["a", "b", "c"])],
+            np.object_,
+        ),
+        (
+            np.array([[None, "a"]], dtype=object).T,
+            np.array([[None, "b"]], dtype=object).T,
+            [[None, "a", "z"]],
+            object,
+        ),
+        (
+            np.array([["a", "b"]], dtype=object).T,
+            np.array([["a", np.nan]], dtype=object).T,
+            [["a", "b", "z"]],
+            object,
+        ),
+        (
+            np.array([["a", None]], dtype=object).T,
+            np.array([["a", np.nan]], dtype=object).T,
+            [["a", None, "z"]],
+            object,
+        ),
+    ],
+    ids=[
+        "object",
+        "numeric",
+        "object-string",
+        "object-string-none",
+        "object-string-nan",
+        "object-None-and-nan",
+    ],
+)
+def test_one_hot_encoder_specified_categories(X, X2, cats, cat_dtype, handle_unknown):
+    enc = OneHotEncoder(categories=cats)
+    exp = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]])
+    assert_array_equal(enc.fit_transform(X).toarray(), exp)
+    assert list(enc.categories[0]) == list(cats[0])
+    assert enc.categories_[0].tolist() == list(cats[0])
+    # manually specified categories should have same dtype as
+    # the data when coerced from lists
+    assert enc.categories_[0].dtype == cat_dtype
+
+    # when specifying categories manually, unknown categories should already
+    # raise when fitting
+    enc = OneHotEncoder(categories=cats)
+    with pytest.raises(ValueError, match="Found unknown categories"):
+        enc.fit(X2)
+    enc = OneHotEncoder(categories=cats, handle_unknown=handle_unknown)
+    exp = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
+    assert_array_equal(enc.fit(X2).transform(X2).toarray(), exp)
+
+
+def test_one_hot_encoder_unsorted_categories():
+    X = np.array([["a", "b"]], dtype=object).T
+
+    enc = OneHotEncoder(categories=[["b", "a", "c"]])
+    exp = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0]])
+    assert_array_equal(enc.fit(X).transform(X).toarray(), exp)
+    assert_array_equal(enc.fit_transform(X).toarray(), exp)
+    assert enc.categories_[0].tolist() == ["b", "a", "c"]
+    assert np.issubdtype(enc.categories_[0].dtype, np.object_)
+
+    # unsorted passed categories still raise for numerical values
+    X = np.array([[1, 2]]).T
+    enc = OneHotEncoder(categories=[[2, 1, 3]])
+    msg = "Unsorted categories are not supported"
+    with pytest.raises(ValueError, match=msg):
+        enc.fit_transform(X)
+
+
+@pytest.mark.parametrize("Encoder", [OneHotEncoder, OrdinalEncoder])
+def test_encoder_nan_ending_specified_categories(Encoder):
+    """Test encoder for specified categories that nan is at the end.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27088
+    """
+    cats = [np.array([0, np.nan, 1])]
+    enc = Encoder(categories=cats)
+    X = np.array([[0, 1]], dtype=object).T
+    with pytest.raises(ValueError, match="Nan should be the last element"):
+        enc.fit(X)
+
+
+def test_one_hot_encoder_specified_categories_mixed_columns():
+    # multiple columns
+    X = np.array([["a", "b"], [0, 2]], dtype=object).T
+    enc = OneHotEncoder(categories=[["a", "b", "c"], [0, 1, 2]])
+    exp = np.array([[1.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 1.0]])
+    assert_array_equal(enc.fit_transform(X).toarray(), exp)
+    assert enc.categories_[0].tolist() == ["a", "b", "c"]
+    assert np.issubdtype(enc.categories_[0].dtype, np.object_)
+    assert enc.categories_[1].tolist() == [0, 1, 2]
+    # integer categories but from object dtype data
+    assert np.issubdtype(enc.categories_[1].dtype, np.object_)
+
+
+def test_one_hot_encoder_pandas():
+    pd = pytest.importorskip("pandas")
+
+    X_df = pd.DataFrame({"A": ["a", "b"], "B": [1, 2]})
+
+    Xtr = check_categorical_onehot(X_df)
+    assert_allclose(Xtr, [[1, 0, 1, 0], [0, 1, 0, 1]])
+
+
+@pytest.mark.parametrize(
+    "drop, expected_names",
+    [
+        ("first", ["x0_c", "x2_b"]),
+        ("if_binary", ["x0_c", "x1_2", "x2_b"]),
+        (["c", 2, "b"], ["x0_b", "x2_a"]),
+    ],
+    ids=["first", "binary", "manual"],
+)
+def test_one_hot_encoder_feature_names_drop(drop, expected_names):
+    X = [["c", 2, "a"], ["b", 2, "b"]]
+
+    ohe = OneHotEncoder(drop=drop)
+    ohe.fit(X)
+    feature_names = ohe.get_feature_names_out()
+    assert_array_equal(expected_names, feature_names)
+
+
+def test_one_hot_encoder_drop_equals_if_binary():
+    # Canonical case
+    X = [[10, "yes"], [20, "no"], [30, "yes"]]
+    expected = np.array(
+        [[1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0]]
+    )
+    expected_drop_idx = np.array([None, 0])
+
+    ohe = OneHotEncoder(drop="if_binary", sparse_output=False)
+    result = ohe.fit_transform(X)
+    assert_array_equal(ohe.drop_idx_, expected_drop_idx)
+    assert_allclose(result, expected)
+
+    # with only one cat, the behaviour is equivalent to drop=None
+    X = [["true", "a"], ["false", "a"], ["false", "a"]]
+    expected = np.array([[1.0, 1.0], [0.0, 1.0], [0.0, 1.0]])
+    expected_drop_idx = np.array([0, None])
+
+    ohe = OneHotEncoder(drop="if_binary", sparse_output=False)
+    result = ohe.fit_transform(X)
+    assert_array_equal(ohe.drop_idx_, expected_drop_idx)
+    assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "X",
+    [
+        [["abc", 2, 55], ["def", 1, 55]],
+        np.array([[10, 2, 55], [20, 1, 55]]),
+        np.array([["a", "B", "cat"], ["b", "A", "cat"]], dtype=object),
+    ],
+    ids=["mixed", "numeric", "object"],
+)
+def test_ordinal_encoder(X):
+    enc = OrdinalEncoder()
+    exp = np.array([[0, 1, 0], [1, 0, 0]], dtype="int64")
+    assert_array_equal(enc.fit_transform(X), exp.astype("float64"))
+    enc = OrdinalEncoder(dtype="int64")
+    assert_array_equal(enc.fit_transform(X), exp)
+
+
+@pytest.mark.parametrize(
+    "X, X2, cats, cat_dtype",
+    [
+        (
+            np.array([["a", "b"]], dtype=object).T,
+            np.array([["a", "d"]], dtype=object).T,
+            [["a", "b", "c"]],
+            np.object_,
+        ),
+        (
+            np.array([[1, 2]], dtype="int64").T,
+            np.array([[1, 4]], dtype="int64").T,
+            [[1, 2, 3]],
+            np.int64,
+        ),
+        (
+            np.array([["a", "b"]], dtype=object).T,
+            np.array([["a", "d"]], dtype=object).T,
+            [np.array(["a", "b", "c"])],
+            np.object_,
+        ),
+    ],
+    ids=["object", "numeric", "object-string-cat"],
+)
+def test_ordinal_encoder_specified_categories(X, X2, cats, cat_dtype):
+    enc = OrdinalEncoder(categories=cats)
+    exp = np.array([[0.0], [1.0]])
+    assert_array_equal(enc.fit_transform(X), exp)
+    assert list(enc.categories[0]) == list(cats[0])
+    assert enc.categories_[0].tolist() == list(cats[0])
+    # manually specified categories should have same dtype as
+    # the data when coerced from lists
+    assert enc.categories_[0].dtype == cat_dtype
+
+    # when specifying categories manually, unknown categories should already
+    # raise when fitting
+    enc = OrdinalEncoder(categories=cats)
+    with pytest.raises(ValueError, match="Found unknown categories"):
+        enc.fit(X2)
+
+
+def test_ordinal_encoder_inverse():
+    X = [["abc", 2, 55], ["def", 1, 55]]
+    enc = OrdinalEncoder()
+    X_tr = enc.fit_transform(X)
+    exp = np.array(X, dtype=object)
+    assert_array_equal(enc.inverse_transform(X_tr), exp)
+
+    # incorrect shape raises
+    X_tr = np.array([[0, 1, 1, 2], [1, 0, 1, 0]])
+    msg = re.escape("Shape of the passed X data is not correct")
+    with pytest.raises(ValueError, match=msg):
+        enc.inverse_transform(X_tr)
+
+
+def test_ordinal_encoder_handle_unknowns_string():
+    enc = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-2)
+    X_fit = np.array([["a", "x"], ["b", "y"], ["c", "z"]], dtype=object)
+    X_trans = np.array([["c", "xy"], ["bla", "y"], ["a", "x"]], dtype=object)
+    enc.fit(X_fit)
+
+    X_trans_enc = enc.transform(X_trans)
+    exp = np.array([[2, -2], [-2, 1], [0, 0]], dtype="int64")
+    assert_array_equal(X_trans_enc, exp)
+
+    X_trans_inv = enc.inverse_transform(X_trans_enc)
+    inv_exp = np.array([["c", None], [None, "y"], ["a", "x"]], dtype=object)
+    assert_array_equal(X_trans_inv, inv_exp)
+
+
+@pytest.mark.parametrize("dtype", [float, int])
+def test_ordinal_encoder_handle_unknowns_numeric(dtype):
+    enc = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-999)
+    X_fit = np.array([[1, 7], [2, 8], [3, 9]], dtype=dtype)
+    X_trans = np.array([[3, 12], [23, 8], [1, 7]], dtype=dtype)
+    enc.fit(X_fit)
+
+    X_trans_enc = enc.transform(X_trans)
+    exp = np.array([[2, -999], [-999, 1], [0, 0]], dtype="int64")
+    assert_array_equal(X_trans_enc, exp)
+
+    X_trans_inv = enc.inverse_transform(X_trans_enc)
+    inv_exp = np.array([[3, None], [None, 8], [1, 7]], dtype=object)
+    assert_array_equal(X_trans_inv, inv_exp)
+
+
+def test_ordinal_encoder_handle_unknowns_nan():
+    # Make sure unknown_value=np.nan properly works
+
+    enc = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
+
+    X_fit = np.array([[1], [2], [3]])
+    enc.fit(X_fit)
+    X_trans = enc.transform([[1], [2], [4]])
+    assert_array_equal(X_trans, [[0], [1], [np.nan]])
+
+
+def test_ordinal_encoder_handle_unknowns_nan_non_float_dtype():
+    # Make sure an error is raised when unknown_value=np.nan and the dtype
+    # isn't a float dtype
+    enc = OrdinalEncoder(
+        handle_unknown="use_encoded_value", unknown_value=np.nan, dtype=int
+    )
+
+    X_fit = np.array([[1], [2], [3]])
+    with pytest.raises(ValueError, match="dtype parameter should be a float dtype"):
+        enc.fit(X_fit)
+
+
+def test_ordinal_encoder_raise_categories_shape():
+    X = np.array([["Low", "Medium", "High", "Medium", "Low"]], dtype=object).T
+    cats = ["Low", "Medium", "High"]
+    enc = OrdinalEncoder(categories=cats)
+    msg = "Shape mismatch: if categories is an array,"
+
+    with pytest.raises(ValueError, match=msg):
+        enc.fit(X)
+
+
+def test_encoder_dtypes():
+    # check that dtypes are preserved when determining categories
+    enc = OneHotEncoder(categories="auto")
+    exp = np.array([[1.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 1.0]], dtype="float64")
+
+    for X in [
+        np.array([[1, 2], [3, 4]], dtype="int64"),
+        np.array([[1, 2], [3, 4]], dtype="float64"),
+        np.array([["a", "b"], ["c", "d"]]),  # str dtype
+        np.array([[b"a", b"b"], [b"c", b"d"]]),  # bytes dtype
+        np.array([[1, "a"], [3, "b"]], dtype="object"),
+    ]:
+        enc.fit(X)
+        assert all([enc.categories_[i].dtype == X.dtype for i in range(2)])
+        assert_array_equal(enc.transform(X).toarray(), exp)
+
+    X = [[1, 2], [3, 4]]
+    enc.fit(X)
+    assert all([np.issubdtype(enc.categories_[i].dtype, np.integer) for i in range(2)])
+    assert_array_equal(enc.transform(X).toarray(), exp)
+
+    X = [[1, "a"], [3, "b"]]
+    enc.fit(X)
+    assert all([enc.categories_[i].dtype == "object" for i in range(2)])
+    assert_array_equal(enc.transform(X).toarray(), exp)
+
+
+def test_encoder_dtypes_pandas():
+    # check dtype (similar to test_categorical_encoder_dtypes for dataframes)
+    pd = pytest.importorskip("pandas")
+
+    enc = OneHotEncoder(categories="auto")
+    exp = np.array(
+        [[1.0, 0.0, 1.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 1.0, 0.0, 1.0]],
+        dtype="float64",
+    )
+
+    X = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}, dtype="int64")
+    enc.fit(X)
+    assert all([enc.categories_[i].dtype == "int64" for i in range(2)])
+    assert_array_equal(enc.transform(X).toarray(), exp)
+
+    X = pd.DataFrame({"A": [1, 2], "B": ["a", "b"], "C": [3.0, 4.0]})
+    X_type = [X["A"].dtype, X["B"].dtype, X["C"].dtype]
+    enc.fit(X)
+    assert all([enc.categories_[i].dtype == X_type[i] for i in range(3)])
+    assert_array_equal(enc.transform(X).toarray(), exp)
+
+
+def test_one_hot_encoder_warning():
+    enc = OneHotEncoder()
+    X = [["Male", 1], ["Female", 3]]
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        enc.fit_transform(X)
+
+
+@pytest.mark.parametrize("drop", ["if_binary", "first"])
+def test_ohe_handle_unknown_warn(drop):
+    """Check handle_unknown='warn' works correctly."""
+
+    X = [["a", 0], ["b", 2], ["b", 1]]
+
+    ohe = OneHotEncoder(
+        drop=drop,
+        sparse_output=False,
+        handle_unknown="warn",
+        categories=[["b", "a"], [1, 2]],
+    )
+    ohe.fit(X)
+
+    X_test = [["c", 1]]
+    X_expected = np.array([[0, 0]])
+
+    warn_msg = (
+        r"Found unknown categories in columns \[0\] during transform. "
+        r"These unknown categories will be encoded as all zeros"
+    )
+    with pytest.warns(UserWarning, match=warn_msg):
+        X_trans = ohe.transform(X_test)
+    assert_allclose(X_trans, X_expected)
+
+
+@pytest.mark.parametrize("missing_value", [np.nan, None, float("nan")])
+def test_one_hot_encoder_drop_manual(missing_value):
+    cats_to_drop = ["def", 12, 3, 56, missing_value]
+    enc = OneHotEncoder(drop=cats_to_drop)
+    X = [
+        ["abc", 12, 2, 55, "a"],
+        ["def", 12, 1, 55, "a"],
+        ["def", 12, 3, 56, missing_value],
+    ]
+    trans = enc.fit_transform(X).toarray()
+    exp = [[1, 0, 1, 1, 1], [0, 1, 0, 1, 1], [0, 0, 0, 0, 0]]
+    assert_array_equal(trans, exp)
+    assert enc.drop is cats_to_drop
+
+    dropped_cats = [
+        cat[feature] for cat, feature in zip(enc.categories_, enc.drop_idx_)
+    ]
+    X_inv_trans = enc.inverse_transform(trans)
+    X_array = np.array(X, dtype=object)
+
+    # last value is np.nan
+    if is_scalar_nan(cats_to_drop[-1]):
+        assert_array_equal(dropped_cats[:-1], cats_to_drop[:-1])
+        assert is_scalar_nan(dropped_cats[-1])
+        assert is_scalar_nan(cats_to_drop[-1])
+        # do not include the last column which includes missing values
+        assert_array_equal(X_array[:, :-1], X_inv_trans[:, :-1])
+
+        # check last column is the missing value
+        assert_array_equal(X_array[-1, :-1], X_inv_trans[-1, :-1])
+        assert is_scalar_nan(X_array[-1, -1])
+        assert is_scalar_nan(X_inv_trans[-1, -1])
+    else:
+        assert_array_equal(dropped_cats, cats_to_drop)
+        assert_array_equal(X_array, X_inv_trans)
+
+
+@pytest.mark.parametrize("drop", [["abc", 3], ["abc", 3, 41, "a"]])
+def test_invalid_drop_length(drop):
+    enc = OneHotEncoder(drop=drop)
+    err_msg = "`drop` should have length equal to the number"
+    with pytest.raises(ValueError, match=err_msg):
+        enc.fit([["abc", 2, 55], ["def", 1, 55], ["def", 3, 59]])
+
+
+@pytest.mark.parametrize("density", [True, False], ids=["sparse", "dense"])
+@pytest.mark.parametrize("drop", ["first", ["a", 2, "b"]], ids=["first", "manual"])
+def test_categories(density, drop):
+    ohe_base = OneHotEncoder(sparse_output=density)
+    ohe_test = OneHotEncoder(sparse_output=density, drop=drop)
+    X = [["c", 1, "a"], ["a", 2, "b"]]
+    ohe_base.fit(X)
+    ohe_test.fit(X)
+    assert_array_equal(ohe_base.categories_, ohe_test.categories_)
+    if drop == "first":
+        assert_array_equal(ohe_test.drop_idx_, 0)
+    else:
+        for drop_cat, drop_idx, cat_list in zip(
+            drop, ohe_test.drop_idx_, ohe_test.categories_
+        ):
+            assert cat_list[int(drop_idx)] == drop_cat
+    assert isinstance(ohe_test.drop_idx_, np.ndarray)
+    assert ohe_test.drop_idx_.dtype == object
+
+
+@pytest.mark.parametrize("Encoder", [OneHotEncoder, OrdinalEncoder])
+def test_encoders_has_categorical_tags(Encoder):
+    assert Encoder().__sklearn_tags__().input_tags.categorical
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"max_categories": 2},
+        {"min_frequency": 11},
+        {"min_frequency": 0.29},
+        {"max_categories": 2, "min_frequency": 6},
+        {"max_categories": 4, "min_frequency": 12},
+    ],
+)
+@pytest.mark.parametrize("categories", ["auto", [["a", "b", "c", "d"]]])
+def test_ohe_infrequent_two_levels(kwargs, categories):
+    """Test that different parameters for combine 'a', 'c', and 'd' into
+    the infrequent category works as expected."""
+
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ohe = OneHotEncoder(
+        categories=categories,
+        handle_unknown="infrequent_if_exist",
+        sparse_output=False,
+        **kwargs,
+    ).fit(X_train)
+    assert_array_equal(ohe.infrequent_categories_, [["a", "c", "d"]])
+
+    X_test = [["b"], ["a"], ["c"], ["d"], ["e"]]
+    expected = np.array([[1, 0], [0, 1], [0, 1], [0, 1], [0, 1]])
+
+    X_trans = ohe.transform(X_test)
+    assert_allclose(expected, X_trans)
+
+    expected_inv = [[col] for col in ["b"] + ["infrequent_sklearn"] * 4]
+    X_inv = ohe.inverse_transform(X_trans)
+    assert_array_equal(expected_inv, X_inv)
+
+    feature_names = ohe.get_feature_names_out()
+    assert_array_equal(["x0_b", "x0_infrequent_sklearn"], feature_names)
+
+
+@pytest.mark.parametrize("drop", ["if_binary", "first", ["b"]])
+def test_ohe_infrequent_two_levels_drop_frequent(drop):
+    """Test two levels and dropping the frequent category."""
+
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ohe = OneHotEncoder(
+        handle_unknown="infrequent_if_exist",
+        sparse_output=False,
+        max_categories=2,
+        drop=drop,
+    ).fit(X_train)
+    assert ohe.categories_[0][ohe.drop_idx_[0]] == "b"
+
+    X_test = np.array([["b"], ["c"]])
+    X_trans = ohe.transform(X_test)
+    assert_allclose([[0], [1]], X_trans)
+
+    feature_names = ohe.get_feature_names_out()
+    assert_array_equal(["x0_infrequent_sklearn"], feature_names)
+
+    X_inverse = ohe.inverse_transform(X_trans)
+    assert_array_equal([["b"], ["infrequent_sklearn"]], X_inverse)
+
+
+@pytest.mark.parametrize("drop", [["a"], ["d"]])
+def test_ohe_infrequent_two_levels_drop_infrequent_errors(drop):
+    """Test two levels and dropping any infrequent category removes the
+    whole infrequent category."""
+
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ohe = OneHotEncoder(
+        handle_unknown="infrequent_if_exist",
+        sparse_output=False,
+        max_categories=2,
+        drop=drop,
+    )
+
+    msg = f"Unable to drop category {drop[0]!r} from feature 0 because it is infrequent"
+    with pytest.raises(ValueError, match=msg):
+        ohe.fit(X_train)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"max_categories": 3},
+        {"min_frequency": 6},
+        {"min_frequency": 9},
+        {"min_frequency": 0.24},
+        {"min_frequency": 0.16},
+        {"max_categories": 3, "min_frequency": 8},
+        {"max_categories": 4, "min_frequency": 6},
+    ],
+)
+def test_ohe_infrequent_three_levels(kwargs):
+    """Test that different parameters for combing 'a', and 'd' into
+    the infrequent category works as expected."""
+
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ohe = OneHotEncoder(
+        handle_unknown="infrequent_if_exist", sparse_output=False, **kwargs
+    ).fit(X_train)
+    assert_array_equal(ohe.infrequent_categories_, [["a", "d"]])
+
+    X_test = [["b"], ["a"], ["c"], ["d"], ["e"]]
+    expected = np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 0, 1]])
+
+    X_trans = ohe.transform(X_test)
+    assert_allclose(expected, X_trans)
+
+    expected_inv = [
+        ["b"],
+        ["infrequent_sklearn"],
+        ["c"],
+        ["infrequent_sklearn"],
+        ["infrequent_sklearn"],
+    ]
+    X_inv = ohe.inverse_transform(X_trans)
+    assert_array_equal(expected_inv, X_inv)
+
+    feature_names = ohe.get_feature_names_out()
+    assert_array_equal(["x0_b", "x0_c", "x0_infrequent_sklearn"], feature_names)
+
+
+@pytest.mark.parametrize("drop", ["first", ["b"]])
+def test_ohe_infrequent_three_levels_drop_frequent(drop):
+    """Test three levels and dropping the frequent category."""
+
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ohe = OneHotEncoder(
+        handle_unknown="infrequent_if_exist",
+        sparse_output=False,
+        max_categories=3,
+        drop=drop,
+    ).fit(X_train)
+
+    X_test = np.array([["b"], ["c"], ["d"]])
+    assert_allclose([[0, 0], [1, 0], [0, 1]], ohe.transform(X_test))
+
+    # Check handle_unknown="ignore"
+    ohe.set_params(handle_unknown="ignore").fit(X_train)
+    msg = "Found unknown categories"
+    with pytest.warns(UserWarning, match=msg):
+        X_trans = ohe.transform([["b"], ["e"]])
+
+    assert_allclose([[0, 0], [0, 0]], X_trans)
+
+
+@pytest.mark.parametrize("drop", [["a"], ["d"]])
+def test_ohe_infrequent_three_levels_drop_infrequent_errors(drop):
+    """Test three levels and dropping the infrequent category."""
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ohe = OneHotEncoder(
+        handle_unknown="infrequent_if_exist",
+        sparse_output=False,
+        max_categories=3,
+        drop=drop,
+    )
+
+    msg = f"Unable to drop category {drop[0]!r} from feature 0 because it is infrequent"
+    with pytest.raises(ValueError, match=msg):
+        ohe.fit(X_train)
+
+
+def test_ohe_infrequent_handle_unknown_error():
+    """Test that different parameters for combining 'a', and 'd' into
+    the infrequent category works as expected."""
+
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ohe = OneHotEncoder(
+        handle_unknown="error", sparse_output=False, max_categories=3
+    ).fit(X_train)
+    assert_array_equal(ohe.infrequent_categories_, [["a", "d"]])
+
+    # all categories are known
+    X_test = [["b"], ["a"], ["c"], ["d"]]
+    expected = np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 1]])
+
+    X_trans = ohe.transform(X_test)
+    assert_allclose(expected, X_trans)
+
+    # 'bad' is not known and will error
+    X_test = [["bad"]]
+    msg = r"Found unknown categories \['bad'\] in column 0"
+    with pytest.raises(ValueError, match=msg):
+        ohe.transform(X_test)
+
+
+@pytest.mark.parametrize(
+    "kwargs", [{"max_categories": 3, "min_frequency": 1}, {"min_frequency": 4}]
+)
+def test_ohe_infrequent_two_levels_user_cats_one_frequent(kwargs):
+    """'a' is the only frequent category, all other categories are infrequent."""
+
+    X_train = np.array([["a"] * 5 + ["e"] * 30], dtype=object).T
+    ohe = OneHotEncoder(
+        categories=[["c", "d", "a", "b"]],
+        sparse_output=False,
+        handle_unknown="infrequent_if_exist",
+        **kwargs,
+    ).fit(X_train)
+
+    X_test = [["a"], ["b"], ["c"], ["d"], ["e"]]
+    expected = np.array([[1, 0], [0, 1], [0, 1], [0, 1], [0, 1]])
+
+    X_trans = ohe.transform(X_test)
+    assert_allclose(expected, X_trans)
+
+    # 'a' is dropped
+    drops = ["first", "if_binary", ["a"]]
+    X_test = [["a"], ["c"]]
+    for drop in drops:
+        ohe.set_params(drop=drop).fit(X_train)
+        assert_allclose([[0], [1]], ohe.transform(X_test))
+
+
+def test_ohe_infrequent_two_levels_user_cats():
+    """Test that the order of the categories provided by a user is respected."""
+    X_train = np.array(
+        [["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3], dtype=object
+    ).T
+    ohe = OneHotEncoder(
+        categories=[["c", "d", "a", "b"]],
+        sparse_output=False,
+        handle_unknown="infrequent_if_exist",
+        max_categories=2,
+    ).fit(X_train)
+
+    assert_array_equal(ohe.infrequent_categories_, [["c", "d", "a"]])
+
+    X_test = [["b"], ["a"], ["c"], ["d"], ["e"]]
+    expected = np.array([[1, 0], [0, 1], [0, 1], [0, 1], [0, 1]])
+
+    X_trans = ohe.transform(X_test)
+    assert_allclose(expected, X_trans)
+
+    # 'infrequent' is used to denote the infrequent categories for
+    # `inverse_transform`
+    expected_inv = [[col] for col in ["b"] + ["infrequent_sklearn"] * 4]
+    X_inv = ohe.inverse_transform(X_trans)
+    assert_array_equal(expected_inv, X_inv)
+
+
+def test_ohe_infrequent_three_levels_user_cats():
+    """Test that the order of the categories provided by a user is respected.
+    In this case 'c' is encoded as the first category and 'b' is encoded
+    as the second one."""
+
+    X_train = np.array(
+        [["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3], dtype=object
+    ).T
+    ohe = OneHotEncoder(
+        categories=[["c", "d", "b", "a"]],
+        sparse_output=False,
+        handle_unknown="infrequent_if_exist",
+        max_categories=3,
+    ).fit(X_train)
+
+    assert_array_equal(ohe.infrequent_categories_, [["d", "a"]])
+
+    X_test = [["b"], ["a"], ["c"], ["d"], ["e"]]
+    expected = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 0, 1]])
+
+    X_trans = ohe.transform(X_test)
+    assert_allclose(expected, X_trans)
+
+    # 'infrequent' is used to denote the infrequent categories for
+    # `inverse_transform`
+    expected_inv = [
+        ["b"],
+        ["infrequent_sklearn"],
+        ["c"],
+        ["infrequent_sklearn"],
+        ["infrequent_sklearn"],
+    ]
+    X_inv = ohe.inverse_transform(X_trans)
+    assert_array_equal(expected_inv, X_inv)
+
+
+def test_ohe_infrequent_mixed():
+    """Test infrequent categories where feature 0 has infrequent categories,
+    and feature 1 does not."""
+
+    # X[:, 0] 1 and 2 are infrequent
+    # X[:, 1] nothing is infrequent
+    X = np.c_[[0, 1, 3, 3, 3, 3, 2, 0, 3], [0, 0, 0, 0, 1, 1, 1, 1, 1]]
+
+    ohe = OneHotEncoder(max_categories=3, drop="if_binary", sparse_output=False)
+    ohe.fit(X)
+
+    X_test = [[3, 0], [1, 1]]
+    X_trans = ohe.transform(X_test)
+
+    # feature 1 is binary so it drops a category 0
+    assert_allclose(X_trans, [[0, 1, 0, 0], [0, 0, 1, 1]])
+
+
+def test_ohe_infrequent_multiple_categories():
+    """Test infrequent categories with feature matrix with 3 features."""
+
+    X = np.c_[
+        [0, 1, 3, 3, 3, 3, 2, 0, 3],
+        [0, 0, 5, 1, 1, 10, 5, 5, 0],
+        [1, 0, 1, 0, 1, 0, 1, 0, 1],
+    ]
+
+    ohe = OneHotEncoder(
+        categories="auto", max_categories=3, handle_unknown="infrequent_if_exist"
+    )
+    # X[:, 0] 1 and 2 are infrequent
+    # X[:, 1] 1 and 10 are infrequent
+    # X[:, 2] nothing is infrequent
+
+    X_trans = ohe.fit_transform(X).toarray()
+    assert_array_equal(ohe.infrequent_categories_[0], [1, 2])
+    assert_array_equal(ohe.infrequent_categories_[1], [1, 10])
+    assert_array_equal(ohe.infrequent_categories_[2], None)
+
+    # 'infrequent' is used to denote the infrequent categories
+    # For the first column, 1 and 2 have the same frequency. In this case,
+    # 1 will be chosen to be the feature name because is smaller lexiconically
+    feature_names = ohe.get_feature_names_out()
+    assert_array_equal(
+        [
+            "x0_0",
+            "x0_3",
+            "x0_infrequent_sklearn",
+            "x1_0",
+            "x1_5",
+            "x1_infrequent_sklearn",
+            "x2_0",
+            "x2_1",
+        ],
+        feature_names,
+    )
+
+    expected = [
+        [1, 0, 0, 1, 0, 0, 0, 1],
+        [0, 0, 1, 1, 0, 0, 1, 0],
+        [0, 1, 0, 0, 1, 0, 0, 1],
+        [0, 1, 0, 0, 0, 1, 1, 0],
+        [0, 1, 0, 0, 0, 1, 0, 1],
+        [0, 1, 0, 0, 0, 1, 1, 0],
+        [0, 0, 1, 0, 1, 0, 0, 1],
+        [1, 0, 0, 0, 1, 0, 1, 0],
+        [0, 1, 0, 1, 0, 0, 0, 1],
+    ]
+
+    assert_allclose(expected, X_trans)
+
+    X_test = [[3, 1, 2], [4, 0, 3]]
+
+    X_test_trans = ohe.transform(X_test)
+
+    # X[:, 2] does not have an infrequent category, thus it is encoded as all
+    # zeros
+    expected = [[0, 1, 0, 0, 0, 1, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0]]
+    assert_allclose(expected, X_test_trans.toarray())
+
+    X_inv = ohe.inverse_transform(X_test_trans)
+    expected_inv = np.array(
+        [[3, "infrequent_sklearn", None], ["infrequent_sklearn", 0, None]], dtype=object
+    )
+    assert_array_equal(expected_inv, X_inv)
+
+    # error for unknown categories
+    ohe = OneHotEncoder(
+        categories="auto", max_categories=3, handle_unknown="error"
+    ).fit(X)
+    with pytest.raises(ValueError, match="Found unknown categories"):
+        ohe.transform(X_test)
+
+    # only infrequent or known categories
+    X_test = [[1, 1, 1], [3, 10, 0]]
+    X_test_trans = ohe.transform(X_test)
+
+    expected = [[0, 0, 1, 0, 0, 1, 0, 1], [0, 1, 0, 0, 0, 1, 1, 0]]
+    assert_allclose(expected, X_test_trans.toarray())
+
+    X_inv = ohe.inverse_transform(X_test_trans)
+
+    expected_inv = np.array(
+        [["infrequent_sklearn", "infrequent_sklearn", 1], [3, "infrequent_sklearn", 0]],
+        dtype=object,
+    )
+    assert_array_equal(expected_inv, X_inv)
+
+
+def test_ohe_infrequent_multiple_categories_dtypes():
+    """Test infrequent categories with a pandas dataframe with multiple dtypes."""
+
+    pd = pytest.importorskip("pandas")
+    X = pd.DataFrame(
+        {
+            "str": ["a", "f", "c", "f", "f", "a", "c", "b", "b"],
+            "int": [5, 3, 0, 10, 10, 12, 0, 3, 5],
+        },
+        columns=["str", "int"],
+    )
+
+    ohe = OneHotEncoder(
+        categories="auto", max_categories=3, handle_unknown="infrequent_if_exist"
+    )
+    # X[:, 0] 'a', 'b', 'c' have the same frequency. 'a' and 'b' will be
+    # considered infrequent because they are greater
+
+    # X[:, 1] 0, 3, 5, 10 has frequency 2 and 12 has frequency 1.
+    # 0, 3, 12 will be considered infrequent
+
+    X_trans = ohe.fit_transform(X).toarray()
+    assert_array_equal(ohe.infrequent_categories_[0], ["a", "b"])
+    assert_array_equal(ohe.infrequent_categories_[1], [0, 3, 12])
+
+    expected = [
+        [0, 0, 1, 1, 0, 0],
+        [0, 1, 0, 0, 0, 1],
+        [1, 0, 0, 0, 0, 1],
+        [0, 1, 0, 0, 1, 0],
+        [0, 1, 0, 0, 1, 0],
+        [0, 0, 1, 0, 0, 1],
+        [1, 0, 0, 0, 0, 1],
+        [0, 0, 1, 0, 0, 1],
+        [0, 0, 1, 1, 0, 0],
+    ]
+
+    assert_allclose(expected, X_trans)
+
+    X_test = pd.DataFrame({"str": ["b", "f"], "int": [14, 12]}, columns=["str", "int"])
+
+    expected = [[0, 0, 1, 0, 0, 1], [0, 1, 0, 0, 0, 1]]
+    X_test_trans = ohe.transform(X_test)
+    assert_allclose(expected, X_test_trans.toarray())
+
+    X_inv = ohe.inverse_transform(X_test_trans)
+    expected_inv = np.array(
+        [["infrequent_sklearn", "infrequent_sklearn"], ["f", "infrequent_sklearn"]],
+        dtype=object,
+    )
+    assert_array_equal(expected_inv, X_inv)
+
+    # only infrequent or known categories
+    X_test = pd.DataFrame({"str": ["c", "b"], "int": [12, 5]}, columns=["str", "int"])
+    X_test_trans = ohe.transform(X_test).toarray()
+    expected = [[1, 0, 0, 0, 0, 1], [0, 0, 1, 1, 0, 0]]
+    assert_allclose(expected, X_test_trans)
+
+    X_inv = ohe.inverse_transform(X_test_trans)
+    expected_inv = np.array(
+        [["c", "infrequent_sklearn"], ["infrequent_sklearn", 5]], dtype=object
+    )
+    assert_array_equal(expected_inv, X_inv)
+
+
+@pytest.mark.parametrize("kwargs", [{"min_frequency": 21, "max_categories": 1}])
+def test_ohe_infrequent_one_level_errors(kwargs):
+    """All user provided categories are infrequent."""
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 2]).T
+
+    ohe = OneHotEncoder(
+        handle_unknown="infrequent_if_exist", sparse_output=False, **kwargs
+    )
+    ohe.fit(X_train)
+
+    X_trans = ohe.transform([["a"]])
+    assert_allclose(X_trans, [[1]])
+
+
+@pytest.mark.parametrize("kwargs", [{"min_frequency": 2, "max_categories": 3}])
+def test_ohe_infrequent_user_cats_unknown_training_errors(kwargs):
+    """All user provided categories are infrequent."""
+
+    X_train = np.array([["e"] * 3], dtype=object).T
+    ohe = OneHotEncoder(
+        categories=[["c", "d", "a", "b"]],
+        sparse_output=False,
+        handle_unknown="infrequent_if_exist",
+        **kwargs,
+    ).fit(X_train)
+
+    X_trans = ohe.transform([["a"], ["e"]])
+    assert_allclose(X_trans, [[1], [1]])
+
+
+# deliberately omit 'OS' as an invalid combo
+@pytest.mark.parametrize(
+    "input_dtype, category_dtype", ["OO", "OU", "UO", "UU", "SO", "SU", "SS"]
+)
+@pytest.mark.parametrize("array_type", ["list", "array", "dataframe"])
+def test_encoders_string_categories(input_dtype, category_dtype, array_type):
+    """Check that encoding work with object, unicode, and byte string dtypes.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/15616
+    https://github.com/scikit-learn/scikit-learn/issues/15726
+    https://github.com/scikit-learn/scikit-learn/issues/19677
+    """
+
+    X = np.array([["b"], ["a"]], dtype=input_dtype)
+    categories = [np.array(["b", "a"], dtype=category_dtype)]
+    ohe = OneHotEncoder(categories=categories, sparse_output=False).fit(X)
+
+    X_test = _convert_container(
+        [["a"], ["a"], ["b"], ["a"]], array_type, dtype=input_dtype
+    )
+    X_trans = ohe.transform(X_test)
+
+    expected = np.array([[0, 1], [0, 1], [1, 0], [0, 1]])
+    assert_allclose(X_trans, expected)
+
+    oe = OrdinalEncoder(categories=categories).fit(X)
+    X_trans = oe.transform(X_test)
+
+    expected = np.array([[1], [1], [0], [1]])
+    assert_array_equal(X_trans, expected)
+
+
+def test_mixed_string_bytes_categoricals():
+    """Check that this mixture of predefined categories and X raises an error.
+
+    Categories defined as bytes can not easily be compared to data that is
+    a string.
+    """
+    # data as unicode
+    X = np.array([["b"], ["a"]], dtype="U")
+    # predefined categories as bytes
+    categories = [np.array(["b", "a"], dtype="S")]
+    ohe = OneHotEncoder(categories=categories, sparse_output=False)
+
+    msg = re.escape(
+        "In column 0, the predefined categories have type 'bytes' which is incompatible"
+        " with values of type 'str_'."
+    )
+
+    with pytest.raises(ValueError, match=msg):
+        ohe.fit(X)
+
+
+@pytest.mark.parametrize("missing_value", [np.nan, None])
+def test_ohe_missing_values_get_feature_names(missing_value):
+    # encoder with missing values with object dtypes
+    X = np.array([["a", "b", missing_value, "a", missing_value]], dtype=object).T
+    ohe = OneHotEncoder(sparse_output=False, handle_unknown="ignore").fit(X)
+    names = ohe.get_feature_names_out()
+    assert_array_equal(names, ["x0_a", "x0_b", f"x0_{missing_value}"])
+
+
+def test_ohe_missing_value_support_pandas():
+    # check support for pandas with mixed dtypes and missing values
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(
+        {
+            "col1": ["dog", "cat", None, "cat"],
+            "col2": np.array([3, 0, 4, np.nan], dtype=float),
+        },
+        columns=["col1", "col2"],
+    )
+    expected_df_trans = np.array(
+        [
+            [0, 1, 0, 0, 1, 0, 0],
+            [1, 0, 0, 1, 0, 0, 0],
+            [0, 0, 1, 0, 0, 1, 0],
+            [1, 0, 0, 0, 0, 0, 1],
+        ]
+    )
+
+    Xtr = check_categorical_onehot(df)
+    assert_allclose(Xtr, expected_df_trans)
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+@pytest.mark.parametrize("pd_nan_type", ["pd.NA", "np.nan"])
+def test_ohe_missing_value_support_pandas_categorical(pd_nan_type, handle_unknown):
+    # checks pandas dataframe with categorical features
+    pd = pytest.importorskip("pandas")
+
+    pd_missing_value = pd.NA if pd_nan_type == "pd.NA" else np.nan
+
+    df = pd.DataFrame(
+        {
+            "col1": pd.Series(["c", "a", pd_missing_value, "b", "a"], dtype="category"),
+        }
+    )
+    expected_df_trans = np.array(
+        [
+            [0, 0, 1, 0],
+            [1, 0, 0, 0],
+            [0, 0, 0, 1],
+            [0, 1, 0, 0],
+            [1, 0, 0, 0],
+        ]
+    )
+
+    ohe = OneHotEncoder(sparse_output=False, handle_unknown=handle_unknown)
+    df_trans = ohe.fit_transform(df)
+    assert_allclose(expected_df_trans, df_trans)
+
+    assert len(ohe.categories_) == 1
+    assert_array_equal(ohe.categories_[0][:-1], ["a", "b", "c"])
+    assert np.isnan(ohe.categories_[0][-1])
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+def test_ohe_drop_first_handle_unknown_ignore_warns(handle_unknown):
+    """Check drop='first' and handle_unknown='ignore'/'infrequent_if_exist'
+    during transform."""
+    X = [["a", 0], ["b", 2], ["b", 1]]
+
+    ohe = OneHotEncoder(
+        drop="first", sparse_output=False, handle_unknown=handle_unknown
+    )
+    X_trans = ohe.fit_transform(X)
+
+    X_expected = np.array(
+        [
+            [0, 0, 0],
+            [1, 0, 1],
+            [1, 1, 0],
+        ]
+    )
+    assert_allclose(X_trans, X_expected)
+
+    # Both categories are unknown
+    X_test = [["c", 3]]
+    X_expected = np.array([[0, 0, 0]])
+
+    warn_msg = (
+        r"Found unknown categories in columns \[0, 1\] during "
+        "transform. These unknown categories will be encoded as all "
+        "zeros"
+    )
+    with pytest.warns(UserWarning, match=warn_msg):
+        X_trans = ohe.transform(X_test)
+    assert_allclose(X_trans, X_expected)
+
+    # inverse_transform maps to None
+    X_inv = ohe.inverse_transform(X_expected)
+    assert_array_equal(X_inv, np.array([["a", 0]], dtype=object))
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+def test_ohe_drop_if_binary_handle_unknown_ignore_warns(handle_unknown):
+    """Check drop='if_binary' and handle_unknown='ignore' during transform."""
+    X = [["a", 0], ["b", 2], ["b", 1]]
+
+    ohe = OneHotEncoder(
+        drop="if_binary", sparse_output=False, handle_unknown=handle_unknown
+    )
+    X_trans = ohe.fit_transform(X)
+
+    X_expected = np.array(
+        [
+            [0, 1, 0, 0],
+            [1, 0, 0, 1],
+            [1, 0, 1, 0],
+        ]
+    )
+    assert_allclose(X_trans, X_expected)
+
+    # Both categories are unknown
+    X_test = [["c", 3]]
+    X_expected = np.array([[0, 0, 0, 0]])
+
+    warn_msg = (
+        r"Found unknown categories in columns \[0, 1\] during "
+        "transform. These unknown categories will be encoded as all "
+        "zeros"
+    )
+    with pytest.warns(UserWarning, match=warn_msg):
+        X_trans = ohe.transform(X_test)
+    assert_allclose(X_trans, X_expected)
+
+    # inverse_transform maps to None
+    X_inv = ohe.inverse_transform(X_expected)
+    assert_array_equal(X_inv, np.array([["a", None]], dtype=object))
+
+
+@pytest.mark.parametrize("handle_unknown", ["ignore", "infrequent_if_exist", "warn"])
+def test_ohe_drop_first_explicit_categories(handle_unknown):
+    """Check drop='first' and handle_unknown='ignore'/'infrequent_if_exist'
+    during fit with categories passed in."""
+
+    X = [["a", 0], ["b", 2], ["b", 1]]
+
+    ohe = OneHotEncoder(
+        drop="first",
+        sparse_output=False,
+        handle_unknown=handle_unknown,
+        categories=[["b", "a"], [1, 2]],
+    )
+    ohe.fit(X)
+
+    X_test = [["c", 1]]
+    X_expected = np.array([[0, 0]])
+
+    warn_msg = (
+        r"Found unknown categories in columns \[0\] during transform. "
+        r"These unknown categories will be encoded as all zeros"
+    )
+    with pytest.warns(UserWarning, match=warn_msg):
+        X_trans = ohe.transform(X_test)
+    assert_allclose(X_trans, X_expected)
+
+
+def test_ohe_more_informative_error_message():
+    """Raise informative error message when pandas output and sparse_output=True."""
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["z", "b", "b"]}, columns=["a", "b"])
+
+    ohe = OneHotEncoder(sparse_output=True)
+    ohe.set_output(transform="pandas")
+
+    msg = (
+        "Pandas output does not support sparse data. Set "
+        "sparse_output=False to output pandas dataframes or disable Pandas output"
+    )
+    with pytest.raises(ValueError, match=msg):
+        ohe.fit_transform(df)
+
+    ohe.fit(df)
+    with pytest.raises(ValueError, match=msg):
+        ohe.transform(df)
+
+
+def test_ordinal_encoder_passthrough_missing_values_float_errors_dtype():
+    """Test ordinal encoder with nan passthrough fails when dtype=np.int32."""
+
+    X = np.array([[np.nan, 3.0, 1.0, 3.0]]).T
+    oe = OrdinalEncoder(dtype=np.int32)
+
+    msg = (
+        r"There are missing values in features \[0\]. For OrdinalEncoder "
+        f"to encode missing values with dtype: {np.int32}"
+    )
+    with pytest.raises(ValueError, match=msg):
+        oe.fit(X)
+
+
+@pytest.mark.parametrize("encoded_missing_value", [np.nan, -2])
+def test_ordinal_encoder_passthrough_missing_values_float(encoded_missing_value):
+    """Test ordinal encoder with nan on float dtypes."""
+
+    X = np.array([[np.nan, 3.0, 1.0, 3.0]], dtype=np.float64).T
+    oe = OrdinalEncoder(encoded_missing_value=encoded_missing_value).fit(X)
+
+    assert len(oe.categories_) == 1
+
+    assert_allclose(oe.categories_[0], [1.0, 3.0, np.nan])
+
+    X_trans = oe.transform(X)
+    assert_allclose(X_trans, [[encoded_missing_value], [1.0], [0.0], [1.0]])
+
+    X_inverse = oe.inverse_transform(X_trans)
+    assert_allclose(X_inverse, X)
+
+
+@pytest.mark.parametrize("pd_nan_type", ["pd.NA", "np.nan"])
+@pytest.mark.parametrize("encoded_missing_value", [np.nan, -2])
+def test_ordinal_encoder_missing_value_support_pandas_categorical(
+    pd_nan_type, encoded_missing_value
+):
+    """Check ordinal encoder is compatible with pandas."""
+    # checks pandas dataframe with categorical features
+    pd = pytest.importorskip("pandas")
+
+    pd_missing_value = pd.NA if pd_nan_type == "pd.NA" else np.nan
+
+    df = pd.DataFrame(
+        {
+            "col1": pd.Series(["c", "a", pd_missing_value, "b", "a"], dtype="category"),
+        }
+    )
+
+    oe = OrdinalEncoder(encoded_missing_value=encoded_missing_value).fit(df)
+    assert len(oe.categories_) == 1
+    assert_array_equal(oe.categories_[0][:3], ["a", "b", "c"])
+    assert np.isnan(oe.categories_[0][-1])
+
+    df_trans = oe.transform(df)
+
+    assert_allclose(df_trans, [[2.0], [0.0], [encoded_missing_value], [1.0], [0.0]])
+
+    X_inverse = oe.inverse_transform(df_trans)
+    assert X_inverse.shape == (5, 1)
+    assert_array_equal(X_inverse[:2, 0], ["c", "a"])
+    assert_array_equal(X_inverse[3:, 0], ["b", "a"])
+    assert np.isnan(X_inverse[2, 0])
+
+
+@pytest.mark.parametrize(
+    "X, X2, cats, cat_dtype",
+    [
+        (
+            (
+                np.array([["a", np.nan]], dtype=object).T,
+                np.array([["a", "b"]], dtype=object).T,
+                [np.array(["a", "d", np.nan], dtype=object)],
+                np.object_,
+            )
+        ),
+        (
+            (
+                np.array([["a", np.nan]], dtype=object).T,
+                np.array([["a", "b"]], dtype=object).T,
+                [np.array(["a", "d", np.nan], dtype=object)],
+                np.object_,
+            )
+        ),
+        (
+            (
+                np.array([[2.0, np.nan]], dtype=np.float64).T,
+                np.array([[3.0]], dtype=np.float64).T,
+                [np.array([2.0, 4.0, np.nan])],
+                np.float64,
+            )
+        ),
+    ],
+    ids=[
+        "object-None-missing-value",
+        "object-nan-missing_value",
+        "numeric-missing-value",
+    ],
+)
+def test_ordinal_encoder_specified_categories_missing_passthrough(
+    X, X2, cats, cat_dtype
+):
+    """Test ordinal encoder for specified categories."""
+    oe = OrdinalEncoder(categories=cats)
+    exp = np.array([[0.0], [np.nan]])
+    assert_array_equal(oe.fit_transform(X), exp)
+    # manually specified categories should have same dtype as
+    # the data when coerced from lists
+    assert oe.categories_[0].dtype == cat_dtype
+
+    # when specifying categories manually, unknown categories should already
+    # raise when fitting
+    oe = OrdinalEncoder(categories=cats)
+    with pytest.raises(ValueError, match="Found unknown categories"):
+        oe.fit(X2)
+
+
+@pytest.mark.parametrize("Encoder", [OneHotEncoder, OrdinalEncoder])
+def test_encoder_duplicate_specified_categories(Encoder):
+    """Test encoder for specified categories have duplicate values.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27088
+    """
+    cats = [np.array(["a", "b", "a"], dtype=object)]
+    enc = Encoder(categories=cats)
+    X = np.array([["a", "b"]], dtype=object).T
+    with pytest.raises(
+        ValueError, match="the predefined categories contain duplicate elements."
+    ):
+        enc.fit(X)
+
+
+@pytest.mark.parametrize(
+    "X, expected_X_trans, X_test",
+    [
+        (
+            np.array([[1.0, np.nan, 3.0]]).T,
+            np.array([[0.0, np.nan, 1.0]]).T,
+            np.array([[4.0]]),
+        ),
+        (
+            np.array([[1.0, 4.0, 3.0]]).T,
+            np.array([[0.0, 2.0, 1.0]]).T,
+            np.array([[np.nan]]),
+        ),
+        (
+            np.array([["c", np.nan, "b"]], dtype=object).T,
+            np.array([[1.0, np.nan, 0.0]]).T,
+            np.array([["d"]], dtype=object),
+        ),
+        (
+            np.array([["c", "a", "b"]], dtype=object).T,
+            np.array([[2.0, 0.0, 1.0]]).T,
+            np.array([[np.nan]], dtype=object),
+        ),
+    ],
+)
+def test_ordinal_encoder_handle_missing_and_unknown(X, expected_X_trans, X_test):
+    """Test the interaction between missing values and handle_unknown"""
+
+    oe = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
+
+    X_trans = oe.fit_transform(X)
+    assert_allclose(X_trans, expected_X_trans)
+
+    assert_allclose(oe.transform(X_test), [[-1.0]])
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_ordinal_encoder_sparse(csr_container):
+    """Check that we raise proper error with sparse input in OrdinalEncoder.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/19878
+    """
+    X = np.array([[3, 2, 1], [0, 1, 1]])
+    X_sparse = csr_container(X)
+
+    encoder = OrdinalEncoder()
+
+    err_msg = "Sparse data was passed, but dense data is required"
+    with pytest.raises(TypeError, match=err_msg):
+        encoder.fit(X_sparse)
+    with pytest.raises(TypeError, match=err_msg):
+        encoder.fit_transform(X_sparse)
+
+    X_trans = encoder.fit_transform(X)
+    X_trans_sparse = csr_container(X_trans)
+    with pytest.raises(TypeError, match=err_msg):
+        encoder.inverse_transform(X_trans_sparse)
+
+
+def test_ordinal_encoder_fit_with_unseen_category():
+    """Check OrdinalEncoder.fit works with unseen category when
+    `handle_unknown="use_encoded_value"`.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/19872
+    """
+    X = np.array([0, 0, 1, 0, 2, 5])[:, np.newaxis]
+    oe = OrdinalEncoder(
+        categories=[[-1, 0, 1]], handle_unknown="use_encoded_value", unknown_value=-999
+    )
+    oe.fit(X)
+
+    oe = OrdinalEncoder(categories=[[-1, 0, 1]], handle_unknown="error")
+    with pytest.raises(ValueError, match="Found unknown categories"):
+        oe.fit(X)
+
+
+@pytest.mark.parametrize(
+    "X_train",
+    [
+        [["AA", "B"]],
+        np.array([["AA", "B"]], dtype="O"),
+        np.array([["AA", "B"]], dtype="U"),
+    ],
+)
+@pytest.mark.parametrize(
+    "X_test",
+    [
+        [["A", "B"]],
+        np.array([["A", "B"]], dtype="O"),
+        np.array([["A", "B"]], dtype="U"),
+    ],
+)
+def test_ordinal_encoder_handle_unknown_string_dtypes(X_train, X_test):
+    """Checks that `OrdinalEncoder` transforms string dtypes.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/19872
+    """
+    enc = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-9)
+    enc.fit(X_train)
+
+    X_trans = enc.transform(X_test)
+    assert_allclose(X_trans, [[-9, 0]])
+
+
+def test_ordinal_encoder_python_integer():
+    """Check that `OrdinalEncoder` accepts Python integers that are potentially
+    larger than 64 bits.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/20721
+    """
+    X = np.array(
+        [
+            44253463435747313673,
+            9867966753463435747313673,
+            44253462342215747313673,
+            442534634357764313673,
+        ]
+    ).reshape(-1, 1)
+    encoder = OrdinalEncoder().fit(X)
+    assert_array_equal(encoder.categories_, np.sort(X, axis=0).T)
+    X_trans = encoder.transform(X)
+    assert_array_equal(X_trans, [[0], [3], [2], [1]])
+
+
+def test_ordinal_encoder_features_names_out_pandas():
+    """Check feature names out is same as the input."""
+    pd = pytest.importorskip("pandas")
+
+    names = ["b", "c", "a"]
+    X = pd.DataFrame([[1, 2, 3]], columns=names)
+    enc = OrdinalEncoder().fit(X)
+
+    feature_names_out = enc.get_feature_names_out()
+    assert_array_equal(names, feature_names_out)
+
+
+def test_ordinal_encoder_unknown_missing_interaction():
+    """Check interactions between encode_unknown and missing value encoding."""
+
+    X = np.array([["a"], ["b"], [np.nan]], dtype=object)
+
+    oe = OrdinalEncoder(
+        handle_unknown="use_encoded_value",
+        unknown_value=np.nan,
+        encoded_missing_value=-3,
+    ).fit(X)
+
+    X_trans = oe.transform(X)
+    assert_allclose(X_trans, [[0], [1], [-3]])
+
+    # "c" is unknown and is mapped to np.nan
+    # "None" is a missing value and is set to -3
+    X_test = np.array([["c"], [np.nan]], dtype=object)
+    X_test_trans = oe.transform(X_test)
+    assert_allclose(X_test_trans, [[np.nan], [-3]])
+
+    # Non-regression test for #24082
+    X_roundtrip = oe.inverse_transform(X_test_trans)
+
+    # np.nan is unknown so it maps to None
+    assert X_roundtrip[0][0] is None
+
+    # -3 is the encoded missing value so it maps back to nan
+    assert np.isnan(X_roundtrip[1][0])
+
+
+@pytest.mark.parametrize("with_pandas", [True, False])
+def test_ordinal_encoder_encoded_missing_value_error(with_pandas):
+    """Check OrdinalEncoder errors when encoded_missing_value is used by
+    an known category."""
+    X = np.array([["a", "dog"], ["b", "cat"], ["c", np.nan]], dtype=object)
+
+    # The 0-th feature has no missing values so it is not included in the list of
+    # features
+    error_msg = (
+        r"encoded_missing_value \(1\) is already used to encode a known category "
+        r"in features: "
+    )
+
+    if with_pandas:
+        pd = pytest.importorskip("pandas")
+        X = pd.DataFrame(X, columns=["letter", "pet"])
+        error_msg = error_msg + r"\['pet'\]"
+    else:
+        error_msg = error_msg + r"\[1\]"
+
+    oe = OrdinalEncoder(encoded_missing_value=1)
+
+    with pytest.raises(ValueError, match=error_msg):
+        oe.fit(X)
+
+
+@pytest.mark.parametrize(
+    "X_train, X_test_trans_expected, X_roundtrip_expected",
+    [
+        (
+            # missing value is not in training set
+            # inverse transform will considering encoded nan as unknown
+            np.array([["a"], ["1"]], dtype=object),
+            [[0], [np.nan], [np.nan]],
+            np.asarray([["1"], [None], [None]], dtype=object),
+        ),
+        (
+            # missing value in training set,
+            # inverse transform will considering encoded nan as missing
+            np.array([[np.nan], ["1"], ["a"]], dtype=object),
+            [[0], [np.nan], [np.nan]],
+            np.asarray([["1"], [np.nan], [np.nan]], dtype=object),
+        ),
+    ],
+)
+def test_ordinal_encoder_unknown_missing_interaction_both_nan(
+    X_train, X_test_trans_expected, X_roundtrip_expected
+):
+    """Check transform when unknown_value and encoded_missing_value is nan.
+
+    Non-regression test for #24082.
+    """
+    oe = OrdinalEncoder(
+        handle_unknown="use_encoded_value",
+        unknown_value=np.nan,
+        encoded_missing_value=np.nan,
+    ).fit(X_train)
+
+    X_test = np.array([["1"], [np.nan], ["b"]])
+    X_test_trans = oe.transform(X_test)
+
+    # both nan and unknown are encoded as nan
+    assert_allclose(X_test_trans, X_test_trans_expected)
+    X_roundtrip = oe.inverse_transform(X_test_trans)
+
+    n_samples = X_roundtrip_expected.shape[0]
+    for i in range(n_samples):
+        expected_val = X_roundtrip_expected[i, 0]
+        val = X_roundtrip[i, 0]
+
+        if expected_val is None:
+            assert val is None
+        elif is_scalar_nan(expected_val):
+            assert np.isnan(val)
+        else:
+            assert val == expected_val
+
+
+def test_one_hot_encoder_set_output():
+    """Check OneHotEncoder works with set_output."""
+    pd = pytest.importorskip("pandas")
+
+    X_df = pd.DataFrame({"A": ["a", "b"], "B": [1, 2]})
+    ohe = OneHotEncoder()
+
+    ohe.set_output(transform="pandas")
+
+    match = "Pandas output does not support sparse data. Set sparse_output=False"
+    with pytest.raises(ValueError, match=match):
+        ohe.fit_transform(X_df)
+
+    ohe_default = OneHotEncoder(sparse_output=False).set_output(transform="default")
+    ohe_pandas = OneHotEncoder(sparse_output=False).set_output(transform="pandas")
+
+    X_default = ohe_default.fit_transform(X_df)
+    X_pandas = ohe_pandas.fit_transform(X_df)
+
+    assert_allclose(X_pandas.to_numpy(), X_default)
+    assert_array_equal(ohe_pandas.get_feature_names_out(), X_pandas.columns)
+
+
+def test_ordinal_set_output():
+    """Check OrdinalEncoder works with set_output."""
+    pd = pytest.importorskip("pandas")
+
+    X_df = pd.DataFrame({"A": ["a", "b"], "B": [1, 2]})
+
+    ord_default = OrdinalEncoder().set_output(transform="default")
+    ord_pandas = OrdinalEncoder().set_output(transform="pandas")
+
+    X_default = ord_default.fit_transform(X_df)
+    X_pandas = ord_pandas.fit_transform(X_df)
+
+    assert_allclose(X_pandas.to_numpy(), X_default)
+    assert_array_equal(ord_pandas.get_feature_names_out(), X_pandas.columns)
+
+
+def test_predefined_categories_dtype():
+    """Check that the categories_ dtype is `object` for string categories
+
+    Regression test for gh-25171.
+    """
+    categories = [["as", "mmas", "eas", "ras", "acs"], ["1", "2"]]
+
+    enc = OneHotEncoder(categories=categories)
+
+    enc.fit([["as", "1"]])
+
+    assert len(categories) == len(enc.categories_)
+    for n, cat in enumerate(enc.categories_):
+        assert cat.dtype == object
+        assert_array_equal(categories[n], cat)
+
+
+def test_ordinal_encoder_missing_unknown_encoding_max():
+    """Check missing value or unknown encoding can equal the cardinality."""
+    X = np.array([["dog"], ["cat"], [np.nan]], dtype=object)
+    X_trans = OrdinalEncoder(encoded_missing_value=2).fit_transform(X)
+    assert_allclose(X_trans, [[1], [0], [2]])
+
+    enc = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=2).fit(X)
+    X_test = np.array([["snake"]])
+    X_trans = enc.transform(X_test)
+    assert_allclose(X_trans, [[2]])
+
+
+def test_drop_idx_infrequent_categories():
+    """Check drop_idx is defined correctly with infrequent categories.
+
+    Non-regression test for gh-25550.
+    """
+    X = np.array(
+        [["a"] * 2 + ["b"] * 4 + ["c"] * 4 + ["d"] * 4 + ["e"] * 4], dtype=object
+    ).T
+    ohe = OneHotEncoder(min_frequency=4, sparse_output=False, drop="first").fit(X)
+    assert_array_equal(
+        ohe.get_feature_names_out(), ["x0_c", "x0_d", "x0_e", "x0_infrequent_sklearn"]
+    )
+    assert ohe.categories_[0][ohe.drop_idx_[0]] == "b"
+
+    X = np.array([["a"] * 2 + ["b"] * 2 + ["c"] * 10], dtype=object).T
+    ohe = OneHotEncoder(min_frequency=4, sparse_output=False, drop="if_binary").fit(X)
+    assert_array_equal(ohe.get_feature_names_out(), ["x0_infrequent_sklearn"])
+    assert ohe.categories_[0][ohe.drop_idx_[0]] == "c"
+
+    X = np.array(
+        [["a"] * 2 + ["b"] * 4 + ["c"] * 4 + ["d"] * 4 + ["e"] * 4], dtype=object
+    ).T
+    ohe = OneHotEncoder(min_frequency=4, sparse_output=False, drop=["d"]).fit(X)
+    assert_array_equal(
+        ohe.get_feature_names_out(), ["x0_b", "x0_c", "x0_e", "x0_infrequent_sklearn"]
+    )
+    assert ohe.categories_[0][ohe.drop_idx_[0]] == "d"
+
+    ohe = OneHotEncoder(min_frequency=4, sparse_output=False, drop=None).fit(X)
+    assert_array_equal(
+        ohe.get_feature_names_out(),
+        ["x0_b", "x0_c", "x0_d", "x0_e", "x0_infrequent_sklearn"],
+    )
+    assert ohe.drop_idx_ is None
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"max_categories": 3},
+        {"min_frequency": 6},
+        {"min_frequency": 9},
+        {"min_frequency": 0.24},
+        {"min_frequency": 0.16},
+        {"max_categories": 3, "min_frequency": 8},
+        {"max_categories": 4, "min_frequency": 6},
+    ],
+)
+def test_ordinal_encoder_infrequent_three_levels(kwargs):
+    """Test parameters for grouping 'a', and 'd' into the infrequent category."""
+
+    X_train = np.array([["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3]).T
+    ordinal = OrdinalEncoder(
+        handle_unknown="use_encoded_value", unknown_value=-1, **kwargs
+    ).fit(X_train)
+    assert_array_equal(ordinal.categories_, [["a", "b", "c", "d"]])
+    assert_array_equal(ordinal.infrequent_categories_, [["a", "d"]])
+
+    X_test = [["a"], ["b"], ["c"], ["d"], ["z"]]
+    expected_trans = [[2], [0], [1], [2], [-1]]
+
+    X_trans = ordinal.transform(X_test)
+    assert_allclose(X_trans, expected_trans)
+
+    X_inverse = ordinal.inverse_transform(X_trans)
+    expected_inverse = [
+        ["infrequent_sklearn"],
+        ["b"],
+        ["c"],
+        ["infrequent_sklearn"],
+        [None],
+    ]
+    assert_array_equal(X_inverse, expected_inverse)
+
+
+def test_ordinal_encoder_infrequent_three_levels_user_cats():
+    """Test that the order of the categories provided by a user is respected.
+
+    In this case 'c' is encoded as the first category and 'b' is encoded
+    as the second one.
+    """
+
+    X_train = np.array(
+        [["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3], dtype=object
+    ).T
+    ordinal = OrdinalEncoder(
+        categories=[["c", "d", "b", "a"]],
+        max_categories=3,
+        handle_unknown="use_encoded_value",
+        unknown_value=-1,
+    ).fit(X_train)
+    assert_array_equal(ordinal.categories_, [["c", "d", "b", "a"]])
+    assert_array_equal(ordinal.infrequent_categories_, [["d", "a"]])
+
+    X_test = [["a"], ["b"], ["c"], ["d"], ["z"]]
+    expected_trans = [[2], [1], [0], [2], [-1]]
+
+    X_trans = ordinal.transform(X_test)
+    assert_allclose(X_trans, expected_trans)
+
+    X_inverse = ordinal.inverse_transform(X_trans)
+    expected_inverse = [
+        ["infrequent_sklearn"],
+        ["b"],
+        ["c"],
+        ["infrequent_sklearn"],
+        [None],
+    ]
+    assert_array_equal(X_inverse, expected_inverse)
+
+
+def test_ordinal_encoder_infrequent_mixed():
+    """Test when feature 0 has infrequent categories and feature 1 does not."""
+
+    X = np.column_stack(([0, 1, 3, 3, 3, 3, 2, 0, 3], [0, 0, 0, 0, 1, 1, 1, 1, 1]))
+
+    ordinal = OrdinalEncoder(max_categories=3).fit(X)
+
+    assert_array_equal(ordinal.infrequent_categories_[0], [1, 2])
+    assert ordinal.infrequent_categories_[1] is None
+
+    X_test = [[3, 0], [1, 1]]
+    expected_trans = [[1, 0], [2, 1]]
+
+    X_trans = ordinal.transform(X_test)
+    assert_allclose(X_trans, expected_trans)
+
+    X_inverse = ordinal.inverse_transform(X_trans)
+    expected_inverse = np.array([[3, 0], ["infrequent_sklearn", 1]], dtype=object)
+    assert_array_equal(X_inverse, expected_inverse)
+
+
+def test_ordinal_encoder_infrequent_multiple_categories_dtypes():
+    """Test infrequent categories with a pandas DataFrame with multiple dtypes."""
+
+    pd = pytest.importorskip("pandas")
+    categorical_dtype = pd.CategoricalDtype(["bird", "cat", "dog", "snake"])
+    X = pd.DataFrame(
+        {
+            "str": ["a", "f", "c", "f", "f", "a", "c", "b", "b"],
+            "int": [5, 3, 0, 10, 10, 12, 0, 3, 5],
+            "categorical": pd.Series(
+                ["dog"] * 4 + ["cat"] * 3 + ["snake"] + ["bird"],
+                dtype=categorical_dtype,
+            ),
+        },
+        columns=["str", "int", "categorical"],
+    )
+
+    ordinal = OrdinalEncoder(max_categories=3).fit(X)
+    # X[:, 0] 'a', 'b', 'c' have the same frequency. 'a' and 'b' will be
+    # considered infrequent because they appear first when sorted
+
+    # X[:, 1] 0, 3, 5, 10 has frequency 2 and 12 has frequency 1.
+    # 0, 3, 12 will be considered infrequent because they appear first when
+    # sorted.
+
+    # X[:, 2] "snake" and "bird" or infrequent
+
+    assert_array_equal(ordinal.infrequent_categories_[0], ["a", "b"])
+    assert_array_equal(ordinal.infrequent_categories_[1], [0, 3, 12])
+    assert_array_equal(ordinal.infrequent_categories_[2], ["bird", "snake"])
+
+    X_test = pd.DataFrame(
+        {
+            "str": ["a", "b", "f", "c"],
+            "int": [12, 0, 10, 5],
+            "categorical": pd.Series(
+                ["cat"] + ["snake"] + ["bird"] + ["dog"],
+                dtype=categorical_dtype,
+            ),
+        },
+        columns=["str", "int", "categorical"],
+    )
+    expected_trans = [[2, 2, 0], [2, 2, 2], [1, 1, 2], [0, 0, 1]]
+
+    X_trans = ordinal.transform(X_test)
+    assert_allclose(X_trans, expected_trans)
+
+
+def test_ordinal_encoder_infrequent_custom_mapping():
+    """Check behavior of unknown_value and encoded_missing_value with infrequent."""
+    X_train = np.array(
+        [["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3 + [np.nan]], dtype=object
+    ).T
+
+    ordinal = OrdinalEncoder(
+        handle_unknown="use_encoded_value",
+        unknown_value=2,
+        max_categories=2,
+        encoded_missing_value=3,
+    ).fit(X_train)
+    assert_array_equal(ordinal.infrequent_categories_, [["a", "c", "d"]])
+
+    X_test = np.array([["a"], ["b"], ["c"], ["d"], ["e"], [np.nan]], dtype=object)
+    expected_trans = [[1], [0], [1], [1], [2], [3]]
+
+    X_trans = ordinal.transform(X_test)
+    assert_allclose(X_trans, expected_trans)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"max_categories": 6},
+        {"min_frequency": 2},
+    ],
+)
+def test_ordinal_encoder_all_frequent(kwargs):
+    """All categories are considered frequent have same encoding as default encoder."""
+    X_train = np.array(
+        [["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3], dtype=object
+    ).T
+
+    adjusted_encoder = OrdinalEncoder(
+        **kwargs, handle_unknown="use_encoded_value", unknown_value=-1
+    ).fit(X_train)
+    default_encoder = OrdinalEncoder(
+        handle_unknown="use_encoded_value", unknown_value=-1
+    ).fit(X_train)
+
+    X_test = [["a"], ["b"], ["c"], ["d"], ["e"]]
+
+    assert_allclose(
+        adjusted_encoder.transform(X_test), default_encoder.transform(X_test)
+    )
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"max_categories": 1},
+        {"min_frequency": 100},
+    ],
+)
+def test_ordinal_encoder_all_infrequent(kwargs):
+    """When all categories are infrequent, they are all encoded as zero."""
+    X_train = np.array(
+        [["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3], dtype=object
+    ).T
+    encoder = OrdinalEncoder(
+        **kwargs, handle_unknown="use_encoded_value", unknown_value=-1
+    ).fit(X_train)
+
+    X_test = [["a"], ["b"], ["c"], ["d"], ["e"]]
+    assert_allclose(encoder.transform(X_test), [[0], [0], [0], [0], [-1]])
+
+
+def test_ordinal_encoder_missing_appears_frequent():
+    """Check behavior when missing value appears frequently."""
+    X = np.array(
+        [[np.nan] * 20 + ["dog"] * 10 + ["cat"] * 5 + ["snake"] + ["deer"]],
+        dtype=object,
+    ).T
+    ordinal = OrdinalEncoder(max_categories=3).fit(X)
+
+    X_test = np.array([["snake", "cat", "dog", np.nan]], dtype=object).T
+    X_trans = ordinal.transform(X_test)
+    assert_allclose(X_trans, [[2], [0], [1], [np.nan]])
+
+
+def test_ordinal_encoder_missing_appears_infrequent():
+    """Check behavior when missing value appears infrequently."""
+
+    # feature 0 has infrequent categories
+    # feature 1 has no infrequent categories
+    X = np.array(
+        [
+            [np.nan] + ["dog"] * 10 + ["cat"] * 5 + ["snake"] + ["deer"],
+            ["red"] * 9 + ["green"] * 9,
+        ],
+        dtype=object,
+    ).T
+    ordinal = OrdinalEncoder(min_frequency=4).fit(X)
+
+    X_test = np.array(
+        [
+            ["snake", "red"],
+            ["deer", "green"],
+            [np.nan, "green"],
+            ["dog", "green"],
+            ["cat", "red"],
+        ],
+        dtype=object,
+    )
+    X_trans = ordinal.transform(X_test)
+    assert_allclose(X_trans, [[2, 1], [2, 0], [np.nan, 0], [1, 0], [0, 1]])
+
+
+@pytest.mark.parametrize("Encoder", [OneHotEncoder, OrdinalEncoder])
+def test_encoder_not_fitted(Encoder):
+    """Check that we raise a `NotFittedError` by calling transform before fit with
+    the encoders.
+
+    One could expect that the passing the `categories` argument to the encoder
+    would make it stateless. However, `fit` is making a couple of check, such as the
+    position of `np.nan`.
+    """
+    X = np.array([["A"], ["B"], ["C"]], dtype=object)
+    encoder = Encoder(categories=[["A", "B", "C"]])
+    with pytest.raises(NotFittedError):
+        encoder.transform(X)
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_function_transformer.py b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_function_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..42ec20087900ad7fffabf7a675c0affaa897b96b
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_function_transformer.py
@@ -0,0 +1,579 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import FunctionTransformer, StandardScaler
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+
+
+def _make_func(args_store, kwargs_store, func=lambda X, *a, **k: X):
+    def _func(X, *args, **kwargs):
+        args_store.append(X)
+        args_store.extend(args)
+        kwargs_store.update(kwargs)
+        return func(X)
+
+    return _func
+
+
+def test_delegate_to_func():
+    # (args|kwargs)_store will hold the positional and keyword arguments
+    # passed to the function inside the FunctionTransformer.
+    args_store = []
+    kwargs_store = {}
+    X = np.arange(10).reshape((5, 2))
+    assert_array_equal(
+        FunctionTransformer(_make_func(args_store, kwargs_store)).transform(X),
+        X,
+        "transform should have returned X unchanged",
+    )
+
+    # The function should only have received X.
+    assert args_store == [
+        X
+    ], "Incorrect positional arguments passed to func: {args}".format(args=args_store)
+
+    assert (
+        not kwargs_store
+    ), "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store)
+
+    # reset the argument stores.
+    args_store[:] = []
+    kwargs_store.clear()
+    transformed = FunctionTransformer(
+        _make_func(args_store, kwargs_store),
+    ).transform(X)
+
+    assert_array_equal(
+        transformed, X, err_msg="transform should have returned X unchanged"
+    )
+
+    # The function should have received X
+    assert args_store == [
+        X
+    ], "Incorrect positional arguments passed to func: {args}".format(args=args_store)
+
+    assert (
+        not kwargs_store
+    ), "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store)
+
+
+def test_np_log():
+    X = np.arange(10).reshape((5, 2))
+
+    # Test that the numpy.log example still works.
+    assert_array_equal(
+        FunctionTransformer(np.log1p).transform(X),
+        np.log1p(X),
+    )
+
+
+def test_kw_arg():
+    X = np.linspace(0, 1, num=10).reshape((5, 2))
+
+    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
+
+    # Test that rounding is correct
+    assert_array_equal(F.transform(X), np.around(X, decimals=3))
+
+
+def test_kw_arg_update():
+    X = np.linspace(0, 1, num=10).reshape((5, 2))
+
+    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
+
+    F.kw_args["decimals"] = 1
+
+    # Test that rounding is correct
+    assert_array_equal(F.transform(X), np.around(X, decimals=1))
+
+
+def test_kw_arg_reset():
+    X = np.linspace(0, 1, num=10).reshape((5, 2))
+
+    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
+
+    F.kw_args = dict(decimals=1)
+
+    # Test that rounding is correct
+    assert_array_equal(F.transform(X), np.around(X, decimals=1))
+
+
+def test_inverse_transform():
+    X = np.array([1, 4, 9, 16]).reshape((2, 2))
+
+    # Test that inverse_transform works correctly
+    F = FunctionTransformer(
+        func=np.sqrt,
+        inverse_func=np.around,
+        inv_kw_args=dict(decimals=3),
+    )
+    assert_array_equal(
+        F.inverse_transform(F.transform(X)),
+        np.around(np.sqrt(X), decimals=3),
+    )
+
+
+@pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS + CSR_CONTAINERS)
+def test_check_inverse(sparse_container):
+    X = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))
+    if sparse_container is not None:
+        X = sparse_container(X)
+
+    trans = FunctionTransformer(
+        func=np.sqrt,
+        inverse_func=np.around,
+        accept_sparse=sparse_container is not None,
+        check_inverse=True,
+        validate=True,
+    )
+    warning_message = (
+        "The provided functions are not strictly"
+        " inverse of each other. If you are sure you"
+        " want to proceed regardless, set"
+        " 'check_inverse=False'."
+    )
+    with pytest.warns(UserWarning, match=warning_message):
+        trans.fit(X)
+
+    trans = FunctionTransformer(
+        func=np.expm1,
+        inverse_func=np.log1p,
+        accept_sparse=sparse_container is not None,
+        check_inverse=True,
+        validate=True,
+    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        Xt = trans.fit_transform(X)
+
+    assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))
+
+
+def test_check_inverse_func_or_inverse_not_provided():
+    # check that we don't check inverse when one of the func or inverse is not
+    # provided.
+    X = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))
+
+    trans = FunctionTransformer(
+        func=np.expm1, inverse_func=None, check_inverse=True, validate=True
+    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        trans.fit(X)
+    trans = FunctionTransformer(
+        func=None, inverse_func=np.expm1, check_inverse=True, validate=True
+    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        trans.fit(X)
+
+
+def test_function_transformer_frame():
+    pd = pytest.importorskip("pandas")
+    X_df = pd.DataFrame(np.random.randn(100, 10))
+    transformer = FunctionTransformer()
+    X_df_trans = transformer.fit_transform(X_df)
+    assert hasattr(X_df_trans, "loc")
+
+
+@pytest.mark.parametrize("X_type", ["array", "series"])
+def test_function_transformer_raise_error_with_mixed_dtype(X_type):
+    """Check that `FunctionTransformer.check_inverse` raises error on mixed dtype."""
+    mapping = {"one": 1, "two": 2, "three": 3, 5: "five", 6: "six"}
+    inverse_mapping = {value: key for key, value in mapping.items()}
+    dtype = "object"
+
+    data = ["one", "two", "three", "one", "one", 5, 6]
+    data = _convert_container(data, X_type, columns_name=["value"], dtype=dtype)
+
+    def func(X):
+        return np.array([mapping[X[i]] for i in range(X.size)], dtype=object)
+
+    def inverse_func(X):
+        return _convert_container(
+            [inverse_mapping[x] for x in X],
+            X_type,
+            columns_name=["value"],
+            dtype=dtype,
+        )
+
+    transformer = FunctionTransformer(
+        func=func, inverse_func=inverse_func, validate=False, check_inverse=True
+    )
+
+    msg = "'check_inverse' is only supported when all the elements in `X` is numerical."
+    with pytest.raises(ValueError, match=msg):
+        transformer.fit(data)
+
+
+def test_function_transformer_support_all_nummerical_dataframes_check_inverse_True():
+    """Check support for dataframes with only numerical values."""
+    pd = pytest.importorskip("pandas")
+
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    transformer = FunctionTransformer(
+        func=lambda x: x + 2, inverse_func=lambda x: x - 2, check_inverse=True
+    )
+
+    # Does not raise an error
+    df_out = transformer.fit_transform(df)
+    assert_allclose_dense_sparse(df_out, df + 2)
+
+
+def test_function_transformer_with_dataframe_and_check_inverse_True():
+    """Check error is raised when check_inverse=True.
+
+    Non-regresion test for gh-25261.
+    """
+    pd = pytest.importorskip("pandas")
+    transformer = FunctionTransformer(
+        func=lambda x: x, inverse_func=lambda x: x, check_inverse=True
+    )
+
+    df_mixed = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    msg = "'check_inverse' is only supported when all the elements in `X` is numerical."
+    with pytest.raises(ValueError, match=msg):
+        transformer.fit(df_mixed)
+
+
+@pytest.mark.parametrize(
+    "X, feature_names_out, input_features, expected",
+    [
+        (
+            # NumPy inputs, default behavior: generate names
+            np.random.rand(100, 3),
+            "one-to-one",
+            None,
+            ("x0", "x1", "x2"),
+        ),
+        (
+            # Pandas input, default behavior: use input feature names
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            "one-to-one",
+            None,
+            ("a", "b"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable
+            np.random.rand(100, 3),
+            lambda transformer, input_features: ("a", "b"),
+            None,
+            ("a", "b"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: ("c", "d", "e"),
+            None,
+            ("c", "d", "e"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable – default input_features
+            np.random.rand(100, 3),
+            lambda transformer, input_features: tuple(input_features) + ("a",),
+            None,
+            ("x0", "x1", "x2", "a"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable – default input_features
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: tuple(input_features) + ("c",),
+            None,
+            ("a", "b", "c"),
+        ),
+        (
+            # NumPy input, input_features=list of names
+            np.random.rand(100, 3),
+            "one-to-one",
+            ("a", "b", "c"),
+            ("a", "b", "c"),
+        ),
+        (
+            # Pandas input, input_features=list of names
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            "one-to-one",
+            ("a", "b"),  # must match feature_names_in_
+            ("a", "b"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable, input_features=list
+            np.random.rand(100, 3),
+            lambda transformer, input_features: tuple(input_features) + ("d",),
+            ("a", "b", "c"),
+            ("a", "b", "c", "d"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable, input_features=list
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: tuple(input_features) + ("c",),
+            ("a", "b"),  # must match feature_names_in_
+            ("a", "b", "c"),
+        ),
+    ],
+)
+@pytest.mark.parametrize("validate", [True, False])
+def test_function_transformer_get_feature_names_out(
+    X, feature_names_out, input_features, expected, validate
+):
+    if isinstance(X, dict):
+        pd = pytest.importorskip("pandas")
+        X = pd.DataFrame(X)
+
+    transformer = FunctionTransformer(
+        feature_names_out=feature_names_out, validate=validate
+    )
+    transformer.fit(X)
+    names = transformer.get_feature_names_out(input_features)
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, expected)
+
+
+def test_function_transformer_get_feature_names_out_without_validation():
+    transformer = FunctionTransformer(feature_names_out="one-to-one", validate=False)
+    X = np.random.rand(100, 2)
+    transformer.fit_transform(X)
+
+    names = transformer.get_feature_names_out(("a", "b"))
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, ("a", "b"))
+
+
+def test_function_transformer_feature_names_out_is_None():
+    transformer = FunctionTransformer()
+    X = np.random.rand(100, 2)
+    transformer.fit_transform(X)
+
+    msg = "This 'FunctionTransformer' has no attribute 'get_feature_names_out'"
+    with pytest.raises(AttributeError, match=msg):
+        transformer.get_feature_names_out()
+
+
+def test_function_transformer_feature_names_out_uses_estimator():
+    def add_n_random_features(X, n):
+        return np.concatenate([X, np.random.rand(len(X), n)], axis=1)
+
+    def feature_names_out(transformer, input_features):
+        n = transformer.kw_args["n"]
+        return list(input_features) + [f"rnd{i}" for i in range(n)]
+
+    transformer = FunctionTransformer(
+        func=add_n_random_features,
+        feature_names_out=feature_names_out,
+        kw_args=dict(n=3),
+        validate=True,
+    )
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame({"a": np.random.rand(100), "b": np.random.rand(100)})
+    transformer.fit_transform(df)
+    names = transformer.get_feature_names_out()
+
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, ("a", "b", "rnd0", "rnd1", "rnd2"))
+
+
+def test_function_transformer_validate_inverse():
+    """Test that function transformer does not reset estimator in
+    `inverse_transform`."""
+
+    def add_constant_feature(X):
+        X_one = np.ones((X.shape[0], 1))
+        return np.concatenate((X, X_one), axis=1)
+
+    def inverse_add_constant(X):
+        return X[:, :-1]
+
+    X = np.array([[1, 2], [3, 4], [3, 4]])
+    trans = FunctionTransformer(
+        func=add_constant_feature,
+        inverse_func=inverse_add_constant,
+        validate=True,
+    )
+    X_trans = trans.fit_transform(X)
+    assert trans.n_features_in_ == X.shape[1]
+
+    trans.inverse_transform(X_trans)
+    assert trans.n_features_in_ == X.shape[1]
+
+
+@pytest.mark.parametrize(
+    "feature_names_out, expected",
+    [
+        ("one-to-one", ["pet", "color"]),
+        [lambda est, names: [f"{n}_out" for n in names], ["pet_out", "color_out"]],
+    ],
+)
+@pytest.mark.parametrize("in_pipeline", [True, False])
+def test_get_feature_names_out_dataframe_with_string_data(
+    feature_names_out, expected, in_pipeline
+):
+    """Check that get_feature_names_out works with DataFrames with string data."""
+    pd = pytest.importorskip("pandas")
+    X = pd.DataFrame({"pet": ["dog", "cat"], "color": ["red", "green"]})
+
+    def func(X):
+        if feature_names_out == "one-to-one":
+            return X
+        else:
+            name = feature_names_out(None, X.columns)
+            return X.rename(columns=dict(zip(X.columns, name)))
+
+    transformer = FunctionTransformer(func=func, feature_names_out=feature_names_out)
+    if in_pipeline:
+        transformer = make_pipeline(transformer)
+
+    X_trans = transformer.fit_transform(X)
+    assert isinstance(X_trans, pd.DataFrame)
+
+    names = transformer.get_feature_names_out()
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, expected)
+
+
+def test_set_output_func():
+    """Check behavior of set_output with different settings."""
+    pd = pytest.importorskip("pandas")
+
+    X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+
+    ft = FunctionTransformer(np.log, feature_names_out="one-to-one")
+
+    # no warning is raised when feature_names_out is defined
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        ft.set_output(transform="pandas")
+
+    X_trans = ft.fit_transform(X)
+    assert isinstance(X_trans, pd.DataFrame)
+    assert_array_equal(X_trans.columns, ["a", "b"])
+
+    ft = FunctionTransformer(lambda x: 2 * x)
+    ft.set_output(transform="pandas")
+
+    # no warning is raised when func returns a panda dataframe
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        X_trans = ft.fit_transform(X)
+    assert isinstance(X_trans, pd.DataFrame)
+    assert_array_equal(X_trans.columns, ["a", "b"])
+
+    # Warning is raised when func returns a ndarray
+    ft_np = FunctionTransformer(lambda x: np.asarray(x))
+
+    for transform in ("pandas", "polars"):
+        ft_np.set_output(transform=transform)
+        msg = (
+            f"When `set_output` is configured to be '{transform}'.*{transform} "
+            "DataFrame.*"
+        )
+        with pytest.warns(UserWarning, match=msg):
+            ft_np.fit_transform(X)
+
+    # default transform does not warn
+    ft_np.set_output(transform="default")
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        ft_np.fit_transform(X)
+
+
+def test_consistence_column_name_between_steps():
+    """Check that we have a consistence between the feature names out of
+    `FunctionTransformer` and the feature names in of the next step in the pipeline.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27695
+    """
+    pd = pytest.importorskip("pandas")
+
+    def with_suffix(_, names):
+        return [name + "__log" for name in names]
+
+    pipeline = make_pipeline(
+        FunctionTransformer(np.log1p, feature_names_out=with_suffix), StandardScaler()
+    )
+
+    df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["a", "b"])
+    X_trans = pipeline.fit_transform(df)
+    assert pipeline.get_feature_names_out().tolist() == ["a__log", "b__log"]
+    # StandardScaler will convert to a numpy array
+    assert isinstance(X_trans, np.ndarray)
+
+
+@pytest.mark.parametrize("dataframe_lib", ["pandas", "polars"])
+@pytest.mark.parametrize("transform_output", ["default", "pandas", "polars"])
+def test_function_transformer_overwrite_column_names(dataframe_lib, transform_output):
+    """Check that we overwrite the column names when we should."""
+    lib = pytest.importorskip(dataframe_lib)
+    if transform_output != "numpy":
+        pytest.importorskip(transform_output)
+
+    df = lib.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+
+    def with_suffix(_, names):
+        return [name + "__log" for name in names]
+
+    transformer = FunctionTransformer(feature_names_out=with_suffix).set_output(
+        transform=transform_output
+    )
+    X_trans = transformer.fit_transform(df)
+    assert_array_equal(np.asarray(X_trans), np.asarray(df))
+
+    feature_names = transformer.get_feature_names_out()
+    assert list(X_trans.columns) == with_suffix(None, df.columns)
+    assert feature_names.tolist() == with_suffix(None, df.columns)
+
+
+@pytest.mark.parametrize(
+    "feature_names_out",
+    ["one-to-one", lambda _, names: [f"{name}_log" for name in names]],
+)
+def test_function_transformer_overwrite_column_names_numerical(feature_names_out):
+    """Check the same as `test_function_transformer_overwrite_column_names`
+    but for the specific case of pandas where column names can be numerical."""
+    pd = pytest.importorskip("pandas")
+
+    df = pd.DataFrame({0: [1, 2, 3], 1: [10, 20, 100]})
+
+    transformer = FunctionTransformer(feature_names_out=feature_names_out)
+    X_trans = transformer.fit_transform(df)
+    assert_array_equal(np.asarray(X_trans), np.asarray(df))
+
+    feature_names = transformer.get_feature_names_out()
+    assert list(X_trans.columns) == list(feature_names)
+
+
+@pytest.mark.parametrize("dataframe_lib", ["pandas", "polars"])
+@pytest.mark.parametrize(
+    "feature_names_out",
+    ["one-to-one", lambda _, names: [f"{name}_log" for name in names]],
+)
+def test_function_transformer_error_column_inconsistent(
+    dataframe_lib, feature_names_out
+):
+    """Check that we raise an error when `func` returns a dataframe with new
+    column names that become inconsistent with `get_feature_names_out`."""
+    lib = pytest.importorskip(dataframe_lib)
+
+    df = lib.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+
+    def func(df):
+        if dataframe_lib == "pandas":
+            return df.rename(columns={"a": "c"})
+        else:
+            return df.rename({"a": "c"})
+
+    transformer = FunctionTransformer(func=func, feature_names_out=feature_names_out)
+    err_msg = "The output generated by `func` have different column names"
+    with pytest.raises(ValueError, match=err_msg):
+        transformer.fit_transform(df).columns
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_label.py b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_label.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0aa85b2cfe70bcd2fb85c54aca977450c0e32ec
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_label.py
@@ -0,0 +1,745 @@
+import numpy as np
+import pytest
+from scipy.sparse import issparse
+
+from sklearn import config_context, datasets
+from sklearn.preprocessing._label import (
+    LabelBinarizer,
+    LabelEncoder,
+    MultiLabelBinarizer,
+    _inverse_binarize_multiclass,
+    _inverse_binarize_thresholding,
+    label_binarize,
+)
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    get_namespace,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import (
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.validation import _to_object_array
+
+iris = datasets.load_iris()
+
+
+def toarray(a):
+    if hasattr(a, "toarray"):
+        a = a.toarray()
+    return a
+
+
+def test_label_binarizer():
+    # one-class case defaults to negative label
+    # For dense case:
+    inp = ["pos", "pos", "pos", "pos"]
+    lb = LabelBinarizer(sparse_output=False)
+    expected = np.array([[0, 0, 0, 0]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["pos"])
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+    # For sparse case:
+    lb = LabelBinarizer(sparse_output=True)
+    got = lb.fit_transform(inp)
+    assert issparse(got)
+    assert_array_equal(lb.classes_, ["pos"])
+    assert_array_equal(expected, got.toarray())
+    assert_array_equal(lb.inverse_transform(got.toarray()), inp)
+
+    lb = LabelBinarizer(sparse_output=False)
+    # two-class case
+    inp = ["neg", "pos", "pos", "neg"]
+    expected = np.array([[0, 1, 1, 0]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["neg", "pos"])
+    assert_array_equal(expected, got)
+
+    to_invert = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
+    assert_array_equal(lb.inverse_transform(to_invert), inp)
+
+    # multi-class case
+    inp = ["spam", "ham", "eggs", "ham", "0"]
+    expected = np.array(
+        [[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]
+    )
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["0", "eggs", "ham", "spam"])
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+
+def test_label_binarizer_unseen_labels():
+    lb = LabelBinarizer()
+
+    expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    got = lb.fit_transform(["b", "d", "e"])
+    assert_array_equal(expected, got)
+
+    expected = np.array(
+        [[0, 0, 0], [1, 0, 0], [0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]
+    )
+    got = lb.transform(["a", "b", "c", "d", "e", "f"])
+    assert_array_equal(expected, got)
+
+
+def test_label_binarizer_set_label_encoding():
+    lb = LabelBinarizer(neg_label=-2, pos_label=0)
+
+    # two-class case with pos_label=0
+    inp = np.array([0, 1, 1, 0])
+    expected = np.array([[-2, 0, 0, -2]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+    lb = LabelBinarizer(neg_label=-2, pos_label=2)
+
+    # multi-class case
+    inp = np.array([3, 2, 1, 2, 0])
+    expected = np.array(
+        [
+            [-2, -2, -2, +2],
+            [-2, -2, +2, -2],
+            [-2, +2, -2, -2],
+            [-2, -2, +2, -2],
+            [+2, -2, -2, -2],
+        ]
+    )
+    got = lb.fit_transform(inp)
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
+@pytest.mark.parametrize("unique_first", [True, False])
+def test_label_binarizer_pandas_nullable(dtype, unique_first):
+    """Checks that LabelBinarizer works with pandas nullable dtypes.
+
+    Non-regression test for gh-25637.
+    """
+    pd = pytest.importorskip("pandas")
+
+    y_true = pd.Series([1, 0, 0, 1, 0, 1, 1, 0, 1], dtype=dtype)
+    if unique_first:
+        # Calling unique creates a pandas array which has a different interface
+        # compared to a pandas Series. Specifically, pandas arrays do not have "iloc".
+        y_true = y_true.unique()
+    lb = LabelBinarizer().fit(y_true)
+    y_out = lb.transform([1, 0])
+
+    assert_array_equal(y_out, [[1], [0]])
+
+
+def test_label_binarizer_errors():
+    # Check that invalid arguments yield ValueError
+    one_class = np.array([0, 0, 0, 0])
+    lb = LabelBinarizer().fit(one_class)
+
+    multi_label = [(2, 3), (0,), (0, 2)]
+    err_msg = "You appear to be using a legacy multi-label data representation."
+    with pytest.raises(ValueError, match=err_msg):
+        lb.transform(multi_label)
+
+    lb = LabelBinarizer()
+    err_msg = "This LabelBinarizer instance is not fitted yet"
+    with pytest.raises(ValueError, match=err_msg):
+        lb.transform([])
+    with pytest.raises(ValueError, match=err_msg):
+        lb.inverse_transform([])
+
+    input_labels = [0, 1, 0, 1]
+    err_msg = "neg_label=2 must be strictly less than pos_label=1."
+    lb = LabelBinarizer(neg_label=2, pos_label=1)
+    with pytest.raises(ValueError, match=err_msg):
+        lb.fit(input_labels)
+    err_msg = "neg_label=2 must be strictly less than pos_label=2."
+    lb = LabelBinarizer(neg_label=2, pos_label=2)
+    with pytest.raises(ValueError, match=err_msg):
+        lb.fit(input_labels)
+    err_msg = (
+        "Sparse binarization is only supported with non zero pos_label and zero "
+        "neg_label, got pos_label=2 and neg_label=1"
+    )
+    lb = LabelBinarizer(neg_label=1, pos_label=2, sparse_output=True)
+    with pytest.raises(ValueError, match=err_msg):
+        lb.fit(input_labels)
+
+    # Sequence of seq type should raise ValueError
+    y_seq_of_seqs = [[], [1, 2], [3], [0, 1, 3], [2]]
+    err_msg = "You appear to be using a legacy multi-label data representation"
+    with pytest.raises(ValueError, match=err_msg):
+        LabelBinarizer().fit_transform(y_seq_of_seqs)
+
+    # Fail on the dimension of 'binary'
+    err_msg = "output_type='binary', but y.shape"
+    with pytest.raises(ValueError, match=err_msg):
+        _inverse_binarize_thresholding(
+            y=np.array([[1, 2, 3], [2, 1, 3]]),
+            output_type="binary",
+            classes=[1, 2, 3],
+            threshold=0,
+        )
+
+    # Fail on multioutput data
+    err_msg = "Multioutput target data is not supported with label binarization"
+    with pytest.raises(ValueError, match=err_msg):
+        LabelBinarizer().fit(np.array([[1, 3], [2, 1]]))
+    with pytest.raises(ValueError, match=err_msg):
+        label_binarize(np.array([[1, 3], [2, 1]]), classes=[1, 2, 3])
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_label_binarizer_sparse_errors(csr_container):
+    # Fail on y_type
+    err_msg = "foo format is not supported"
+    with pytest.raises(ValueError, match=err_msg):
+        _inverse_binarize_thresholding(
+            y=csr_container([[1, 2], [2, 1]]),
+            output_type="foo",
+            classes=[1, 2],
+            threshold=0,
+        )
+
+    # Fail on the number of classes
+    err_msg = "The number of class is not equal to the number of dimension of y."
+    with pytest.raises(ValueError, match=err_msg):
+        _inverse_binarize_thresholding(
+            y=csr_container([[1, 2], [2, 1]]),
+            output_type="foo",
+            classes=[1, 2, 3],
+            threshold=0,
+        )
+
+
+@pytest.mark.parametrize(
+    "values, classes, unknown",
+    [
+        (
+            np.array([2, 1, 3, 1, 3], dtype="int64"),
+            np.array([1, 2, 3], dtype="int64"),
+            np.array([4], dtype="int64"),
+        ),
+        (
+            np.array(["b", "a", "c", "a", "c"], dtype=object),
+            np.array(["a", "b", "c"], dtype=object),
+            np.array(["d"], dtype=object),
+        ),
+        (
+            np.array(["b", "a", "c", "a", "c"]),
+            np.array(["a", "b", "c"]),
+            np.array(["d"]),
+        ),
+    ],
+    ids=["int64", "object", "str"],
+)
+def test_label_encoder(values, classes, unknown):
+    # Test LabelEncoder's transform, fit_transform and
+    # inverse_transform methods
+    le = LabelEncoder()
+    le.fit(values)
+    assert_array_equal(le.classes_, classes)
+    assert_array_equal(le.transform(values), [1, 0, 2, 0, 2])
+    assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values)
+    le = LabelEncoder()
+    ret = le.fit_transform(values)
+    assert_array_equal(ret, [1, 0, 2, 0, 2])
+
+    with pytest.raises(ValueError, match="unseen labels"):
+        le.transform(unknown)
+
+
+def test_label_encoder_negative_ints():
+    le = LabelEncoder()
+    le.fit([1, 1, 4, 5, -1, 0])
+    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
+    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0])
+    assert_array_equal(
+        le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]
+    )
+    with pytest.raises(ValueError):
+        le.transform([0, 6])
+
+
+@pytest.mark.parametrize("dtype", ["str", "object"])
+def test_label_encoder_str_bad_shape(dtype):
+    le = LabelEncoder()
+    le.fit(np.array(["apple", "orange"], dtype=dtype))
+    msg = "should be a 1d array"
+    with pytest.raises(ValueError, match=msg):
+        le.transform("apple")
+
+
+def test_label_encoder_errors():
+    # Check that invalid arguments yield ValueError
+    le = LabelEncoder()
+    with pytest.raises(ValueError):
+        le.transform([])
+    with pytest.raises(ValueError):
+        le.inverse_transform([])
+
+    # Fail on unseen labels
+    le = LabelEncoder()
+    le.fit([1, 2, 3, -1, 1])
+    msg = "contains previously unseen labels"
+    with pytest.raises(ValueError, match=msg):
+        le.inverse_transform([-2])
+    with pytest.raises(ValueError, match=msg):
+        le.inverse_transform([-2, -3, -4])
+
+    # Fail on inverse_transform("")
+    msg = r"should be a 1d array.+shape \(\)"
+    with pytest.raises(ValueError, match=msg):
+        le.inverse_transform("")
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        np.array([2, 1, 3, 1, 3], dtype="int64"),
+        np.array(["b", "a", "c", "a", "c"], dtype=object),
+        np.array(["b", "a", "c", "a", "c"]),
+    ],
+    ids=["int64", "object", "str"],
+)
+def test_label_encoder_empty_array(values):
+    le = LabelEncoder()
+    le.fit(values)
+    # test empty transform
+    transformed = le.transform([])
+    assert_array_equal(np.array([]), transformed)
+    # test empty inverse transform
+    inverse_transformed = le.inverse_transform([])
+    assert_array_equal(np.array([]), inverse_transformed)
+
+
+def test_sparse_output_multilabel_binarizer():
+    # test input as iterable of iterables
+    inputs = [
+        lambda: [(2, 3), (1,), (1, 2)],
+        lambda: ({2, 3}, {1}, {1, 2}),
+        lambda: iter([iter((2, 3)), iter((1,)), {1, 2}]),
+    ]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+
+    inverse = inputs[0]()
+    for sparse_output in [True, False]:
+        for inp in inputs:
+            # With fit_transform
+            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
+            got = mlb.fit_transform(inp())
+            assert issparse(got) == sparse_output
+            if sparse_output:
+                # verify CSR assumption that indices and indptr have same dtype
+                assert got.indices.dtype == got.indptr.dtype
+                got = got.toarray()
+            assert_array_equal(indicator_mat, got)
+            assert_array_equal([1, 2, 3], mlb.classes_)
+            assert mlb.inverse_transform(got) == inverse
+
+            # With fit
+            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
+            got = mlb.fit(inp()).transform(inp())
+            assert issparse(got) == sparse_output
+            if sparse_output:
+                # verify CSR assumption that indices and indptr have same dtype
+                assert got.indices.dtype == got.indptr.dtype
+                got = got.toarray()
+            assert_array_equal(indicator_mat, got)
+            assert_array_equal([1, 2, 3], mlb.classes_)
+            assert mlb.inverse_transform(got) == inverse
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_output_multilabel_binarizer_errors(csr_container):
+    inp = iter([iter((2, 3)), iter((1,)), {1, 2}])
+    mlb = MultiLabelBinarizer(sparse_output=False)
+    mlb.fit(inp)
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(
+            csr_container(np.array([[0, 1, 1], [2, 0, 0], [1, 1, 0]]))
+        )
+
+
+def test_multilabel_binarizer():
+    # test input as iterable of iterables
+    inputs = [
+        lambda: [(2, 3), (1,), (1, 2)],
+        lambda: ({2, 3}, {1}, {1, 2}),
+        lambda: iter([iter((2, 3)), iter((1,)), {1, 2}]),
+    ]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+    inverse = inputs[0]()
+    for inp in inputs:
+        # With fit_transform
+        mlb = MultiLabelBinarizer()
+        got = mlb.fit_transform(inp())
+        assert_array_equal(indicator_mat, got)
+        assert_array_equal([1, 2, 3], mlb.classes_)
+        assert mlb.inverse_transform(got) == inverse
+
+        # With fit
+        mlb = MultiLabelBinarizer()
+        got = mlb.fit(inp()).transform(inp())
+        assert_array_equal(indicator_mat, got)
+        assert_array_equal([1, 2, 3], mlb.classes_)
+        assert mlb.inverse_transform(got) == inverse
+
+
+def test_multilabel_binarizer_empty_sample():
+    mlb = MultiLabelBinarizer()
+    y = [[1, 2], [1], []]
+    Y = np.array([[1, 1], [1, 0], [0, 0]])
+    assert_array_equal(mlb.fit_transform(y), Y)
+
+
+def test_multilabel_binarizer_unknown_class():
+    mlb = MultiLabelBinarizer()
+    y = [[1, 2]]
+    Y = np.array([[1, 0], [0, 1]])
+    warning_message = "unknown class.* will be ignored"
+    with pytest.warns(UserWarning, match=warning_message):
+        matrix = mlb.fit(y).transform([[4, 1], [2, 0]])
+
+    Y = np.array([[1, 0, 0], [0, 1, 0]])
+    mlb = MultiLabelBinarizer(classes=[1, 2, 3])
+    with pytest.warns(UserWarning, match=warning_message):
+        matrix = mlb.fit(y).transform([[4, 1], [2, 0]])
+    assert_array_equal(matrix, Y)
+
+
+def test_multilabel_binarizer_given_classes():
+    inp = [(2, 3), (1,), (1, 2)]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]])
+    # fit_transform()
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+    assert_array_equal(mlb.classes_, [1, 3, 2])
+
+    # fit().transform()
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+    assert_array_equal(mlb.classes_, [1, 3, 2])
+
+    # ensure works with extra class
+    mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2])
+    assert_array_equal(
+        mlb.fit_transform(inp), np.hstack(([[0], [0], [0]], indicator_mat))
+    )
+    assert_array_equal(mlb.classes_, [4, 1, 3, 2])
+
+    # ensure fit is no-op as iterable is not consumed
+    inp = iter(inp)
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+
+    # ensure a ValueError is thrown if given duplicate classes
+    err_msg = (
+        "The classes argument contains duplicate classes. Remove "
+        "these duplicates before passing them to MultiLabelBinarizer."
+    )
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3])
+    with pytest.raises(ValueError, match=err_msg):
+        mlb.fit(inp)
+
+
+def test_multilabel_binarizer_multiple_calls():
+    inp = [(2, 3), (1,), (1, 2)]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]])
+
+    indicator_mat2 = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+
+    # first call
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+    # second call change class
+    mlb.classes = [1, 2, 3]
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat2)
+
+
+def test_multilabel_binarizer_same_length_sequence():
+    # Ensure sequences of the same length are not interpreted as a 2-d array
+    inp = [[1], [0], [2]]
+    indicator_mat = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
+    # fit_transform()
+    mlb = MultiLabelBinarizer()
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+    assert_array_equal(mlb.inverse_transform(indicator_mat), inp)
+
+    # fit().transform()
+    mlb = MultiLabelBinarizer()
+    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+    assert_array_equal(mlb.inverse_transform(indicator_mat), inp)
+
+
+def test_multilabel_binarizer_non_integer_labels():
+    tuple_classes = _to_object_array([(1,), (2,), (3,)])
+    inputs = [
+        ([("2", "3"), ("1",), ("1", "2")], ["1", "2", "3"]),
+        ([("b", "c"), ("a",), ("a", "b")], ["a", "b", "c"]),
+        ([((2,), (3,)), ((1,),), ((1,), (2,))], tuple_classes),
+    ]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+    for inp, classes in inputs:
+        # fit_transform()
+        mlb = MultiLabelBinarizer()
+        inp = np.array(inp, dtype=object)
+        assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+        assert_array_equal(mlb.classes_, classes)
+        indicator_mat_inv = np.array(mlb.inverse_transform(indicator_mat), dtype=object)
+        assert_array_equal(indicator_mat_inv, inp)
+
+        # fit().transform()
+        mlb = MultiLabelBinarizer()
+        assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+        assert_array_equal(mlb.classes_, classes)
+        indicator_mat_inv = np.array(mlb.inverse_transform(indicator_mat), dtype=object)
+        assert_array_equal(indicator_mat_inv, inp)
+
+    mlb = MultiLabelBinarizer()
+    with pytest.raises(TypeError):
+        mlb.fit_transform([({}), ({}, {"a": "b"})])
+
+
+def test_multilabel_binarizer_non_unique():
+    inp = [(1, 1, 1, 0)]
+    indicator_mat = np.array([[1, 1]])
+    mlb = MultiLabelBinarizer()
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+
+
+def test_multilabel_binarizer_inverse_validation():
+    inp = [(1, 1, 1, 0)]
+    mlb = MultiLabelBinarizer()
+    mlb.fit_transform(inp)
+    # Not binary
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(np.array([[1, 3]]))
+    # The following binary cases are fine, however
+    mlb.inverse_transform(np.array([[0, 0]]))
+    mlb.inverse_transform(np.array([[1, 1]]))
+    mlb.inverse_transform(np.array([[1, 0]]))
+
+    # Wrong shape
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(np.array([[1]]))
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(np.array([[1, 1, 1]]))
+
+
+def test_label_binarize_with_class_order():
+    out = label_binarize([1, 6], classes=[1, 2, 4, 6])
+    expected = np.array([[1, 0, 0, 0], [0, 0, 0, 1]])
+    assert_array_equal(out, expected)
+
+    # Modified class order
+    out = label_binarize([1, 6], classes=[1, 6, 4, 2])
+    expected = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])
+    assert_array_equal(out, expected)
+
+    out = label_binarize([0, 1, 2, 3], classes=[3, 2, 0, 1])
+    expected = np.array([[0, 0, 1, 0], [0, 0, 0, 1], [0, 1, 0, 0], [1, 0, 0, 0]])
+    assert_array_equal(out, expected)
+
+
+def check_binarized_results(y, classes, pos_label, neg_label, expected):
+    for sparse_output in [True, False]:
+        if (pos_label == 0 or neg_label != 0) and sparse_output:
+            with pytest.raises(ValueError):
+                label_binarize(
+                    y,
+                    classes=classes,
+                    neg_label=neg_label,
+                    pos_label=pos_label,
+                    sparse_output=sparse_output,
+                )
+            continue
+
+        # check label_binarize
+        binarized = label_binarize(
+            y,
+            classes=classes,
+            neg_label=neg_label,
+            pos_label=pos_label,
+            sparse_output=sparse_output,
+        )
+        assert_array_equal(toarray(binarized), expected)
+        assert issparse(binarized) == sparse_output
+
+        # check inverse
+        y_type = type_of_target(y)
+        if y_type == "multiclass":
+            inversed = _inverse_binarize_multiclass(binarized, classes=classes)
+
+        else:
+            inversed = _inverse_binarize_thresholding(
+                binarized,
+                output_type=y_type,
+                classes=classes,
+                threshold=((neg_label + pos_label) / 2.0),
+            )
+
+        assert_array_equal(toarray(inversed), toarray(y))
+
+        # Check label binarizer
+        lb = LabelBinarizer(
+            neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output
+        )
+        binarized = lb.fit_transform(y)
+        assert_array_equal(toarray(binarized), expected)
+        assert issparse(binarized) == sparse_output
+        inverse_output = lb.inverse_transform(binarized)
+        assert_array_equal(toarray(inverse_output), toarray(y))
+        assert issparse(inverse_output) == issparse(y)
+
+
+def test_label_binarize_binary():
+    y = [0, 1, 0]
+    classes = [0, 1]
+    pos_label = 2
+    neg_label = -1
+    expected = np.array([[2, -1], [-1, 2], [2, -1]])[:, 1].reshape((-1, 1))
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    # Binary case where sparse_output = True will not result in a ValueError
+    y = [0, 1, 0]
+    classes = [0, 1]
+    pos_label = 3
+    neg_label = 0
+    expected = np.array([[3, 0], [0, 3], [3, 0]])[:, 1].reshape((-1, 1))
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+
+def test_label_binarize_multiclass():
+    y = [0, 1, 2]
+    classes = [0, 1, 2]
+    pos_label = 2
+    neg_label = 0
+    expected = 2 * np.eye(3)
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    with pytest.raises(ValueError):
+        label_binarize(
+            y, classes=classes, neg_label=-1, pos_label=pos_label, sparse_output=True
+        )
+
+
+@pytest.mark.parametrize(
+    "arr_type",
+    [np.array]
+    + COO_CONTAINERS
+    + CSC_CONTAINERS
+    + CSR_CONTAINERS
+    + DOK_CONTAINERS
+    + LIL_CONTAINERS,
+)
+def test_label_binarize_multilabel(arr_type):
+    y_ind = np.array([[0, 1, 0], [1, 1, 1], [0, 0, 0]])
+    classes = [0, 1, 2]
+    pos_label = 2
+    neg_label = 0
+    expected = pos_label * y_ind
+    y = arr_type(y_ind)
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    with pytest.raises(ValueError):
+        label_binarize(
+            y, classes=classes, neg_label=-1, pos_label=pos_label, sparse_output=True
+        )
+
+
+def test_invalid_input_label_binarize():
+    with pytest.raises(ValueError):
+        label_binarize([0, 2], classes=[0, 2], pos_label=0, neg_label=1)
+    with pytest.raises(ValueError, match="continuous target data is not "):
+        label_binarize([1.2, 2.7], classes=[0, 1])
+    with pytest.raises(ValueError, match="mismatch with the labels"):
+        label_binarize([[1, 3]], classes=[1, 2, 3])
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_inverse_binarize_multiclass(csr_container):
+    got = _inverse_binarize_multiclass(
+        csr_container([[0, 1, 0], [-1, 0, -1], [0, 0, 0]]), np.arange(3)
+    )
+    assert_array_equal(got, np.array([1, 1, 0]))
+
+
+def test_nan_label_encoder():
+    """Check that label encoder encodes nans in transform.
+
+    Non-regression test for #22628.
+    """
+    le = LabelEncoder()
+    le.fit(["a", "a", "b", np.nan])
+
+    y_trans = le.transform([np.nan])
+    assert_array_equal(y_trans, [2])
+
+
+@pytest.mark.parametrize(
+    "encoder", [LabelEncoder(), LabelBinarizer(), MultiLabelBinarizer()]
+)
+def test_label_encoders_do_not_have_set_output(encoder):
+    """Check that label encoders do not define set_output and work with y as a kwarg.
+
+    Non-regression test for #26854.
+    """
+    assert not hasattr(encoder, "set_output")
+    y_encoded_with_kwarg = encoder.fit_transform(y=["a", "b", "c"])
+    y_encoded_positional = encoder.fit_transform(["a", "b", "c"])
+    assert_array_equal(y_encoded_with_kwarg, y_encoded_positional)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize(
+    "y",
+    [
+        np.array([2, 1, 3, 1, 3]),
+        np.array([1, 1, 4, 5, -1, 0]),
+        np.array([3, 5, 9, 5, 9, 3]),
+    ],
+)
+def test_label_encoder_array_api_compliance(y, array_namespace, device, dtype):
+    xp = _array_api_for_tests(array_namespace, device)
+    xp_y = xp.asarray(y, device=device)
+    with config_context(array_api_dispatch=True):
+        xp_label = LabelEncoder()
+        np_label = LabelEncoder()
+        xp_label = xp_label.fit(xp_y)
+        xp_transformed = xp_label.transform(xp_y)
+        xp_inv_transformed = xp_label.inverse_transform(xp_transformed)
+        np_label = np_label.fit(y)
+        np_transformed = np_label.transform(y)
+        assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_label.classes_)[0].__name__ == xp.__name__
+        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y)
+        assert_array_equal(_convert_to_numpy(xp_label.classes_, xp), np_label.classes_)
+
+        xp_label = LabelEncoder()
+        np_label = LabelEncoder()
+        xp_transformed = xp_label.fit_transform(xp_y)
+        np_transformed = np_label.fit_transform(y)
+        assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_label.classes_)[0].__name__ == xp.__name__
+        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(_convert_to_numpy(xp_label.classes_, xp), np_label.classes_)
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_polynomial.py b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_polynomial.py
new file mode 100644
index 0000000000000000000000000000000000000000..c83e5e35232c894839a047a16aaf3adc36a2f633
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_polynomial.py
@@ -0,0 +1,1260 @@
+import sys
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+from scipy import sparse
+from scipy.interpolate import BSpline
+from scipy.sparse import random as sparse_random
+
+from sklearn.linear_model import LinearRegression
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import (
+    KBinsDiscretizer,
+    PolynomialFeatures,
+    SplineTransformer,
+)
+from sklearn.preprocessing._csr_polynomial_expansion import (
+    _calc_expanded_nnz,
+    _calc_total_nnz,
+    _get_sizeof_LARGEST_INT_t,
+)
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils.fixes import (
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    parse_version,
+    sp_version,
+)
+
+
+@pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer))
+def test_polynomial_and_spline_array_order(est):
+    """Test that output array has the given order."""
+    X = np.arange(10).reshape(5, 2)
+
+    def is_c_contiguous(a):
+        return np.isfortran(a.T)
+
+    assert is_c_contiguous(est().fit_transform(X))
+    assert is_c_contiguous(est(order="C").fit_transform(X))
+    assert np.isfortran(est(order="F").fit_transform(X))
+
+
+@pytest.mark.parametrize(
+    "params, err_msg",
+    [
+        ({"knots": [[1]]}, r"Number of knots, knots.shape\[0\], must be >= 2."),
+        ({"knots": [[1, 1], [2, 2]]}, r"knots.shape\[1\] == n_features is violated"),
+        ({"knots": [[1], [0]]}, "knots must be sorted without duplicates."),
+    ],
+)
+def test_spline_transformer_input_validation(params, err_msg):
+    """Test that we raise errors for invalid input in SplineTransformer."""
+    X = [[1], [2]]
+
+    with pytest.raises(ValueError, match=err_msg):
+        SplineTransformer(**params).fit(X)
+
+
+@pytest.mark.parametrize("extrapolation", ["continue", "periodic"])
+def test_spline_transformer_integer_knots(extrapolation):
+    """Test that SplineTransformer accepts integer value knot positions."""
+    X = np.arange(20).reshape(10, 2)
+    knots = [[0, 1], [1, 2], [5, 5], [11, 10], [12, 11]]
+    _ = SplineTransformer(
+        degree=3, knots=knots, extrapolation=extrapolation
+    ).fit_transform(X)
+
+
+def test_spline_transformer_feature_names():
+    """Test that SplineTransformer generates correct features name."""
+    X = np.arange(20).reshape(10, 2)
+    splt = SplineTransformer(n_knots=3, degree=3, include_bias=True).fit(X)
+    feature_names = splt.get_feature_names_out()
+    assert_array_equal(
+        feature_names,
+        [
+            "x0_sp_0",
+            "x0_sp_1",
+            "x0_sp_2",
+            "x0_sp_3",
+            "x0_sp_4",
+            "x1_sp_0",
+            "x1_sp_1",
+            "x1_sp_2",
+            "x1_sp_3",
+            "x1_sp_4",
+        ],
+    )
+
+    splt = SplineTransformer(n_knots=3, degree=3, include_bias=False).fit(X)
+    feature_names = splt.get_feature_names_out(["a", "b"])
+    assert_array_equal(
+        feature_names,
+        [
+            "a_sp_0",
+            "a_sp_1",
+            "a_sp_2",
+            "a_sp_3",
+            "b_sp_0",
+            "b_sp_1",
+            "b_sp_2",
+            "b_sp_3",
+        ],
+    )
+
+
+@pytest.mark.parametrize(
+    "extrapolation",
+    ["constant", "linear", "continue", "periodic"],
+)
+@pytest.mark.parametrize("degree", [2, 3])
+def test_split_transform_feature_names_extrapolation_degree(extrapolation, degree):
+    """Test feature names are correct for different extrapolations and degree.
+
+    Non-regression test for gh-25292.
+    """
+    X = np.arange(20).reshape(10, 2)
+    splt = SplineTransformer(degree=degree, extrapolation=extrapolation).fit(X)
+    feature_names = splt.get_feature_names_out(["a", "b"])
+    assert len(feature_names) == splt.n_features_out_
+
+    X_trans = splt.transform(X)
+    assert X_trans.shape[1] == len(feature_names)
+
+
+@pytest.mark.parametrize("degree", range(1, 5))
+@pytest.mark.parametrize("n_knots", range(3, 5))
+@pytest.mark.parametrize("knots", ["uniform", "quantile"])
+@pytest.mark.parametrize("extrapolation", ["constant", "periodic"])
+def test_spline_transformer_unity_decomposition(degree, n_knots, knots, extrapolation):
+    """Test that B-splines are indeed a decomposition of unity.
+
+    Splines basis functions must sum up to 1 per row, if we stay in between boundaries.
+    """
+    X = np.linspace(0, 1, 100)[:, None]
+    # make the boundaries 0 and 1 part of X_train, for sure.
+    X_train = np.r_[[[0]], X[::2, :], [[1]]]
+    X_test = X[1::2, :]
+
+    if extrapolation == "periodic":
+        n_knots = n_knots + degree  # periodic splines require degree < n_knots
+
+    splt = SplineTransformer(
+        n_knots=n_knots,
+        degree=degree,
+        knots=knots,
+        include_bias=True,
+        extrapolation=extrapolation,
+    )
+    splt.fit(X_train)
+    for X in [X_train, X_test]:
+        assert_allclose(np.sum(splt.transform(X), axis=1), 1)
+
+
+@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)])
+def test_spline_transformer_linear_regression(bias, intercept):
+    """Test that B-splines fit a sinusodial curve pretty well."""
+    X = np.linspace(0, 10, 100)[:, None]
+    y = np.sin(X[:, 0]) + 2  # +2 to avoid the value 0 in assert_allclose
+    pipe = Pipeline(
+        steps=[
+            (
+                "spline",
+                SplineTransformer(
+                    n_knots=15,
+                    degree=3,
+                    include_bias=bias,
+                    extrapolation="constant",
+                ),
+            ),
+            ("ols", LinearRegression(fit_intercept=intercept)),
+        ]
+    )
+    pipe.fit(X, y)
+    assert_allclose(pipe.predict(X), y, rtol=1e-3)
+
+
+@pytest.mark.parametrize(
+    ["knots", "n_knots", "sample_weight", "expected_knots"],
+    [
+        ("uniform", 3, None, np.array([[0, 2], [3, 8], [6, 14]])),
+        (
+            "uniform",
+            3,
+            np.array([0, 0, 1, 1, 0, 3, 1]),
+            np.array([[2, 2], [4, 8], [6, 14]]),
+        ),
+        ("uniform", 4, None, np.array([[0, 2], [2, 6], [4, 10], [6, 14]])),
+        ("quantile", 3, None, np.array([[0, 2], [3, 3], [6, 14]])),
+        (
+            "quantile",
+            3,
+            np.array([0, 0, 1, 1, 0, 3, 1]),
+            np.array([[2, 2], [5, 8], [6, 14]]),
+        ),
+    ],
+)
+def test_spline_transformer_get_base_knot_positions(
+    knots, n_knots, sample_weight, expected_knots
+):
+    """Check the behaviour to find knot positions with and without sample_weight."""
+    X = np.array([[0, 2], [0, 2], [2, 2], [3, 3], [4, 6], [5, 8], [6, 14]])
+    base_knots = SplineTransformer._get_base_knot_positions(
+        X=X, knots=knots, n_knots=n_knots, sample_weight=sample_weight
+    )
+    assert_allclose(base_knots, expected_knots)
+
+
+@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)])
+def test_spline_transformer_periodic_linear_regression(bias, intercept):
+    """Test that B-splines fit a periodic curve pretty well."""
+
+    # "+ 3" to avoid the value 0 in assert_allclose
+    def f(x):
+        return np.sin(2 * np.pi * x) - np.sin(8 * np.pi * x) + 3
+
+    X = np.linspace(0, 1, 101)[:, None]
+    pipe = Pipeline(
+        steps=[
+            (
+                "spline",
+                SplineTransformer(
+                    n_knots=20,
+                    degree=3,
+                    include_bias=bias,
+                    extrapolation="periodic",
+                ),
+            ),
+            ("ols", LinearRegression(fit_intercept=intercept)),
+        ]
+    )
+    pipe.fit(X, f(X[:, 0]))
+
+    # Generate larger array to check periodic extrapolation
+    X_ = np.linspace(-1, 2, 301)[:, None]
+    predictions = pipe.predict(X_)
+    assert_allclose(predictions, f(X_[:, 0]), atol=0.01, rtol=0.01)
+    assert_allclose(predictions[0:100], predictions[100:200], rtol=1e-3)
+
+
+def test_spline_transformer_periodic_spline_backport():
+    """Test that the backport of extrapolate="periodic" works correctly"""
+    X = np.linspace(-2, 3.5, 10)[:, None]
+    degree = 2
+
+    # Use periodic extrapolation backport in SplineTransformer
+    transformer = SplineTransformer(
+        degree=degree, extrapolation="periodic", knots=[[-1.0], [0.0], [1.0]]
+    )
+    Xt = transformer.fit_transform(X)
+
+    # Use periodic extrapolation in BSpline
+    coef = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]])
+    spl = BSpline(np.arange(-3, 4), coef, degree, "periodic")
+    Xspl = spl(X[:, 0])
+    assert_allclose(Xt, Xspl)
+
+
+def test_spline_transformer_periodic_splines_periodicity():
+    """Test if shifted knots result in the same transformation up to permutation."""
+    X = np.linspace(0, 10, 101)[:, None]
+
+    transformer_1 = SplineTransformer(
+        degree=3,
+        extrapolation="periodic",
+        knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]],
+    )
+
+    transformer_2 = SplineTransformer(
+        degree=3,
+        extrapolation="periodic",
+        knots=[[1.0], [3.0], [4.0], [5.0], [8.0], [9.0]],
+    )
+
+    Xt_1 = transformer_1.fit_transform(X)
+    Xt_2 = transformer_2.fit_transform(X)
+
+    assert_allclose(Xt_1, Xt_2[:, [4, 0, 1, 2, 3]])
+
+
+@pytest.mark.parametrize("degree", [3, 5])
+def test_spline_transformer_periodic_splines_smoothness(degree):
+    """Test that spline transformation is smooth at first / last knot."""
+    X = np.linspace(-2, 10, 10_000)[:, None]
+
+    transformer = SplineTransformer(
+        degree=degree,
+        extrapolation="periodic",
+        knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]],
+    )
+    Xt = transformer.fit_transform(X)
+
+    delta = (X.max() - X.min()) / len(X)
+    tol = 10 * delta
+
+    dXt = Xt
+    # We expect splines of degree `degree` to be (`degree`-1) times
+    # continuously differentiable. I.e. for d = 0, ..., `degree` - 1 the d-th
+    # derivative should be continuous. This is the case if the (d+1)-th
+    # numerical derivative is reasonably small (smaller than `tol` in absolute
+    # value). We thus compute d-th numeric derivatives for d = 1, ..., `degree`
+    # and compare them to `tol`.
+    #
+    # Note that the 0-th derivative is the function itself, such that we are
+    # also checking its continuity.
+    for d in range(1, degree + 1):
+        # Check continuity of the (d-1)-th derivative
+        diff = np.diff(dXt, axis=0)
+        assert np.abs(diff).max() < tol
+        # Compute d-th numeric derivative
+        dXt = diff / delta
+
+    # As degree `degree` splines are not `degree` times continuously
+    # differentiable at the knots, the `degree + 1`-th numeric derivative
+    # should have spikes at the knots.
+    diff = np.diff(dXt, axis=0)
+    assert np.abs(diff).max() > 1
+
+
+@pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)])
+@pytest.mark.parametrize("degree", [1, 2, 3, 4, 5])
+def test_spline_transformer_extrapolation(bias, intercept, degree):
+    """Test that B-spline extrapolation works correctly."""
+    # we use a straight line for that
+    X = np.linspace(-1, 1, 100)[:, None]
+    y = X.squeeze()
+
+    # 'constant'
+    pipe = Pipeline(
+        [
+            [
+                "spline",
+                SplineTransformer(
+                    n_knots=4,
+                    degree=degree,
+                    include_bias=bias,
+                    extrapolation="constant",
+                ),
+            ],
+            ["ols", LinearRegression(fit_intercept=intercept)],
+        ]
+    )
+    pipe.fit(X, y)
+    assert_allclose(pipe.predict([[-10], [5]]), [-1, 1])
+
+    # 'linear'
+    pipe = Pipeline(
+        [
+            [
+                "spline",
+                SplineTransformer(
+                    n_knots=4,
+                    degree=degree,
+                    include_bias=bias,
+                    extrapolation="linear",
+                ),
+            ],
+            ["ols", LinearRegression(fit_intercept=intercept)],
+        ]
+    )
+    pipe.fit(X, y)
+    assert_allclose(pipe.predict([[-10], [5]]), [-10, 5])
+
+    # 'error'
+    splt = SplineTransformer(
+        n_knots=4, degree=degree, include_bias=bias, extrapolation="error"
+    )
+    splt.fit(X)
+    msg = "X contains values beyond the limits of the knots"
+    with pytest.raises(ValueError, match=msg):
+        splt.transform([[-10]])
+    with pytest.raises(ValueError, match=msg):
+        splt.transform([[5]])
+
+
+def test_spline_transformer_kbindiscretizer():
+    """Test that a B-spline of degree=0 is equivalent to KBinsDiscretizer."""
+    rng = np.random.RandomState(97531)
+    X = rng.randn(200).reshape(200, 1)
+    n_bins = 5
+    n_knots = n_bins + 1
+
+    splt = SplineTransformer(
+        n_knots=n_knots, degree=0, knots="quantile", include_bias=True
+    )
+    splines = splt.fit_transform(X)
+
+    kbd = KBinsDiscretizer(n_bins=n_bins, encode="onehot-dense", strategy="quantile")
+    kbins = kbd.fit_transform(X)
+
+    # Though they should be exactly equal, we test approximately with high
+    # accuracy.
+    assert_allclose(splines, kbins, rtol=1e-13)
+
+
+@pytest.mark.skipif(
+    sp_version < parse_version("1.8.0"),
+    reason="The option `sparse_output` is available as of scipy 1.8.0",
+)
+@pytest.mark.parametrize("degree", range(1, 3))
+@pytest.mark.parametrize("knots", ["uniform", "quantile"])
+@pytest.mark.parametrize(
+    "extrapolation", ["error", "constant", "linear", "continue", "periodic"]
+)
+@pytest.mark.parametrize("include_bias", [False, True])
+def test_spline_transformer_sparse_output(
+    degree, knots, extrapolation, include_bias, global_random_seed
+):
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.randn(200).reshape(40, 5)
+
+    splt_dense = SplineTransformer(
+        degree=degree,
+        knots=knots,
+        extrapolation=extrapolation,
+        include_bias=include_bias,
+        sparse_output=False,
+    )
+    splt_sparse = SplineTransformer(
+        degree=degree,
+        knots=knots,
+        extrapolation=extrapolation,
+        include_bias=include_bias,
+        sparse_output=True,
+    )
+
+    splt_dense.fit(X)
+    splt_sparse.fit(X)
+
+    X_trans_sparse = splt_sparse.transform(X)
+    X_trans_dense = splt_dense.transform(X)
+    assert sparse.issparse(X_trans_sparse) and X_trans_sparse.format == "csr"
+    assert_allclose(X_trans_dense, X_trans_sparse.toarray())
+
+    # extrapolation regime
+    X_min = np.amin(X, axis=0)
+    X_max = np.amax(X, axis=0)
+    X_extra = np.r_[
+        np.linspace(X_min - 5, X_min, 10), np.linspace(X_max, X_max + 5, 10)
+    ]
+    if extrapolation == "error":
+        msg = "X contains values beyond the limits of the knots"
+        with pytest.raises(ValueError, match=msg):
+            splt_dense.transform(X_extra)
+        msg = "Out of bounds"
+        with pytest.raises(ValueError, match=msg):
+            splt_sparse.transform(X_extra)
+    else:
+        assert_allclose(
+            splt_dense.transform(X_extra), splt_sparse.transform(X_extra).toarray()
+        )
+
+
+@pytest.mark.skipif(
+    sp_version >= parse_version("1.8.0"),
+    reason="The option `sparse_output` is available as of scipy 1.8.0",
+)
+def test_spline_transformer_sparse_output_raise_error_for_old_scipy():
+    """Test that SplineTransformer with sparse=True raises for scipy<1.8.0."""
+    X = [[1], [2]]
+    with pytest.raises(ValueError, match="scipy>=1.8.0"):
+        SplineTransformer(sparse_output=True).fit(X)
+
+
+@pytest.mark.parametrize("n_knots", [5, 10])
+@pytest.mark.parametrize("include_bias", [True, False])
+@pytest.mark.parametrize("degree", [3, 4])
+@pytest.mark.parametrize(
+    "extrapolation", ["error", "constant", "linear", "continue", "periodic"]
+)
+@pytest.mark.parametrize("sparse_output", [False, True])
+def test_spline_transformer_n_features_out(
+    n_knots, include_bias, degree, extrapolation, sparse_output
+):
+    """Test that transform results in n_features_out_ features."""
+    if sparse_output and sp_version < parse_version("1.8.0"):
+        pytest.skip("The option `sparse_output` is available as of scipy 1.8.0")
+
+    splt = SplineTransformer(
+        n_knots=n_knots,
+        degree=degree,
+        include_bias=include_bias,
+        extrapolation=extrapolation,
+        sparse_output=sparse_output,
+    )
+    X = np.linspace(0, 1, 10)[:, None]
+    splt.fit(X)
+
+    assert splt.transform(X).shape[1] == splt.n_features_out_
+
+
+@pytest.mark.parametrize(
+    "params, err_msg",
+    [
+        ({"degree": (-1, 2)}, r"degree=\(min_degree, max_degree\) must"),
+        ({"degree": (0, 1.5)}, r"degree=\(min_degree, max_degree\) must"),
+        ({"degree": (3, 2)}, r"degree=\(min_degree, max_degree\) must"),
+        ({"degree": (1, 2, 3)}, r"int or tuple \(min_degree, max_degree\)"),
+    ],
+)
+def test_polynomial_features_input_validation(params, err_msg):
+    """Test that we raise errors for invalid input in PolynomialFeatures."""
+    X = [[1], [2]]
+
+    with pytest.raises(ValueError, match=err_msg):
+        PolynomialFeatures(**params).fit(X)
+
+
+@pytest.fixture()
+def single_feature_degree3():
+    X = np.arange(6)[:, np.newaxis]
+    P = np.hstack([np.ones_like(X), X, X**2, X**3])
+    return X, P
+
+
+@pytest.mark.parametrize(
+    "degree, include_bias, interaction_only, indices",
+    [
+        (3, True, False, slice(None, None)),
+        (3, False, False, slice(1, None)),
+        (3, True, True, [0, 1]),
+        (3, False, True, [1]),
+        ((2, 3), True, False, [0, 2, 3]),
+        ((2, 3), False, False, [2, 3]),
+        ((2, 3), True, True, [0]),
+        ((2, 3), False, True, []),
+    ],
+)
+@pytest.mark.parametrize("X_container", [None] + CSR_CONTAINERS + CSC_CONTAINERS)
+def test_polynomial_features_one_feature(
+    single_feature_degree3,
+    degree,
+    include_bias,
+    interaction_only,
+    indices,
+    X_container,
+):
+    """Test PolynomialFeatures on single feature up to degree 3."""
+    X, P = single_feature_degree3
+    if X_container is not None:
+        X = X_container(X)
+    tf = PolynomialFeatures(
+        degree=degree, include_bias=include_bias, interaction_only=interaction_only
+    ).fit(X)
+    out = tf.transform(X)
+    if X_container is not None:
+        out = out.toarray()
+    assert_allclose(out, P[:, indices])
+    if tf.n_output_features_ > 0:
+        assert tf.powers_.shape == (tf.n_output_features_, tf.n_features_in_)
+
+
+@pytest.fixture()
+def two_features_degree3():
+    X = np.arange(6).reshape((3, 2))
+    x1 = X[:, :1]
+    x2 = X[:, 1:]
+    P = np.hstack(
+        [
+            x1**0 * x2**0,  # 0
+            x1**1 * x2**0,  # 1
+            x1**0 * x2**1,  # 2
+            x1**2 * x2**0,  # 3
+            x1**1 * x2**1,  # 4
+            x1**0 * x2**2,  # 5
+            x1**3 * x2**0,  # 6
+            x1**2 * x2**1,  # 7
+            x1**1 * x2**2,  # 8
+            x1**0 * x2**3,  # 9
+        ]
+    )
+    return X, P
+
+
+@pytest.mark.parametrize(
+    "degree, include_bias, interaction_only, indices",
+    [
+        (2, True, False, slice(0, 6)),
+        (2, False, False, slice(1, 6)),
+        (2, True, True, [0, 1, 2, 4]),
+        (2, False, True, [1, 2, 4]),
+        ((2, 2), True, False, [0, 3, 4, 5]),
+        ((2, 2), False, False, [3, 4, 5]),
+        ((2, 2), True, True, [0, 4]),
+        ((2, 2), False, True, [4]),
+        (3, True, False, slice(None, None)),
+        (3, False, False, slice(1, None)),
+        (3, True, True, [0, 1, 2, 4]),
+        (3, False, True, [1, 2, 4]),
+        ((2, 3), True, False, [0, 3, 4, 5, 6, 7, 8, 9]),
+        ((2, 3), False, False, slice(3, None)),
+        ((2, 3), True, True, [0, 4]),
+        ((2, 3), False, True, [4]),
+        ((3, 3), True, False, [0, 6, 7, 8, 9]),
+        ((3, 3), False, False, [6, 7, 8, 9]),
+        ((3, 3), True, True, [0]),
+        ((3, 3), False, True, []),  # would need 3 input features
+    ],
+)
+@pytest.mark.parametrize("X_container", [None] + CSR_CONTAINERS + CSC_CONTAINERS)
+def test_polynomial_features_two_features(
+    two_features_degree3,
+    degree,
+    include_bias,
+    interaction_only,
+    indices,
+    X_container,
+):
+    """Test PolynomialFeatures on 2 features up to degree 3."""
+    X, P = two_features_degree3
+    if X_container is not None:
+        X = X_container(X)
+    tf = PolynomialFeatures(
+        degree=degree, include_bias=include_bias, interaction_only=interaction_only
+    ).fit(X)
+    out = tf.transform(X)
+    if X_container is not None:
+        out = out.toarray()
+    assert_allclose(out, P[:, indices])
+    if tf.n_output_features_ > 0:
+        assert tf.powers_.shape == (tf.n_output_features_, tf.n_features_in_)
+
+
+def test_polynomial_feature_names():
+    X = np.arange(30).reshape(10, 3)
+    poly = PolynomialFeatures(degree=2, include_bias=True).fit(X)
+    feature_names = poly.get_feature_names_out()
+    assert_array_equal(
+        ["1", "x0", "x1", "x2", "x0^2", "x0 x1", "x0 x2", "x1^2", "x1 x2", "x2^2"],
+        feature_names,
+    )
+    assert len(feature_names) == poly.transform(X).shape[1]
+
+    poly = PolynomialFeatures(degree=3, include_bias=False).fit(X)
+    feature_names = poly.get_feature_names_out(["a", "b", "c"])
+    assert_array_equal(
+        [
+            "a",
+            "b",
+            "c",
+            "a^2",
+            "a b",
+            "a c",
+            "b^2",
+            "b c",
+            "c^2",
+            "a^3",
+            "a^2 b",
+            "a^2 c",
+            "a b^2",
+            "a b c",
+            "a c^2",
+            "b^3",
+            "b^2 c",
+            "b c^2",
+            "c^3",
+        ],
+        feature_names,
+    )
+    assert len(feature_names) == poly.transform(X).shape[1]
+
+    poly = PolynomialFeatures(degree=(2, 3), include_bias=False).fit(X)
+    feature_names = poly.get_feature_names_out(["a", "b", "c"])
+    assert_array_equal(
+        [
+            "a^2",
+            "a b",
+            "a c",
+            "b^2",
+            "b c",
+            "c^2",
+            "a^3",
+            "a^2 b",
+            "a^2 c",
+            "a b^2",
+            "a b c",
+            "a c^2",
+            "b^3",
+            "b^2 c",
+            "b c^2",
+            "c^3",
+        ],
+        feature_names,
+    )
+    assert len(feature_names) == poly.transform(X).shape[1]
+
+    poly = PolynomialFeatures(
+        degree=(3, 3), include_bias=True, interaction_only=True
+    ).fit(X)
+    feature_names = poly.get_feature_names_out(["a", "b", "c"])
+    assert_array_equal(["1", "a b c"], feature_names)
+    assert len(feature_names) == poly.transform(X).shape[1]
+
+    # test some unicode
+    poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
+    feature_names = poly.get_feature_names_out(["\u0001F40D", "\u262e", "\u05d0"])
+    assert_array_equal(["1", "\u0001F40D", "\u262e", "\u05d0"], feature_names)
+
+
+@pytest.mark.parametrize(
+    ["deg", "include_bias", "interaction_only", "dtype"],
+    [
+        (1, True, False, int),
+        (2, True, False, int),
+        (2, True, False, np.float32),
+        (2, True, False, np.float64),
+        (3, False, False, np.float64),
+        (3, False, True, np.float64),
+        (4, False, False, np.float64),
+        (4, False, True, np.float64),
+    ],
+)
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_polynomial_features_csc_X(
+    deg, include_bias, interaction_only, dtype, csc_container
+):
+    rng = np.random.RandomState(0)
+    X = rng.randint(0, 2, (100, 2))
+    X_csc = csc_container(X)
+
+    est = PolynomialFeatures(
+        deg, include_bias=include_bias, interaction_only=interaction_only
+    )
+    Xt_csc = est.fit_transform(X_csc.astype(dtype))
+    Xt_dense = est.fit_transform(X.astype(dtype))
+
+    assert sparse.issparse(Xt_csc) and Xt_csc.format == "csc"
+    assert Xt_csc.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csc.toarray(), Xt_dense)
+
+
+@pytest.mark.parametrize(
+    ["deg", "include_bias", "interaction_only", "dtype"],
+    [
+        (1, True, False, int),
+        (2, True, False, int),
+        (2, True, False, np.float32),
+        (2, True, False, np.float64),
+        (3, False, False, np.float64),
+        (3, False, True, np.float64),
+    ],
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_polynomial_features_csr_X(
+    deg, include_bias, interaction_only, dtype, csr_container
+):
+    rng = np.random.RandomState(0)
+    X = rng.randint(0, 2, (100, 2))
+    X_csr = csr_container(X)
+
+    est = PolynomialFeatures(
+        deg, include_bias=include_bias, interaction_only=interaction_only
+    )
+    Xt_csr = est.fit_transform(X_csr.astype(dtype))
+    Xt_dense = est.fit_transform(X.astype(dtype, copy=False))
+
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
+
+
+@pytest.mark.parametrize("n_features", [1, 4, 5])
+@pytest.mark.parametrize(
+    "min_degree, max_degree", [(0, 1), (0, 2), (1, 3), (0, 4), (3, 4)]
+)
+@pytest.mark.parametrize("interaction_only", [True, False])
+@pytest.mark.parametrize("include_bias", [True, False])
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_num_combinations(
+    n_features, min_degree, max_degree, interaction_only, include_bias, csr_container
+):
+    """
+    Test that n_output_features_ is calculated correctly.
+    """
+    x = csr_container(([1], ([0], [n_features - 1])))
+    est = PolynomialFeatures(
+        degree=max_degree,
+        interaction_only=interaction_only,
+        include_bias=include_bias,
+    )
+    est.fit(x)
+    num_combos = est.n_output_features_
+
+    combos = PolynomialFeatures._combinations(
+        n_features=n_features,
+        min_degree=0,
+        max_degree=max_degree,
+        interaction_only=interaction_only,
+        include_bias=include_bias,
+    )
+    assert num_combos == sum([1 for _ in combos])
+
+
+@pytest.mark.parametrize(
+    ["deg", "include_bias", "interaction_only", "dtype"],
+    [
+        (2, True, False, np.float32),
+        (2, True, False, np.float64),
+        (3, False, False, np.float64),
+        (3, False, True, np.float64),
+    ],
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_polynomial_features_csr_X_floats(
+    deg, include_bias, interaction_only, dtype, csr_container
+):
+    X_csr = csr_container(sparse_random(1000, 10, 0.5, random_state=0))
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(
+        deg, include_bias=include_bias, interaction_only=interaction_only
+    )
+    Xt_csr = est.fit_transform(X_csr.astype(dtype))
+    Xt_dense = est.fit_transform(X.astype(dtype))
+
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
+
+
+@pytest.mark.parametrize(
+    ["zero_row_index", "deg", "interaction_only"],
+    [
+        (0, 2, True),
+        (1, 2, True),
+        (2, 2, True),
+        (0, 3, True),
+        (1, 3, True),
+        (2, 3, True),
+        (0, 2, False),
+        (1, 2, False),
+        (2, 2, False),
+        (0, 3, False),
+        (1, 3, False),
+        (2, 3, False),
+    ],
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_polynomial_features_csr_X_zero_row(
+    zero_row_index, deg, interaction_only, csr_container
+):
+    X_csr = csr_container(sparse_random(3, 10, 1.0, random_state=0))
+    X_csr[zero_row_index, :] = 0.0
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(deg, include_bias=False, interaction_only=interaction_only)
+    Xt_csr = est.fit_transform(X_csr)
+    Xt_dense = est.fit_transform(X)
+
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
+
+
+# This degree should always be one more than the highest degree supported by
+# _csr_expansion.
+@pytest.mark.parametrize(
+    ["include_bias", "interaction_only"],
+    [(True, True), (True, False), (False, True), (False, False)],
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_polynomial_features_csr_X_degree_4(
+    include_bias, interaction_only, csr_container
+):
+    X_csr = csr_container(sparse_random(1000, 10, 0.5, random_state=0))
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(
+        4, include_bias=include_bias, interaction_only=interaction_only
+    )
+    Xt_csr = est.fit_transform(X_csr)
+    Xt_dense = est.fit_transform(X)
+
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
+
+
+@pytest.mark.parametrize(
+    ["deg", "dim", "interaction_only"],
+    [
+        (2, 1, True),
+        (2, 2, True),
+        (3, 1, True),
+        (3, 2, True),
+        (3, 3, True),
+        (2, 1, False),
+        (2, 2, False),
+        (3, 1, False),
+        (3, 2, False),
+        (3, 3, False),
+    ],
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only, csr_container):
+    X_csr = csr_container(sparse_random(1000, dim, 0.5, random_state=0))
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(deg, interaction_only=interaction_only)
+    Xt_csr = est.fit_transform(X_csr)
+    Xt_dense = est.fit_transform(X)
+
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
+
+
+@pytest.mark.parametrize("interaction_only", [True, False])
+@pytest.mark.parametrize("include_bias", [True, False])
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_csr_polynomial_expansion_index_overflow_non_regression(
+    interaction_only, include_bias, csr_container
+):
+    """Check the automatic index dtype promotion to `np.int64` when needed.
+
+    This ensures that sufficiently large input configurations get
+    properly promoted to use `np.int64` for index and indptr representation
+    while preserving data integrity. Non-regression test for gh-16803.
+
+    Note that this is only possible for Python runtimes with a 64 bit address
+    space. On 32 bit platforms, a `ValueError` is raised instead.
+    """
+
+    def degree_2_calc(d, i, j):
+        if interaction_only:
+            return d * i - (i**2 + 3 * i) // 2 - 1 + j
+        else:
+            return d * i - (i**2 + i) // 2 + j
+
+    n_samples = 13
+    n_features = 120001
+    data_dtype = np.float32
+    data = np.arange(1, 5, dtype=np.int64)
+    row = np.array([n_samples - 2, n_samples - 2, n_samples - 1, n_samples - 1])
+    # An int64 dtype is required to avoid overflow error on Windows within the
+    # `degree_2_calc` function.
+    col = np.array(
+        [n_features - 2, n_features - 1, n_features - 2, n_features - 1], dtype=np.int64
+    )
+    X = csr_container(
+        (data, (row, col)),
+        shape=(n_samples, n_features),
+        dtype=data_dtype,
+    )
+    pf = PolynomialFeatures(
+        interaction_only=interaction_only, include_bias=include_bias, degree=2
+    )
+
+    # Calculate the number of combinations a-priori, and if needed check for
+    # the correct ValueError and terminate the test early.
+    num_combinations = pf._num_combinations(
+        n_features=n_features,
+        min_degree=0,
+        max_degree=2,
+        interaction_only=pf.interaction_only,
+        include_bias=pf.include_bias,
+    )
+    if num_combinations > np.iinfo(np.intp).max:
+        msg = (
+            r"The output that would result from the current configuration would have"
+            r" \d* features which is too large to be indexed"
+        )
+        with pytest.raises(ValueError, match=msg):
+            pf.fit(X)
+        return
+    X_trans = pf.fit_transform(X)
+    row_nonzero, col_nonzero = X_trans.nonzero()
+    n_degree_1_features_out = n_features + include_bias
+    max_degree_2_idx = (
+        degree_2_calc(n_features, col[int(not interaction_only)], col[1])
+        + n_degree_1_features_out
+    )
+
+    # Account for bias of all samples except last one which will be handled
+    # separately since there are distinct data values before it
+    data_target = [1] * (n_samples - 2) if include_bias else []
+    col_nonzero_target = [0] * (n_samples - 2) if include_bias else []
+
+    for i in range(2):
+        x = data[2 * i]
+        y = data[2 * i + 1]
+        x_idx = col[2 * i]
+        y_idx = col[2 * i + 1]
+        if include_bias:
+            data_target.append(1)
+            col_nonzero_target.append(0)
+        data_target.extend([x, y])
+        col_nonzero_target.extend(
+            [x_idx + int(include_bias), y_idx + int(include_bias)]
+        )
+        if not interaction_only:
+            data_target.extend([x * x, x * y, y * y])
+            col_nonzero_target.extend(
+                [
+                    degree_2_calc(n_features, x_idx, x_idx) + n_degree_1_features_out,
+                    degree_2_calc(n_features, x_idx, y_idx) + n_degree_1_features_out,
+                    degree_2_calc(n_features, y_idx, y_idx) + n_degree_1_features_out,
+                ]
+            )
+        else:
+            data_target.extend([x * y])
+            col_nonzero_target.append(
+                degree_2_calc(n_features, x_idx, y_idx) + n_degree_1_features_out
+            )
+
+    nnz_per_row = int(include_bias) + 3 + 2 * int(not interaction_only)
+
+    assert pf.n_output_features_ == max_degree_2_idx + 1
+    assert X_trans.dtype == data_dtype
+    assert X_trans.shape == (n_samples, max_degree_2_idx + 1)
+    assert X_trans.indptr.dtype == X_trans.indices.dtype == np.int64
+    # Ensure that dtype promotion was actually required:
+    assert X_trans.indices.max() > np.iinfo(np.int32).max
+
+    row_nonzero_target = list(range(n_samples - 2)) if include_bias else []
+    row_nonzero_target.extend(
+        [n_samples - 2] * nnz_per_row + [n_samples - 1] * nnz_per_row
+    )
+
+    assert_allclose(X_trans.data, data_target)
+    assert_array_equal(row_nonzero, row_nonzero_target)
+    assert_array_equal(col_nonzero, col_nonzero_target)
+
+
+@pytest.mark.parametrize(
+    "degree, n_features",
+    [
+        # Needs promotion to int64 when interaction_only=False
+        (2, 65535),
+        (3, 2344),
+        # This guarantees that the intermediate operation when calculating
+        # output columns would overflow a C-long, hence checks that python-
+        # longs are being used.
+        (2, int(np.sqrt(np.iinfo(np.int64).max) + 1)),
+        (3, 65535),
+        # This case tests the second clause of the overflow check which
+        # takes into account the value of `n_features` itself.
+        (2, int(np.sqrt(np.iinfo(np.int64).max))),
+    ],
+)
+@pytest.mark.parametrize("interaction_only", [True, False])
+@pytest.mark.parametrize("include_bias", [True, False])
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_csr_polynomial_expansion_index_overflow(
+    degree, n_features, interaction_only, include_bias, csr_container
+):
+    """Tests known edge-cases to the dtype promotion strategy and custom
+    Cython code, including a current bug in the upstream
+    `scipy.sparse.hstack`.
+    """
+    data = [1.0]
+    # Use int32 indices as much as we can
+    indices_dtype = np.int32 if n_features - 1 <= np.iinfo(np.int32).max else np.int64
+    row = np.array([0], dtype=indices_dtype)
+    col = np.array([n_features - 1], dtype=indices_dtype)
+
+    # First degree index
+    expected_indices = [
+        n_features - 1 + int(include_bias),
+    ]
+    # Second degree index
+    expected_indices.append(n_features * (n_features + 1) // 2 + expected_indices[0])
+    # Third degree index
+    expected_indices.append(
+        n_features * (n_features + 1) * (n_features + 2) // 6 + expected_indices[1]
+    )
+
+    X = csr_container((data, (row, col)))
+    pf = PolynomialFeatures(
+        interaction_only=interaction_only, include_bias=include_bias, degree=degree
+    )
+
+    # Calculate the number of combinations a-priori, and if needed check for
+    # the correct ValueError and terminate the test early.
+    num_combinations = pf._num_combinations(
+        n_features=n_features,
+        min_degree=0,
+        max_degree=degree,
+        interaction_only=pf.interaction_only,
+        include_bias=pf.include_bias,
+    )
+    if num_combinations > np.iinfo(np.intp).max:
+        msg = (
+            r"The output that would result from the current configuration would have"
+            r" \d* features which is too large to be indexed"
+        )
+        with pytest.raises(ValueError, match=msg):
+            pf.fit(X)
+        return
+
+    # In SciPy < 1.8, a bug occurs when an intermediate matrix in
+    # `to_stack` in `hstack` fits within int32 however would require int64 when
+    # combined with all previous matrices in `to_stack`.
+    if sp_version < parse_version("1.8.0"):
+        has_bug = False
+        max_int32 = np.iinfo(np.int32).max
+        cumulative_size = n_features + include_bias
+        for deg in range(2, degree + 1):
+            max_indptr = _calc_total_nnz(X.indptr, interaction_only, deg)
+            max_indices = _calc_expanded_nnz(n_features, interaction_only, deg) - 1
+            cumulative_size += max_indices + 1
+            needs_int64 = max(max_indices, max_indptr) > max_int32
+            has_bug |= not needs_int64 and cumulative_size > max_int32
+        if has_bug:
+            msg = r"In scipy versions `<1.8.0`, the function `scipy.sparse.hstack`"
+            with pytest.raises(ValueError, match=msg):
+                X_trans = pf.fit_transform(X)
+            return
+
+    # When `n_features>=65535`, `scipy.sparse.hstack` may not use the right
+    # dtype for representing indices and indptr if `n_features` is still
+    # small enough so that each block matrix's indices and indptr arrays
+    # can be represented with `np.int32`. We test `n_features==65535`
+    # since it is guaranteed to run into this bug.
+    if (
+        sp_version < parse_version("1.9.2")
+        and n_features == 65535
+        and degree == 2
+        and not interaction_only
+    ):  # pragma: no cover
+        msg = r"In scipy versions `<1.9.2`, the function `scipy.sparse.hstack`"
+        with pytest.raises(ValueError, match=msg):
+            X_trans = pf.fit_transform(X)
+        return
+    X_trans = pf.fit_transform(X)
+
+    expected_dtype = np.int64 if num_combinations > np.iinfo(np.int32).max else np.int32
+    # Terms higher than first degree
+    non_bias_terms = 1 + (degree - 1) * int(not interaction_only)
+    expected_nnz = int(include_bias) + non_bias_terms
+    assert X_trans.dtype == X.dtype
+    assert X_trans.shape == (1, pf.n_output_features_)
+    assert X_trans.indptr.dtype == X_trans.indices.dtype == expected_dtype
+    assert X_trans.nnz == expected_nnz
+
+    if include_bias:
+        assert X_trans[0, 0] == pytest.approx(1.0)
+    for idx in range(non_bias_terms):
+        assert X_trans[0, expected_indices[idx]] == pytest.approx(1.0)
+
+    offset = interaction_only * n_features
+    if degree == 3:
+        offset *= 1 + n_features
+    assert pf.n_output_features_ == expected_indices[degree - 1] + 1 - offset
+
+
+@pytest.mark.parametrize("interaction_only", [True, False])
+@pytest.mark.parametrize("include_bias", [True, False])
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_csr_polynomial_expansion_too_large_to_index(
+    interaction_only, include_bias, csr_container
+):
+    n_features = np.iinfo(np.int64).max // 2
+    data = [1.0]
+    row = [0]
+    col = [n_features - 1]
+    X = csr_container((data, (row, col)))
+    pf = PolynomialFeatures(
+        interaction_only=interaction_only, include_bias=include_bias, degree=(2, 2)
+    )
+    msg = (
+        r"The output that would result from the current configuration would have \d*"
+        r" features which is too large to be indexed"
+    )
+    with pytest.raises(ValueError, match=msg):
+        pf.fit(X)
+    with pytest.raises(ValueError, match=msg):
+        pf.fit_transform(X)
+
+
+@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
+def test_polynomial_features_behaviour_on_zero_degree(sparse_container):
+    """Check that PolynomialFeatures raises error when degree=0 and include_bias=False,
+    and output a single constant column when include_bias=True
+    """
+    X = np.ones((10, 2))
+    poly = PolynomialFeatures(degree=0, include_bias=False)
+    err_msg = (
+        "Setting degree to zero and include_bias to False would result in"
+        " an empty output array."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        poly.fit_transform(X)
+
+    poly = PolynomialFeatures(degree=(0, 0), include_bias=False)
+    err_msg = (
+        "Setting both min_degree and max_degree to zero and include_bias to"
+        " False would result in an empty output array."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        poly.fit_transform(X)
+
+    for _X in [X, sparse_container(X)]:
+        poly = PolynomialFeatures(degree=0, include_bias=True)
+        output = poly.fit_transform(_X)
+        # convert to dense array if needed
+        if sparse.issparse(output):
+            output = output.toarray()
+        assert_array_equal(output, np.ones((X.shape[0], 1)))
+
+
+def test_sizeof_LARGEST_INT_t():
+    # On Windows, scikit-learn is typically compiled with MSVC that
+    # does not support int128 arithmetic (at the time of writing):
+    # https://stackoverflow.com/a/6761962/163740
+    if sys.platform == "win32" or (
+        sys.maxsize <= 2**32 and sys.platform != "emscripten"
+    ):
+        expected_size = 8
+    else:
+        expected_size = 16
+
+    assert _get_sizeof_LARGEST_INT_t() == expected_size
+
+
+@pytest.mark.xfail(
+    sys.platform == "win32",
+    reason=(
+        "On Windows, scikit-learn is typically compiled with MSVC that does not support"
+        " int128 arithmetic (at the time of writing)"
+    ),
+    run=True,
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_csr_polynomial_expansion_windows_fail(csr_container):
+    # Minimum needed to ensure integer overflow occurs while guaranteeing an
+    # int64-indexable output.
+    n_features = int(np.iinfo(np.int64).max ** (1 / 3) + 3)
+    data = [1.0]
+    row = [0]
+    col = [n_features - 1]
+
+    # First degree index
+    expected_indices = [
+        n_features - 1,
+    ]
+    # Second degree index
+    expected_indices.append(
+        int(n_features * (n_features + 1) // 2 + expected_indices[0])
+    )
+    # Third degree index
+    expected_indices.append(
+        int(n_features * (n_features + 1) * (n_features + 2) // 6 + expected_indices[1])
+    )
+
+    X = csr_container((data, (row, col)))
+    pf = PolynomialFeatures(interaction_only=False, include_bias=False, degree=3)
+    if sys.maxsize <= 2**32:
+        msg = (
+            r"The output that would result from the current configuration would"
+            r" have \d*"
+            r" features which is too large to be indexed"
+        )
+        with pytest.raises(ValueError, match=msg):
+            pf.fit_transform(X)
+    else:
+        X_trans = pf.fit_transform(X)
+        for idx in range(3):
+            assert X_trans[0, expected_indices[idx]] == pytest.approx(1.0)
diff --git a/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_target_encoder.py b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_target_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f6fff581f39f6bc9430f5e631d6d361fb6bc4ae
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/preprocessing/tests/test_target_encoder.py
@@ -0,0 +1,714 @@
+import re
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Ridge
+from sklearn.model_selection import (
+    KFold,
+    ShuffleSplit,
+    StratifiedKFold,
+    cross_val_score,
+    train_test_split,
+)
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    KBinsDiscretizer,
+    LabelBinarizer,
+    LabelEncoder,
+    TargetEncoder,
+)
+
+
+def _encode_target(X_ordinal, y_numeric, n_categories, smooth):
+    """Simple Python implementation of target encoding."""
+    cur_encodings = np.zeros(n_categories, dtype=np.float64)
+    y_mean = np.mean(y_numeric)
+
+    if smooth == "auto":
+        y_variance = np.var(y_numeric)
+        for c in range(n_categories):
+            y_subset = y_numeric[X_ordinal == c]
+            n_i = y_subset.shape[0]
+
+            if n_i == 0:
+                cur_encodings[c] = y_mean
+                continue
+
+            y_subset_variance = np.var(y_subset)
+            m = y_subset_variance / y_variance
+            lambda_ = n_i / (n_i + m)
+
+            cur_encodings[c] = lambda_ * np.mean(y_subset) + (1 - lambda_) * y_mean
+        return cur_encodings
+    else:  # float
+        for c in range(n_categories):
+            y_subset = y_numeric[X_ordinal == c]
+            current_sum = np.sum(y_subset) + y_mean * smooth
+            current_cnt = y_subset.shape[0] + smooth
+            cur_encodings[c] = current_sum / current_cnt
+        return cur_encodings
+
+
+@pytest.mark.parametrize(
+    "categories, unknown_value",
+    [
+        ([np.array([0, 1, 2], dtype=np.int64)], 4),
+        ([np.array([1.0, 3.0, np.nan], dtype=np.float64)], 6.0),
+        ([np.array(["cat", "dog", "snake"], dtype=object)], "bear"),
+        ("auto", 3),
+    ],
+)
+@pytest.mark.parametrize("smooth", [5.0, "auto"])
+@pytest.mark.parametrize("target_type", ["binary", "continuous"])
+def test_encoding(categories, unknown_value, global_random_seed, smooth, target_type):
+    """Check encoding for binary and continuous targets.
+
+    Compare the values returned by `TargetEncoder.fit_transform` against the
+    expected encodings for cv splits from a naive reference Python
+    implementation in _encode_target.
+    """
+
+    n_categories = 3
+    X_train_int_array = np.array([[0] * 20 + [1] * 30 + [2] * 40], dtype=np.int64).T
+    X_test_int_array = np.array([[0, 1, 2]], dtype=np.int64).T
+    n_samples = X_train_int_array.shape[0]
+
+    if categories == "auto":
+        X_train = X_train_int_array
+        X_test = X_test_int_array
+    else:
+        X_train = categories[0][X_train_int_array]
+        X_test = categories[0][X_test_int_array]
+
+    X_test = np.concatenate((X_test, [[unknown_value]]))
+
+    data_rng = np.random.RandomState(global_random_seed)
+    n_splits = 3
+    if target_type == "binary":
+        y_numeric = data_rng.randint(low=0, high=2, size=n_samples)
+        target_names = np.array(["cat", "dog"], dtype=object)
+        y_train = target_names[y_numeric]
+
+    else:
+        assert target_type == "continuous"
+        y_numeric = data_rng.uniform(low=-10, high=20, size=n_samples)
+        y_train = y_numeric
+
+    shuffled_idx = data_rng.permutation(n_samples)
+    X_train_int_array = X_train_int_array[shuffled_idx]
+    X_train = X_train[shuffled_idx]
+    y_train = y_train[shuffled_idx]
+    y_numeric = y_numeric[shuffled_idx]
+
+    # Define our CV splitting strategy
+    if target_type == "binary":
+        cv = StratifiedKFold(
+            n_splits=n_splits, random_state=global_random_seed, shuffle=True
+        )
+    else:
+        cv = KFold(n_splits=n_splits, random_state=global_random_seed, shuffle=True)
+
+    # Compute the expected values using our reference Python implementation of
+    # target encoding:
+    expected_X_fit_transform = np.empty_like(X_train_int_array, dtype=np.float64)
+
+    for train_idx, test_idx in cv.split(X_train_int_array, y_train):
+        X_, y_ = X_train_int_array[train_idx, 0], y_numeric[train_idx]
+        cur_encodings = _encode_target(X_, y_, n_categories, smooth)
+        expected_X_fit_transform[test_idx, 0] = cur_encodings[
+            X_train_int_array[test_idx, 0]
+        ]
+
+    # Check that we can obtain the same encodings by calling `fit_transform` on
+    # the estimator with the same CV parameters:
+    target_encoder = TargetEncoder(
+        smooth=smooth,
+        categories=categories,
+        cv=n_splits,
+        random_state=global_random_seed,
+    )
+
+    X_fit_transform = target_encoder.fit_transform(X_train, y_train)
+
+    assert target_encoder.target_type_ == target_type
+    assert_allclose(X_fit_transform, expected_X_fit_transform)
+    assert len(target_encoder.encodings_) == 1
+    if target_type == "binary":
+        assert_array_equal(target_encoder.classes_, target_names)
+    else:
+        assert target_encoder.classes_ is None
+
+    # compute encodings for all data to validate `transform`
+    y_mean = np.mean(y_numeric)
+    expected_encodings = _encode_target(
+        X_train_int_array[:, 0], y_numeric, n_categories, smooth
+    )
+    assert_allclose(target_encoder.encodings_[0], expected_encodings)
+    assert target_encoder.target_mean_ == pytest.approx(y_mean)
+
+    # Transform on test data, the last value is unknown so it is encoded as the target
+    # mean
+    expected_X_test_transform = np.concatenate(
+        (expected_encodings, np.array([y_mean]))
+    ).reshape(-1, 1)
+
+    X_test_transform = target_encoder.transform(X_test)
+    assert_allclose(X_test_transform, expected_X_test_transform)
+
+
+@pytest.mark.parametrize(
+    "categories, unknown_values",
+    [
+        ([np.array([0, 1, 2], dtype=np.int64)], "auto"),
+        ([np.array(["cat", "dog", "snake"], dtype=object)], ["bear", "rabbit"]),
+    ],
+)
+@pytest.mark.parametrize(
+    "target_labels", [np.array([1, 2, 3]), np.array(["a", "b", "c"])]
+)
+@pytest.mark.parametrize("smooth", [5.0, "auto"])
+def test_encoding_multiclass(
+    global_random_seed, categories, unknown_values, target_labels, smooth
+):
+    """Check encoding for multiclass targets."""
+    rng = np.random.RandomState(global_random_seed)
+
+    n_samples = 80
+    n_features = 2
+    feat_1_int = np.array(rng.randint(low=0, high=2, size=n_samples))
+    feat_2_int = np.array(rng.randint(low=0, high=3, size=n_samples))
+    feat_1 = categories[0][feat_1_int]
+    feat_2 = categories[0][feat_2_int]
+    X_train = np.column_stack((feat_1, feat_2))
+    X_train_int = np.column_stack((feat_1_int, feat_2_int))
+    categories_ = [[0, 1], [0, 1, 2]]
+
+    n_classes = 3
+    y_train_int = np.array(rng.randint(low=0, high=n_classes, size=n_samples))
+    y_train = target_labels[y_train_int]
+    y_train_enc = LabelBinarizer().fit_transform(y_train)
+
+    n_splits = 3
+    cv = StratifiedKFold(
+        n_splits=n_splits, random_state=global_random_seed, shuffle=True
+    )
+
+    # Manually compute encodings for cv splits to validate `fit_transform`
+    expected_X_fit_transform = np.empty(
+        (X_train_int.shape[0], X_train_int.shape[1] * n_classes),
+        dtype=np.float64,
+    )
+    for f_idx, cats in enumerate(categories_):
+        for c_idx in range(n_classes):
+            for train_idx, test_idx in cv.split(X_train, y_train):
+                y_class = y_train_enc[:, c_idx]
+                X_, y_ = X_train_int[train_idx, f_idx], y_class[train_idx]
+                current_encoding = _encode_target(X_, y_, len(cats), smooth)
+                # f_idx:   0, 0, 0, 1, 1, 1
+                # c_idx:   0, 1, 2, 0, 1, 2
+                # exp_idx: 0, 1, 2, 3, 4, 5
+                exp_idx = c_idx + (f_idx * n_classes)
+                expected_X_fit_transform[test_idx, exp_idx] = current_encoding[
+                    X_train_int[test_idx, f_idx]
+                ]
+
+    target_encoder = TargetEncoder(
+        smooth=smooth,
+        cv=n_splits,
+        random_state=global_random_seed,
+    )
+    X_fit_transform = target_encoder.fit_transform(X_train, y_train)
+
+    assert target_encoder.target_type_ == "multiclass"
+    assert_allclose(X_fit_transform, expected_X_fit_transform)
+
+    # Manually compute encoding to validate `transform`
+    expected_encodings = []
+    for f_idx, cats in enumerate(categories_):
+        for c_idx in range(n_classes):
+            y_class = y_train_enc[:, c_idx]
+            current_encoding = _encode_target(
+                X_train_int[:, f_idx], y_class, len(cats), smooth
+            )
+            expected_encodings.append(current_encoding)
+
+    assert len(target_encoder.encodings_) == n_features * n_classes
+    for i in range(n_features * n_classes):
+        assert_allclose(target_encoder.encodings_[i], expected_encodings[i])
+    assert_array_equal(target_encoder.classes_, target_labels)
+
+    # Include unknown values at the end
+    X_test_int = np.array([[0, 1], [1, 2], [4, 5]])
+    if unknown_values == "auto":
+        X_test = X_test_int
+    else:
+        X_test = np.empty_like(X_test_int[:-1, :], dtype=object)
+        for column_idx in range(X_test_int.shape[1]):
+            X_test[:, column_idx] = categories[0][X_test_int[:-1, column_idx]]
+        # Add unknown values at end
+        X_test = np.vstack((X_test, unknown_values))
+
+    y_mean = np.mean(y_train_enc, axis=0)
+    expected_X_test_transform = np.empty(
+        (X_test_int.shape[0], X_test_int.shape[1] * n_classes),
+        dtype=np.float64,
+    )
+    n_rows = X_test_int.shape[0]
+    f_idx = [0, 0, 0, 1, 1, 1]
+    # Last row are unknowns, dealt with later
+    for row_idx in range(n_rows - 1):
+        for i, enc in enumerate(expected_encodings):
+            expected_X_test_transform[row_idx, i] = enc[X_test_int[row_idx, f_idx[i]]]
+
+    # Unknowns encoded as target mean for each class
+    # `y_mean` contains target mean for each class, thus cycle through mean of
+    # each class, `n_features` times
+    mean_idx = [0, 1, 2, 0, 1, 2]
+    for i in range(n_classes * n_features):
+        expected_X_test_transform[n_rows - 1, i] = y_mean[mean_idx[i]]
+
+    X_test_transform = target_encoder.transform(X_test)
+    assert_allclose(X_test_transform, expected_X_test_transform)
+
+
+@pytest.mark.parametrize(
+    "X, categories",
+    [
+        (
+            np.array([[0] * 10 + [1] * 10 + [3]], dtype=np.int64).T,  # 3 is unknown
+            [[0, 1, 2]],
+        ),
+        (
+            np.array(
+                [["cat"] * 10 + ["dog"] * 10 + ["snake"]], dtype=object
+            ).T,  # snake is unknown
+            [["dog", "cat", "cow"]],
+        ),
+    ],
+)
+@pytest.mark.parametrize("smooth", [4.0, "auto"])
+def test_custom_categories(X, categories, smooth):
+    """Custom categories with unknown categories that are not in training data."""
+    rng = np.random.RandomState(0)
+    y = rng.uniform(low=-10, high=20, size=X.shape[0])
+    enc = TargetEncoder(categories=categories, smooth=smooth, random_state=0).fit(X, y)
+
+    # The last element is unknown and encoded as the mean
+    y_mean = y.mean()
+    X_trans = enc.transform(X[-1:])
+    assert X_trans[0, 0] == pytest.approx(y_mean)
+
+    assert len(enc.encodings_) == 1
+    # custom category that is not in training data
+    assert enc.encodings_[0][-1] == pytest.approx(y_mean)
+
+
+@pytest.mark.parametrize(
+    "y, msg",
+    [
+        ([1, 2, 0, 1], "Found input variables with inconsistent"),
+        (
+            np.array([[1, 2, 0], [1, 2, 3]]).T,
+            "Target type was inferred to be 'multiclass-multioutput'",
+        ),
+    ],
+)
+def test_errors(y, msg):
+    """Check invalidate input."""
+    X = np.array([[1, 0, 1]]).T
+
+    enc = TargetEncoder()
+    with pytest.raises(ValueError, match=msg):
+        enc.fit_transform(X, y)
+
+
+def test_use_regression_target():
+    """Check inferred and specified `target_type` on regression target."""
+    X = np.array([[0, 1, 0, 1, 0, 1]]).T
+    y = np.array([1.0, 2.0, 3.0, 2.0, 3.0, 4.0])
+
+    enc = TargetEncoder(cv=2)
+    with pytest.warns(
+        UserWarning,
+        match=re.escape(
+            "The least populated class in y has only 1 members, which is less than"
+            " n_splits=2."
+        ),
+    ):
+        enc.fit_transform(X, y)
+    assert enc.target_type_ == "multiclass"
+
+    enc = TargetEncoder(cv=2, target_type="continuous")
+    enc.fit_transform(X, y)
+    assert enc.target_type_ == "continuous"
+
+
+@pytest.mark.parametrize(
+    "y, feature_names",
+    [
+        ([1, 2] * 10, ["A", "B"]),
+        ([1, 2, 3] * 6 + [1, 2], ["A_1", "A_2", "A_3", "B_1", "B_2", "B_3"]),
+        (
+            ["y1", "y2", "y3"] * 6 + ["y1", "y2"],
+            ["A_y1", "A_y2", "A_y3", "B_y1", "B_y2", "B_y3"],
+        ),
+    ],
+)
+def test_feature_names_out_set_output(y, feature_names):
+    """Check TargetEncoder works with set_output."""
+    pd = pytest.importorskip("pandas")
+
+    X_df = pd.DataFrame({"A": ["a", "b"] * 10, "B": [1, 2] * 10})
+
+    enc_default = TargetEncoder(cv=2, smooth=3.0, random_state=0)
+    enc_default.set_output(transform="default")
+    enc_pandas = TargetEncoder(cv=2, smooth=3.0, random_state=0)
+    enc_pandas.set_output(transform="pandas")
+
+    X_default = enc_default.fit_transform(X_df, y)
+    X_pandas = enc_pandas.fit_transform(X_df, y)
+
+    assert_allclose(X_pandas.to_numpy(), X_default)
+    assert_array_equal(enc_pandas.get_feature_names_out(), feature_names)
+    assert_array_equal(enc_pandas.get_feature_names_out(), X_pandas.columns)
+
+
+@pytest.mark.parametrize("to_pandas", [True, False])
+@pytest.mark.parametrize("smooth", [1.0, "auto"])
+@pytest.mark.parametrize("target_type", ["binary-ints", "binary-str", "continuous"])
+def test_multiple_features_quick(to_pandas, smooth, target_type):
+    """Check target encoder with multiple features."""
+    X_ordinal = np.array(
+        [[1, 1], [0, 1], [1, 1], [2, 1], [1, 0], [0, 1], [1, 0], [0, 0]], dtype=np.int64
+    )
+    if target_type == "binary-str":
+        y_train = np.array(["a", "b", "a", "a", "b", "b", "a", "b"])
+        y_integer = LabelEncoder().fit_transform(y_train)
+        cv = StratifiedKFold(2, random_state=0, shuffle=True)
+    elif target_type == "binary-ints":
+        y_train = np.array([3, 4, 3, 3, 3, 4, 4, 4])
+        y_integer = LabelEncoder().fit_transform(y_train)
+        cv = StratifiedKFold(2, random_state=0, shuffle=True)
+    else:
+        y_train = np.array([3.0, 5.1, 2.4, 3.5, 4.1, 5.5, 10.3, 7.3], dtype=np.float32)
+        y_integer = y_train
+        cv = KFold(2, random_state=0, shuffle=True)
+    y_mean = np.mean(y_integer)
+    categories = [[0, 1, 2], [0, 1]]
+
+    X_test = np.array(
+        [
+            [0, 1],
+            [3, 0],  # 3 is unknown
+            [1, 10],  # 10 is unknown
+        ],
+        dtype=np.int64,
+    )
+
+    if to_pandas:
+        pd = pytest.importorskip("pandas")
+        # convert second feature to an object
+        X_train = pd.DataFrame(
+            {
+                "feat0": X_ordinal[:, 0],
+                "feat1": np.array(["cat", "dog"], dtype=object)[X_ordinal[:, 1]],
+            }
+        )
+        # "snake" is unknown
+        X_test = pd.DataFrame({"feat0": X_test[:, 0], "feat1": ["dog", "cat", "snake"]})
+    else:
+        X_train = X_ordinal
+
+    # manually compute encoding for fit_transform
+    expected_X_fit_transform = np.empty_like(X_ordinal, dtype=np.float64)
+    for f_idx, cats in enumerate(categories):
+        for train_idx, test_idx in cv.split(X_ordinal, y_integer):
+            X_, y_ = X_ordinal[train_idx, f_idx], y_integer[train_idx]
+            current_encoding = _encode_target(X_, y_, len(cats), smooth)
+            expected_X_fit_transform[test_idx, f_idx] = current_encoding[
+                X_ordinal[test_idx, f_idx]
+            ]
+
+    # manually compute encoding for transform
+    expected_encodings = []
+    for f_idx, cats in enumerate(categories):
+        current_encoding = _encode_target(
+            X_ordinal[:, f_idx], y_integer, len(cats), smooth
+        )
+        expected_encodings.append(current_encoding)
+
+    expected_X_test_transform = np.array(
+        [
+            [expected_encodings[0][0], expected_encodings[1][1]],
+            [y_mean, expected_encodings[1][0]],
+            [expected_encodings[0][1], y_mean],
+        ],
+        dtype=np.float64,
+    )
+
+    enc = TargetEncoder(smooth=smooth, cv=2, random_state=0)
+    X_fit_transform = enc.fit_transform(X_train, y_train)
+    assert_allclose(X_fit_transform, expected_X_fit_transform)
+
+    assert len(enc.encodings_) == 2
+    for i in range(2):
+        assert_allclose(enc.encodings_[i], expected_encodings[i])
+
+    X_test_transform = enc.transform(X_test)
+    assert_allclose(X_test_transform, expected_X_test_transform)
+
+
+@pytest.mark.parametrize(
+    "y, y_mean",
+    [
+        (np.array([3.4] * 20), 3.4),
+        (np.array([0] * 20), 0),
+        (np.array(["a"] * 20, dtype=object), 0),
+    ],
+    ids=["continuous", "binary", "binary-string"],
+)
+@pytest.mark.parametrize("smooth", ["auto", 4.0, 0.0])
+def test_constant_target_and_feature(y, y_mean, smooth):
+    """Check edge case where feature and target is constant."""
+    X = np.array([[1] * 20]).T
+    n_samples = X.shape[0]
+
+    enc = TargetEncoder(cv=2, smooth=smooth, random_state=0)
+    X_trans = enc.fit_transform(X, y)
+    assert_allclose(X_trans, np.repeat([[y_mean]], n_samples, axis=0))
+    assert enc.encodings_[0][0] == pytest.approx(y_mean)
+    assert enc.target_mean_ == pytest.approx(y_mean)
+
+    X_test = np.array([[1], [0]])
+    X_test_trans = enc.transform(X_test)
+    assert_allclose(X_test_trans, np.repeat([[y_mean]], 2, axis=0))
+
+
+def test_fit_transform_not_associated_with_y_if_ordinal_categorical_is_not(
+    global_random_seed,
+):
+    cardinality = 30  # not too large, otherwise we need a very large n_samples
+    n_samples = 3000
+    rng = np.random.RandomState(global_random_seed)
+    y_train = rng.normal(size=n_samples)
+    X_train = rng.randint(0, cardinality, size=n_samples).reshape(-1, 1)
+
+    # Sort by y_train to attempt to cause a leak
+    y_sorted_indices = y_train.argsort()
+    y_train = y_train[y_sorted_indices]
+    X_train = X_train[y_sorted_indices]
+
+    target_encoder = TargetEncoder(shuffle=True, random_state=global_random_seed)
+    X_encoded_train_shuffled = target_encoder.fit_transform(X_train, y_train)
+
+    target_encoder = TargetEncoder(shuffle=False)
+    X_encoded_train_no_shuffled = target_encoder.fit_transform(X_train, y_train)
+
+    # Check that no information about y_train has leaked into X_train:
+    regressor = RandomForestRegressor(
+        n_estimators=10, min_samples_leaf=20, random_state=global_random_seed
+    )
+
+    # It's impossible to learn a good predictive model on the training set when
+    # using the original representation X_train or the target encoded
+    # representation with shuffled inner CV. For the latter, no information
+    # about y_train has inadvertently leaked into the prior used to generate
+    # `X_encoded_train_shuffled`:
+    cv = ShuffleSplit(n_splits=50, random_state=global_random_seed)
+    assert cross_val_score(regressor, X_train, y_train, cv=cv).mean() < 0.1
+    assert (
+        cross_val_score(regressor, X_encoded_train_shuffled, y_train, cv=cv).mean()
+        < 0.1
+    )
+
+    # Without the inner CV shuffling, a lot of information about y_train goes into the
+    # the per-fold y_train.mean() priors: shrinkage is no longer effective in this
+    # case and would no longer be able to prevent downstream over-fitting.
+    assert (
+        cross_val_score(regressor, X_encoded_train_no_shuffled, y_train, cv=cv).mean()
+        > 0.5
+    )
+
+
+def test_smooth_zero():
+    """Check edge case with zero smoothing and cv does not contain category."""
+    X = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]).T
+    y = np.array([2.1, 4.3, 1.2, 3.1, 1.0, 9.0, 10.3, 14.2, 13.3, 15.0])
+
+    enc = TargetEncoder(smooth=0.0, shuffle=False, cv=2)
+    X_trans = enc.fit_transform(X, y)
+
+    # With cv = 2, category 0 does not exist in the second half, thus
+    # it will be encoded as the mean of the second half
+    assert_allclose(X_trans[0], np.mean(y[5:]))
+
+    # category 1 does not exist in the first half, thus it will be encoded as
+    # the mean of the first half
+    assert_allclose(X_trans[-1], np.mean(y[:5]))
+
+
+@pytest.mark.parametrize("smooth", [0.0, 1e3, "auto"])
+def test_invariance_of_encoding_under_label_permutation(smooth, global_random_seed):
+    # Check that the encoding does not depend on the integer of the value of
+    # the integer labels. This is quite a trivial property but it is helpful
+    # to understand the following test.
+    rng = np.random.RandomState(global_random_seed)
+
+    # Random y and informative categorical X to make the test non-trivial when
+    # using smoothing.
+    y = rng.normal(size=1000)
+    n_categories = 30
+    X = KBinsDiscretizer(n_bins=n_categories, encode="ordinal").fit_transform(
+        y.reshape(-1, 1)
+    )
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=global_random_seed
+    )
+
+    # Shuffle the labels to make sure that the encoding is invariant to the
+    # permutation of the labels
+    permutated_labels = rng.permutation(n_categories)
+    X_train_permuted = permutated_labels[X_train.astype(np.int32)]
+    X_test_permuted = permutated_labels[X_test.astype(np.int32)]
+
+    target_encoder = TargetEncoder(smooth=smooth, random_state=global_random_seed)
+    X_train_encoded = target_encoder.fit_transform(X_train, y_train)
+    X_test_encoded = target_encoder.transform(X_test)
+
+    X_train_permuted_encoded = target_encoder.fit_transform(X_train_permuted, y_train)
+    X_test_permuted_encoded = target_encoder.transform(X_test_permuted)
+
+    assert_allclose(X_train_encoded, X_train_permuted_encoded)
+    assert_allclose(X_test_encoded, X_test_permuted_encoded)
+
+
+@pytest.mark.parametrize("smooth", [0.0, "auto"])
+def test_target_encoding_for_linear_regression(smooth, global_random_seed):
+    # Check some expected statistical properties when fitting a linear
+    # regression model on target encoded features depending on their relation
+    # with that target.
+
+    # In this test, we use the Ridge class with the "lsqr" solver and a little
+    # bit of regularization to implement a linear regression model that
+    # converges quickly for large `n_samples` and robustly in case of
+    # correlated features. Since we will fit this model on a mean centered
+    # target, we do not need to fit an intercept and this will help simplify
+    # the analysis with respect to the expected coefficients.
+    linear_regression = Ridge(alpha=1e-6, solver="lsqr", fit_intercept=False)
+
+    # Construct a random target variable. We need a large number of samples for
+    # this test to be stable across all values of the random seed.
+    n_samples = 50_000
+    rng = np.random.RandomState(global_random_seed)
+    y = rng.randn(n_samples)
+
+    # Generate a single informative ordinal feature with medium cardinality.
+    # Inject some irreducible noise to make it harder for a multivariate model
+    # to identify the informative feature from other pure noise features.
+    noise = 0.8 * rng.randn(n_samples)
+    n_categories = 100
+    X_informative = KBinsDiscretizer(
+        n_bins=n_categories,
+        encode="ordinal",
+        strategy="uniform",
+        random_state=rng,
+    ).fit_transform((y + noise).reshape(-1, 1))
+
+    # Let's permute the labels to hide the fact that this feature is
+    # informative to naive linear regression model trained on the raw ordinal
+    # values. As highlighted in the previous test, the target encoding should be
+    # invariant to such a permutation.
+    permutated_labels = rng.permutation(n_categories)
+    X_informative = permutated_labels[X_informative.astype(np.int32)]
+
+    # Generate a shuffled copy of the informative feature to destroy the
+    # relationship with the target.
+    X_shuffled = rng.permutation(X_informative)
+
+    # Also include a very high cardinality categorical feature that is by
+    # itself independent of the target variable: target encoding such a feature
+    # without internal cross-validation should cause catastrophic overfitting
+    # for the downstream regressor, even with shrinkage. This kind of features
+    # typically represents near unique identifiers of samples. In general they
+    # should be removed from a machine learning datasets but here we want to
+    # study the ability of the default behavior of TargetEncoder to mitigate
+    # them automatically.
+    X_near_unique_categories = rng.choice(
+        int(0.9 * n_samples), size=n_samples, replace=True
+    ).reshape(-1, 1)
+
+    # Assemble the dataset and do a train-test split:
+    X = np.concatenate(
+        [X_informative, X_shuffled, X_near_unique_categories],
+        axis=1,
+    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    # Let's first check that a linear regression model trained on the raw
+    # features underfits because of the meaning-less ordinal encoding of the
+    # labels.
+    raw_model = linear_regression.fit(X_train, y_train)
+    assert raw_model.score(X_train, y_train) < 0.1
+    assert raw_model.score(X_test, y_test) < 0.1
+
+    # Now do the same with target encoding using the internal CV mechanism
+    # implemented when using fit_transform.
+    model_with_cv = make_pipeline(
+        TargetEncoder(smooth=smooth, random_state=rng), linear_regression
+    ).fit(X_train, y_train)
+
+    # This model should be able to fit the data well and also generalise to the
+    # test data (assuming that the binning is fine-grained enough). The R2
+    # scores are not perfect because of the noise injected during the
+    # generation of the unique informative feature.
+    coef = model_with_cv[-1].coef_
+    assert model_with_cv.score(X_train, y_train) > 0.5, coef
+    assert model_with_cv.score(X_test, y_test) > 0.5, coef
+
+    # The target encoder recovers the linear relationship with slope 1 between
+    # the target encoded unique informative predictor and the target. Since the
+    # target encoding of the 2 other features is not informative thanks to the
+    # use of internal cross-validation, the multivariate linear regressor
+    # assigns a coef of 1 to the first feature and 0 to the other 2.
+    assert coef[0] == pytest.approx(1, abs=1e-2)
+    assert (np.abs(coef[1:]) < 0.2).all()
+
+    # Let's now disable the internal cross-validation by calling fit and then
+    # transform separately on the training set:
+    target_encoder = TargetEncoder(smooth=smooth, random_state=rng).fit(
+        X_train, y_train
+    )
+    X_enc_no_cv_train = target_encoder.transform(X_train)
+    X_enc_no_cv_test = target_encoder.transform(X_test)
+    model_no_cv = linear_regression.fit(X_enc_no_cv_train, y_train)
+
+    # The linear regression model should always overfit because it assigns
+    # too much weight to the extremely high cardinality feature relatively to
+    # the informative feature. Note that this is the case even when using
+    # the empirical Bayes smoothing which is not enough to prevent such
+    # overfitting alone.
+    coef = model_no_cv.coef_
+    assert model_no_cv.score(X_enc_no_cv_train, y_train) > 0.7, coef
+    assert model_no_cv.score(X_enc_no_cv_test, y_test) < 0.5, coef
+
+    # The model overfits because it assigns too much weight to the high
+    # cardinality yet non-informative feature instead of the lower
+    # cardinality yet informative feature:
+    assert abs(coef[0]) < abs(coef[2])
+
+
+def test_pandas_copy_on_write():
+    """
+    Test target-encoder cython code when y is read-only.
+
+    The numpy array underlying df["y"] is read-only when copy-on-write is enabled.
+    Non-regression test for gh-27879.
+    """
+    pd = pytest.importorskip("pandas", minversion="2.0")
+    with pd.option_context("mode.copy_on_write", True):
+        df = pd.DataFrame({"x": ["a", "b", "b"], "y": [4.0, 5.0, 6.0]})
+        TargetEncoder(target_type="continuous").fit(df[["x"]], df["y"])
diff --git a/.venv/Lib/site-packages/sklearn/semi_supervised/__init__.py b/.venv/Lib/site-packages/sklearn/semi_supervised/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..790c51e98185099ae22267463f6492d840087fd0
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/semi_supervised/__init__.py
@@ -0,0 +1,13 @@
+"""Semi-supervised learning algorithms.
+
+These algorithms utilize small amounts of labeled data and large amounts of unlabeled
+data for classification tasks.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from ._label_propagation import LabelPropagation, LabelSpreading
+from ._self_training import SelfTrainingClassifier
+
+__all__ = ["SelfTrainingClassifier", "LabelPropagation", "LabelSpreading"]
diff --git a/.venv/Lib/site-packages/sklearn/semi_supervised/_label_propagation.py b/.venv/Lib/site-packages/sklearn/semi_supervised/_label_propagation.py
new file mode 100644
index 0000000000000000000000000000000000000000..40e1be15ee5897312cc2e14febdce220d542df6f
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/semi_supervised/_label_propagation.py
@@ -0,0 +1,625 @@
+# coding=utf8
+"""
+Label propagation in the context of this module refers to a set of
+semi-supervised classification algorithms. At a high level, these algorithms
+work by forming a fully-connected graph between all points given and solving
+for the steady-state distribution of labels at each point.
+
+These algorithms perform very well in practice. The cost of running can be very
+expensive, at approximately O(N^3) where N is the number of (labeled and
+unlabeled) points. The theory (why they perform so well) is motivated by
+intuitions from random walk algorithms and geometric relationships in the data.
+For more information see the references below.
+
+Model Features
+--------------
+Label clamping:
+  The algorithm tries to learn distributions of labels over the dataset given
+  label assignments over an initial subset. In one variant, the algorithm does
+  not allow for any errors in the initial assignment (hard-clamping) while
+  in another variant, the algorithm allows for some wiggle room for the initial
+  assignments, allowing them to change by a fraction alpha in each iteration
+  (soft-clamping).
+
+Kernel:
+  A function which projects a vector into some higher dimensional space. This
+  implementation supports RBF and KNN kernels. Using the RBF kernel generates
+  a dense matrix of size O(N^2). KNN kernel will generate a sparse matrix of
+  size O(k*N) which will run much faster. See the documentation for SVMs for
+  more info on kernels.
+
+Examples
+--------
+>>> import numpy as np
+>>> from sklearn import datasets
+>>> from sklearn.semi_supervised import LabelPropagation
+>>> label_prop_model = LabelPropagation()
+>>> iris = datasets.load_iris()
+>>> rng = np.random.RandomState(42)
+>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
+>>> labels = np.copy(iris.target)
+>>> labels[random_unlabeled_points] = -1
+>>> label_prop_model.fit(iris.data, labels)
+LabelPropagation(...)
+
+Notes
+-----
+References:
+[1] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised
+Learning (2006), pp. 193-216
+
+[2] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient
+Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from abc import ABCMeta, abstractmethod
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import sparse
+
+from ..base import BaseEstimator, ClassifierMixin, _fit_context
+from ..exceptions import ConvergenceWarning
+from ..metrics.pairwise import rbf_kernel
+from ..neighbors import NearestNeighbors
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import safe_sparse_dot
+from ..utils.fixes import laplacian as csgraph_laplacian
+from ..utils.multiclass import check_classification_targets
+from ..utils.validation import check_is_fitted, validate_data
+
+
+class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
+    """Base class for label propagation module.
+
+     Parameters
+     ----------
+     kernel : {'knn', 'rbf'} or callable, default='rbf'
+         String identifier for kernel function to use or the kernel function
+         itself. Only 'rbf' and 'knn' strings are valid inputs. The function
+         passed should take two inputs, each of shape (n_samples, n_features),
+         and return a (n_samples, n_samples) shaped weight matrix.
+
+     gamma : float, default=20
+         Parameter for rbf kernel.
+
+     n_neighbors : int, default=7
+         Parameter for knn kernel. Need to be strictly positive.
+
+     alpha : float, default=1.0
+         Clamping factor.
+
+     max_iter : int, default=30
+         Change maximum number of iterations allowed.
+
+     tol : float, default=1e-3
+         Convergence tolerance: threshold to consider the system at steady
+         state.
+
+    n_jobs : int, default=None
+         The number of parallel jobs to run.
+         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+         for more details.
+    """
+
+    _parameter_constraints: dict = {
+        "kernel": [StrOptions({"knn", "rbf"}), callable],
+        "gamma": [Interval(Real, 0, None, closed="left")],
+        "n_neighbors": [Interval(Integral, 0, None, closed="neither")],
+        "alpha": [None, Interval(Real, 0, 1, closed="neither")],
+        "max_iter": [Interval(Integral, 0, None, closed="neither")],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "n_jobs": [None, Integral],
+    }
+
+    def __init__(
+        self,
+        kernel="rbf",
+        *,
+        gamma=20,
+        n_neighbors=7,
+        alpha=1,
+        max_iter=30,
+        tol=1e-3,
+        n_jobs=None,
+    ):
+        self.max_iter = max_iter
+        self.tol = tol
+
+        # kernel parameters
+        self.kernel = kernel
+        self.gamma = gamma
+        self.n_neighbors = n_neighbors
+
+        # clamping factor
+        self.alpha = alpha
+
+        self.n_jobs = n_jobs
+
+    def _get_kernel(self, X, y=None):
+        if self.kernel == "rbf":
+            if y is None:
+                return rbf_kernel(X, X, gamma=self.gamma)
+            else:
+                return rbf_kernel(X, y, gamma=self.gamma)
+        elif self.kernel == "knn":
+            if self.nn_fit is None:
+                self.nn_fit = NearestNeighbors(
+                    n_neighbors=self.n_neighbors, n_jobs=self.n_jobs
+                ).fit(X)
+            if y is None:
+                return self.nn_fit.kneighbors_graph(
+                    self.nn_fit._fit_X, self.n_neighbors, mode="connectivity"
+                )
+            else:
+                return self.nn_fit.kneighbors(y, return_distance=False)
+        elif callable(self.kernel):
+            if y is None:
+                return self.kernel(X, X)
+            else:
+                return self.kernel(X, y)
+
+    @abstractmethod
+    def _build_graph(self):
+        raise NotImplementedError(
+            "Graph construction must be implemented to fit a label propagation model."
+        )
+
+    def predict(self, X):
+        """Perform inductive inference across the model.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples,)
+            Predictions for input data.
+        """
+        # Note: since `predict` does not accept semi-supervised labels as input,
+        # `fit(X, y).predict(X) != fit(X, y).transduction_`.
+        # Hence, `fit_predict` is not implemented.
+        # See https://github.com/scikit-learn/scikit-learn/pull/24898
+        probas = self.predict_proba(X)
+        return self.classes_[np.argmax(probas, axis=1)].ravel()
+
+    def predict_proba(self, X):
+        """Predict probability for each possible outcome.
+
+        Compute the probability estimates for each single sample in X
+        and each possible outcome seen during training (categorical
+        distribution).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
+
+        Returns
+        -------
+        probabilities : ndarray of shape (n_samples, n_classes)
+            Normalized probability distributions across
+            class labels.
+        """
+        check_is_fitted(self)
+
+        X_2d = validate_data(
+            self,
+            X,
+            accept_sparse=["csc", "csr", "coo", "dok", "bsr", "lil", "dia"],
+            reset=False,
+        )
+        weight_matrices = self._get_kernel(self.X_, X_2d)
+        if self.kernel == "knn":
+            probabilities = np.array(
+                [
+                    np.sum(self.label_distributions_[weight_matrix], axis=0)
+                    for weight_matrix in weight_matrices
+                ]
+            )
+        else:
+            weight_matrices = weight_matrices.T
+            probabilities = safe_sparse_dot(weight_matrices, self.label_distributions_)
+        normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T
+        probabilities /= normalizer
+        return probabilities
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y):
+        """Fit a semi-supervised label propagation model to X.
+
+        The input samples (labeled and unlabeled) are provided by matrix X,
+        and target labels are provided by matrix y. We conventionally apply the
+        label -1 to unlabeled samples in matrix y in a semi-supervised
+        classification.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target class values with unlabeled points marked as -1.
+            All unlabeled samples will be transductively assigned labels
+            internally, which are stored in `transduction_`.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        X, y = validate_data(
+            self,
+            X,
+            y,
+            accept_sparse=["csr", "csc"],
+            reset=True,
+        )
+        self.X_ = X
+        check_classification_targets(y)
+
+        # actual graph construction (implementations should override this)
+        graph_matrix = self._build_graph()
+
+        # label construction
+        # construct a categorical distribution for classification only
+        classes = np.unique(y)
+        classes = classes[classes != -1]
+        self.classes_ = classes
+
+        n_samples, n_classes = len(y), len(classes)
+
+        y = np.asarray(y)
+        unlabeled = y == -1
+
+        # initialize distributions
+        self.label_distributions_ = np.zeros((n_samples, n_classes))
+        for label in classes:
+            self.label_distributions_[y == label, classes == label] = 1
+
+        y_static = np.copy(self.label_distributions_)
+        if self._variant == "propagation":
+            # LabelPropagation
+            y_static[unlabeled] = 0
+        else:
+            # LabelSpreading
+            y_static *= 1 - self.alpha
+
+        l_previous = np.zeros((self.X_.shape[0], n_classes))
+
+        unlabeled = unlabeled[:, np.newaxis]
+        if sparse.issparse(graph_matrix):
+            graph_matrix = graph_matrix.tocsr()
+
+        for self.n_iter_ in range(self.max_iter):
+            if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:
+                break
+
+            l_previous = self.label_distributions_
+            self.label_distributions_ = safe_sparse_dot(
+                graph_matrix, self.label_distributions_
+            )
+
+            if self._variant == "propagation":
+                normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]
+                normalizer[normalizer == 0] = 1
+                self.label_distributions_ /= normalizer
+                self.label_distributions_ = np.where(
+                    unlabeled, self.label_distributions_, y_static
+                )
+            else:
+                # clamp
+                self.label_distributions_ = (
+                    np.multiply(self.alpha, self.label_distributions_) + y_static
+                )
+        else:
+            warnings.warn(
+                "max_iter=%d was reached without convergence." % self.max_iter,
+                category=ConvergenceWarning,
+            )
+            self.n_iter_ += 1
+
+        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]
+        normalizer[normalizer == 0] = 1
+        self.label_distributions_ /= normalizer
+
+        # set the transduction item
+        transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)]
+        self.transduction_ = transduction.ravel()
+        return self
+
+
+class LabelPropagation(BaseLabelPropagation):
+    """Label Propagation classifier.
+
+    Read more in the :ref:`User Guide <label_propagation>`.
+
+    Parameters
+    ----------
+    kernel : {'knn', 'rbf'} or callable, default='rbf'
+        String identifier for kernel function to use or the kernel function
+        itself. Only 'rbf' and 'knn' strings are valid inputs. The function
+        passed should take two inputs, each of shape (n_samples, n_features),
+        and return a (n_samples, n_samples) shaped weight matrix.
+
+    gamma : float, default=20
+        Parameter for rbf kernel.
+
+    n_neighbors : int, default=7
+        Parameter for knn kernel which need to be strictly positive.
+
+    max_iter : int, default=1000
+        Change maximum number of iterations allowed.
+
+    tol : float, 1e-3
+        Convergence tolerance: threshold to consider the system at steady
+        state.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    X_ : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Input array.
+
+    classes_ : ndarray of shape (n_classes,)
+        The distinct labels used in classifying instances.
+
+    label_distributions_ : ndarray of shape (n_samples, n_classes)
+        Categorical distribution for each item.
+
+    transduction_ : ndarray of shape (n_samples)
+        Label assigned to each item during :term:`fit`.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of iterations run.
+
+    See Also
+    --------
+    LabelSpreading : Alternate label propagation strategy more robust to noise.
+
+    References
+    ----------
+    Xiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data
+    with label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon
+    University, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn import datasets
+    >>> from sklearn.semi_supervised import LabelPropagation
+    >>> label_prop_model = LabelPropagation()
+    >>> iris = datasets.load_iris()
+    >>> rng = np.random.RandomState(42)
+    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
+    >>> labels = np.copy(iris.target)
+    >>> labels[random_unlabeled_points] = -1
+    >>> label_prop_model.fit(iris.data, labels)
+    LabelPropagation(...)
+    """
+
+    _variant = "propagation"
+
+    _parameter_constraints: dict = {**BaseLabelPropagation._parameter_constraints}
+    _parameter_constraints.pop("alpha")
+
+    def __init__(
+        self,
+        kernel="rbf",
+        *,
+        gamma=20,
+        n_neighbors=7,
+        max_iter=1000,
+        tol=1e-3,
+        n_jobs=None,
+    ):
+        super().__init__(
+            kernel=kernel,
+            gamma=gamma,
+            n_neighbors=n_neighbors,
+            max_iter=max_iter,
+            tol=tol,
+            n_jobs=n_jobs,
+            alpha=None,
+        )
+
+    def _build_graph(self):
+        """Matrix representing a fully connected graph between each sample
+
+        This basic implementation creates a non-stochastic affinity matrix, so
+        class distributions will exceed 1 (normalization may be desired).
+        """
+        if self.kernel == "knn":
+            self.nn_fit = None
+        affinity_matrix = self._get_kernel(self.X_)
+        normalizer = affinity_matrix.sum(axis=0)
+        if sparse.issparse(affinity_matrix):
+            affinity_matrix.data /= np.diag(np.array(normalizer))
+        else:
+            affinity_matrix /= normalizer[:, np.newaxis]
+        return affinity_matrix
+
+    def fit(self, X, y):
+        """Fit a semi-supervised label propagation model to X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target class values with unlabeled points marked as -1.
+            All unlabeled samples will be transductively assigned labels
+            internally, which are stored in `transduction_`.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        return super().fit(X, y)
+
+
+class LabelSpreading(BaseLabelPropagation):
+    """LabelSpreading model for semi-supervised learning.
+
+    This model is similar to the basic Label Propagation algorithm,
+    but uses affinity matrix based on the normalized graph Laplacian
+    and soft clamping across the labels.
+
+    Read more in the :ref:`User Guide <label_propagation>`.
+
+    Parameters
+    ----------
+    kernel : {'knn', 'rbf'} or callable, default='rbf'
+        String identifier for kernel function to use or the kernel function
+        itself. Only 'rbf' and 'knn' strings are valid inputs. The function
+        passed should take two inputs, each of shape (n_samples, n_features),
+        and return a (n_samples, n_samples) shaped weight matrix.
+
+    gamma : float, default=20
+      Parameter for rbf kernel.
+
+    n_neighbors : int, default=7
+      Parameter for knn kernel which is a strictly positive integer.
+
+    alpha : float, default=0.2
+      Clamping factor. A value in (0, 1) that specifies the relative amount
+      that an instance should adopt the information from its neighbors as
+      opposed to its initial label.
+      alpha=0 means keeping the initial label information; alpha=1 means
+      replacing all initial information.
+
+    max_iter : int, default=30
+      Maximum number of iterations allowed.
+
+    tol : float, default=1e-3
+      Convergence tolerance: threshold to consider the system at steady
+      state.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    X_ : ndarray of shape (n_samples, n_features)
+        Input array.
+
+    classes_ : ndarray of shape (n_classes,)
+        The distinct labels used in classifying instances.
+
+    label_distributions_ : ndarray of shape (n_samples, n_classes)
+        Categorical distribution for each item.
+
+    transduction_ : ndarray of shape (n_samples,)
+        Label assigned to each item during :term:`fit`.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of iterations run.
+
+    See Also
+    --------
+    LabelPropagation : Unregularized graph based semi-supervised learning.
+
+    References
+    ----------
+    `Dengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,
+    Bernhard Schoelkopf. Learning with local and global consistency (2004)
+    <https://citeseerx.ist.psu.edu/doc_view/pid/d74c37aabf2d5cae663007cbd8718175466aea8c>`_
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn import datasets
+    >>> from sklearn.semi_supervised import LabelSpreading
+    >>> label_prop_model = LabelSpreading()
+    >>> iris = datasets.load_iris()
+    >>> rng = np.random.RandomState(42)
+    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
+    >>> labels = np.copy(iris.target)
+    >>> labels[random_unlabeled_points] = -1
+    >>> label_prop_model.fit(iris.data, labels)
+    LabelSpreading(...)
+    """
+
+    _variant = "spreading"
+
+    _parameter_constraints: dict = {**BaseLabelPropagation._parameter_constraints}
+    _parameter_constraints["alpha"] = [Interval(Real, 0, 1, closed="neither")]
+
+    def __init__(
+        self,
+        kernel="rbf",
+        *,
+        gamma=20,
+        n_neighbors=7,
+        alpha=0.2,
+        max_iter=30,
+        tol=1e-3,
+        n_jobs=None,
+    ):
+        # this one has different base parameters
+        super().__init__(
+            kernel=kernel,
+            gamma=gamma,
+            n_neighbors=n_neighbors,
+            alpha=alpha,
+            max_iter=max_iter,
+            tol=tol,
+            n_jobs=n_jobs,
+        )
+
+    def _build_graph(self):
+        """Graph matrix for Label Spreading computes the graph laplacian"""
+        # compute affinity matrix (or gram matrix)
+        if self.kernel == "knn":
+            self.nn_fit = None
+        n_samples = self.X_.shape[0]
+        affinity_matrix = self._get_kernel(self.X_)
+        laplacian = csgraph_laplacian(affinity_matrix, normed=True)
+        laplacian = -laplacian
+        if sparse.issparse(laplacian):
+            diag_mask = laplacian.row == laplacian.col
+            laplacian.data[diag_mask] = 0.0
+        else:
+            laplacian.flat[:: n_samples + 1] = 0.0  # set diag to 0.0
+        return laplacian
diff --git a/.venv/Lib/site-packages/sklearn/semi_supervised/_self_training.py b/.venv/Lib/site-packages/sklearn/semi_supervised/_self_training.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d0627d36ac853f0ead43c237658c7cd800c01f4
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/semi_supervised/_self_training.py
@@ -0,0 +1,615 @@
+import warnings
+from numbers import Integral, Real
+from warnings import warn
+
+import numpy as np
+
+from sklearn.base import ClassifierMixin
+
+from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone
+from ..utils import Bunch, safe_mask
+from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
+from ..utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    _raise_for_params,
+    _routing_enabled,
+    process_routing,
+)
+from ..utils.metaestimators import available_if
+from ..utils.validation import _estimator_has, check_is_fitted, validate_data
+
+__all__ = ["SelfTrainingClassifier"]
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+class SelfTrainingClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
+    """Self-training classifier.
+
+    This :term:`metaestimator` allows a given supervised classifier to function as a
+    semi-supervised classifier, allowing it to learn from unlabeled data. It
+    does this by iteratively predicting pseudo-labels for the unlabeled data
+    and adding them to the training set.
+
+    The classifier will continue iterating until either max_iter is reached, or
+    no pseudo-labels were added to the training set in the previous iteration.
+
+    Read more in the :ref:`User Guide <self_training>`.
+
+    Parameters
+    ----------
+    estimator : estimator object
+        An estimator object implementing `fit` and `predict_proba`.
+        Invoking the `fit` method will fit a clone of the passed estimator,
+        which will be stored in the `estimator_` attribute.
+
+        .. versionadded:: 1.6
+            `estimator` was added to replace `base_estimator`.
+
+    base_estimator : estimator object
+        An estimator object implementing `fit` and `predict_proba`.
+        Invoking the `fit` method will fit a clone of the passed estimator,
+        which will be stored in the `estimator_` attribute.
+
+        .. deprecated:: 1.6
+            `base_estimator` was deprecated in 1.6 and will be removed in 1.8.
+            Use `estimator` instead.
+
+    threshold : float, default=0.75
+        The decision threshold for use with `criterion='threshold'`.
+        Should be in [0, 1). When using the `'threshold'` criterion, a
+        :ref:`well calibrated classifier <calibration>` should be used.
+
+    criterion : {'threshold', 'k_best'}, default='threshold'
+        The selection criterion used to select which labels to add to the
+        training set. If `'threshold'`, pseudo-labels with prediction
+        probabilities above `threshold` are added to the dataset. If `'k_best'`,
+        the `k_best` pseudo-labels with highest prediction probabilities are
+        added to the dataset. When using the 'threshold' criterion, a
+        :ref:`well calibrated classifier <calibration>` should be used.
+
+    k_best : int, default=10
+        The amount of samples to add in each iteration. Only used when
+        `criterion='k_best'`.
+
+    max_iter : int or None, default=10
+        Maximum number of iterations allowed. Should be greater than or equal
+        to 0. If it is `None`, the classifier will continue to predict labels
+        until no new pseudo-labels are added, or all unlabeled samples have
+        been labeled.
+
+    verbose : bool, default=False
+        Enable verbose output.
+
+    Attributes
+    ----------
+    estimator_ : estimator object
+        The fitted estimator.
+
+    classes_ : ndarray or list of ndarray of shape (n_classes,)
+        Class labels for each output. (Taken from the trained
+        `estimator_`).
+
+    transduction_ : ndarray of shape (n_samples,)
+        The labels used for the final fit of the classifier, including
+        pseudo-labels added during fit.
+
+    labeled_iter_ : ndarray of shape (n_samples,)
+        The iteration in which each sample was labeled. When a sample has
+        iteration 0, the sample was already labeled in the original dataset.
+        When a sample has iteration -1, the sample was not labeled in any
+        iteration.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        The number of rounds of self-training, that is the number of times the
+        base estimator is fitted on relabeled variants of the training set.
+
+    termination_condition_ : {'max_iter', 'no_change', 'all_labeled'}
+        The reason that fitting was stopped.
+
+        - `'max_iter'`: `n_iter_` reached `max_iter`.
+        - `'no_change'`: no new labels were predicted.
+        - `'all_labeled'`: all unlabeled samples were labeled before `max_iter`
+          was reached.
+
+    See Also
+    --------
+    LabelPropagation : Label propagation classifier.
+    LabelSpreading : Label spreading model for semi-supervised learning.
+
+    References
+    ----------
+    :doi:`David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling
+    supervised methods. In Proceedings of the 33rd annual meeting on
+    Association for Computational Linguistics (ACL '95). Association for
+    Computational Linguistics, Stroudsburg, PA, USA, 189-196.
+    <10.3115/981658.981684>`
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn import datasets
+    >>> from sklearn.semi_supervised import SelfTrainingClassifier
+    >>> from sklearn.svm import SVC
+    >>> rng = np.random.RandomState(42)
+    >>> iris = datasets.load_iris()
+    >>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3
+    >>> iris.target[random_unlabeled_points] = -1
+    >>> svc = SVC(probability=True, gamma="auto")
+    >>> self_training_model = SelfTrainingClassifier(svc)
+    >>> self_training_model.fit(iris.data, iris.target)
+    SelfTrainingClassifier(...)
+    """
+
+    _parameter_constraints: dict = {
+        # We don't require `predic_proba` here to allow passing a meta-estimator
+        # that only exposes `predict_proba` after fitting.
+        # TODO(1.8) remove None option
+        "estimator": [None, HasMethods(["fit"])],
+        # TODO(1.8) remove
+        "base_estimator": [
+            HasMethods(["fit"]),
+            Hidden(StrOptions({"deprecated"})),
+        ],
+        "threshold": [Interval(Real, 0.0, 1.0, closed="left")],
+        "criterion": [StrOptions({"threshold", "k_best"})],
+        "k_best": [Interval(Integral, 1, None, closed="left")],
+        "max_iter": [Interval(Integral, 0, None, closed="left"), None],
+        "verbose": ["verbose"],
+    }
+
+    def __init__(
+        self,
+        estimator=None,
+        base_estimator="deprecated",
+        threshold=0.75,
+        criterion="threshold",
+        k_best=10,
+        max_iter=10,
+        verbose=False,
+    ):
+        self.estimator = estimator
+        self.threshold = threshold
+        self.criterion = criterion
+        self.k_best = k_best
+        self.max_iter = max_iter
+        self.verbose = verbose
+
+        # TODO(1.8) remove
+        self.base_estimator = base_estimator
+
+    def _get_estimator(self):
+        """Get the estimator.
+
+        Returns
+        -------
+        estimator_ : estimator object
+            The cloned estimator object.
+        """
+        # TODO(1.8): remove and only keep clone(self.estimator)
+        if self.estimator is None and self.base_estimator != "deprecated":
+            estimator_ = clone(self.base_estimator)
+
+            warn(
+                (
+                    "`base_estimator` has been deprecated in 1.6 and will be removed"
+                    " in 1.8. Please use `estimator` instead."
+                ),
+                FutureWarning,
+            )
+        # TODO(1.8) remove
+        elif self.estimator is None and self.base_estimator == "deprecated":
+            raise ValueError(
+                "You must pass an estimator to SelfTrainingClassifier."
+                " Use `estimator`."
+            )
+        elif self.estimator is not None and self.base_estimator != "deprecated":
+            raise ValueError(
+                "You must pass only one estimator to SelfTrainingClassifier."
+                " Use `estimator`."
+            )
+        else:
+            estimator_ = clone(self.estimator)
+        return estimator_
+
+    @_fit_context(
+        # SelfTrainingClassifier.estimator is not validated yet
+        prefer_skip_nested_validation=False
+    )
+    def fit(self, X, y, **params):
+        """
+        Fit self-training classifier using `X`, `y` as training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        y : {array-like, sparse matrix} of shape (n_samples,)
+            Array representing the labels. Unlabeled samples should have the
+            label -1.
+
+        **params : dict
+            Parameters to pass to the underlying estimators.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        _raise_for_params(params, self, "fit")
+
+        self.estimator_ = self._get_estimator()
+
+        # we need row slicing support for sparse matrices, but costly finiteness check
+        # can be delegated to the base estimator.
+        X, y = validate_data(
+            self,
+            X,
+            y,
+            accept_sparse=["csr", "csc", "lil", "dok"],
+            ensure_all_finite=False,
+        )
+
+        if y.dtype.kind in ["U", "S"]:
+            raise ValueError(
+                "y has dtype string. If you wish to predict on "
+                "string targets, use dtype object, and use -1"
+                " as the label for unlabeled samples."
+            )
+
+        has_label = y != -1
+
+        if np.all(has_label):
+            warnings.warn("y contains no unlabeled samples", UserWarning)
+
+        if self.criterion == "k_best" and (
+            self.k_best > X.shape[0] - np.sum(has_label)
+        ):
+            warnings.warn(
+                (
+                    "k_best is larger than the amount of unlabeled "
+                    "samples. All unlabeled samples will be labeled in "
+                    "the first iteration"
+                ),
+                UserWarning,
+            )
+
+        if _routing_enabled():
+            routed_params = process_routing(self, "fit", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(fit={}))
+
+        self.transduction_ = np.copy(y)
+        self.labeled_iter_ = np.full_like(y, -1)
+        self.labeled_iter_[has_label] = 0
+
+        self.n_iter_ = 0
+
+        while not np.all(has_label) and (
+            self.max_iter is None or self.n_iter_ < self.max_iter
+        ):
+            self.n_iter_ += 1
+            self.estimator_.fit(
+                X[safe_mask(X, has_label)],
+                self.transduction_[has_label],
+                **routed_params.estimator.fit,
+            )
+
+            # Predict on the unlabeled samples
+            prob = self.estimator_.predict_proba(X[safe_mask(X, ~has_label)])
+            pred = self.estimator_.classes_[np.argmax(prob, axis=1)]
+            max_proba = np.max(prob, axis=1)
+
+            # Select new labeled samples
+            if self.criterion == "threshold":
+                selected = max_proba > self.threshold
+            else:
+                n_to_select = min(self.k_best, max_proba.shape[0])
+                if n_to_select == max_proba.shape[0]:
+                    selected = np.ones_like(max_proba, dtype=bool)
+                else:
+                    # NB these are indices, not a mask
+                    selected = np.argpartition(-max_proba, n_to_select)[:n_to_select]
+
+            # Map selected indices into original array
+            selected_full = np.nonzero(~has_label)[0][selected]
+
+            # Add newly labeled confident predictions to the dataset
+            self.transduction_[selected_full] = pred[selected]
+            has_label[selected_full] = True
+            self.labeled_iter_[selected_full] = self.n_iter_
+
+            if selected_full.shape[0] == 0:
+                # no changed labels
+                self.termination_condition_ = "no_change"
+                break
+
+            if self.verbose:
+                print(
+                    f"End of iteration {self.n_iter_},"
+                    f" added {selected_full.shape[0]} new labels."
+                )
+
+        if self.n_iter_ == self.max_iter:
+            self.termination_condition_ = "max_iter"
+        if np.all(has_label):
+            self.termination_condition_ = "all_labeled"
+
+        self.estimator_.fit(
+            X[safe_mask(X, has_label)],
+            self.transduction_[has_label],
+            **routed_params.estimator.fit,
+        )
+        self.classes_ = self.estimator_.classes_
+        return self
+
+    @available_if(_estimator_has("predict"))
+    def predict(self, X, **params):
+        """Predict the classes of `X`.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's ``predict`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples,)
+            Array with predicted labels.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "predict")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "predict", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(predict={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.predict(X, **routed_params.estimator.predict)
+
+    @available_if(_estimator_has("predict_proba"))
+    def predict_proba(self, X, **params):
+        """Predict probability for each possible outcome.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's
+            ``predict_proba`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples, n_features)
+            Array with prediction probabilities.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "predict_proba")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "predict_proba", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(predict_proba={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.predict_proba(X, **routed_params.estimator.predict_proba)
+
+    @available_if(_estimator_has("decision_function"))
+    def decision_function(self, X, **params):
+        """Call decision function of the `estimator`.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's
+            ``decision_function`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples, n_features)
+            Result of the decision function of the `estimator`.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "decision_function")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "decision_function", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(decision_function={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.decision_function(
+            X, **routed_params.estimator.decision_function
+        )
+
+    @available_if(_estimator_has("predict_log_proba"))
+    def predict_log_proba(self, X, **params):
+        """Predict log probability for each possible outcome.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's
+            ``predict_log_proba`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples, n_features)
+            Array with log prediction probabilities.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "predict_log_proba")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "predict_log_proba", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(predict_log_proba={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.predict_log_proba(
+            X, **routed_params.estimator.predict_log_proba
+        )
+
+    @available_if(_estimator_has("score"))
+    def score(self, X, y, **params):
+        """Call score on the `estimator`.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        y : array-like of shape (n_samples,)
+            Array representing the labels.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's ``score`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        score : float
+            Result of calling score on the `estimator`.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "score")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "score", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(score={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.score(X, y, **routed_params.estimator.score)
+
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        .. versionadded:: 1.6
+
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        router = MetadataRouter(owner=self.__class__.__name__)
+        router.add(
+            estimator=self.estimator,
+            method_mapping=(
+                MethodMapping()
+                .add(callee="fit", caller="fit")
+                .add(callee="score", caller="fit")
+                .add(callee="predict", caller="predict")
+                .add(callee="predict_proba", caller="predict_proba")
+                .add(callee="decision_function", caller="decision_function")
+                .add(callee="predict_log_proba", caller="predict_log_proba")
+                .add(callee="score", caller="score")
+            ),
+        )
+        return router
diff --git a/.venv/Lib/site-packages/sklearn/semi_supervised/tests/__init__.py b/.venv/Lib/site-packages/sklearn/semi_supervised/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/Lib/site-packages/sklearn/semi_supervised/tests/test_label_propagation.py b/.venv/Lib/site-packages/sklearn/semi_supervised/tests/test_label_propagation.py
new file mode 100644
index 0000000000000000000000000000000000000000..b012c5bf7755a0b13ddc095ada57e036cb15b705
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -0,0 +1,238 @@
+"""test the label propagation module"""
+
+import warnings
+
+import numpy as np
+import pytest
+from scipy.sparse import issparse
+
+from sklearn.datasets import make_classification
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import NearestNeighbors
+from sklearn.semi_supervised import _label_propagation as label_propagation
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
+
+CONSTRUCTOR_TYPES = ("array", "sparse_csr", "sparse_csc")
+
+ESTIMATORS = [
+    (label_propagation.LabelPropagation, {"kernel": "rbf"}),
+    (label_propagation.LabelPropagation, {"kernel": "knn", "n_neighbors": 2}),
+    (
+        label_propagation.LabelPropagation,
+        {"kernel": lambda x, y: rbf_kernel(x, y, gamma=20)},
+    ),
+    (label_propagation.LabelSpreading, {"kernel": "rbf"}),
+    (label_propagation.LabelSpreading, {"kernel": "knn", "n_neighbors": 2}),
+    (
+        label_propagation.LabelSpreading,
+        {"kernel": lambda x, y: rbf_kernel(x, y, gamma=20)},
+    ),
+]
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_fit_transduction(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert clf.transduction_[2] == 1
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_distribution(global_dtype, Estimator, parameters):
+    if parameters["kernel"] == "knn":
+        pytest.skip(
+            "Unstable test for this configuration: changes in k-NN ordering break it."
+        )
+    samples = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_allclose(clf.label_distributions_[2], [0.5, 0.5], atol=1e-2)
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_predict(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1]))
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_predict_proba(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_allclose(clf.predict_proba([[1.0, 1.0]]), np.array([[0.5, 0.5]]))
+
+
+@pytest.mark.parametrize("alpha", [0.1, 0.3, 0.5, 0.7, 0.9])
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_label_spreading_closed_form(global_dtype, Estimator, parameters, alpha):
+    n_classes = 2
+    X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    y[::3] = -1
+
+    gamma = 0.1
+    clf = label_propagation.LabelSpreading(gamma=gamma).fit(X, y)
+    # adopting notation from Zhou et al (2004):
+    S = clf._build_graph()
+    Y = np.zeros((len(y), n_classes + 1), dtype=X.dtype)
+    Y[np.arange(len(y)), y] = 1
+    Y = Y[:, :-1]
+
+    expected = np.dot(np.linalg.inv(np.eye(len(S), dtype=S.dtype) - alpha * S), Y)
+    expected /= expected.sum(axis=1)[:, np.newaxis]
+
+    clf = label_propagation.LabelSpreading(
+        max_iter=100, alpha=alpha, tol=1e-10, gamma=gamma
+    )
+    clf.fit(X, y)
+
+    assert_allclose(expected, clf.label_distributions_)
+
+
+def test_label_propagation_closed_form(global_dtype):
+    n_classes = 2
+    X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    y[::3] = -1
+    Y = np.zeros((len(y), n_classes + 1))
+    Y[np.arange(len(y)), y] = 1
+    unlabelled_idx = Y[:, (-1,)].nonzero()[0]
+    labelled_idx = (Y[:, (-1,)] == 0).nonzero()[0]
+
+    clf = label_propagation.LabelPropagation(max_iter=100, tol=1e-10, gamma=0.1)
+    clf.fit(X, y)
+    # adopting notation from Zhu et al 2002
+    T_bar = clf._build_graph()
+    Tuu = T_bar[tuple(np.meshgrid(unlabelled_idx, unlabelled_idx, indexing="ij"))]
+    Tul = T_bar[tuple(np.meshgrid(unlabelled_idx, labelled_idx, indexing="ij"))]
+    Y = Y[:, :-1]
+    Y_l = Y[labelled_idx, :]
+    Y_u = np.dot(np.dot(np.linalg.inv(np.eye(Tuu.shape[0]) - Tuu), Tul), Y_l)
+
+    expected = Y.copy()
+    expected[unlabelled_idx, :] = Y_u
+    expected /= expected.sum(axis=1)[:, np.newaxis]
+
+    assert_allclose(expected, clf.label_distributions_, atol=1e-4)
+
+
+@pytest.mark.parametrize("accepted_sparse_type", ["sparse_csr", "sparse_csc"])
+@pytest.mark.parametrize("index_dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_sparse_input_types(
+    accepted_sparse_type, index_dtype, dtype, Estimator, parameters
+):
+    # This is non-regression test for #17085
+    X = _convert_container([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], accepted_sparse_type)
+    X.data = X.data.astype(dtype, copy=False)
+    X.indices = X.indices.astype(index_dtype, copy=False)
+    X.indptr = X.indptr.astype(index_dtype, copy=False)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(X, labels)
+    assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1]))
+
+
+@pytest.mark.parametrize("constructor_type", CONSTRUCTOR_TYPES)
+def test_convergence_speed(constructor_type):
+    # This is a non-regression test for #5774
+    X = _convert_container([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]], constructor_type)
+    y = np.array([0, 1, -1])
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=5000)
+    mdl.fit(X, y)
+
+    # this should converge quickly:
+    assert mdl.n_iter_ < 10
+    assert_array_equal(mdl.predict(X), [0, 1, 1])
+
+
+def test_convergence_warning():
+    # This is a non-regression test for #5774
+    X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]])
+    y = np.array([0, 1, -1])
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=1)
+    warn_msg = "max_iter=1 was reached without convergence."
+    with pytest.warns(ConvergenceWarning, match=warn_msg):
+        mdl.fit(X, y)
+    assert mdl.n_iter_ == mdl.max_iter
+
+    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=1)
+    with pytest.warns(ConvergenceWarning, match=warn_msg):
+        mdl.fit(X, y)
+    assert mdl.n_iter_ == mdl.max_iter
+
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=500)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        mdl.fit(X, y)
+
+    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=500)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        mdl.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "LabelPropagationCls",
+    [label_propagation.LabelSpreading, label_propagation.LabelPropagation],
+)
+def test_label_propagation_non_zero_normalizer(LabelPropagationCls):
+    # check that we don't divide by zero in case of null normalizer
+    # non-regression test for
+    # https://github.com/scikit-learn/scikit-learn/pull/15946
+    # https://github.com/scikit-learn/scikit-learn/issues/9292
+    X = np.array([[100.0, 100.0], [100.0, 100.0], [0.0, 0.0], [0.0, 0.0]])
+    y = np.array([0, 1, -1, -1])
+    mdl = LabelPropagationCls(kernel="knn", max_iter=100, n_neighbors=1)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+        mdl.fit(X, y)
+
+
+def test_predict_sparse_callable_kernel(global_dtype):
+    # This is a non-regression test for #15866
+
+    # Custom sparse kernel (top-K RBF)
+    def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5):
+        nn = NearestNeighbors(n_neighbors=10, metric="euclidean", n_jobs=2)
+        nn.fit(X)
+        W = -1 * nn.kneighbors_graph(Y, mode="distance").power(2) * gamma
+        np.exp(W.data, out=W.data)
+        assert issparse(W)
+        return W.T
+
+    n_classes = 4
+    n_samples = 500
+    n_test = 10
+    X, y = make_classification(
+        n_classes=n_classes,
+        n_samples=n_samples,
+        n_features=20,
+        n_informative=20,
+        n_redundant=0,
+        n_repeated=0,
+        random_state=0,
+    )
+    X = X.astype(global_dtype)
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=n_test, random_state=0
+    )
+
+    model = label_propagation.LabelSpreading(kernel=topk_rbf)
+    model.fit(X_train, y_train)
+    assert model.score(X_test, y_test) >= 0.9
+
+    model = label_propagation.LabelPropagation(kernel=topk_rbf)
+    model.fit(X_train, y_train)
+    assert model.score(X_test, y_test) >= 0.9
diff --git a/.venv/Lib/site-packages/sklearn/semi_supervised/tests/test_self_training.py b/.venv/Lib/site-packages/sklearn/semi_supervised/tests/test_self_training.py
new file mode 100644
index 0000000000000000000000000000000000000000..983355b32ab4572d266519b98350107665d032c3
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/semi_supervised/tests/test_self_training.py
@@ -0,0 +1,395 @@
+from math import ceil
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import load_iris, make_blobs
+from sklearn.ensemble import StackingClassifier
+from sklearn.exceptions import NotFittedError
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.semi_supervised import SelfTrainingClassifier
+from sklearn.svm import SVC
+from sklearn.tests.test_pipeline import SimpleEstimator
+from sklearn.tree import DecisionTreeClassifier
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+# load the iris dataset and randomly permute it
+iris = load_iris()
+X_train, X_test, y_train, y_test = train_test_split(
+    iris.data, iris.target, random_state=0
+)
+
+n_labeled_samples = 50
+
+y_train_missing_labels = y_train.copy()
+y_train_missing_labels[n_labeled_samples:] = -1
+mapping = {0: "A", 1: "B", 2: "C", -1: "-1"}
+y_train_missing_strings = np.vectorize(mapping.get)(y_train_missing_labels).astype(
+    object
+)
+y_train_missing_strings[y_train_missing_labels == -1] = -1
+
+
+def test_warns_k_best():
+    st = SelfTrainingClassifier(KNeighborsClassifier(), criterion="k_best", k_best=1000)
+    with pytest.warns(UserWarning, match="k_best is larger than"):
+        st.fit(X_train, y_train_missing_labels)
+
+    assert st.termination_condition_ == "all_labeled"
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [KNeighborsClassifier(), SVC(gamma="scale", probability=True, random_state=0)],
+)
+@pytest.mark.parametrize("selection_crit", ["threshold", "k_best"])
+def test_classification(estimator, selection_crit):
+    # Check classification for various parameter settings.
+    # Also assert that predictions for strings and numerical labels are equal.
+    # Also test for multioutput classification
+    threshold = 0.75
+    max_iter = 10
+    st = SelfTrainingClassifier(
+        estimator, max_iter=max_iter, threshold=threshold, criterion=selection_crit
+    )
+    st.fit(X_train, y_train_missing_labels)
+    pred = st.predict(X_test)
+    proba = st.predict_proba(X_test)
+
+    st_string = SelfTrainingClassifier(
+        estimator, max_iter=max_iter, criterion=selection_crit, threshold=threshold
+    )
+    st_string.fit(X_train, y_train_missing_strings)
+    pred_string = st_string.predict(X_test)
+    proba_string = st_string.predict_proba(X_test)
+
+    assert_array_equal(np.vectorize(mapping.get)(pred), pred_string)
+    assert_array_equal(proba, proba_string)
+
+    assert st.termination_condition_ == st_string.termination_condition_
+    # Check consistency between labeled_iter, n_iter and max_iter
+    labeled = y_train_missing_labels != -1
+    # assert that labeled samples have labeled_iter = 0
+    assert_array_equal(st.labeled_iter_ == 0, labeled)
+    # assert that labeled samples do not change label during training
+    assert_array_equal(y_train_missing_labels[labeled], st.transduction_[labeled])
+
+    # assert that the max of the iterations is less than the total amount of
+    # iterations
+    assert np.max(st.labeled_iter_) <= st.n_iter_ <= max_iter
+    assert np.max(st_string.labeled_iter_) <= st_string.n_iter_ <= max_iter
+
+    # check shapes
+    assert st.labeled_iter_.shape == st.transduction_.shape
+    assert st_string.labeled_iter_.shape == st_string.transduction_.shape
+
+
+def test_k_best():
+    st = SelfTrainingClassifier(
+        KNeighborsClassifier(n_neighbors=1),
+        criterion="k_best",
+        k_best=10,
+        max_iter=None,
+    )
+    y_train_only_one_label = np.copy(y_train)
+    y_train_only_one_label[1:] = -1
+    n_samples = y_train.shape[0]
+
+    n_expected_iter = ceil((n_samples - 1) / 10)
+    st.fit(X_train, y_train_only_one_label)
+    assert st.n_iter_ == n_expected_iter
+
+    # Check labeled_iter_
+    assert np.sum(st.labeled_iter_ == 0) == 1
+    for i in range(1, n_expected_iter):
+        assert np.sum(st.labeled_iter_ == i) == 10
+    assert np.sum(st.labeled_iter_ == n_expected_iter) == (n_samples - 1) % 10
+    assert st.termination_condition_ == "all_labeled"
+
+
+def test_sanity_classification():
+    estimator = SVC(gamma="scale", probability=True)
+    estimator.fit(X_train[n_labeled_samples:], y_train[n_labeled_samples:])
+
+    st = SelfTrainingClassifier(estimator)
+    st.fit(X_train, y_train_missing_labels)
+
+    pred1, pred2 = estimator.predict(X_test), st.predict(X_test)
+    assert not np.array_equal(pred1, pred2)
+    score_supervised = accuracy_score(estimator.predict(X_test), y_test)
+    score_self_training = accuracy_score(st.predict(X_test), y_test)
+
+    assert score_self_training > score_supervised
+
+
+def test_none_iter():
+    # Check that the all samples were labeled after a 'reasonable' number of
+    # iterations.
+    st = SelfTrainingClassifier(KNeighborsClassifier(), threshold=0.55, max_iter=None)
+    st.fit(X_train, y_train_missing_labels)
+
+    assert st.n_iter_ < 10
+    assert st.termination_condition_ == "all_labeled"
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [KNeighborsClassifier(), SVC(gamma="scale", probability=True, random_state=0)],
+)
+@pytest.mark.parametrize("y", [y_train_missing_labels, y_train_missing_strings])
+def test_zero_iterations(estimator, y):
+    # Check classification for zero iterations.
+    # Fitting a SelfTrainingClassifier with zero iterations should give the
+    # same results as fitting a supervised classifier.
+    # This also asserts that string arrays work as expected.
+
+    clf1 = SelfTrainingClassifier(estimator, max_iter=0)
+
+    clf1.fit(X_train, y)
+
+    clf2 = estimator.fit(X_train[:n_labeled_samples], y[:n_labeled_samples])
+
+    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
+    assert clf1.termination_condition_ == "max_iter"
+
+
+def test_prefitted_throws_error():
+    # Test that passing a pre-fitted classifier and calling predict throws an
+    # error
+    knn = KNeighborsClassifier()
+    knn.fit(X_train, y_train)
+    st = SelfTrainingClassifier(knn)
+    with pytest.raises(
+        NotFittedError,
+        match="This SelfTrainingClassifier instance is not fitted yet",
+    ):
+        st.predict(X_train)
+
+
+@pytest.mark.parametrize("max_iter", range(1, 5))
+def test_labeled_iter(max_iter):
+    # Check that the amount of datapoints labeled in iteration 0 is equal to
+    # the amount of labeled datapoints we passed.
+    st = SelfTrainingClassifier(KNeighborsClassifier(), max_iter=max_iter)
+
+    st.fit(X_train, y_train_missing_labels)
+    amount_iter_0 = len(st.labeled_iter_[st.labeled_iter_ == 0])
+    assert amount_iter_0 == n_labeled_samples
+    # Check that the max of the iterations is less than the total amount of
+    # iterations
+    assert np.max(st.labeled_iter_) <= st.n_iter_ <= max_iter
+
+
+def test_no_unlabeled():
+    # Test that training on a fully labeled dataset produces the same results
+    # as training the classifier by itself.
+    knn = KNeighborsClassifier()
+    knn.fit(X_train, y_train)
+    st = SelfTrainingClassifier(knn)
+    with pytest.warns(UserWarning, match="y contains no unlabeled samples"):
+        st.fit(X_train, y_train)
+    assert_array_equal(knn.predict(X_test), st.predict(X_test))
+    # Assert that all samples were labeled in iteration 0 (since there were no
+    # unlabeled samples).
+    assert np.all(st.labeled_iter_ == 0)
+    assert st.termination_condition_ == "all_labeled"
+
+
+def test_early_stopping():
+    svc = SVC(gamma="scale", probability=True)
+    st = SelfTrainingClassifier(svc)
+    X_train_easy = [[1], [0], [1], [0.5]]
+    y_train_easy = [1, 0, -1, -1]
+    # X = [[0.5]] cannot be predicted on with a high confidence, so training
+    # stops early
+    st.fit(X_train_easy, y_train_easy)
+    assert st.n_iter_ == 1
+    assert st.termination_condition_ == "no_change"
+
+
+def test_strings_dtype():
+    clf = SelfTrainingClassifier(KNeighborsClassifier())
+    X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
+    labels_multiclass = ["one", "two", "three"]
+
+    y_strings = np.take(labels_multiclass, y)
+
+    with pytest.raises(ValueError, match="dtype"):
+        clf.fit(X, y_strings)
+
+
+@pytest.mark.parametrize("verbose", [True, False])
+def test_verbose(capsys, verbose):
+    clf = SelfTrainingClassifier(KNeighborsClassifier(), verbose=verbose)
+    clf.fit(X_train, y_train_missing_labels)
+
+    captured = capsys.readouterr()
+
+    if verbose:
+        assert "iteration" in captured.out
+    else:
+        assert "iteration" not in captured.out
+
+
+def test_verbose_k_best(capsys):
+    st = SelfTrainingClassifier(
+        KNeighborsClassifier(n_neighbors=1),
+        criterion="k_best",
+        k_best=10,
+        verbose=True,
+        max_iter=None,
+    )
+
+    y_train_only_one_label = np.copy(y_train)
+    y_train_only_one_label[1:] = -1
+    n_samples = y_train.shape[0]
+
+    n_expected_iter = ceil((n_samples - 1) / 10)
+    st.fit(X_train, y_train_only_one_label)
+
+    captured = capsys.readouterr()
+
+    msg = "End of iteration {}, added {} new labels."
+    for i in range(1, n_expected_iter):
+        assert msg.format(i, 10) in captured.out
+
+    assert msg.format(n_expected_iter, (n_samples - 1) % 10) in captured.out
+
+
+def test_k_best_selects_best():
+    # Tests that the labels added by st really are the 10 best labels.
+    svc = SVC(gamma="scale", probability=True, random_state=0)
+    st = SelfTrainingClassifier(svc, criterion="k_best", max_iter=1, k_best=10)
+    has_label = y_train_missing_labels != -1
+    st.fit(X_train, y_train_missing_labels)
+
+    got_label = ~has_label & (st.transduction_ != -1)
+
+    svc.fit(X_train[has_label], y_train_missing_labels[has_label])
+    pred = svc.predict_proba(X_train[~has_label])
+    max_proba = np.max(pred, axis=1)
+
+    most_confident_svc = X_train[~has_label][np.argsort(max_proba)[-10:]]
+    added_by_st = X_train[np.where(got_label)].tolist()
+
+    for row in most_confident_svc.tolist():
+        assert row in added_by_st
+
+
+def test_estimator_meta_estimator():
+    # Check that a meta-estimator relying on an estimator implementing
+    # `predict_proba` will work even if it does not expose this method before being
+    # fitted.
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/19119
+
+    estimator = StackingClassifier(
+        estimators=[
+            ("svc_1", SVC(probability=True)),
+            ("svc_2", SVC(probability=True)),
+        ],
+        final_estimator=SVC(probability=True),
+        cv=2,
+    )
+
+    assert hasattr(estimator, "predict_proba")
+    clf = SelfTrainingClassifier(estimator=estimator)
+    clf.fit(X_train, y_train_missing_labels)
+    clf.predict_proba(X_test)
+
+    estimator = StackingClassifier(
+        estimators=[
+            ("svc_1", SVC(probability=False)),
+            ("svc_2", SVC(probability=False)),
+        ],
+        final_estimator=SVC(probability=False),
+        cv=2,
+    )
+
+    assert not hasattr(estimator, "predict_proba")
+    clf = SelfTrainingClassifier(estimator=estimator)
+    with pytest.raises(AttributeError):
+        clf.fit(X_train, y_train_missing_labels)
+
+
+def test_self_training_estimator_attribute_error():
+    """Check that we raise the proper AttributeErrors when the `estimator`
+    does not implement the `predict_proba` method, which is called from within
+    `fit`, or `decision_function`, which is decorated with `available_if`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/28108
+    """
+    # `SVC` with `probability=False` does not implement 'predict_proba' that
+    # is required internally in `fit` of `SelfTrainingClassifier`. We expect
+    # an AttributeError to be raised.
+    estimator = SVC(probability=False, gamma="scale")
+    self_training = SelfTrainingClassifier(estimator)
+
+    with pytest.raises(AttributeError, match="has no attribute 'predict_proba'"):
+        self_training.fit(X_train, y_train_missing_labels)
+
+    # `DecisionTreeClassifier` does not implement 'decision_function' and
+    # should raise an AttributeError
+    self_training = SelfTrainingClassifier(estimator=DecisionTreeClassifier())
+
+    outer_msg = "This 'SelfTrainingClassifier' has no attribute 'decision_function'"
+    inner_msg = "'DecisionTreeClassifier' object has no attribute 'decision_function'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        self_training.fit(X_train, y_train_missing_labels).decision_function(X_train)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
+
+# TODO(1.8): remove in 1.8
+def test_deprecation_warning_base_estimator():
+    warn_msg = "`base_estimator` has been deprecated in 1.6 and will be removed"
+    with pytest.warns(FutureWarning, match=warn_msg):
+        SelfTrainingClassifier(base_estimator=DecisionTreeClassifier()).fit(
+            X_train, y_train_missing_labels
+        )
+
+    error_msg = "You must pass an estimator to SelfTrainingClassifier"
+    with pytest.raises(ValueError, match=error_msg):
+        SelfTrainingClassifier().fit(X_train, y_train_missing_labels)
+
+    error_msg = "You must pass only one estimator to SelfTrainingClassifier."
+    with pytest.raises(ValueError, match=error_msg):
+        SelfTrainingClassifier(
+            base_estimator=DecisionTreeClassifier(), estimator=DecisionTreeClassifier()
+        ).fit(X_train, y_train_missing_labels)
+
+
+# Metadata routing tests
+# =================================================================
+
+
+@pytest.mark.filterwarnings("ignore:y contains no unlabeled samples:UserWarning")
+@pytest.mark.parametrize(
+    "method", ["decision_function", "predict_log_proba", "predict_proba", "predict"]
+)
+def test_routing_passed_metadata_not_supported(method):
+    """Test that the right error message is raised when metadata is passed while
+    not supported when `enable_metadata_routing=False`."""
+    est = SelfTrainingClassifier(estimator=SimpleEstimator())
+    with pytest.raises(
+        ValueError, match="is only supported if enable_metadata_routing=True"
+    ):
+        est.fit([[1], [1]], [1, 1], sample_weight=[1], prop="a")
+
+    est = SelfTrainingClassifier(estimator=SimpleEstimator())
+    with pytest.raises(
+        ValueError, match="is only supported if enable_metadata_routing=True"
+    ):
+        # make sure that the estimator thinks it is already fitted
+        est.fitted_params_ = True
+        getattr(est, method)([[1]], sample_weight=[1], prop="a")
+
+
+# End of routing tests
+# ====================
diff --git a/.venv/Lib/site-packages/sklearn/svm/__init__.py b/.venv/Lib/site-packages/sklearn/svm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cffb8cf8ecc0914e08172716536fff5afc35383c
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/__init__.py
@@ -0,0 +1,21 @@
+"""Support vector machine algorithms."""
+
+# See http://scikit-learn.sourceforge.net/modules/svm.html for complete
+# documentation.
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from ._bounds import l1_min_c
+from ._classes import SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM
+
+__all__ = [
+    "LinearSVC",
+    "LinearSVR",
+    "NuSVC",
+    "NuSVR",
+    "OneClassSVM",
+    "SVC",
+    "SVR",
+    "l1_min_c",
+]
diff --git a/.venv/Lib/site-packages/sklearn/svm/_base.py b/.venv/Lib/site-packages/sklearn/svm/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a65beb0e48047f0daba1e3309c72f29f6951932
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_base.py
@@ -0,0 +1,1255 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from abc import ABCMeta, abstractmethod
+from numbers import Integral, Real
+
+import numpy as np
+import scipy.sparse as sp
+
+from ..base import BaseEstimator, ClassifierMixin, _fit_context
+from ..exceptions import ConvergenceWarning, NotFittedError
+from ..preprocessing import LabelEncoder
+from ..utils import check_array, check_random_state, column_or_1d, compute_class_weight
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import safe_sparse_dot
+from ..utils.metaestimators import available_if
+from ..utils.multiclass import _ovr_decision_function, check_classification_targets
+from ..utils.validation import (
+    _check_large_sparse,
+    _check_sample_weight,
+    _num_samples,
+    check_consistent_length,
+    check_is_fitted,
+    validate_data,
+)
+from . import _liblinear as liblinear  # type: ignore
+
+# mypy error: error: Module 'sklearn.svm' has no attribute '_libsvm'
+# (and same for other imports)
+from . import _libsvm as libsvm  # type: ignore
+from . import _libsvm_sparse as libsvm_sparse  # type: ignore
+
+LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr"]
+
+
+def _one_vs_one_coef(dual_coef, n_support, support_vectors):
+    """Generate primal coefficients from dual coefficients
+    for the one-vs-one multi class LibSVM in the case
+    of a linear kernel."""
+
+    # get 1vs1 weights for all n*(n-1) classifiers.
+    # this is somewhat messy.
+    # shape of dual_coef_ is nSV * (n_classes -1)
+    # see docs for details
+    n_class = dual_coef.shape[0] + 1
+
+    # XXX we could do preallocation of coef but
+    # would have to take care in the sparse case
+    coef = []
+    sv_locs = np.cumsum(np.hstack([[0], n_support]))
+    for class1 in range(n_class):
+        # SVs for class1:
+        sv1 = support_vectors[sv_locs[class1] : sv_locs[class1 + 1], :]
+        for class2 in range(class1 + 1, n_class):
+            # SVs for class1:
+            sv2 = support_vectors[sv_locs[class2] : sv_locs[class2 + 1], :]
+
+            # dual coef for class1 SVs:
+            alpha1 = dual_coef[class2 - 1, sv_locs[class1] : sv_locs[class1 + 1]]
+            # dual coef for class2 SVs:
+            alpha2 = dual_coef[class1, sv_locs[class2] : sv_locs[class2 + 1]]
+            # build weight for class1 vs class2
+
+            coef.append(safe_sparse_dot(alpha1, sv1) + safe_sparse_dot(alpha2, sv2))
+    return coef
+
+
+class BaseLibSVM(BaseEstimator, metaclass=ABCMeta):
+    """Base class for estimators that use libsvm as backing library.
+
+    This implements support vector machine classification and regression.
+
+    Parameter documentation is in the derived `SVC` class.
+    """
+
+    _parameter_constraints: dict = {
+        "kernel": [
+            StrOptions({"linear", "poly", "rbf", "sigmoid", "precomputed"}),
+            callable,
+        ],
+        "degree": [Interval(Integral, 0, None, closed="left")],
+        "gamma": [
+            StrOptions({"scale", "auto"}),
+            Interval(Real, 0.0, None, closed="left"),
+        ],
+        "coef0": [Interval(Real, None, None, closed="neither")],
+        "tol": [Interval(Real, 0.0, None, closed="neither")],
+        "C": [Interval(Real, 0.0, None, closed="right")],
+        "nu": [Interval(Real, 0.0, 1.0, closed="right")],
+        "epsilon": [Interval(Real, 0.0, None, closed="left")],
+        "shrinking": ["boolean"],
+        "probability": ["boolean"],
+        "cache_size": [Interval(Real, 0, None, closed="neither")],
+        "class_weight": [StrOptions({"balanced"}), dict, None],
+        "verbose": ["verbose"],
+        "max_iter": [Interval(Integral, -1, None, closed="left")],
+        "random_state": ["random_state"],
+    }
+
+    # The order of these must match the integer values in LibSVM.
+    # XXX These are actually the same in the dense case. Need to factor
+    # this out.
+    _sparse_kernels = ["linear", "poly", "rbf", "sigmoid", "precomputed"]
+
+    @abstractmethod
+    def __init__(
+        self,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        tol,
+        C,
+        nu,
+        epsilon,
+        shrinking,
+        probability,
+        cache_size,
+        class_weight,
+        verbose,
+        max_iter,
+        random_state,
+    ):
+        if self._impl not in LIBSVM_IMPL:
+            raise ValueError(
+                "impl should be one of %s, %s was given" % (LIBSVM_IMPL, self._impl)
+            )
+
+        self.kernel = kernel
+        self.degree = degree
+        self.gamma = gamma
+        self.coef0 = coef0
+        self.tol = tol
+        self.C = C
+        self.nu = nu
+        self.epsilon = epsilon
+        self.shrinking = shrinking
+        self.probability = probability
+        self.cache_size = cache_size
+        self.class_weight = class_weight
+        self.verbose = verbose
+        self.max_iter = max_iter
+        self.random_state = random_state
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        # Used by cross_val_score.
+        tags.input_tags.pairwise = self.kernel == "precomputed"
+        return tags
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y, sample_weight=None):
+        """Fit the SVM model according to the given training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features) \
+                or (n_samples, n_samples)
+            Training vectors, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+            For kernel="precomputed", the expected shape of X is
+            (n_samples, n_samples).
+
+        y : array-like of shape (n_samples,)
+            Target values (class labels in classification, real numbers in
+            regression).
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Per-sample weights. Rescale C per sample. Higher weights
+            force the classifier to put more emphasis on these points.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+
+        Notes
+        -----
+        If X and y are not C-ordered and contiguous arrays of np.float64 and
+        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.
+
+        If X is a dense array, then the other methods will not support sparse
+        matrices as input.
+        """
+        rnd = check_random_state(self.random_state)
+
+        sparse = sp.issparse(X)
+        if sparse and self.kernel == "precomputed":
+            raise TypeError("Sparse precomputed kernels are not supported.")
+        self._sparse = sparse and not callable(self.kernel)
+
+        if callable(self.kernel):
+            check_consistent_length(X, y)
+        else:
+            X, y = validate_data(
+                self,
+                X,
+                y,
+                dtype=np.float64,
+                order="C",
+                accept_sparse="csr",
+                accept_large_sparse=False,
+            )
+
+        y = self._validate_targets(y)
+
+        sample_weight = np.asarray(
+            [] if sample_weight is None else sample_weight, dtype=np.float64
+        )
+        solver_type = LIBSVM_IMPL.index(self._impl)
+
+        # input validation
+        n_samples = _num_samples(X)
+        if solver_type != 2 and n_samples != y.shape[0]:
+            raise ValueError(
+                "X and y have incompatible shapes.\n"
+                + "X has %s samples, but y has %s." % (n_samples, y.shape[0])
+            )
+
+        if self.kernel == "precomputed" and n_samples != X.shape[1]:
+            raise ValueError(
+                "Precomputed matrix must be a square matrix."
+                " Input is a {}x{} matrix.".format(X.shape[0], X.shape[1])
+            )
+
+        if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:
+            raise ValueError(
+                "sample_weight and X have incompatible shapes: "
+                "%r vs %r\n"
+                "Note: Sparse matrices cannot be indexed w/"
+                "boolean masks (use `indices=True` in CV)."
+                % (sample_weight.shape, X.shape)
+            )
+
+        kernel = "precomputed" if callable(self.kernel) else self.kernel
+
+        if kernel == "precomputed":
+            # unused but needs to be a float for cython code that ignores
+            # it anyway
+            self._gamma = 0.0
+        elif isinstance(self.gamma, str):
+            if self.gamma == "scale":
+                # var = E[X^2] - E[X]^2 if sparse
+                X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()
+                self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0
+            elif self.gamma == "auto":
+                self._gamma = 1.0 / X.shape[1]
+        elif isinstance(self.gamma, Real):
+            self._gamma = self.gamma
+
+        fit = self._sparse_fit if self._sparse else self._dense_fit
+        if self.verbose:
+            print("[LibSVM]", end="")
+
+        seed = rnd.randint(np.iinfo("i").max)
+        fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
+        # see comment on the other call to np.iinfo in this file
+
+        self.shape_fit_ = X.shape if hasattr(X, "shape") else (n_samples,)
+
+        # In binary case, we need to flip the sign of coef, intercept and
+        # decision function. Use self._intercept_ and self._dual_coef_
+        # internally.
+        self._intercept_ = self.intercept_.copy()
+        self._dual_coef_ = self.dual_coef_
+        if self._impl in ["c_svc", "nu_svc"] and len(self.classes_) == 2:
+            self.intercept_ *= -1
+            self.dual_coef_ = -self.dual_coef_
+
+        dual_coef = self._dual_coef_.data if self._sparse else self._dual_coef_
+        intercept_finiteness = np.isfinite(self._intercept_).all()
+        dual_coef_finiteness = np.isfinite(dual_coef).all()
+        if not (intercept_finiteness and dual_coef_finiteness):
+            raise ValueError(
+                "The dual coefficients or intercepts are not finite."
+                " The input data may contain large values and need to be"
+                " preprocessed."
+            )
+
+        # Since, in the case of SVC and NuSVC, the number of models optimized by
+        # libSVM could be greater than one (depending on the input), `n_iter_`
+        # stores an ndarray.
+        # For the other sub-classes (SVR, NuSVR, and OneClassSVM), the number of
+        # models optimized by libSVM is always one, so `n_iter_` stores an
+        # integer.
+        if self._impl in ["c_svc", "nu_svc"]:
+            self.n_iter_ = self._num_iter
+        else:
+            self.n_iter_ = self._num_iter.item()
+
+        return self
+
+    def _validate_targets(self, y):
+        """Validation of y and class_weight.
+
+        Default implementation for SVR and one-class; overridden in BaseSVC.
+        """
+        return column_or_1d(y, warn=True).astype(np.float64, copy=False)
+
+    def _warn_from_fit_status(self):
+        assert self.fit_status_ in (0, 1)
+        if self.fit_status_ == 1:
+            warnings.warn(
+                "Solver terminated early (max_iter=%i)."
+                "  Consider pre-processing your data with"
+                " StandardScaler or MinMaxScaler." % self.max_iter,
+                ConvergenceWarning,
+            )
+
+    def _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):
+        if callable(self.kernel):
+            # you must store a reference to X to compute the kernel in predict
+            # TODO: add keyword copy to copy on demand
+            self.__Xfit = X
+            X = self._compute_kernel(X)
+
+            if X.shape[0] != X.shape[1]:
+                raise ValueError("X.shape[0] should be equal to X.shape[1]")
+
+        libsvm.set_verbosity_wrap(self.verbose)
+
+        # we don't pass **self.get_params() to allow subclasses to
+        # add other parameters to __init__
+        (
+            self.support_,
+            self.support_vectors_,
+            self._n_support,
+            self.dual_coef_,
+            self.intercept_,
+            self._probA,
+            self._probB,
+            self.fit_status_,
+            self._num_iter,
+        ) = libsvm.fit(
+            X,
+            y,
+            svm_type=solver_type,
+            sample_weight=sample_weight,
+            class_weight=getattr(self, "class_weight_", np.empty(0)),
+            kernel=kernel,
+            C=self.C,
+            nu=self.nu,
+            probability=self.probability,
+            degree=self.degree,
+            shrinking=self.shrinking,
+            tol=self.tol,
+            cache_size=self.cache_size,
+            coef0=self.coef0,
+            gamma=self._gamma,
+            epsilon=self.epsilon,
+            max_iter=self.max_iter,
+            random_seed=random_seed,
+        )
+
+        self._warn_from_fit_status()
+
+    def _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):
+        X.data = np.asarray(X.data, dtype=np.float64, order="C")
+        X.sort_indices()
+
+        kernel_type = self._sparse_kernels.index(kernel)
+
+        libsvm_sparse.set_verbosity_wrap(self.verbose)
+
+        (
+            self.support_,
+            self.support_vectors_,
+            dual_coef_data,
+            self.intercept_,
+            self._n_support,
+            self._probA,
+            self._probB,
+            self.fit_status_,
+            self._num_iter,
+        ) = libsvm_sparse.libsvm_sparse_train(
+            X.shape[1],
+            X.data,
+            X.indices,
+            X.indptr,
+            y,
+            solver_type,
+            kernel_type,
+            self.degree,
+            self._gamma,
+            self.coef0,
+            self.tol,
+            self.C,
+            getattr(self, "class_weight_", np.empty(0)),
+            sample_weight,
+            self.nu,
+            self.cache_size,
+            self.epsilon,
+            int(self.shrinking),
+            int(self.probability),
+            self.max_iter,
+            random_seed,
+        )
+
+        self._warn_from_fit_status()
+
+        if hasattr(self, "classes_"):
+            n_class = len(self.classes_) - 1
+        else:  # regression
+            n_class = 1
+        n_SV = self.support_vectors_.shape[0]
+
+        dual_coef_indices = np.tile(np.arange(n_SV), n_class)
+        if not n_SV:
+            self.dual_coef_ = sp.csr_matrix([])
+        else:
+            dual_coef_indptr = np.arange(
+                0, dual_coef_indices.size + 1, dual_coef_indices.size / n_class
+            )
+            self.dual_coef_ = sp.csr_matrix(
+                (dual_coef_data, dual_coef_indices, dual_coef_indptr), (n_class, n_SV)
+            )
+
+    def predict(self, X):
+        """Perform regression on samples in X.
+
+        For an one-class model, +1 (inlier) or -1 (outlier) is returned.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            For kernel="precomputed", the expected shape of X is
+            (n_samples_test, n_samples_train).
+
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            The predicted values.
+        """
+        X = self._validate_for_predict(X)
+        predict = self._sparse_predict if self._sparse else self._dense_predict
+        return predict(X)
+
+    def _dense_predict(self, X):
+        X = self._compute_kernel(X)
+        if X.ndim == 1:
+            X = check_array(X, order="C", accept_large_sparse=False)
+
+        kernel = self.kernel
+        if callable(self.kernel):
+            kernel = "precomputed"
+            if X.shape[1] != self.shape_fit_[0]:
+                raise ValueError(
+                    "X.shape[1] = %d should be equal to %d, "
+                    "the number of samples at training time"
+                    % (X.shape[1], self.shape_fit_[0])
+                )
+
+        svm_type = LIBSVM_IMPL.index(self._impl)
+
+        return libsvm.predict(
+            X,
+            self.support_,
+            self.support_vectors_,
+            self._n_support,
+            self._dual_coef_,
+            self._intercept_,
+            self._probA,
+            self._probB,
+            svm_type=svm_type,
+            kernel=kernel,
+            degree=self.degree,
+            coef0=self.coef0,
+            gamma=self._gamma,
+            cache_size=self.cache_size,
+        )
+
+    def _sparse_predict(self, X):
+        # Precondition: X is a csr_matrix of dtype np.float64.
+        kernel = self.kernel
+        if callable(kernel):
+            kernel = "precomputed"
+
+        kernel_type = self._sparse_kernels.index(kernel)
+
+        C = 0.0  # C is not useful here
+
+        return libsvm_sparse.libsvm_sparse_predict(
+            X.data,
+            X.indices,
+            X.indptr,
+            self.support_vectors_.data,
+            self.support_vectors_.indices,
+            self.support_vectors_.indptr,
+            self._dual_coef_.data,
+            self._intercept_,
+            LIBSVM_IMPL.index(self._impl),
+            kernel_type,
+            self.degree,
+            self._gamma,
+            self.coef0,
+            self.tol,
+            C,
+            getattr(self, "class_weight_", np.empty(0)),
+            self.nu,
+            self.epsilon,
+            self.shrinking,
+            self.probability,
+            self._n_support,
+            self._probA,
+            self._probB,
+        )
+
+    def _compute_kernel(self, X):
+        """Return the data transformed by a callable kernel"""
+        if callable(self.kernel):
+            # in the case of precomputed kernel given as a function, we
+            # have to compute explicitly the kernel matrix
+            kernel = self.kernel(X, self.__Xfit)
+            if sp.issparse(kernel):
+                kernel = kernel.toarray()
+            X = np.asarray(kernel, dtype=np.float64, order="C")
+        return X
+
+    def _decision_function(self, X):
+        """Evaluates the decision function for the samples in X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+
+        Returns
+        -------
+        X : array-like of shape (n_samples, n_class * (n_class-1) / 2)
+            Returns the decision function of the sample for each class
+            in the model.
+        """
+        # NOTE: _validate_for_predict contains check for is_fitted
+        # hence must be placed before any other attributes are used.
+        X = self._validate_for_predict(X)
+        X = self._compute_kernel(X)
+
+        if self._sparse:
+            dec_func = self._sparse_decision_function(X)
+        else:
+            dec_func = self._dense_decision_function(X)
+
+        # In binary case, we need to flip the sign of coef, intercept and
+        # decision function.
+        if self._impl in ["c_svc", "nu_svc"] and len(self.classes_) == 2:
+            return -dec_func.ravel()
+
+        return dec_func
+
+    def _dense_decision_function(self, X):
+        X = check_array(X, dtype=np.float64, order="C", accept_large_sparse=False)
+
+        kernel = self.kernel
+        if callable(kernel):
+            kernel = "precomputed"
+
+        return libsvm.decision_function(
+            X,
+            self.support_,
+            self.support_vectors_,
+            self._n_support,
+            self._dual_coef_,
+            self._intercept_,
+            self._probA,
+            self._probB,
+            svm_type=LIBSVM_IMPL.index(self._impl),
+            kernel=kernel,
+            degree=self.degree,
+            cache_size=self.cache_size,
+            coef0=self.coef0,
+            gamma=self._gamma,
+        )
+
+    def _sparse_decision_function(self, X):
+        X.data = np.asarray(X.data, dtype=np.float64, order="C")
+
+        kernel = self.kernel
+        if hasattr(kernel, "__call__"):
+            kernel = "precomputed"
+
+        kernel_type = self._sparse_kernels.index(kernel)
+
+        return libsvm_sparse.libsvm_sparse_decision_function(
+            X.data,
+            X.indices,
+            X.indptr,
+            self.support_vectors_.data,
+            self.support_vectors_.indices,
+            self.support_vectors_.indptr,
+            self._dual_coef_.data,
+            self._intercept_,
+            LIBSVM_IMPL.index(self._impl),
+            kernel_type,
+            self.degree,
+            self._gamma,
+            self.coef0,
+            self.tol,
+            self.C,
+            getattr(self, "class_weight_", np.empty(0)),
+            self.nu,
+            self.epsilon,
+            self.shrinking,
+            self.probability,
+            self._n_support,
+            self._probA,
+            self._probB,
+        )
+
+    def _validate_for_predict(self, X):
+        check_is_fitted(self)
+
+        if not callable(self.kernel):
+            X = validate_data(
+                self,
+                X,
+                accept_sparse="csr",
+                dtype=np.float64,
+                order="C",
+                accept_large_sparse=False,
+                reset=False,
+            )
+
+        if self._sparse and not sp.issparse(X):
+            X = sp.csr_matrix(X)
+        if self._sparse:
+            X.sort_indices()
+
+        if sp.issparse(X) and not self._sparse and not callable(self.kernel):
+            raise ValueError(
+                "cannot use sparse input in %r trained on dense data"
+                % type(self).__name__
+            )
+
+        if self.kernel == "precomputed":
+            if X.shape[1] != self.shape_fit_[0]:
+                raise ValueError(
+                    "X.shape[1] = %d should be equal to %d, "
+                    "the number of samples at training time"
+                    % (X.shape[1], self.shape_fit_[0])
+                )
+        # Fixes https://nvd.nist.gov/vuln/detail/CVE-2020-28975
+        # Check that _n_support is consistent with support_vectors
+        sv = self.support_vectors_
+        if not self._sparse and sv.size > 0 and self.n_support_.sum() != sv.shape[0]:
+            raise ValueError(
+                f"The internal representation of {self.__class__.__name__} was altered"
+            )
+        return X
+
+    @property
+    def coef_(self):
+        """Weights assigned to the features when `kernel="linear"`.
+
+        Returns
+        -------
+        ndarray of shape (n_features, n_classes)
+        """
+        if self.kernel != "linear":
+            raise AttributeError("coef_ is only available when using a linear kernel")
+
+        coef = self._get_coef()
+
+        # coef_ being a read-only property, it's better to mark the value as
+        # immutable to avoid hiding potential bugs for the unsuspecting user.
+        if sp.issparse(coef):
+            # sparse matrix do not have global flags
+            coef.data.flags.writeable = False
+        else:
+            # regular dense array
+            coef.flags.writeable = False
+        return coef
+
+    def _get_coef(self):
+        return safe_sparse_dot(self._dual_coef_, self.support_vectors_)
+
+    @property
+    def n_support_(self):
+        """Number of support vectors for each class."""
+        try:
+            check_is_fitted(self)
+        except NotFittedError:
+            raise AttributeError
+
+        svm_type = LIBSVM_IMPL.index(self._impl)
+        if svm_type in (0, 1):
+            return self._n_support
+        else:
+            # SVR and OneClass
+            # _n_support has size 2, we make it size 1
+            return np.array([self._n_support[0]])
+
+
+class BaseSVC(ClassifierMixin, BaseLibSVM, metaclass=ABCMeta):
+    """ABC for LibSVM-based classifiers."""
+
+    _parameter_constraints: dict = {
+        **BaseLibSVM._parameter_constraints,
+        "decision_function_shape": [StrOptions({"ovr", "ovo"})],
+        "break_ties": ["boolean"],
+    }
+    for unused_param in ["epsilon", "nu"]:
+        _parameter_constraints.pop(unused_param)
+
+    @abstractmethod
+    def __init__(
+        self,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        tol,
+        C,
+        nu,
+        shrinking,
+        probability,
+        cache_size,
+        class_weight,
+        verbose,
+        max_iter,
+        decision_function_shape,
+        random_state,
+        break_ties,
+    ):
+        self.decision_function_shape = decision_function_shape
+        self.break_ties = break_ties
+        super().__init__(
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=C,
+            nu=nu,
+            epsilon=0.0,
+            shrinking=shrinking,
+            probability=probability,
+            cache_size=cache_size,
+            class_weight=class_weight,
+            verbose=verbose,
+            max_iter=max_iter,
+            random_state=random_state,
+        )
+
+    def _validate_targets(self, y):
+        y_ = column_or_1d(y, warn=True)
+        check_classification_targets(y)
+        cls, y = np.unique(y_, return_inverse=True)
+        self.class_weight_ = compute_class_weight(self.class_weight, classes=cls, y=y_)
+        if len(cls) < 2:
+            raise ValueError(
+                "The number of classes has to be greater than one; got %d class"
+                % len(cls)
+            )
+
+        self.classes_ = cls
+
+        return np.asarray(y, dtype=np.float64, order="C")
+
+    def decision_function(self, X):
+        """Evaluate the decision function for the samples in X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        X : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)
+            Returns the decision function of the sample for each class
+            in the model.
+            If decision_function_shape='ovr', the shape is (n_samples,
+            n_classes).
+
+        Notes
+        -----
+        If decision_function_shape='ovo', the function values are proportional
+        to the distance of the samples X to the separating hyperplane. If the
+        exact distances are required, divide the function values by the norm of
+        the weight vector (``coef_``). See also `this question
+        <https://stats.stackexchange.com/questions/14876/
+        interpreting-distance-from-hyperplane-in-svm>`_ for further details.
+        If decision_function_shape='ovr', the decision function is a monotonic
+        transformation of ovo decision function.
+        """
+        dec = self._decision_function(X)
+        if self.decision_function_shape == "ovr" and len(self.classes_) > 2:
+            return _ovr_decision_function(dec < 0, -dec, len(self.classes_))
+        return dec
+
+    def predict(self, X):
+        """Perform classification on samples in X.
+
+        For an one-class model, +1 or -1 is returned.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \
+                (n_samples_test, n_samples_train)
+            For kernel="precomputed", the expected shape of X is
+            (n_samples_test, n_samples_train).
+
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            Class labels for samples in X.
+        """
+        check_is_fitted(self)
+        if self.break_ties and self.decision_function_shape == "ovo":
+            raise ValueError(
+                "break_ties must be False when decision_function_shape is 'ovo'"
+            )
+
+        if (
+            self.break_ties
+            and self.decision_function_shape == "ovr"
+            and len(self.classes_) > 2
+        ):
+            y = np.argmax(self.decision_function(X), axis=1)
+        else:
+            y = super().predict(X)
+        return self.classes_.take(np.asarray(y, dtype=np.intp))
+
+    # Hacky way of getting predict_proba to raise an AttributeError when
+    # probability=False using properties. Do not use this in new code; when
+    # probabilities are not available depending on a setting, introduce two
+    # estimators.
+    def _check_proba(self):
+        if not self.probability:
+            raise AttributeError(
+                "predict_proba is not available when probability=False"
+            )
+        if self._impl not in ("c_svc", "nu_svc"):
+            raise AttributeError("predict_proba only implemented for SVC and NuSVC")
+        return True
+
+    @available_if(_check_proba)
+    def predict_proba(self, X):
+        """Compute probabilities of possible outcomes for samples in X.
+
+        The model needs to have probability information computed at training
+        time: fit with attribute `probability` set to True.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            For kernel="precomputed", the expected shape of X is
+            (n_samples_test, n_samples_train).
+
+        Returns
+        -------
+        T : ndarray of shape (n_samples, n_classes)
+            Returns the probability of the sample for each class in
+            the model. The columns correspond to the classes in sorted
+            order, as they appear in the attribute :term:`classes_`.
+
+        Notes
+        -----
+        The probability model is created using cross validation, so
+        the results can be slightly different than those obtained by
+        predict. Also, it will produce meaningless results on very small
+        datasets.
+        """
+        X = self._validate_for_predict(X)
+        if self.probA_.size == 0 or self.probB_.size == 0:
+            raise NotFittedError(
+                "predict_proba is not available when fitted with probability=False"
+            )
+        pred_proba = (
+            self._sparse_predict_proba if self._sparse else self._dense_predict_proba
+        )
+        return pred_proba(X)
+
+    @available_if(_check_proba)
+    def predict_log_proba(self, X):
+        """Compute log probabilities of possible outcomes for samples in X.
+
+        The model need to have probability information computed at training
+        time: fit with attribute `probability` set to True.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features) or \
+                (n_samples_test, n_samples_train)
+            For kernel="precomputed", the expected shape of X is
+            (n_samples_test, n_samples_train).
+
+        Returns
+        -------
+        T : ndarray of shape (n_samples, n_classes)
+            Returns the log-probabilities of the sample for each class in
+            the model. The columns correspond to the classes in sorted
+            order, as they appear in the attribute :term:`classes_`.
+
+        Notes
+        -----
+        The probability model is created using cross validation, so
+        the results can be slightly different than those obtained by
+        predict. Also, it will produce meaningless results on very small
+        datasets.
+        """
+        return np.log(self.predict_proba(X))
+
+    def _dense_predict_proba(self, X):
+        X = self._compute_kernel(X)
+
+        kernel = self.kernel
+        if callable(kernel):
+            kernel = "precomputed"
+
+        svm_type = LIBSVM_IMPL.index(self._impl)
+        pprob = libsvm.predict_proba(
+            X,
+            self.support_,
+            self.support_vectors_,
+            self._n_support,
+            self._dual_coef_,
+            self._intercept_,
+            self._probA,
+            self._probB,
+            svm_type=svm_type,
+            kernel=kernel,
+            degree=self.degree,
+            cache_size=self.cache_size,
+            coef0=self.coef0,
+            gamma=self._gamma,
+        )
+
+        return pprob
+
+    def _sparse_predict_proba(self, X):
+        X.data = np.asarray(X.data, dtype=np.float64, order="C")
+
+        kernel = self.kernel
+        if callable(kernel):
+            kernel = "precomputed"
+
+        kernel_type = self._sparse_kernels.index(kernel)
+
+        return libsvm_sparse.libsvm_sparse_predict_proba(
+            X.data,
+            X.indices,
+            X.indptr,
+            self.support_vectors_.data,
+            self.support_vectors_.indices,
+            self.support_vectors_.indptr,
+            self._dual_coef_.data,
+            self._intercept_,
+            LIBSVM_IMPL.index(self._impl),
+            kernel_type,
+            self.degree,
+            self._gamma,
+            self.coef0,
+            self.tol,
+            self.C,
+            getattr(self, "class_weight_", np.empty(0)),
+            self.nu,
+            self.epsilon,
+            self.shrinking,
+            self.probability,
+            self._n_support,
+            self._probA,
+            self._probB,
+        )
+
+    def _get_coef(self):
+        if self.dual_coef_.shape[0] == 1:
+            # binary classifier
+            coef = safe_sparse_dot(self.dual_coef_, self.support_vectors_)
+        else:
+            # 1vs1 classifier
+            coef = _one_vs_one_coef(
+                self.dual_coef_, self._n_support, self.support_vectors_
+            )
+            if sp.issparse(coef[0]):
+                coef = sp.vstack(coef).tocsr()
+            else:
+                coef = np.vstack(coef)
+
+        return coef
+
+    @property
+    def probA_(self):
+        """Parameter learned in Platt scaling when `probability=True`.
+
+        Returns
+        -------
+        ndarray of shape  (n_classes * (n_classes - 1) / 2)
+        """
+        return self._probA
+
+    @property
+    def probB_(self):
+        """Parameter learned in Platt scaling when `probability=True`.
+
+        Returns
+        -------
+        ndarray of shape  (n_classes * (n_classes - 1) / 2)
+        """
+        return self._probB
+
+
+def _get_liblinear_solver_type(multi_class, penalty, loss, dual):
+    """Find the liblinear magic number for the solver.
+
+    This number depends on the values of the following attributes:
+      - multi_class
+      - penalty
+      - loss
+      - dual
+
+    The same number is also internally used by LibLinear to determine
+    which solver to use.
+    """
+    # nested dicts containing level 1: available loss functions,
+    # level2: available penalties for the given loss function,
+    # level3: whether the dual solver is available for the specified
+    # combination of loss function and penalty
+    _solver_type_dict = {
+        "logistic_regression": {"l1": {False: 6}, "l2": {False: 0, True: 7}},
+        "hinge": {"l2": {True: 3}},
+        "squared_hinge": {"l1": {False: 5}, "l2": {False: 2, True: 1}},
+        "epsilon_insensitive": {"l2": {True: 13}},
+        "squared_epsilon_insensitive": {"l2": {False: 11, True: 12}},
+        "crammer_singer": 4,
+    }
+
+    if multi_class == "crammer_singer":
+        return _solver_type_dict[multi_class]
+    elif multi_class != "ovr":
+        raise ValueError(
+            "`multi_class` must be one of `ovr`, `crammer_singer`, got %r" % multi_class
+        )
+
+    _solver_pen = _solver_type_dict.get(loss, None)
+    if _solver_pen is None:
+        error_string = "loss='%s' is not supported" % loss
+    else:
+        _solver_dual = _solver_pen.get(penalty, None)
+        if _solver_dual is None:
+            error_string = (
+                "The combination of penalty='%s' and loss='%s' is not supported"
+                % (penalty, loss)
+            )
+        else:
+            solver_num = _solver_dual.get(dual, None)
+            if solver_num is None:
+                error_string = (
+                    "The combination of penalty='%s' and "
+                    "loss='%s' are not supported when dual=%s" % (penalty, loss, dual)
+                )
+            else:
+                return solver_num
+    raise ValueError(
+        "Unsupported set of arguments: %s, Parameters: penalty=%r, loss=%r, dual=%r"
+        % (error_string, penalty, loss, dual)
+    )
+
+
+def _fit_liblinear(
+    X,
+    y,
+    C,
+    fit_intercept,
+    intercept_scaling,
+    class_weight,
+    penalty,
+    dual,
+    verbose,
+    max_iter,
+    tol,
+    random_state=None,
+    multi_class="ovr",
+    loss="logistic_regression",
+    epsilon=0.1,
+    sample_weight=None,
+):
+    """Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.
+
+    Preprocessing is done in this function before supplying it to liblinear.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Training vector, where `n_samples` is the number of samples and
+        `n_features` is the number of features.
+
+    y : array-like of shape (n_samples,)
+        Target vector relative to X
+
+    C : float
+        Inverse of cross-validation parameter. The lower the C, the higher
+        the penalization.
+
+    fit_intercept : bool
+        Whether or not to fit an intercept. If set to True, the feature vector
+        is extended to include an intercept term: ``[x_1, ..., x_n, 1]``, where
+        1 corresponds to the intercept. If set to False, no intercept will be
+        used in calculations (i.e. data is expected to be already centered).
+
+    intercept_scaling : float
+        Liblinear internally penalizes the intercept, treating it like any
+        other term in the feature vector. To reduce the impact of the
+        regularization on the intercept, the `intercept_scaling` parameter can
+        be set to a value greater than 1; the higher the value of
+        `intercept_scaling`, the lower the impact of regularization on it.
+        Then, the weights become `[w_x_1, ..., w_x_n,
+        w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
+        the feature weights and the intercept weight is scaled by
+        `intercept_scaling`. This scaling allows the intercept term to have a
+        different regularization behavior compared to the other features.
+
+    class_weight : dict or 'balanced', default=None
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+    penalty : {'l1', 'l2'}
+        The norm of the penalty used in regularization.
+
+    dual : bool
+        Dual or primal formulation,
+
+    verbose : int
+        Set verbose to any positive number for verbosity.
+
+    max_iter : int
+        Number of iterations.
+
+    tol : float
+        Stopping condition.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the pseudo random number generation for shuffling the data.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    multi_class : {'ovr', 'crammer_singer'}, default='ovr'
+        `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`
+        optimizes a joint objective over all classes.
+        While `crammer_singer` is interesting from an theoretical perspective
+        as it is consistent it is seldom used in practice and rarely leads to
+        better accuracy and is more expensive to compute.
+        If `crammer_singer` is chosen, the options loss, penalty and dual will
+        be ignored.
+
+    loss : {'logistic_regression', 'hinge', 'squared_hinge', \
+            'epsilon_insensitive', 'squared_epsilon_insensitive}, \
+            default='logistic_regression'
+        The loss function used to fit the model.
+
+    epsilon : float, default=0.1
+        Epsilon parameter in the epsilon-insensitive loss function. Note
+        that the value of this parameter depends on the scale of the target
+        variable y. If unsure, set epsilon=0.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Weights assigned to each sample.
+
+    Returns
+    -------
+    coef_ : ndarray of shape (n_features, n_features + 1)
+        The coefficient vector got by minimizing the objective function.
+
+    intercept_ : float
+        The intercept term added to the vector.
+
+    n_iter_ : array of int
+        Number of iterations run across for each class.
+    """
+    if loss not in ["epsilon_insensitive", "squared_epsilon_insensitive"]:
+        enc = LabelEncoder()
+        y_ind = enc.fit_transform(y)
+        classes_ = enc.classes_
+        if len(classes_) < 2:
+            raise ValueError(
+                "This solver needs samples of at least 2 classes"
+                " in the data, but the data contains only one"
+                " class: %r" % classes_[0]
+            )
+
+        class_weight_ = compute_class_weight(class_weight, classes=classes_, y=y)
+    else:
+        class_weight_ = np.empty(0, dtype=np.float64)
+        y_ind = y
+    liblinear.set_verbosity_wrap(verbose)
+    rnd = check_random_state(random_state)
+    if verbose:
+        print("[LibLinear]", end="")
+
+    # LinearSVC breaks when intercept_scaling is <= 0
+    bias = -1.0
+    if fit_intercept:
+        if intercept_scaling <= 0:
+            raise ValueError(
+                "Intercept scaling is %r but needs to be greater "
+                "than 0. To disable fitting an intercept,"
+                " set fit_intercept=False." % intercept_scaling
+            )
+        else:
+            bias = intercept_scaling
+
+    libsvm.set_verbosity_wrap(verbose)
+    libsvm_sparse.set_verbosity_wrap(verbose)
+    liblinear.set_verbosity_wrap(verbose)
+
+    # Liblinear doesn't support 64bit sparse matrix indices yet
+    if sp.issparse(X):
+        _check_large_sparse(X)
+
+    # LibLinear wants targets as doubles, even for classification
+    y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
+    y_ind = np.require(y_ind, requirements="W")
+
+    sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64)
+
+    solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
+    raw_coef_, n_iter_ = liblinear.train_wrap(
+        X,
+        y_ind,
+        sp.issparse(X),
+        solver_type,
+        tol,
+        bias,
+        C,
+        class_weight_,
+        max_iter,
+        rnd.randint(np.iinfo("i").max),
+        epsilon,
+        sample_weight,
+    )
+    # Regarding rnd.randint(..) in the above signature:
+    # seed for srand in range [0..INT_MAX); due to limitations in Numpy
+    # on 32-bit platforms, we can't get to the UINT_MAX limit that
+    # srand supports
+    n_iter_max = max(n_iter_)
+    if n_iter_max >= max_iter:
+        warnings.warn(
+            "Liblinear failed to converge, increase the number of iterations.",
+            ConvergenceWarning,
+        )
+
+    if fit_intercept:
+        coef_ = raw_coef_[:, :-1]
+        intercept_ = intercept_scaling * raw_coef_[:, -1]
+    else:
+        coef_ = raw_coef_
+        intercept_ = 0.0
+
+    return coef_, intercept_, n_iter_
diff --git a/.venv/Lib/site-packages/sklearn/svm/_bounds.py b/.venv/Lib/site-packages/sklearn/svm/_bounds.py
new file mode 100644
index 0000000000000000000000000000000000000000..704462ce23689dc76f1590e1c35cd8034f83dfdd
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_bounds.py
@@ -0,0 +1,95 @@
+"""Determination of parameter bounds"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Real
+
+import numpy as np
+
+from ..preprocessing import LabelBinarizer
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.extmath import safe_sparse_dot
+from ..utils.validation import check_array, check_consistent_length
+
+
+@validate_params(
+    {
+        "X": ["array-like", "sparse matrix"],
+        "y": ["array-like"],
+        "loss": [StrOptions({"squared_hinge", "log"})],
+        "fit_intercept": ["boolean"],
+        "intercept_scaling": [Interval(Real, 0, None, closed="neither")],
+    },
+    prefer_skip_nested_validation=True,
+)
+def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0):
+    """Return the lowest bound for C.
+
+    The lower bound for C is computed such that for C in (l1_min_C, infinity)
+    the model is guaranteed not to be empty. This applies to l1 penalized
+    classifiers, such as LinearSVC with penalty='l1' and
+    linear_model.LogisticRegression with penalty='l1'.
+
+    This value is valid if class_weight parameter in fit() is not set.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Training vector, where `n_samples` is the number of samples and
+        `n_features` is the number of features.
+
+    y : array-like of shape (n_samples,)
+        Target vector relative to X.
+
+    loss : {'squared_hinge', 'log'}, default='squared_hinge'
+        Specifies the loss function.
+        With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).
+        With 'log' it is the loss of logistic regression models.
+
+    fit_intercept : bool, default=True
+        Specifies if the intercept should be fitted by the model.
+        It must match the fit() method parameter.
+
+    intercept_scaling : float, default=1.0
+        When fit_intercept is True, instance vector x becomes
+        [x, intercept_scaling],
+        i.e. a "synthetic" feature with constant value equals to
+        intercept_scaling is appended to the instance vector.
+        It must match the fit() method parameter.
+
+    Returns
+    -------
+    l1_min_c : float
+        Minimum value for C.
+
+    Examples
+    --------
+    >>> from sklearn.svm import l1_min_c
+    >>> from sklearn.datasets import make_classification
+    >>> X, y = make_classification(n_samples=100, n_features=20, random_state=42)
+    >>> print(f"{l1_min_c(X, y, loss='squared_hinge', fit_intercept=True):.4f}")
+    0.0044
+    """
+
+    X = check_array(X, accept_sparse="csc")
+    check_consistent_length(X, y)
+
+    Y = LabelBinarizer(neg_label=-1).fit_transform(y).T
+    # maximum absolute value over classes and features
+    den = np.max(np.abs(safe_sparse_dot(Y, X)))
+    if fit_intercept:
+        bias = np.full(
+            (np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype
+        )
+        den = max(den, abs(np.dot(Y, bias)).max())
+
+    if den == 0.0:
+        raise ValueError(
+            "Ill-posed l1_min_c calculation: l1 will always "
+            "select zero coefficients for this data"
+        )
+    if loss == "squared_hinge":
+        return 0.5 / den
+    else:  # loss == 'log':
+        return 2.0 / den
diff --git a/.venv/Lib/site-packages/sklearn/svm/_classes.py b/.venv/Lib/site-packages/sklearn/svm/_classes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d4b360f1efa59b9dd3bf94fb68f82ffeda6f74d
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_classes.py
@@ -0,0 +1,1779 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Integral, Real
+
+import numpy as np
+
+from ..base import BaseEstimator, OutlierMixin, RegressorMixin, _fit_context
+from ..linear_model._base import LinearClassifierMixin, LinearModel, SparseCoefMixin
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.multiclass import check_classification_targets
+from ..utils.validation import _num_samples, validate_data
+from ._base import BaseLibSVM, BaseSVC, _fit_liblinear, _get_liblinear_solver_type
+
+
+def _validate_dual_parameter(dual, loss, penalty, multi_class, X):
+    """Helper function to assign the value of dual parameter."""
+    if dual == "auto":
+        if X.shape[0] < X.shape[1]:
+            try:
+                _get_liblinear_solver_type(multi_class, penalty, loss, True)
+                return True
+            except ValueError:  # dual not supported for the combination
+                return False
+        else:
+            try:
+                _get_liblinear_solver_type(multi_class, penalty, loss, False)
+                return False
+            except ValueError:  # primal not supported by the combination
+                return True
+    else:
+        return dual
+
+
+class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
+    """Linear Support Vector Classification.
+
+    Similar to SVC with parameter kernel='linear', but implemented in terms of
+    liblinear rather than libsvm, so it has more flexibility in the choice of
+    penalties and loss functions and should scale better to large numbers of
+    samples.
+
+    The main differences between :class:`~sklearn.svm.LinearSVC` and
+    :class:`~sklearn.svm.SVC` lie in the loss function used by default, and in
+    the handling of intercept regularization between those two implementations.
+
+    This class supports both dense and sparse input and the multiclass support
+    is handled according to a one-vs-the-rest scheme.
+
+    Read more in the :ref:`User Guide <svm_classification>`.
+
+    Parameters
+    ----------
+    penalty : {'l1', 'l2'}, default='l2'
+        Specifies the norm used in the penalization. The 'l2'
+        penalty is the standard used in SVC. The 'l1' leads to ``coef_``
+        vectors that are sparse.
+
+    loss : {'hinge', 'squared_hinge'}, default='squared_hinge'
+        Specifies the loss function. 'hinge' is the standard SVM loss
+        (used e.g. by the SVC class) while 'squared_hinge' is the
+        square of the hinge loss. The combination of ``penalty='l1'``
+        and ``loss='hinge'`` is not supported.
+
+    dual : "auto" or bool, default="auto"
+        Select the algorithm to either solve the dual or primal
+        optimization problem. Prefer dual=False when n_samples > n_features.
+        `dual="auto"` will choose the value of the parameter automatically,
+        based on the values of `n_samples`, `n_features`, `loss`, `multi_class`
+        and `penalty`. If `n_samples` < `n_features` and optimizer supports
+        chosen `loss`, `multi_class` and `penalty`, then dual will be set to True,
+        otherwise it will be set to False.
+
+        .. versionchanged:: 1.3
+           The `"auto"` option is added in version 1.3 and will be the default
+           in version 1.5.
+
+    tol : float, default=1e-4
+        Tolerance for stopping criteria.
+
+    C : float, default=1.0
+        Regularization parameter. The strength of the regularization is
+        inversely proportional to C. Must be strictly positive.
+        For an intuitive visualization of the effects of scaling
+        the regularization parameter C, see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
+
+    multi_class : {'ovr', 'crammer_singer'}, default='ovr'
+        Determines the multi-class strategy if `y` contains more than
+        two classes.
+        ``"ovr"`` trains n_classes one-vs-rest classifiers, while
+        ``"crammer_singer"`` optimizes a joint objective over all classes.
+        While `crammer_singer` is interesting from a theoretical perspective
+        as it is consistent, it is seldom used in practice as it rarely leads
+        to better accuracy and is more expensive to compute.
+        If ``"crammer_singer"`` is chosen, the options loss, penalty and dual
+        will be ignored.
+
+    fit_intercept : bool, default=True
+        Whether or not to fit an intercept. If set to True, the feature vector
+        is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where
+        1 corresponds to the intercept. If set to False, no intercept will be
+        used in calculations (i.e. data is expected to be already centered).
+
+    intercept_scaling : float, default=1.0
+        When `fit_intercept` is True, the instance vector x becomes ``[x_1,
+        ..., x_n, intercept_scaling]``, i.e. a "synthetic" feature with a
+        constant value equal to `intercept_scaling` is appended to the instance
+        vector. The intercept becomes intercept_scaling * synthetic feature
+        weight. Note that liblinear internally penalizes the intercept,
+        treating it like any other term in the feature vector. To reduce the
+        impact of the regularization on the intercept, the `intercept_scaling`
+        parameter can be set to a value greater than 1; the higher the value of
+        `intercept_scaling`, the lower the impact of regularization on it.
+        Then, the weights become `[w_x_1, ..., w_x_n,
+        w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
+        the feature weights and the intercept weight is scaled by
+        `intercept_scaling`. This scaling allows the intercept term to have a
+        different regularization behavior compared to the other features.
+
+    class_weight : dict or 'balanced', default=None
+        Set the parameter C of class i to ``class_weight[i]*C`` for
+        SVC. If not given, all classes are supposed to have
+        weight one.
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``.
+
+    verbose : int, default=0
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in liblinear that, if enabled, may not work
+        properly in a multithreaded context.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the pseudo random number generation for shuffling the data for
+        the dual coordinate descent (if ``dual=True``). When ``dual=False`` the
+        underlying implementation of :class:`LinearSVC` is not random and
+        ``random_state`` has no effect on the results.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    max_iter : int, default=1000
+        The maximum number of iterations to be run.
+
+    Attributes
+    ----------
+    coef_ : ndarray of shape (1, n_features) if n_classes == 2 \
+            else (n_classes, n_features)
+        Weights assigned to the features (coefficients in the primal
+        problem).
+
+        ``coef_`` is a readonly property derived from ``raw_coef_`` that
+        follows the internal memory layout of liblinear.
+
+    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)
+        Constants in decision function.
+
+    classes_ : ndarray of shape (n_classes,)
+        The unique classes labels.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Maximum number of iterations run across all classes.
+
+    See Also
+    --------
+    SVC : Implementation of Support Vector Machine classifier using libsvm:
+        the kernel can be non-linear but its SMO algorithm does not
+        scale to large number of samples as LinearSVC does.
+
+        Furthermore SVC multi-class mode is implemented using one
+        vs one scheme while LinearSVC uses one vs the rest. It is
+        possible to implement one vs the rest with SVC by using the
+        :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.
+
+        Finally SVC can fit dense data without memory copy if the input
+        is C-contiguous. Sparse data will still incur memory copy though.
+
+    sklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same
+        cost function as LinearSVC
+        by adjusting the penalty and loss parameters. In addition it requires
+        less memory, allows incremental (online) learning, and implements
+        various loss functions and regularization regimes.
+
+    Notes
+    -----
+    The underlying C implementation uses a random number generator to
+    select features when fitting the model. It is thus not uncommon
+    to have slightly different results for the same input data. If
+    that happens, try with a smaller ``tol`` parameter.
+
+    The underlying implementation, liblinear, uses a sparse internal
+    representation for the data that will incur a memory copy.
+
+    Predict output may not match that of standalone liblinear in certain
+    cases. See :ref:`differences from liblinear <liblinear_differences>`
+    in the narrative documentation.
+
+    References
+    ----------
+    `LIBLINEAR: A Library for Large Linear Classification
+    <https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__
+
+    Examples
+    --------
+    >>> from sklearn.svm import LinearSVC
+    >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.preprocessing import StandardScaler
+    >>> from sklearn.datasets import make_classification
+    >>> X, y = make_classification(n_features=4, random_state=0)
+    >>> clf = make_pipeline(StandardScaler(),
+    ...                     LinearSVC(random_state=0, tol=1e-5))
+    >>> clf.fit(X, y)
+    Pipeline(steps=[('standardscaler', StandardScaler()),
+                    ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])
+
+    >>> print(clf.named_steps['linearsvc'].coef_)
+    [[0.141...   0.526... 0.679... 0.493...]]
+
+    >>> print(clf.named_steps['linearsvc'].intercept_)
+    [0.1693...]
+    >>> print(clf.predict([[0, 0, 0, 0]]))
+    [1]
+    """
+
+    _parameter_constraints: dict = {
+        "penalty": [StrOptions({"l1", "l2"})],
+        "loss": [StrOptions({"hinge", "squared_hinge"})],
+        "dual": ["boolean", StrOptions({"auto"})],
+        "tol": [Interval(Real, 0.0, None, closed="neither")],
+        "C": [Interval(Real, 0.0, None, closed="neither")],
+        "multi_class": [StrOptions({"ovr", "crammer_singer"})],
+        "fit_intercept": ["boolean"],
+        "intercept_scaling": [Interval(Real, 0, None, closed="neither")],
+        "class_weight": [None, dict, StrOptions({"balanced"})],
+        "verbose": ["verbose"],
+        "random_state": ["random_state"],
+        "max_iter": [Interval(Integral, 0, None, closed="left")],
+    }
+
+    def __init__(
+        self,
+        penalty="l2",
+        loss="squared_hinge",
+        *,
+        dual="auto",
+        tol=1e-4,
+        C=1.0,
+        multi_class="ovr",
+        fit_intercept=True,
+        intercept_scaling=1,
+        class_weight=None,
+        verbose=0,
+        random_state=None,
+        max_iter=1000,
+    ):
+        self.dual = dual
+        self.tol = tol
+        self.C = C
+        self.multi_class = multi_class
+        self.fit_intercept = fit_intercept
+        self.intercept_scaling = intercept_scaling
+        self.class_weight = class_weight
+        self.verbose = verbose
+        self.random_state = random_state
+        self.max_iter = max_iter
+        self.penalty = penalty
+        self.loss = loss
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y, sample_weight=None):
+        """Fit the model according to the given training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target vector relative to X.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Array of weights that are assigned to individual
+            samples. If not provided,
+            then each sample is given unit weight.
+
+            .. versionadded:: 0.18
+
+        Returns
+        -------
+        self : object
+            An instance of the estimator.
+        """
+        X, y = validate_data(
+            self,
+            X,
+            y,
+            accept_sparse="csr",
+            dtype=np.float64,
+            order="C",
+            accept_large_sparse=False,
+        )
+        check_classification_targets(y)
+        self.classes_ = np.unique(y)
+
+        _dual = _validate_dual_parameter(
+            self.dual, self.loss, self.penalty, self.multi_class, X
+        )
+
+        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(
+            X,
+            y,
+            self.C,
+            self.fit_intercept,
+            self.intercept_scaling,
+            self.class_weight,
+            self.penalty,
+            _dual,
+            self.verbose,
+            self.max_iter,
+            self.tol,
+            self.random_state,
+            self.multi_class,
+            self.loss,
+            sample_weight=sample_weight,
+        )
+        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R
+        # and LogisticRegression but LogisticRegression sets a structured
+        # `n_iter_` attribute with information about the underlying OvR fits
+        # while LinearSVC/R only reports the maximum value.
+        self.n_iter_ = n_iter_.max().item()
+
+        if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
+            self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)
+            if self.fit_intercept:
+                intercept = self.intercept_[1] - self.intercept_[0]
+                self.intercept_ = np.array([intercept])
+
+        return self
+
+
+class LinearSVR(RegressorMixin, LinearModel):
+    """Linear Support Vector Regression.
+
+    Similar to SVR with parameter kernel='linear', but implemented in terms of
+    liblinear rather than libsvm, so it has more flexibility in the choice of
+    penalties and loss functions and should scale better to large numbers of
+    samples.
+
+    The main differences between :class:`~sklearn.svm.LinearSVR` and
+    :class:`~sklearn.svm.SVR` lie in the loss function used by default, and in
+    the handling of intercept regularization between those two implementations.
+
+    This class supports both dense and sparse input.
+
+    Read more in the :ref:`User Guide <svm_regression>`.
+
+    .. versionadded:: 0.16
+
+    Parameters
+    ----------
+    epsilon : float, default=0.0
+        Epsilon parameter in the epsilon-insensitive loss function. Note
+        that the value of this parameter depends on the scale of the target
+        variable y. If unsure, set ``epsilon=0``.
+
+    tol : float, default=1e-4
+        Tolerance for stopping criteria.
+
+    C : float, default=1.0
+        Regularization parameter. The strength of the regularization is
+        inversely proportional to C. Must be strictly positive.
+
+    loss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, \
+            default='epsilon_insensitive'
+        Specifies the loss function. The epsilon-insensitive loss
+        (standard SVR) is the L1 loss, while the squared epsilon-insensitive
+        loss ('squared_epsilon_insensitive') is the L2 loss.
+
+    fit_intercept : bool, default=True
+        Whether or not to fit an intercept. If set to True, the feature vector
+        is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where
+        1 corresponds to the intercept. If set to False, no intercept will be
+        used in calculations (i.e. data is expected to be already centered).
+
+    intercept_scaling : float, default=1.0
+        When `fit_intercept` is True, the instance vector x becomes `[x_1, ...,
+        x_n, intercept_scaling]`, i.e. a "synthetic" feature with a constant
+        value equal to `intercept_scaling` is appended to the instance vector.
+        The intercept becomes intercept_scaling * synthetic feature weight.
+        Note that liblinear internally penalizes the intercept, treating it
+        like any other term in the feature vector. To reduce the impact of the
+        regularization on the intercept, the `intercept_scaling` parameter can
+        be set to a value greater than 1; the higher the value of
+        `intercept_scaling`, the lower the impact of regularization on it.
+        Then, the weights become `[w_x_1, ..., w_x_n,
+        w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
+        the feature weights and the intercept weight is scaled by
+        `intercept_scaling`. This scaling allows the intercept term to have a
+        different regularization behavior compared to the other features.
+
+    dual : "auto" or bool, default="auto"
+        Select the algorithm to either solve the dual or primal
+        optimization problem. Prefer dual=False when n_samples > n_features.
+        `dual="auto"` will choose the value of the parameter automatically,
+        based on the values of `n_samples`, `n_features` and `loss`. If
+        `n_samples` < `n_features` and optimizer supports chosen `loss`,
+        then dual will be set to True, otherwise it will be set to False.
+
+        .. versionchanged:: 1.3
+           The `"auto"` option is added in version 1.3 and will be the default
+           in version 1.5.
+
+    verbose : int, default=0
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in liblinear that, if enabled, may not work
+        properly in a multithreaded context.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the pseudo random number generation for shuffling the data.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    max_iter : int, default=1000
+        The maximum number of iterations to be run.
+
+    Attributes
+    ----------
+    coef_ : ndarray of shape (n_features) if n_classes == 2 \
+            else (n_classes, n_features)
+        Weights assigned to the features (coefficients in the primal
+        problem).
+
+        `coef_` is a readonly property derived from `raw_coef_` that
+        follows the internal memory layout of liblinear.
+
+    intercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)
+        Constants in decision function.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Maximum number of iterations run across all classes.
+
+    See Also
+    --------
+    LinearSVC : Implementation of Support Vector Machine classifier using the
+        same library as this class (liblinear).
+
+    SVR : Implementation of Support Vector Machine regression using libsvm:
+        the kernel can be non-linear but its SMO algorithm does not scale to
+        large number of samples as :class:`~sklearn.svm.LinearSVR` does.
+
+    sklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost
+        function as LinearSVR
+        by adjusting the penalty and loss parameters. In addition it requires
+        less memory, allows incremental (online) learning, and implements
+        various loss functions and regularization regimes.
+
+    Examples
+    --------
+    >>> from sklearn.svm import LinearSVR
+    >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.preprocessing import StandardScaler
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(n_features=4, random_state=0)
+    >>> regr = make_pipeline(StandardScaler(),
+    ...                      LinearSVR(random_state=0, tol=1e-5))
+    >>> regr.fit(X, y)
+    Pipeline(steps=[('standardscaler', StandardScaler()),
+                    ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])
+
+    >>> print(regr.named_steps['linearsvr'].coef_)
+    [18.582... 27.023... 44.357... 64.522...]
+    >>> print(regr.named_steps['linearsvr'].intercept_)
+    [-4...]
+    >>> print(regr.predict([[0, 0, 0, 0]]))
+    [-2.384...]
+    """
+
+    _parameter_constraints: dict = {
+        "epsilon": [Real],
+        "tol": [Interval(Real, 0.0, None, closed="neither")],
+        "C": [Interval(Real, 0.0, None, closed="neither")],
+        "loss": [StrOptions({"epsilon_insensitive", "squared_epsilon_insensitive"})],
+        "fit_intercept": ["boolean"],
+        "intercept_scaling": [Interval(Real, 0, None, closed="neither")],
+        "dual": ["boolean", StrOptions({"auto"})],
+        "verbose": ["verbose"],
+        "random_state": ["random_state"],
+        "max_iter": [Interval(Integral, 0, None, closed="left")],
+    }
+
+    def __init__(
+        self,
+        *,
+        epsilon=0.0,
+        tol=1e-4,
+        C=1.0,
+        loss="epsilon_insensitive",
+        fit_intercept=True,
+        intercept_scaling=1.0,
+        dual="auto",
+        verbose=0,
+        random_state=None,
+        max_iter=1000,
+    ):
+        self.tol = tol
+        self.C = C
+        self.epsilon = epsilon
+        self.fit_intercept = fit_intercept
+        self.intercept_scaling = intercept_scaling
+        self.verbose = verbose
+        self.random_state = random_state
+        self.max_iter = max_iter
+        self.dual = dual
+        self.loss = loss
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y, sample_weight=None):
+        """Fit the model according to the given training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target vector relative to X.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Array of weights that are assigned to individual
+            samples. If not provided,
+            then each sample is given unit weight.
+
+            .. versionadded:: 0.18
+
+        Returns
+        -------
+        self : object
+            An instance of the estimator.
+        """
+        X, y = validate_data(
+            self,
+            X,
+            y,
+            accept_sparse="csr",
+            dtype=np.float64,
+            order="C",
+            accept_large_sparse=False,
+        )
+        penalty = "l2"  # SVR only accepts l2 penalty
+
+        _dual = _validate_dual_parameter(self.dual, self.loss, penalty, "ovr", X)
+
+        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(
+            X,
+            y,
+            self.C,
+            self.fit_intercept,
+            self.intercept_scaling,
+            None,
+            penalty,
+            _dual,
+            self.verbose,
+            self.max_iter,
+            self.tol,
+            self.random_state,
+            loss=self.loss,
+            epsilon=self.epsilon,
+            sample_weight=sample_weight,
+        )
+        self.coef_ = self.coef_.ravel()
+        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R
+        # and LogisticRegression but LogisticRegression sets a structured
+        # `n_iter_` attribute with information about the underlying OvR fits
+        # while LinearSVC/R only reports the maximum value.
+        self.n_iter_ = n_iter_.max().item()
+
+        return self
+
+
+class SVC(BaseSVC):
+    """C-Support Vector Classification.
+
+    The implementation is based on libsvm. The fit time scales at least
+    quadratically with the number of samples and may be impractical
+    beyond tens of thousands of samples. For large datasets
+    consider using :class:`~sklearn.svm.LinearSVC` or
+    :class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a
+    :class:`~sklearn.kernel_approximation.Nystroem` transformer or
+    other :ref:`kernel_approximation`.
+
+    The multiclass support is handled according to a one-vs-one scheme.
+
+    For details on the precise mathematical formulation of the provided
+    kernel functions and how `gamma`, `coef0` and `degree` affect each
+    other, see the corresponding section in the narrative documentation:
+    :ref:`svm_kernels`.
+
+    To learn how to tune SVC's hyperparameters, see the following example:
+    :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`
+
+    Read more in the :ref:`User Guide <svm_classification>`.
+
+    Parameters
+    ----------
+    C : float, default=1.0
+        Regularization parameter. The strength of the regularization is
+        inversely proportional to C. Must be strictly positive. The penalty
+        is a squared l2 penalty. For an intuitive visualization of the effects
+        of scaling the regularization parameter C, see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
+
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
+        default='rbf'
+        Specifies the kernel type to be used in the algorithm. If
+        none is given, 'rbf' will be used. If a callable is given it is used to
+        pre-compute the kernel matrix from data matrices; that matrix should be
+        an array of shape ``(n_samples, n_samples)``. For an intuitive
+        visualization of different kernel types see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.
+
+    degree : int, default=3
+        Degree of the polynomial kernel function ('poly').
+        Must be non-negative. Ignored by all other kernels.
+
+    gamma : {'scale', 'auto'} or float, default='scale'
+        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
+
+        - if ``gamma='scale'`` (default) is passed then it uses
+          1 / (n_features * X.var()) as value of gamma,
+        - if 'auto', uses 1 / n_features
+        - if float, must be non-negative.
+
+        .. versionchanged:: 0.22
+           The default value of ``gamma`` changed from 'auto' to 'scale'.
+
+    coef0 : float, default=0.0
+        Independent term in kernel function.
+        It is only significant in 'poly' and 'sigmoid'.
+
+    shrinking : bool, default=True
+        Whether to use the shrinking heuristic.
+        See the :ref:`User Guide <shrinking_svm>`.
+
+    probability : bool, default=False
+        Whether to enable probability estimates. This must be enabled prior
+        to calling `fit`, will slow down that method as it internally uses
+        5-fold cross-validation, and `predict_proba` may be inconsistent with
+        `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.
+
+    tol : float, default=1e-3
+        Tolerance for stopping criterion.
+
+    cache_size : float, default=200
+        Specify the size of the kernel cache (in MB).
+
+    class_weight : dict or 'balanced', default=None
+        Set the parameter C of class i to class_weight[i]*C for
+        SVC. If not given, all classes are supposed to have
+        weight one.
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``.
+
+    verbose : bool, default=False
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in libsvm that, if enabled, may not work
+        properly in a multithreaded context.
+
+    max_iter : int, default=-1
+        Hard limit on iterations within solver, or -1 for no limit.
+
+    decision_function_shape : {'ovo', 'ovr'}, default='ovr'
+        Whether to return a one-vs-rest ('ovr') decision function of shape
+        (n_samples, n_classes) as all other classifiers, or the original
+        one-vs-one ('ovo') decision function of libsvm which has shape
+        (n_samples, n_classes * (n_classes - 1) / 2). However, note that
+        internally, one-vs-one ('ovo') is always used as a multi-class strategy
+        to train models; an ovr matrix is only constructed from the ovo matrix.
+        The parameter is ignored for binary classification.
+
+        .. versionchanged:: 0.19
+            decision_function_shape is 'ovr' by default.
+
+        .. versionadded:: 0.17
+           *decision_function_shape='ovr'* is recommended.
+
+        .. versionchanged:: 0.17
+           Deprecated *decision_function_shape='ovo' and None*.
+
+    break_ties : bool, default=False
+        If true, ``decision_function_shape='ovr'``, and number of classes > 2,
+        :term:`predict` will break ties according to the confidence values of
+        :term:`decision_function`; otherwise the first class among the tied
+        classes is returned. Please note that breaking ties comes at a
+        relatively high computational cost compared to a simple predict. See
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_tie_breaking.py` for an
+        example of its usage with ``decision_function_shape='ovr'``.
+
+        .. versionadded:: 0.22
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the pseudo random number generation for shuffling the data for
+        probability estimates. Ignored when `probability` is False.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    class_weight_ : ndarray of shape (n_classes,)
+        Multipliers of parameter C for each class.
+        Computed based on the ``class_weight`` parameter.
+
+    classes_ : ndarray of shape (n_classes,)
+        The classes labels.
+
+    coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)
+        Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+
+        `coef_` is a readonly property derived from `dual_coef_` and
+        `support_vectors_`.
+
+    dual_coef_ : ndarray of shape (n_classes -1, n_SV)
+        Dual coefficients of the support vector in the decision
+        function (see :ref:`sgd_mathematical_formulation`), multiplied by
+        their targets.
+        For multiclass, coefficient for all 1-vs-1 classifiers.
+        The layout of the coefficients in the multiclass case is somewhat
+        non-trivial. See the :ref:`multi-class section of the User Guide
+        <svm_multi_class>` for details.
+
+    fit_status_ : int
+        0 if correctly fitted, 1 otherwise (will raise warning)
+
+    intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
+        Constants in decision function.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)
+        Number of iterations run by the optimization routine to fit the model.
+        The shape of this attribute depends on the number of models optimized
+        which in turn depends on the number of classes.
+
+        .. versionadded:: 1.1
+
+    support_ : ndarray of shape (n_SV)
+        Indices of support vectors.
+
+    support_vectors_ : ndarray of shape (n_SV, n_features)
+        Support vectors. An empty array if kernel is precomputed.
+
+    n_support_ : ndarray of shape (n_classes,), dtype=int32
+        Number of support vectors for each class.
+
+    probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)
+    probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)
+        If `probability=True`, it corresponds to the parameters learned in
+        Platt scaling to produce probability estimates from decision values.
+        If `probability=False`, it's an empty array. Platt scaling uses the
+        logistic function
+        ``1 / (1 + exp(decision_value * probA_ + probB_))``
+        where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For
+        more information on the multiclass case and training procedure see
+        section 8 of [1]_.
+
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
+    See Also
+    --------
+    SVR : Support Vector Machine for Regression implemented using libsvm.
+
+    LinearSVC : Scalable Linear Support Vector Machine for classification
+        implemented using liblinear. Check the See Also section of
+        LinearSVC for more comparison element.
+
+    References
+    ----------
+    .. [1] `LIBSVM: A Library for Support Vector Machines
+        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
+
+    .. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
+        Machines and Comparisons to Regularized Likelihood Methods"
+        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.preprocessing import StandardScaler
+    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
+    >>> y = np.array([1, 1, 2, 2])
+    >>> from sklearn.svm import SVC
+    >>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
+    >>> clf.fit(X, y)
+    Pipeline(steps=[('standardscaler', StandardScaler()),
+                    ('svc', SVC(gamma='auto'))])
+
+    >>> print(clf.predict([[-0.8, -1]]))
+    [1]
+
+    For a comaprison of the SVC with other classifiers see:
+    :ref:`sphx_glr_auto_examples_classification_plot_classification_probability.py`.
+    """
+
+    _impl = "c_svc"
+
+    def __init__(
+        self,
+        *,
+        C=1.0,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        shrinking=True,
+        probability=False,
+        tol=1e-3,
+        cache_size=200,
+        class_weight=None,
+        verbose=False,
+        max_iter=-1,
+        decision_function_shape="ovr",
+        break_ties=False,
+        random_state=None,
+    ):
+        super().__init__(
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=C,
+            nu=0.0,
+            shrinking=shrinking,
+            probability=probability,
+            cache_size=cache_size,
+            class_weight=class_weight,
+            verbose=verbose,
+            max_iter=max_iter,
+            decision_function_shape=decision_function_shape,
+            break_ties=break_ties,
+            random_state=random_state,
+        )
+
+
+class NuSVC(BaseSVC):
+    """Nu-Support Vector Classification.
+
+    Similar to SVC but uses a parameter to control the number of support
+    vectors.
+
+    The implementation is based on libsvm.
+
+    Read more in the :ref:`User Guide <svm_classification>`.
+
+    Parameters
+    ----------
+    nu : float, default=0.5
+        An upper bound on the fraction of margin errors (see :ref:`User Guide
+        <nu_svc>`) and a lower bound of the fraction of support vectors.
+        Should be in the interval (0, 1].
+
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
+        default='rbf'
+        Specifies the kernel type to be used in the algorithm.
+        If none is given, 'rbf' will be used. If a callable is given it is
+        used to precompute the kernel matrix. For an intuitive
+        visualization of different kernel types see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.
+
+    degree : int, default=3
+        Degree of the polynomial kernel function ('poly').
+        Must be non-negative. Ignored by all other kernels.
+
+    gamma : {'scale', 'auto'} or float, default='scale'
+        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
+
+        - if ``gamma='scale'`` (default) is passed then it uses
+          1 / (n_features * X.var()) as value of gamma,
+        - if 'auto', uses 1 / n_features
+        - if float, must be non-negative.
+
+        .. versionchanged:: 0.22
+           The default value of ``gamma`` changed from 'auto' to 'scale'.
+
+    coef0 : float, default=0.0
+        Independent term in kernel function.
+        It is only significant in 'poly' and 'sigmoid'.
+
+    shrinking : bool, default=True
+        Whether to use the shrinking heuristic.
+        See the :ref:`User Guide <shrinking_svm>`.
+
+    probability : bool, default=False
+        Whether to enable probability estimates. This must be enabled prior
+        to calling `fit`, will slow down that method as it internally uses
+        5-fold cross-validation, and `predict_proba` may be inconsistent with
+        `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.
+
+    tol : float, default=1e-3
+        Tolerance for stopping criterion.
+
+    cache_size : float, default=200
+        Specify the size of the kernel cache (in MB).
+
+    class_weight : {dict, 'balanced'}, default=None
+        Set the parameter C of class i to class_weight[i]*C for
+        SVC. If not given, all classes are supposed to have
+        weight one. The "balanced" mode uses the values of y to automatically
+        adjust weights inversely proportional to class frequencies as
+        ``n_samples / (n_classes * np.bincount(y))``.
+
+    verbose : bool, default=False
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in libsvm that, if enabled, may not work
+        properly in a multithreaded context.
+
+    max_iter : int, default=-1
+        Hard limit on iterations within solver, or -1 for no limit.
+
+    decision_function_shape : {'ovo', 'ovr'}, default='ovr'
+        Whether to return a one-vs-rest ('ovr') decision function of shape
+        (n_samples, n_classes) as all other classifiers, or the original
+        one-vs-one ('ovo') decision function of libsvm which has shape
+        (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one
+        ('ovo') is always used as multi-class strategy. The parameter is
+        ignored for binary classification.
+
+        .. versionchanged:: 0.19
+            decision_function_shape is 'ovr' by default.
+
+        .. versionadded:: 0.17
+           *decision_function_shape='ovr'* is recommended.
+
+        .. versionchanged:: 0.17
+           Deprecated *decision_function_shape='ovo' and None*.
+
+    break_ties : bool, default=False
+        If true, ``decision_function_shape='ovr'``, and number of classes > 2,
+        :term:`predict` will break ties according to the confidence values of
+        :term:`decision_function`; otherwise the first class among the tied
+        classes is returned. Please note that breaking ties comes at a
+        relatively high computational cost compared to a simple predict.
+        See :ref:`sphx_glr_auto_examples_svm_plot_svm_tie_breaking.py` for an
+        example of its usage with ``decision_function_shape='ovr'``.
+
+        .. versionadded:: 0.22
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the pseudo random number generation for shuffling the data for
+        probability estimates. Ignored when `probability` is False.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    class_weight_ : ndarray of shape (n_classes,)
+        Multipliers of parameter C of each class.
+        Computed based on the ``class_weight`` parameter.
+
+    classes_ : ndarray of shape (n_classes,)
+        The unique classes labels.
+
+    coef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)
+        Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+
+        `coef_` is readonly property derived from `dual_coef_` and
+        `support_vectors_`.
+
+    dual_coef_ : ndarray of shape (n_classes - 1, n_SV)
+        Dual coefficients of the support vector in the decision
+        function (see :ref:`sgd_mathematical_formulation`), multiplied by
+        their targets.
+        For multiclass, coefficient for all 1-vs-1 classifiers.
+        The layout of the coefficients in the multiclass case is somewhat
+        non-trivial. See the :ref:`multi-class section of the User Guide
+        <svm_multi_class>` for details.
+
+    fit_status_ : int
+        0 if correctly fitted, 1 if the algorithm did not converge.
+
+    intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
+        Constants in decision function.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)
+        Number of iterations run by the optimization routine to fit the model.
+        The shape of this attribute depends on the number of models optimized
+        which in turn depends on the number of classes.
+
+        .. versionadded:: 1.1
+
+    support_ : ndarray of shape (n_SV,)
+        Indices of support vectors.
+
+    support_vectors_ : ndarray of shape (n_SV, n_features)
+        Support vectors.
+
+    n_support_ : ndarray of shape (n_classes,), dtype=int32
+        Number of support vectors for each class.
+
+    fit_status_ : int
+        0 if correctly fitted, 1 if the algorithm did not converge.
+
+    probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
+
+    probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
+        If `probability=True`, it corresponds to the parameters learned in
+        Platt scaling to produce probability estimates from decision values.
+        If `probability=False`, it's an empty array. Platt scaling uses the
+        logistic function
+        ``1 / (1 + exp(decision_value * probA_ + probB_))``
+        where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For
+        more information on the multiclass case and training procedure see
+        section 8 of [1]_.
+
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
+    See Also
+    --------
+    SVC : Support Vector Machine for classification using libsvm.
+
+    LinearSVC : Scalable linear Support Vector Machine for classification using
+        liblinear.
+
+    References
+    ----------
+    .. [1] `LIBSVM: A Library for Support Vector Machines
+        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
+
+    .. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
+        Machines and Comparisons to Regularized Likelihood Methods"
+        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
+    >>> y = np.array([1, 1, 2, 2])
+    >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.preprocessing import StandardScaler
+    >>> from sklearn.svm import NuSVC
+    >>> clf = make_pipeline(StandardScaler(), NuSVC())
+    >>> clf.fit(X, y)
+    Pipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])
+    >>> print(clf.predict([[-0.8, -1]]))
+    [1]
+    """
+
+    _impl = "nu_svc"
+
+    _parameter_constraints: dict = {
+        **BaseSVC._parameter_constraints,
+        "nu": [Interval(Real, 0.0, 1.0, closed="right")],
+    }
+    _parameter_constraints.pop("C")
+
+    def __init__(
+        self,
+        *,
+        nu=0.5,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        shrinking=True,
+        probability=False,
+        tol=1e-3,
+        cache_size=200,
+        class_weight=None,
+        verbose=False,
+        max_iter=-1,
+        decision_function_shape="ovr",
+        break_ties=False,
+        random_state=None,
+    ):
+        super().__init__(
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=0.0,
+            nu=nu,
+            shrinking=shrinking,
+            probability=probability,
+            cache_size=cache_size,
+            class_weight=class_weight,
+            verbose=verbose,
+            max_iter=max_iter,
+            decision_function_shape=decision_function_shape,
+            break_ties=break_ties,
+            random_state=random_state,
+        )
+
+
+class SVR(RegressorMixin, BaseLibSVM):
+    """Epsilon-Support Vector Regression.
+
+    The free parameters in the model are C and epsilon.
+
+    The implementation is based on libsvm. The fit time complexity
+    is more than quadratic with the number of samples which makes it hard
+    to scale to datasets with more than a couple of 10000 samples. For large
+    datasets consider using :class:`~sklearn.svm.LinearSVR` or
+    :class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a
+    :class:`~sklearn.kernel_approximation.Nystroem` transformer or
+    other :ref:`kernel_approximation`.
+
+    Read more in the :ref:`User Guide <svm_regression>`.
+
+    Parameters
+    ----------
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
+        default='rbf'
+         Specifies the kernel type to be used in the algorithm.
+         If none is given, 'rbf' will be used. If a callable is given it is
+         used to precompute the kernel matrix.
+         For an intuitive visualization of different kernel types
+         see :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py`
+
+    degree : int, default=3
+        Degree of the polynomial kernel function ('poly').
+        Must be non-negative. Ignored by all other kernels.
+
+    gamma : {'scale', 'auto'} or float, default='scale'
+        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
+
+        - if ``gamma='scale'`` (default) is passed then it uses
+          1 / (n_features * X.var()) as value of gamma,
+        - if 'auto', uses 1 / n_features
+        - if float, must be non-negative.
+
+        .. versionchanged:: 0.22
+           The default value of ``gamma`` changed from 'auto' to 'scale'.
+
+    coef0 : float, default=0.0
+        Independent term in kernel function.
+        It is only significant in 'poly' and 'sigmoid'.
+
+    tol : float, default=1e-3
+        Tolerance for stopping criterion.
+
+    C : float, default=1.0
+        Regularization parameter. The strength of the regularization is
+        inversely proportional to C. Must be strictly positive.
+        The penalty is a squared l2. For an intuitive visualization of the
+        effects of scaling the regularization parameter C, see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
+
+    epsilon : float, default=0.1
+         Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
+         within which no penalty is associated in the training loss function
+         with points predicted within a distance epsilon from the actual
+         value. Must be non-negative.
+
+    shrinking : bool, default=True
+        Whether to use the shrinking heuristic.
+        See the :ref:`User Guide <shrinking_svm>`.
+
+    cache_size : float, default=200
+        Specify the size of the kernel cache (in MB).
+
+    verbose : bool, default=False
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in libsvm that, if enabled, may not work
+        properly in a multithreaded context.
+
+    max_iter : int, default=-1
+        Hard limit on iterations within solver, or -1 for no limit.
+
+    Attributes
+    ----------
+    coef_ : ndarray of shape (1, n_features)
+        Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+
+        `coef_` is readonly property derived from `dual_coef_` and
+        `support_vectors_`.
+
+    dual_coef_ : ndarray of shape (1, n_SV)
+        Coefficients of the support vector in the decision function.
+
+    fit_status_ : int
+        0 if correctly fitted, 1 otherwise (will raise warning)
+
+    intercept_ : ndarray of shape (1,)
+        Constants in decision function.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of iterations run by the optimization routine to fit the model.
+
+        .. versionadded:: 1.1
+
+    n_support_ : ndarray of shape (1,), dtype=int32
+        Number of support vectors.
+
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
+    support_ : ndarray of shape (n_SV,)
+        Indices of support vectors.
+
+    support_vectors_ : ndarray of shape (n_SV, n_features)
+        Support vectors.
+
+    See Also
+    --------
+    NuSVR : Support Vector Machine for regression implemented using libsvm
+        using a parameter to control the number of support vectors.
+
+    LinearSVR : Scalable Linear Support Vector Machine for regression
+        implemented using liblinear.
+
+    References
+    ----------
+    .. [1] `LIBSVM: A Library for Support Vector Machines
+        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
+
+    .. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
+        Machines and Comparisons to Regularized Likelihood Methods"
+        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
+
+    Examples
+    --------
+    >>> from sklearn.svm import SVR
+    >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.preprocessing import StandardScaler
+    >>> import numpy as np
+    >>> n_samples, n_features = 10, 5
+    >>> rng = np.random.RandomState(0)
+    >>> y = rng.randn(n_samples)
+    >>> X = rng.randn(n_samples, n_features)
+    >>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))
+    >>> regr.fit(X, y)
+    Pipeline(steps=[('standardscaler', StandardScaler()),
+                    ('svr', SVR(epsilon=0.2))])
+    """
+
+    _impl = "epsilon_svr"
+
+    _parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}
+    for unused_param in ["class_weight", "nu", "probability", "random_state"]:
+        _parameter_constraints.pop(unused_param)
+
+    def __init__(
+        self,
+        *,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        tol=1e-3,
+        C=1.0,
+        epsilon=0.1,
+        shrinking=True,
+        cache_size=200,
+        verbose=False,
+        max_iter=-1,
+    ):
+        super().__init__(
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=C,
+            nu=0.0,
+            epsilon=epsilon,
+            verbose=verbose,
+            shrinking=shrinking,
+            probability=False,
+            cache_size=cache_size,
+            class_weight=None,
+            max_iter=max_iter,
+            random_state=None,
+        )
+
+
+class NuSVR(RegressorMixin, BaseLibSVM):
+    """Nu Support Vector Regression.
+
+    Similar to NuSVC, for regression, uses a parameter nu to control
+    the number of support vectors. However, unlike NuSVC, where nu
+    replaces C, here nu replaces the parameter epsilon of epsilon-SVR.
+
+    The implementation is based on libsvm.
+
+    Read more in the :ref:`User Guide <svm_regression>`.
+
+    Parameters
+    ----------
+    nu : float, default=0.5
+        An upper bound on the fraction of training errors and a lower bound of
+        the fraction of support vectors. Should be in the interval (0, 1].  By
+        default 0.5 will be taken.
+
+    C : float, default=1.0
+        Penalty parameter C of the error term. For an intuitive visualization
+        of the effects of scaling the regularization parameter C, see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
+
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
+        default='rbf'
+         Specifies the kernel type to be used in the algorithm.
+         If none is given, 'rbf' will be used. If a callable is given it is
+         used to precompute the kernel matrix.
+         For an intuitive visualization of different kernel types see
+         See :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py`
+
+    degree : int, default=3
+        Degree of the polynomial kernel function ('poly').
+        Must be non-negative. Ignored by all other kernels.
+
+    gamma : {'scale', 'auto'} or float, default='scale'
+        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
+
+        - if ``gamma='scale'`` (default) is passed then it uses
+          1 / (n_features * X.var()) as value of gamma,
+        - if 'auto', uses 1 / n_features
+        - if float, must be non-negative.
+
+        .. versionchanged:: 0.22
+           The default value of ``gamma`` changed from 'auto' to 'scale'.
+
+    coef0 : float, default=0.0
+        Independent term in kernel function.
+        It is only significant in 'poly' and 'sigmoid'.
+
+    shrinking : bool, default=True
+        Whether to use the shrinking heuristic.
+        See the :ref:`User Guide <shrinking_svm>`.
+
+    tol : float, default=1e-3
+        Tolerance for stopping criterion.
+
+    cache_size : float, default=200
+        Specify the size of the kernel cache (in MB).
+
+    verbose : bool, default=False
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in libsvm that, if enabled, may not work
+        properly in a multithreaded context.
+
+    max_iter : int, default=-1
+        Hard limit on iterations within solver, or -1 for no limit.
+
+    Attributes
+    ----------
+    coef_ : ndarray of shape (1, n_features)
+        Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+
+        `coef_` is readonly property derived from `dual_coef_` and
+        `support_vectors_`.
+
+    dual_coef_ : ndarray of shape (1, n_SV)
+        Coefficients of the support vector in the decision function.
+
+    fit_status_ : int
+        0 if correctly fitted, 1 otherwise (will raise warning)
+
+    intercept_ : ndarray of shape (1,)
+        Constants in decision function.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of iterations run by the optimization routine to fit the model.
+
+        .. versionadded:: 1.1
+
+    n_support_ : ndarray of shape (1,), dtype=int32
+        Number of support vectors.
+
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
+    support_ : ndarray of shape (n_SV,)
+        Indices of support vectors.
+
+    support_vectors_ : ndarray of shape (n_SV, n_features)
+        Support vectors.
+
+    See Also
+    --------
+    NuSVC : Support Vector Machine for classification implemented with libsvm
+        with a parameter to control the number of support vectors.
+
+    SVR : Epsilon Support Vector Machine for regression implemented with
+        libsvm.
+
+    References
+    ----------
+    .. [1] `LIBSVM: A Library for Support Vector Machines
+        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
+
+    .. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
+        Machines and Comparisons to Regularized Likelihood Methods"
+        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
+
+    Examples
+    --------
+    >>> from sklearn.svm import NuSVR
+    >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.preprocessing import StandardScaler
+    >>> import numpy as np
+    >>> n_samples, n_features = 10, 5
+    >>> np.random.seed(0)
+    >>> y = np.random.randn(n_samples)
+    >>> X = np.random.randn(n_samples, n_features)
+    >>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))
+    >>> regr.fit(X, y)
+    Pipeline(steps=[('standardscaler', StandardScaler()),
+                    ('nusvr', NuSVR(nu=0.1))])
+    """
+
+    _impl = "nu_svr"
+
+    _parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}
+    for unused_param in ["class_weight", "epsilon", "probability", "random_state"]:
+        _parameter_constraints.pop(unused_param)
+
+    def __init__(
+        self,
+        *,
+        nu=0.5,
+        C=1.0,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        shrinking=True,
+        tol=1e-3,
+        cache_size=200,
+        verbose=False,
+        max_iter=-1,
+    ):
+        super().__init__(
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=C,
+            nu=nu,
+            epsilon=0.0,
+            shrinking=shrinking,
+            probability=False,
+            cache_size=cache_size,
+            class_weight=None,
+            verbose=verbose,
+            max_iter=max_iter,
+            random_state=None,
+        )
+
+
+class OneClassSVM(OutlierMixin, BaseLibSVM):
+    """Unsupervised Outlier Detection.
+
+    Estimate the support of a high-dimensional distribution.
+
+    The implementation is based on libsvm.
+
+    Read more in the :ref:`User Guide <outlier_detection>`.
+
+    Parameters
+    ----------
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
+        default='rbf'
+         Specifies the kernel type to be used in the algorithm.
+         If none is given, 'rbf' will be used. If a callable is given it is
+         used to precompute the kernel matrix.
+
+    degree : int, default=3
+        Degree of the polynomial kernel function ('poly').
+        Must be non-negative. Ignored by all other kernels.
+
+    gamma : {'scale', 'auto'} or float, default='scale'
+        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
+
+        - if ``gamma='scale'`` (default) is passed then it uses
+          1 / (n_features * X.var()) as value of gamma,
+        - if 'auto', uses 1 / n_features
+        - if float, must be non-negative.
+
+        .. versionchanged:: 0.22
+           The default value of ``gamma`` changed from 'auto' to 'scale'.
+
+    coef0 : float, default=0.0
+        Independent term in kernel function.
+        It is only significant in 'poly' and 'sigmoid'.
+
+    tol : float, default=1e-3
+        Tolerance for stopping criterion.
+
+    nu : float, default=0.5
+        An upper bound on the fraction of training
+        errors and a lower bound of the fraction of support
+        vectors. Should be in the interval (0, 1]. By default 0.5
+        will be taken.
+
+    shrinking : bool, default=True
+        Whether to use the shrinking heuristic.
+        See the :ref:`User Guide <shrinking_svm>`.
+
+    cache_size : float, default=200
+        Specify the size of the kernel cache (in MB).
+
+    verbose : bool, default=False
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in libsvm that, if enabled, may not work
+        properly in a multithreaded context.
+
+    max_iter : int, default=-1
+        Hard limit on iterations within solver, or -1 for no limit.
+
+    Attributes
+    ----------
+    coef_ : ndarray of shape (1, n_features)
+        Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+
+        `coef_` is readonly property derived from `dual_coef_` and
+        `support_vectors_`.
+
+    dual_coef_ : ndarray of shape (1, n_SV)
+        Coefficients of the support vectors in the decision function.
+
+    fit_status_ : int
+        0 if correctly fitted, 1 otherwise (will raise warning)
+
+    intercept_ : ndarray of shape (1,)
+        Constant in the decision function.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of iterations run by the optimization routine to fit the model.
+
+        .. versionadded:: 1.1
+
+    n_support_ : ndarray of shape (n_classes,), dtype=int32
+        Number of support vectors for each class.
+
+    offset_ : float
+        Offset used to define the decision function from the raw scores.
+        We have the relation: decision_function = score_samples - `offset_`.
+        The offset is the opposite of `intercept_` and is provided for
+        consistency with other outlier detection algorithms.
+
+        .. versionadded:: 0.20
+
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
+    support_ : ndarray of shape (n_SV,)
+        Indices of support vectors.
+
+    support_vectors_ : ndarray of shape (n_SV, n_features)
+        Support vectors.
+
+    See Also
+    --------
+    sklearn.linear_model.SGDOneClassSVM : Solves linear One-Class SVM using
+        Stochastic Gradient Descent.
+    sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using
+        Local Outlier Factor (LOF).
+    sklearn.ensemble.IsolationForest : Isolation Forest Algorithm.
+
+    Examples
+    --------
+    >>> from sklearn.svm import OneClassSVM
+    >>> X = [[0], [0.44], [0.45], [0.46], [1]]
+    >>> clf = OneClassSVM(gamma='auto').fit(X)
+    >>> clf.predict(X)
+    array([-1,  1,  1,  1, -1])
+    >>> clf.score_samples(X)
+    array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])
+
+    For a more extended example,
+    see :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`
+    """
+
+    _impl = "one_class"
+
+    _parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}
+    for unused_param in ["C", "class_weight", "epsilon", "probability", "random_state"]:
+        _parameter_constraints.pop(unused_param)
+
+    def __init__(
+        self,
+        *,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        tol=1e-3,
+        nu=0.5,
+        shrinking=True,
+        cache_size=200,
+        verbose=False,
+        max_iter=-1,
+    ):
+        super().__init__(
+            kernel,
+            degree,
+            gamma,
+            coef0,
+            tol,
+            0.0,
+            nu,
+            0.0,
+            shrinking,
+            False,
+            cache_size,
+            None,
+            verbose,
+            max_iter,
+            random_state=None,
+        )
+
+    def fit(self, X, y=None, sample_weight=None):
+        """Detect the soft boundary of the set of samples X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Set of samples, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Per-sample weights. Rescale C per sample. Higher weights
+            force the classifier to put more emphasis on these points.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+
+        Notes
+        -----
+        If X is not a C-ordered contiguous array it is copied.
+        """
+        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight)
+        self.offset_ = -self._intercept_
+        return self
+
+    def decision_function(self, X):
+        """Signed distance to the separating hyperplane.
+
+        Signed distance is positive for an inlier and negative for an outlier.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
+
+        Returns
+        -------
+        dec : ndarray of shape (n_samples,)
+            Returns the decision function of the samples.
+        """
+        dec = self._decision_function(X).ravel()
+        return dec
+
+    def score_samples(self, X):
+        """Raw scoring function of the samples.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
+
+        Returns
+        -------
+        score_samples : ndarray of shape (n_samples,)
+            Returns the (unshifted) scoring function of the samples.
+        """
+        return self.decision_function(X) + self.offset_
+
+    def predict(self, X):
+        """Perform classification on samples in X.
+
+        For a one-class model, +1 or -1 is returned.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \
+                (n_samples_test, n_samples_train)
+            For kernel="precomputed", the expected shape of X is
+            (n_samples_test, n_samples_train).
+
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            Class labels for samples in X.
+        """
+        y = super().predict(X)
+        return np.asarray(y, dtype=np.intp)
diff --git a/.venv/Lib/site-packages/sklearn/svm/_liblinear.cp39-win_amd64.lib b/.venv/Lib/site-packages/sklearn/svm/_liblinear.cp39-win_amd64.lib
new file mode 100644
index 0000000000000000000000000000000000000000..2cfbeaf2d4ca495e37b430e9b5f5acc2657d2b27
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_liblinear.cp39-win_amd64.lib differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_liblinear.cp39-win_amd64.pyd b/.venv/Lib/site-packages/sklearn/svm/_liblinear.cp39-win_amd64.pyd
new file mode 100644
index 0000000000000000000000000000000000000000..4b9ed10afedb70bcb17cdac0c6d2e921e83672de
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_liblinear.cp39-win_amd64.pyd differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_liblinear.pxi b/.venv/Lib/site-packages/sklearn/svm/_liblinear.pxi
new file mode 100644
index 0000000000000000000000000000000000000000..af0d0b38c2c662275eee069ba2f56dd8617a3c78
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_liblinear.pxi
@@ -0,0 +1,43 @@
+from ..utils._typedefs cimport intp_t
+
+cdef extern from "_cython_blas_helpers.h":
+    ctypedef double (*dot_func)(int, const double*, int, const double*, int)
+    ctypedef void (*axpy_func)(int, double, const double*, int, double*, int)
+    ctypedef void (*scal_func)(int, double, const double*, int)
+    ctypedef double (*nrm2_func)(int, const double*, int)
+    cdef struct BlasFunctions:
+        dot_func dot
+        axpy_func axpy
+        scal_func scal
+        nrm2_func nrm2
+
+
+cdef extern from "linear.h":
+    cdef struct feature_node
+    cdef struct problem
+    cdef struct model
+    cdef struct parameter
+    ctypedef problem* problem_const_ptr "problem const *"
+    ctypedef parameter* parameter_const_ptr "parameter const *"
+    ctypedef char* char_const_ptr "char const *"
+    char_const_ptr check_parameter(problem_const_ptr prob, parameter_const_ptr param)
+    model *train(problem_const_ptr prob, parameter_const_ptr param, BlasFunctions *blas_functions) nogil
+    int get_nr_feature (model *model)
+    int get_nr_class (model *model)
+    void get_n_iter (model *model, int *n_iter)
+    void free_and_destroy_model (model **)
+    void destroy_param (parameter *)
+
+
+cdef extern from "liblinear_helper.c":
+    void copy_w(void *, model *, int)
+    parameter *set_parameter(int, double, double, int, char *, char *, int, int, double)
+    problem *set_problem (char *, int, int, int, int, double, char *, char *)
+    problem *csr_set_problem (char *, int, char *, char *, int, int, int, double, char *, char *)
+
+    model *set_model(parameter *, char *, intp_t *, char *, double)
+
+    double get_bias(model *)
+    void free_problem (problem *)
+    void free_parameter (parameter *)
+    void set_verbosity(int)
diff --git a/.venv/Lib/site-packages/sklearn/svm/_liblinear.pyx b/.venv/Lib/site-packages/sklearn/svm/_liblinear.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..6360e705ba04ae3f4692bdcd6a2958bcc0478a66
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_liblinear.pyx
@@ -0,0 +1,147 @@
+"""
+Wrapper for liblinear
+
+Author: fabian.pedregosa@inria.fr
+"""
+
+import  numpy as np
+
+from ..utils._cython_blas cimport _dot, _axpy, _scal, _nrm2
+from ..utils._typedefs cimport float32_t, float64_t, int32_t
+
+include "_liblinear.pxi"
+
+
+def train_wrap(
+    object X,
+    const float64_t[::1] Y,
+    bint is_sparse,
+    int solver_type,
+    double eps,
+    double bias,
+    double C,
+    const float64_t[:] class_weight,
+    int max_iter,
+    unsigned random_seed,
+    double epsilon,
+    const float64_t[::1] sample_weight
+):
+    cdef parameter *param
+    cdef problem *problem
+    cdef model *model
+    cdef char_const_ptr error_msg
+    cdef int len_w
+    cdef bint X_has_type_float64 = X.dtype == np.float64
+    cdef char * X_data_bytes_ptr
+    cdef const float64_t[::1] X_data_64
+    cdef const float32_t[::1] X_data_32
+    cdef const int32_t[::1] X_indices
+    cdef const int32_t[::1] X_indptr
+
+    if is_sparse:
+        X_indices = X.indices
+        X_indptr = X.indptr
+        if X_has_type_float64:
+            X_data_64 = X.data
+            X_data_bytes_ptr = <char *> &X_data_64[0]
+        else:
+            X_data_32 = X.data
+            X_data_bytes_ptr = <char *> &X_data_32[0]
+
+        problem = csr_set_problem(
+            X_data_bytes_ptr,
+            X_has_type_float64,
+            <char *> &X_indices[0],
+            <char *> &X_indptr[0],
+            (<int32_t>X.shape[0]),
+            (<int32_t>X.shape[1]),
+            (<int32_t>X.nnz),
+            bias,
+            <char *> &sample_weight[0],
+            <char *> &Y[0]
+        )
+    else:
+        X_as_1d_array = X.reshape(-1)
+        if X_has_type_float64:
+            X_data_64 = X_as_1d_array
+            X_data_bytes_ptr = <char *> &X_data_64[0]
+        else:
+            X_data_32 = X_as_1d_array
+            X_data_bytes_ptr = <char *> &X_data_32[0]
+
+        problem = set_problem(
+            X_data_bytes_ptr,
+            X_has_type_float64,
+            (<int32_t>X.shape[0]),
+            (<int32_t>X.shape[1]),
+            (<int32_t>np.count_nonzero(X)),
+            bias,
+            <char *> &sample_weight[0],
+            <char *> &Y[0]
+        )
+
+    cdef int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.intc)
+    param = set_parameter(
+        solver_type,
+        eps,
+        C,
+        class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        max_iter,
+        random_seed,
+        epsilon
+    )
+
+    error_msg = check_parameter(problem, param)
+    if error_msg:
+        free_problem(problem)
+        free_parameter(param)
+        raise ValueError(error_msg)
+
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    blas_functions.axpy = _axpy[double]
+    blas_functions.scal = _scal[double]
+    blas_functions.nrm2 = _nrm2[double]
+
+    # early return
+    with nogil:
+        model = train(problem, param, &blas_functions)
+
+    # FREE
+    free_problem(problem)
+    free_parameter(param)
+    # destroy_param(param)  don't call this or it will destroy class_weight_label and class_weight
+
+    # coef matrix holder created as fortran since that's what's used in liblinear
+    cdef float64_t[::1, :] w
+    cdef int nr_class = get_nr_class(model)
+
+    cdef int labels_ = nr_class
+    if nr_class == 2:
+        labels_ = 1
+    cdef int32_t[::1] n_iter = np.zeros(labels_, dtype=np.intc)
+    get_n_iter(model, <int *> &n_iter[0])
+
+    cdef int nr_feature = get_nr_feature(model)
+    if bias > 0:
+        nr_feature = nr_feature + 1
+    if nr_class == 2 and solver_type != 4:  # solver is not Crammer-Singer
+        w = np.empty((1, nr_feature), order='F')
+        copy_w(&w[0, 0], model, nr_feature)
+    else:
+        len_w = (nr_class) * nr_feature
+        w = np.empty((nr_class, nr_feature), order='F')
+        copy_w(&w[0, 0], model, len_w)
+
+    free_and_destroy_model(&model)
+
+    return w.base, n_iter.base
+
+
+def set_verbosity_wrap(int verbosity):
+    """
+    Control verbosity of libsvm library
+    """
+    set_verbosity(verbosity)
diff --git a/.venv/Lib/site-packages/sklearn/svm/_libsvm.cp39-win_amd64.lib b/.venv/Lib/site-packages/sklearn/svm/_libsvm.cp39-win_amd64.lib
new file mode 100644
index 0000000000000000000000000000000000000000..fee32a6a6507fe1a3388abe56979d2e3ad51d3bd
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_libsvm.cp39-win_amd64.lib differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_libsvm.cp39-win_amd64.pyd b/.venv/Lib/site-packages/sklearn/svm/_libsvm.cp39-win_amd64.pyd
new file mode 100644
index 0000000000000000000000000000000000000000..d8b6b575582bf79672d465f7c628722f05761d1e
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_libsvm.cp39-win_amd64.pyd differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_libsvm.pxi b/.venv/Lib/site-packages/sklearn/svm/_libsvm.pxi
new file mode 100644
index 0000000000000000000000000000000000000000..6eab92b9d7b375928771759d2b4c2bb1d5ed2677
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_libsvm.pxi
@@ -0,0 +1,75 @@
+################################################################################
+# Includes
+from ..utils._typedefs cimport intp_t
+
+cdef extern from "_svm_cython_blas_helpers.h":
+    ctypedef double (*dot_func)(int, const double*, int, const double*, int)
+    cdef struct BlasFunctions:
+        dot_func dot
+
+
+cdef extern from "svm.h":
+    cdef struct svm_node
+    cdef struct svm_model
+    cdef struct svm_parameter:
+        int svm_type
+        int kernel_type
+        int degree    # for poly
+        double gamma  # for poly/rbf/sigmoid
+        double coef0  # for poly/sigmoid
+
+        # these are for training only
+        double cache_size  # in MB
+        double eps         # stopping criteria
+        double C           # for C_SVC, EPSILON_SVR and NU_SVR
+        int nr_weight      # for C_SVC
+        int *weight_label  # for C_SVC
+        double* weight     # for C_SVC
+        double nu          # for NU_SVC, ONE_CLASS, and NU_SVR
+        double p           # for EPSILON_SVR
+        int shrinking      # use the shrinking heuristics
+        int probability    # do probability estimates
+        int max_iter       # ceiling on Solver runtime
+        int random_seed    # seed for random generator in probability estimation
+
+    cdef struct svm_problem:
+        int l
+        double *y
+        svm_node *x
+        double *W  # instance weights
+
+    char *svm_check_parameter(svm_problem *, svm_parameter *)
+    svm_model *svm_train(svm_problem *, svm_parameter *, int *, BlasFunctions *) nogil
+    void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
+    void svm_cross_validation(svm_problem *, svm_parameter *, int nr_fold, double *target, BlasFunctions *) nogil
+
+
+cdef extern from "libsvm_helper.c":
+    # this file contains methods for accessing libsvm 'hidden' fields
+    svm_node **dense_to_sparse (char *, intp_t *)
+    void set_parameter (svm_parameter *, int , int , int , double, double ,
+                        double , double , double , double,
+                        double, int, int, int, char *, char *, int,
+                        int)
+    void set_problem (svm_problem *, char *, char *, char *, intp_t *, int)
+
+    svm_model *set_model (svm_parameter *, int, char *, intp_t *,
+                          char *, intp_t *, intp_t *, char *,
+                          char *, char *, char *, char *)
+
+    void copy_sv_coef   (char *, svm_model *)
+    void copy_n_iter  (char *, svm_model *)
+    void copy_intercept (char *, svm_model *, intp_t *)
+    void copy_SV        (char *, svm_model *, intp_t *)
+    int copy_support (char *data, svm_model *model)
+    int copy_predict (char *, svm_model *, intp_t *, char *, BlasFunctions *) nogil
+    int copy_predict_proba (char *, svm_model *, intp_t *, char *, BlasFunctions *) nogil
+    int copy_predict_values(char *, svm_model *, intp_t *, char *, int, BlasFunctions *) nogil
+    void copy_nSV     (char *, svm_model *)
+    void copy_probA   (char *, svm_model *, intp_t *)
+    void copy_probB   (char *, svm_model *, intp_t *)
+    intp_t  get_l  (svm_model *)
+    intp_t  get_nr (svm_model *)
+    int  free_problem   (svm_problem *)
+    int  free_model     (svm_model *)
+    void set_verbosity(int)
diff --git a/.venv/Lib/site-packages/sklearn/svm/_libsvm.pyx b/.venv/Lib/site-packages/sklearn/svm/_libsvm.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..6abe5045972bf49c30d649c4675f13185d91b9a4
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_libsvm.pyx
@@ -0,0 +1,917 @@
+"""
+Binding for libsvm_skl
+----------------------
+
+These are the bindings for libsvm_skl, which is a fork of libsvm[1]
+that adds to libsvm some capabilities, like index of support vectors
+and efficient representation of dense matrices.
+
+These are low-level routines, but can be used for flexibility or
+performance reasons. See sklearn.svm for a higher-level API.
+
+Low-level memory management is done in libsvm_helper.c. If we happen
+to run out of memory a MemoryError will be raised. In practice this is
+not very helpful since high chances are malloc fails inside svm.cpp,
+where no sort of memory checks are done.
+
+[1] https://www.csie.ntu.edu.tw/~cjlin/libsvm/
+
+Notes
+-----
+The signature mode='c' is somewhat superficial, since we already
+check that arrays are C-contiguous in svm.py
+
+Authors
+-------
+2010: Fabian Pedregosa <fabian.pedregosa@inria.fr>
+      Gael Varoquaux <gael.varoquaux@normalesup.org>
+"""
+
+import  numpy as np
+from libc.stdlib cimport free
+from ..utils._cython_blas cimport _dot
+from ..utils._typedefs cimport float64_t, int32_t, intp_t
+
+include "_libsvm.pxi"
+
+cdef extern from *:
+    ctypedef struct svm_parameter:
+        pass
+
+
+################################################################################
+# Internal variables
+LIBSVM_KERNEL_TYPES = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
+
+
+################################################################################
+# Wrapper functions
+
+def fit(
+    const float64_t[:, ::1] X,
+    const float64_t[::1] Y,
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    double tol=1e-3,
+    double C=1.0,
+    double nu=0.5,
+    double epsilon=0.1,
+    const float64_t[::1] class_weight=np.empty(0),
+    const float64_t[::1] sample_weight=np.empty(0),
+    int shrinking=1,
+    int probability=0,
+    double cache_size=100.,
+    int max_iter=-1,
+    int random_seed=0,
+):
+    """
+    Train the model using libsvm (low-level method)
+
+    Parameters
+    ----------
+    X : array-like, dtype=float64 of shape (n_samples, n_features)
+
+    Y : array, dtype=float64 of shape (n_samples,)
+        target vector
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0
+        Independent parameter in poly/sigmoid kernel.
+
+    tol : float64, default=1e-3
+        Numeric stopping criterion (WRITEME).
+
+    C : float64, default=1
+        C parameter in C-Support Vector Classification.
+
+    nu : float64, default=0.5
+        An upper bound on the fraction of training errors and a lower bound of
+        the fraction of support vectors. Should be in the interval (0, 1].
+
+    epsilon : double, default=0.1
+        Epsilon parameter in the epsilon-insensitive loss function.
+
+    class_weight : array, dtype=float64, shape (n_classes,), \
+            default=np.empty(0)
+        Set the parameter C of class i to class_weight[i]*C for
+        SVC. If not given, all classes are supposed to have
+        weight one.
+
+    sample_weight : array, dtype=float64, shape (n_samples,), \
+            default=np.empty(0)
+        Weights assigned to each sample.
+
+    shrinking : int, default=1
+        Whether to use the shrinking heuristic.
+
+    probability : int, default=0
+        Whether to enable probability estimates.
+
+    cache_size : float64, default=100
+        Cache size for gram matrix columns (in megabytes).
+
+    max_iter : int (-1 for no limit), default=-1
+        Stop solver after this many iterations regardless of accuracy
+        (XXX Currently there is no API to know whether this kicked in.)
+
+    random_seed : int, default=0
+        Seed for the random number generator used for probability estimates.
+
+    Returns
+    -------
+    support : array of shape (n_support,)
+        Index of support vectors.
+
+    support_vectors : array of shape (n_support, n_features)
+        Support vectors (equivalent to X[support]). Will return an
+        empty array in the case of precomputed kernel.
+
+    n_class_SV : array of shape (n_class,)
+        Number of support vectors in each class.
+
+    sv_coef : array of shape (n_class-1, n_support)
+        Coefficients of support vectors in decision function.
+
+    intercept : array of shape (n_class*(n_class-1)/2,)
+        Intercept in decision function.
+
+    probA, probB : array of shape (n_class*(n_class-1)/2,)
+        Probability estimates, empty array for probability=False.
+
+    n_iter : ndarray of shape (max(1, (n_class * (n_class - 1) // 2)),)
+        Number of iterations run by the optimization routine to fit the model.
+    """
+
+    cdef svm_parameter param
+    cdef svm_problem problem
+    cdef svm_model *model
+    cdef const char *error_msg
+    cdef intp_t SV_len
+
+    if len(sample_weight) == 0:
+        sample_weight = np.ones(X.shape[0], dtype=np.float64)
+    else:
+        assert sample_weight.shape[0] == X.shape[0], (
+            f"sample_weight and X have incompatible shapes: sample_weight has "
+            f"{sample_weight.shape[0]} samples while X has {X.shape[0]}"
+        )
+
+    kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
+    set_problem(
+        &problem,
+        <char*> &X[0, 0],
+        <char*> &Y[0],
+        <char*> &sample_weight[0],
+        <intp_t*> X.shape,
+        kernel_index,
+    )
+    if problem.x == NULL:
+        raise MemoryError("Seems we've run out of memory")
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+    set_parameter(
+        &param,
+        svm_type,
+        kernel_index,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        tol,
+        epsilon,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+        max_iter,
+        random_seed,
+    )
+
+    error_msg = svm_check_parameter(&problem, &param)
+    if error_msg:
+        # for SVR: epsilon is called p in libsvm
+        error_repl = error_msg.decode('utf-8').replace("p < 0", "epsilon < 0")
+        raise ValueError(error_repl)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    # this does the real work
+    cdef int fit_status = 0
+    with nogil:
+        model = svm_train(&problem, &param, &fit_status, &blas_functions)
+
+    # from here until the end, we just copy the data returned by
+    # svm_train
+    SV_len = get_l(model)
+    n_class = get_nr(model)
+
+    cdef int[::1] n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc)
+    copy_n_iter(<char*> &n_iter[0], model)
+
+    cdef float64_t[:, ::1] sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64)
+    copy_sv_coef(<char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL, model)
+
+    # the intercept is just model.rho but with sign changed
+    cdef float64_t[::1] intercept = np.empty(
+        int((n_class*(n_class-1))/2), dtype=np.float64
+    )
+    copy_intercept(<char*> &intercept[0], model, <intp_t*> intercept.shape)
+
+    cdef int32_t[::1] support = np.empty(SV_len, dtype=np.int32)
+    copy_support(<char*> &support[0] if support.size > 0 else NULL, model)
+
+    # copy model.SV
+    cdef float64_t[:, ::1] support_vectors
+    if kernel_index == 4:
+        # precomputed kernel
+        support_vectors = np.empty((0, 0), dtype=np.float64)
+    else:
+        support_vectors = np.empty((SV_len, X.shape[1]), dtype=np.float64)
+        copy_SV(
+            <char*> &support_vectors[0, 0] if support_vectors.size > 0 else NULL,
+            model,
+            <intp_t*> support_vectors.shape,
+        )
+
+    cdef int32_t[::1] n_class_SV
+    if svm_type == 0 or svm_type == 1:
+        n_class_SV = np.empty(n_class, dtype=np.int32)
+        copy_nSV(<char*> &n_class_SV[0] if n_class_SV.size > 0 else NULL, model)
+    else:
+        # OneClass and SVR are considered to have 2 classes
+        n_class_SV = np.array([SV_len, SV_len], dtype=np.int32)
+
+    cdef float64_t[::1] probA
+    cdef float64_t[::1] probB
+    if probability != 0:
+        if svm_type < 2:  # SVC and NuSVC
+            probA = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64)
+            probB = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64)
+            copy_probB(<char*> &probB[0], model, <intp_t*> probB.shape)
+        else:
+            probA = np.empty(1, dtype=np.float64)
+            probB = np.empty(0, dtype=np.float64)
+        copy_probA(<char*> &probA[0], model, <intp_t*> probA.shape)
+    else:
+        probA = np.empty(0, dtype=np.float64)
+        probB = np.empty(0, dtype=np.float64)
+
+    svm_free_and_destroy_model(&model)
+    free(problem.x)
+
+    return (
+        support.base,
+        support_vectors.base,
+        n_class_SV.base,
+        sv_coef.base,
+        intercept.base,
+        probA.base,
+        probB.base,
+        fit_status,
+        n_iter.base,
+    )
+
+
+cdef void set_predict_params(
+    svm_parameter *param,
+    int svm_type,
+    kernel,
+    int degree,
+    double gamma,
+    double coef0,
+    double cache_size,
+    int probability,
+    int nr_weight,
+    char *weight_label,
+    char *weight,
+) except *:
+    """Fill param with prediction time-only parameters."""
+
+    # training-time only parameters
+    cdef double C = 0.0
+    cdef double epsilon = 0.1
+    cdef int max_iter = 0
+    cdef double nu = 0.5
+    cdef int shrinking = 0
+    cdef double tol = 0.1
+    cdef int random_seed = -1
+
+    kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
+
+    set_parameter(
+        param,
+        svm_type,
+        kernel_index,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        tol,
+        epsilon,
+        shrinking,
+        probability,
+        nr_weight,
+        weight_label,
+        weight,
+        max_iter,
+        random_seed,
+    )
+
+
+def predict(
+    const float64_t[:, ::1] X,
+    const int32_t[::1] support,
+    const float64_t[:, ::1] SV,
+    const int32_t[::1] nSV,
+    const float64_t[:, ::1] sv_coef,
+    const float64_t[::1] intercept,
+    const float64_t[::1] probA=np.empty(0),
+    const float64_t[::1] probB=np.empty(0),
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    const float64_t[::1] class_weight=np.empty(0),
+    const float64_t[::1] sample_weight=np.empty(0),
+    double cache_size=100.0,
+):
+    """
+    Predict target values of X given a model (low-level method)
+
+    Parameters
+    ----------
+    X : array-like, dtype=float of shape (n_samples, n_features)
+
+    support : array of shape (n_support,)
+        Index of support vectors in training set.
+
+    SV : array of shape (n_support, n_features)
+        Support vectors.
+
+    nSV : array of shape (n_class,)
+        Number of support vectors in each class.
+
+    sv_coef : array of shape (n_class-1, n_support)
+        Coefficients of support vectors in decision function.
+
+    intercept : array of shape (n_class*(n_class-1)/2)
+        Intercept in decision function.
+
+    probA, probB : array of shape (n_class*(n_class-1)/2,)
+        Probability estimates.
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0.0
+        Independent parameter in poly/sigmoid kernel.
+
+    Returns
+    -------
+    dec_values : array
+        Predicted values.
+    """
+    cdef float64_t[::1] dec_values
+    cdef svm_parameter param
+    cdef svm_model *model
+    cdef int rv
+
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+
+    set_predict_params(
+        &param,
+        svm_type,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        cache_size,
+        0,
+        <int>class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+    )
+    model = set_model(
+        &param,
+        <int> nSV.shape[0],
+        <char*> &SV[0, 0] if SV.size > 0 else NULL,
+        <intp_t*> SV.shape,
+        <char*> &support[0] if support.size > 0 else NULL,
+        <intp_t*> support.shape,
+        <intp_t*> sv_coef.strides,
+        <char*> &sv_coef[0, 0] if sv_coef.size > 0 else NULL,
+        <char*> &intercept[0],
+        <char*> &nSV[0],
+        <char*> &probA[0] if probA.size > 0 else NULL,
+        <char*> &probB[0] if probB.size > 0 else NULL,
+    )
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    # TODO: use check_model
+    try:
+        dec_values = np.empty(X.shape[0])
+        with nogil:
+            rv = copy_predict(
+                <char*> &X[0, 0],
+                model,
+                <intp_t*> X.shape,
+                <char*> &dec_values[0],
+                &blas_functions,
+            )
+        if rv < 0:
+            raise MemoryError("We've run out of memory")
+    finally:
+        free_model(model)
+
+    return dec_values.base
+
+
+def predict_proba(
+    const float64_t[:, ::1] X,
+    const int32_t[::1] support,
+    const float64_t[:, ::1] SV,
+    const int32_t[::1] nSV,
+    float64_t[:, ::1] sv_coef,
+    float64_t[::1] intercept,
+    float64_t[::1] probA=np.empty(0),
+    float64_t[::1] probB=np.empty(0),
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    float64_t[::1] class_weight=np.empty(0),
+    float64_t[::1] sample_weight=np.empty(0),
+    double cache_size=100.0,
+):
+    """
+    Predict probabilities
+
+    svm_model stores all parameters needed to predict a given value.
+
+    For speed, all real work is done at the C level in function
+    copy_predict (libsvm_helper.c).
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+
+    See sklearn.svm.predict for a complete list of parameters.
+
+    Parameters
+    ----------
+    X : array-like, dtype=float of shape (n_samples, n_features)
+
+    support : array of shape (n_support,)
+        Index of support vectors in training set.
+
+    SV : array of shape (n_support, n_features)
+        Support vectors.
+
+    nSV : array of shape (n_class,)
+        Number of support vectors in each class.
+
+    sv_coef : array of shape (n_class-1, n_support)
+        Coefficients of support vectors in decision function.
+
+    intercept : array of shape (n_class*(n_class-1)/2,)
+        Intercept in decision function.
+
+    probA, probB : array of shape (n_class*(n_class-1)/2,)
+        Probability estimates.
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0.0
+        Independent parameter in poly/sigmoid kernel.
+
+    Returns
+    -------
+    dec_values : array
+        Predicted values.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter param
+    cdef svm_model *model
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+    cdef int rv
+
+    set_predict_params(
+        &param,
+        svm_type,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        cache_size,
+        1,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+    )
+    model = set_model(
+        &param,
+        <int> nSV.shape[0],
+        <char*> &SV[0, 0] if SV.size > 0 else NULL,
+        <intp_t*> SV.shape,
+        <char*> &support[0],
+        <intp_t*> support.shape,
+        <intp_t*> sv_coef.strides,
+        <char*> &sv_coef[0, 0],
+        <char*> &intercept[0],
+        <char*> &nSV[0],
+        <char*> &probA[0] if probA.size > 0 else NULL,
+        <char*> &probB[0] if probB.size > 0 else NULL,
+    )
+
+    cdef intp_t n_class = get_nr(model)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    try:
+        dec_values = np.empty((X.shape[0], n_class), dtype=np.float64)
+        with nogil:
+            rv = copy_predict_proba(
+                <char*> &X[0, 0],
+                model,
+                <intp_t*> X.shape,
+                <char*> &dec_values[0, 0],
+                &blas_functions,
+            )
+        if rv < 0:
+            raise MemoryError("We've run out of memory")
+    finally:
+        free_model(model)
+
+    return dec_values.base
+
+
+def decision_function(
+    const float64_t[:, ::1] X,
+    const int32_t[::1] support,
+    const float64_t[:, ::1] SV,
+    const int32_t[::1] nSV,
+    const float64_t[:, ::1] sv_coef,
+    const float64_t[::1] intercept,
+    const float64_t[::1] probA=np.empty(0),
+    const float64_t[::1] probB=np.empty(0),
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    const float64_t[::1] class_weight=np.empty(0),
+    const float64_t[::1] sample_weight=np.empty(0),
+    double cache_size=100.0,
+):
+    """
+    Predict margin (libsvm name for this is predict_values)
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+
+    Parameters
+    ----------
+    X : array-like, dtype=float, size=[n_samples, n_features]
+
+    support : array, shape=[n_support]
+        Index of support vectors in training set.
+
+    SV : array, shape=[n_support, n_features]
+        Support vectors.
+
+    nSV : array, shape=[n_class]
+        Number of support vectors in each class.
+
+    sv_coef : array, shape=[n_class-1, n_support]
+        Coefficients of support vectors in decision function.
+
+    intercept : array, shape=[n_class*(n_class-1)/2]
+        Intercept in decision function.
+
+    probA, probB : array, shape=[n_class*(n_class-1)/2]
+        Probability estimates.
+
+    svm_type : {0, 1, 2, 3, 4}, optional
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively. 0 by default.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, optional
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed. 'rbf' by default.
+
+    degree : int32, optional
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial), 3 by default.
+
+    gamma : float64, optional
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels. 0.1 by default.
+
+    coef0 : float64, optional
+        Independent parameter in poly/sigmoid kernel. 0 by default.
+
+    Returns
+    -------
+    dec_values : array
+        Predicted values.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter param
+    cdef svm_model *model
+    cdef intp_t n_class
+
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+
+    cdef int rv
+
+    set_predict_params(
+        &param,
+        svm_type,
+        kernel,
+        degree,
+        gamma,
+        coef0,
+        cache_size,
+        0,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+    )
+
+    model = set_model(
+        &param,
+        <int> nSV.shape[0],
+        <char*> &SV[0, 0] if SV.size > 0 else NULL,
+        <intp_t*> SV.shape,
+        <char*> &support[0],
+        <intp_t*> support.shape,
+        <intp_t*> sv_coef.strides,
+        <char*> &sv_coef[0, 0],
+        <char*> &intercept[0],
+        <char*> &nSV[0],
+        <char*> &probA[0] if probA.size > 0 else NULL,
+        <char*> &probB[0] if probB.size > 0 else NULL,
+    )
+
+    if svm_type > 1:
+        n_class = 1
+    else:
+        n_class = get_nr(model)
+        n_class = n_class * (n_class - 1) // 2
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    try:
+        dec_values = np.empty((X.shape[0], n_class), dtype=np.float64)
+        with nogil:
+            rv = copy_predict_values(
+                <char*> &X[0, 0],
+                model,
+                <intp_t*> X.shape,
+                <char*> &dec_values[0, 0],
+                n_class,
+                &blas_functions,
+            )
+        if rv < 0:
+            raise MemoryError("We've run out of memory")
+    finally:
+        free_model(model)
+
+    return dec_values.base
+
+
+def cross_validation(
+    const float64_t[:, ::1] X,
+    const float64_t[::1] Y,
+    int n_fold,
+    int svm_type=0,
+    kernel='rbf',
+    int degree=3,
+    double gamma=0.1,
+    double coef0=0.0,
+    double tol=1e-3,
+    double C=1.0,
+    double nu=0.5,
+    double epsilon=0.1,
+    float64_t[::1] class_weight=np.empty(0),
+    float64_t[::1] sample_weight=np.empty(0),
+    int shrinking=0,
+    int probability=0,
+    double cache_size=100.0,
+    int max_iter=-1,
+    int random_seed=0,
+):
+    """
+    Binding of the cross-validation routine (low-level routine)
+
+    Parameters
+    ----------
+
+    X : array-like, dtype=float of shape (n_samples, n_features)
+
+    Y : array, dtype=float of shape (n_samples,)
+        target vector
+
+    n_fold : int32
+        Number of folds for cross validation.
+
+    svm_type : {0, 1, 2, 3, 4}, default=0
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
+        respectively.
+
+    kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default='rbf'
+        Kernel to use in the model: linear, polynomial, RBF, sigmoid
+        or precomputed.
+
+    degree : int32, default=3
+        Degree of the polynomial kernel (only relevant if kernel is
+        set to polynomial).
+
+    gamma : float64, default=0.1
+        Gamma parameter in rbf, poly and sigmoid kernels. Ignored by other
+        kernels.
+
+    coef0 : float64, default=0.0
+        Independent parameter in poly/sigmoid kernel.
+
+    tol : float64, default=1e-3
+        Numeric stopping criterion (WRITEME).
+
+    C : float64, default=1
+        C parameter in C-Support Vector Classification.
+
+    nu : float64, default=0.5
+        An upper bound on the fraction of training errors and a lower bound of
+        the fraction of support vectors. Should be in the interval (0, 1].
+
+    epsilon : double, default=0.1
+        Epsilon parameter in the epsilon-insensitive loss function.
+
+    class_weight : array, dtype=float64, shape (n_classes,), \
+            default=np.empty(0)
+        Set the parameter C of class i to class_weight[i]*C for
+        SVC. If not given, all classes are supposed to have
+        weight one.
+
+    sample_weight : array, dtype=float64, shape (n_samples,), \
+            default=np.empty(0)
+        Weights assigned to each sample.
+
+    shrinking : int, default=1
+        Whether to use the shrinking heuristic.
+
+    probability : int, default=0
+        Whether to enable probability estimates.
+
+    cache_size : float64, default=100
+        Cache size for gram matrix columns (in megabytes).
+
+    max_iter : int (-1 for no limit), default=-1
+        Stop solver after this many iterations regardless of accuracy
+        (XXX Currently there is no API to know whether this kicked in.)
+
+    random_seed : int, default=0
+        Seed for the random number generator used for probability estimates.
+
+    Returns
+    -------
+    target : array, float
+
+    """
+
+    cdef svm_parameter param
+    cdef svm_problem problem
+    cdef const char *error_msg
+
+    if len(sample_weight) == 0:
+        sample_weight = np.ones(X.shape[0], dtype=np.float64)
+    else:
+        assert sample_weight.shape[0] == X.shape[0], (
+            f"sample_weight and X have incompatible shapes: sample_weight has "
+            f"{sample_weight.shape[0]} samples while X has {X.shape[0]}"
+        )
+
+    if X.shape[0] < n_fold:
+        raise ValueError("Number of samples is less than number of folds")
+
+    # set problem
+    kernel_index = LIBSVM_KERNEL_TYPES.index(kernel)
+    set_problem(
+        &problem,
+        <char*> &X[0, 0],
+        <char*> &Y[0],
+        <char*> &sample_weight[0] if sample_weight.size > 0 else NULL,
+        <intp_t*> X.shape,
+        kernel_index,
+    )
+    if problem.x == NULL:
+        raise MemoryError("Seems we've run out of memory")
+    cdef int32_t[::1] class_weight_label = np.arange(
+        class_weight.shape[0], dtype=np.int32
+    )
+
+    # set parameters
+    set_parameter(
+        &param,
+        svm_type,
+        kernel_index,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        tol,
+        tol,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char*> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char*> &class_weight[0] if class_weight.size > 0 else NULL,
+        max_iter,
+        random_seed,
+    )
+
+    error_msg = svm_check_parameter(&problem, &param)
+    if error_msg:
+        raise ValueError(error_msg)
+
+    cdef float64_t[::1] target
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    try:
+        target = np.empty((X.shape[0]), dtype=np.float64)
+        with nogil:
+            svm_cross_validation(
+                &problem,
+                &param,
+                n_fold,
+                <double *> &target[0],
+                &blas_functions,
+            )
+    finally:
+        free(problem.x)
+
+    return target.base
+
+
+def set_verbosity_wrap(int verbosity):
+    """
+    Control verbosity of libsvm library
+    """
+    set_verbosity(verbosity)
diff --git a/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp39-win_amd64.lib b/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp39-win_amd64.lib
new file mode 100644
index 0000000000000000000000000000000000000000..41d212390667b6780c4a569477067426392223aa
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp39-win_amd64.lib differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp39-win_amd64.pyd b/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp39-win_amd64.pyd
new file mode 100644
index 0000000000000000000000000000000000000000..12402160f3240a3b7990a976fe349e367e153dd2
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.cp39-win_amd64.pyd differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.pyx b/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..f1b5e8edf167ea86cb5f1b893bb61e371b822666
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_libsvm_sparse.pyx
@@ -0,0 +1,550 @@
+import  numpy as np
+from scipy import sparse
+from ..utils._cython_blas cimport _dot
+from ..utils._typedefs cimport float64_t, int32_t, intp_t
+
+cdef extern from *:
+    ctypedef char* const_char_p "const char*"
+
+################################################################################
+# Includes
+
+cdef extern from "_svm_cython_blas_helpers.h":
+    ctypedef double (*dot_func)(int, const double*, int, const double*, int)
+    cdef struct BlasFunctions:
+        dot_func dot
+
+cdef extern from "svm.h":
+    cdef struct svm_csr_node
+    cdef struct svm_csr_model
+    cdef struct svm_parameter
+    cdef struct svm_csr_problem
+    char *svm_csr_check_parameter(svm_csr_problem *, svm_parameter *)
+    svm_csr_model *svm_csr_train(svm_csr_problem *, svm_parameter *, int *, BlasFunctions *) nogil
+    void svm_csr_free_and_destroy_model(svm_csr_model** model_ptr_ptr)
+
+cdef extern from "libsvm_sparse_helper.c":
+    # this file contains methods for accessing libsvm 'hidden' fields
+    svm_csr_problem * csr_set_problem (
+        char *, intp_t *, char *, intp_t *, char *, char *, char *, int)
+    svm_csr_model *csr_set_model(svm_parameter *param, int nr_class,
+                                 char *SV_data, intp_t *SV_indices_dims,
+                                 char *SV_indices, intp_t *SV_intptr_dims,
+                                 char *SV_intptr,
+                                 char *sv_coef, char *rho, char *nSV,
+                                 char *probA, char *probB)
+    svm_parameter *set_parameter (int , int , int , double, double ,
+                                  double , double , double , double,
+                                  double, int, int, int, char *, char *, int,
+                                  int)
+    void copy_sv_coef   (char *, svm_csr_model *)
+    void copy_n_iter  (char *, svm_csr_model *)
+    void copy_support   (char *, svm_csr_model *)
+    void copy_intercept (char *, svm_csr_model *, intp_t *)
+    int copy_predict (char *, svm_csr_model *, intp_t *, char *, BlasFunctions *)
+    int csr_copy_predict_values (intp_t *data_size, char *data, intp_t *index_size,
+                                 char *index, intp_t *intptr_size, char *size,
+                                 svm_csr_model *model, char *dec_values, int nr_class, BlasFunctions *)
+    int csr_copy_predict (intp_t *data_size, char *data, intp_t *index_size,
+                          char *index, intp_t *intptr_size, char *size,
+                          svm_csr_model *model, char *dec_values, BlasFunctions *) nogil
+    int csr_copy_predict_proba (intp_t *data_size, char *data, intp_t *index_size,
+                                char *index, intp_t *intptr_size, char *size,
+                                svm_csr_model *model, char *dec_values, BlasFunctions *) nogil
+
+    int  copy_predict_values(char *, svm_csr_model *, intp_t *, char *, int, BlasFunctions *)
+    int  csr_copy_SV (char *values, intp_t *n_indices,
+                      char *indices, intp_t *n_indptr, char *indptr,
+                      svm_csr_model *model, int n_features)
+    intp_t get_nonzero_SV (svm_csr_model *)
+    void copy_nSV     (char *, svm_csr_model *)
+    void copy_probA   (char *, svm_csr_model *, intp_t *)
+    void copy_probB   (char *, svm_csr_model *, intp_t *)
+    intp_t  get_l  (svm_csr_model *)
+    intp_t  get_nr (svm_csr_model *)
+    int  free_problem   (svm_csr_problem *)
+    int  free_model     (svm_csr_model *)
+    int  free_param     (svm_parameter *)
+    int free_model_SV(svm_csr_model *model)
+    void set_verbosity(int)
+
+
+def libsvm_sparse_train (int n_features,
+                         const float64_t[::1] values,
+                         const int32_t[::1] indices,
+                         const int32_t[::1] indptr,
+                         const float64_t[::1] Y,
+                         int svm_type, int kernel_type, int degree, double gamma,
+                         double coef0, double eps, double C,
+                         const float64_t[::1] class_weight,
+                         const float64_t[::1] sample_weight,
+                         double nu, double cache_size, double p, int
+                         shrinking, int probability, int max_iter,
+                         int random_seed):
+    """
+    Wrap svm_train from libsvm using a scipy.sparse.csr matrix
+
+    Work in progress.
+
+    Parameters
+    ----------
+    n_features : number of features.
+        XXX: can we retrieve this from any other parameter ?
+
+    X : array-like, dtype=float, size=[N, D]
+
+    Y : array, dtype=float, size=[N]
+        target vector
+
+    ...
+
+    Notes
+    -------------------
+    See sklearn.svm.predict for a complete list of parameters.
+
+    """
+
+    cdef svm_parameter *param
+    cdef svm_csr_problem *problem
+    cdef svm_csr_model *model
+    cdef const_char_p error_msg
+
+    if len(sample_weight) == 0:
+        sample_weight = np.ones(Y.shape[0], dtype=np.float64)
+    else:
+        assert sample_weight.shape[0] == indptr.shape[0] - 1, \
+               "sample_weight and X have incompatible shapes: " + \
+               "sample_weight has %s samples while X has %s" % \
+               (sample_weight.shape[0], indptr.shape[0] - 1)
+
+    # we should never end up here with a precomputed kernel matrix,
+    # as this is always dense.
+    assert(kernel_type != 4)
+
+    # set libsvm problem
+    problem = csr_set_problem(
+        <char *> &values[0],
+        <intp_t *> indices.shape,
+        <char *> &indices[0],
+        <intp_t *> indptr.shape,
+        <char *> &indptr[0],
+        <char *> &Y[0],
+        <char *> &sample_weight[0],
+        kernel_type,
+    )
+
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+
+    # set parameters
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        cache_size,
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL, max_iter,
+        random_seed,
+    )
+
+    # check parameters
+    if (param == NULL or problem == NULL):
+        raise MemoryError("Seems we've run out of memory")
+    error_msg = svm_csr_check_parameter(problem, param)
+    if error_msg:
+        free_problem(problem)
+        free_param(param)
+        raise ValueError(error_msg)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    # call svm_train, this does the real work
+    cdef int fit_status = 0
+    with nogil:
+        model = svm_csr_train(problem, param, &fit_status, &blas_functions)
+
+    cdef intp_t SV_len = get_l(model)
+    cdef intp_t n_class = get_nr(model)
+
+    cdef int[::1] n_iter
+    n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc)
+    copy_n_iter(<char *> &n_iter[0], model)
+
+    # copy model.sv_coef
+    # we create a new array instead of resizing, otherwise
+    # it would not erase previous information
+    cdef float64_t[::1] sv_coef_data
+    sv_coef_data = np.empty((n_class-1)*SV_len, dtype=np.float64)
+    copy_sv_coef (<char *> &sv_coef_data[0] if sv_coef_data.size > 0 else NULL, model)
+
+    cdef int32_t[::1] support
+    support = np.empty(SV_len, dtype=np.int32)
+    copy_support(<char *> &support[0] if support.size > 0 else NULL, model)
+
+    # copy model.rho into the intercept
+    # the intercept is just model.rho but with sign changed
+    cdef float64_t[::1]intercept
+    intercept = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
+    copy_intercept (<char *> &intercept[0], model, <intp_t *> intercept.shape)
+
+    # copy model.SV
+    # we erase any previous information in SV
+    # TODO: custom kernel
+    cdef intp_t nonzero_SV
+    nonzero_SV = get_nonzero_SV (model)
+
+    cdef float64_t[::1] SV_data
+    cdef int32_t[::1] SV_indices, SV_indptr
+    SV_data = np.empty(nonzero_SV, dtype=np.float64)
+    SV_indices = np.empty(nonzero_SV, dtype=np.int32)
+    SV_indptr = np.empty(<intp_t>SV_len + 1, dtype=np.int32)
+    csr_copy_SV(
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *> SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        model,
+        n_features,
+    )
+    support_vectors_ = sparse.csr_matrix(
+        (SV_data, SV_indices, SV_indptr), (SV_len, n_features)
+    )
+
+    # copy model.nSV
+    # TODO: do only in classification
+    cdef int32_t[::1]n_class_SV
+    n_class_SV = np.empty(n_class, dtype=np.int32)
+    copy_nSV(<char *> &n_class_SV[0], model)
+
+    # # copy probabilities
+    cdef float64_t[::1] probA, probB
+    if probability != 0:
+        if svm_type < 2:  # SVC and NuSVC
+            probA = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
+            probB = np.empty(n_class*(n_class-1)//2, dtype=np.float64)
+            copy_probB(<char *> &probB[0], model, <intp_t *> probB.shape)
+        else:
+            probA = np.empty(1, dtype=np.float64)
+            probB = np.empty(0, dtype=np.float64)
+        copy_probA(<char *> &probA[0], model, <intp_t *> probA.shape)
+    else:
+        probA = np.empty(0, dtype=np.float64)
+        probB = np.empty(0, dtype=np.float64)
+
+    svm_csr_free_and_destroy_model (&model)
+    free_problem(problem)
+    free_param(param)
+
+    return (
+        support.base,
+        support_vectors_,
+        sv_coef_data.base,
+        intercept.base,
+        n_class_SV.base,
+        probA.base,
+        probB.base,
+        fit_status,
+        n_iter.base,
+    )
+
+
+def libsvm_sparse_predict (const float64_t[::1] T_data,
+                           const int32_t[::1] T_indices,
+                           const int32_t[::1] T_indptr,
+                           const float64_t[::1] SV_data,
+                           const int32_t[::1] SV_indices,
+                           const int32_t[::1] SV_indptr,
+                           const float64_t[::1] sv_coef,
+                           const float64_t[::1]
+                           intercept, int svm_type, int kernel_type, int
+                           degree, double gamma, double coef0, double
+                           eps, double C,
+                           const float64_t[:] class_weight,
+                           double nu, double p, int
+                           shrinking, int probability,
+                           const int32_t[::1] nSV,
+                           const float64_t[::1] probA,
+                           const float64_t[::1] probB):
+    """
+    Predict values T given a model.
+
+    For speed, all real work is done at the C level in function
+    copy_predict (libsvm_helper.c).
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+
+    See sklearn.svm.predict for a complete list of parameters.
+
+    Parameters
+    ----------
+    X : array-like, dtype=float
+    Y : array
+        target vector
+
+    Returns
+    -------
+    dec_values : array
+        predicted values.
+    """
+    cdef float64_t[::1] dec_values
+    cdef svm_parameter *param
+    cdef svm_csr_model *model
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+    cdef int rv
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        100.0,  # cache size has no effect on predict
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        -1,
+        -1,  # random seed has no effect on predict either
+    )
+
+    model = csr_set_model(
+        param, <int> nSV.shape[0],
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *>SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        <char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
+        <char *> &intercept[0],
+        <char *> &nSV[0],
+        <char *> &probA[0] if probA.size > 0 else NULL,
+        <char *> &probB[0] if probB.size > 0 else NULL,
+    )
+    # TODO: use check_model
+    dec_values = np.empty(T_indptr.shape[0]-1)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    with nogil:
+        rv = csr_copy_predict(
+            <intp_t *> T_data.shape,
+            <char *> &T_data[0],
+            <intp_t *> T_indices.shape,
+            <char *> &T_indices[0],
+            <intp_t *> T_indptr.shape,
+            <char *> &T_indptr[0],
+            model,
+            <char *> &dec_values[0],
+            &blas_functions,
+        )
+    if rv < 0:
+        raise MemoryError("We've run out of memory")
+    # free model and param
+    free_model_SV(model)
+    free_model(model)
+    free_param(param)
+    return dec_values.base
+
+
+def libsvm_sparse_predict_proba(
+    const float64_t[::1] T_data,
+    const int32_t[::1] T_indices,
+    const int32_t[::1] T_indptr,
+    const float64_t[::1] SV_data,
+    const int32_t[::1] SV_indices,
+    const int32_t[::1] SV_indptr,
+    const float64_t[::1] sv_coef,
+    const float64_t[::1]
+    intercept, int svm_type, int kernel_type, int
+    degree, double gamma, double coef0, double
+    eps, double C,
+    const float64_t[:] class_weight,
+    double nu, double p, int shrinking, int probability,
+    const int32_t[::1] nSV,
+    const float64_t[::1] probA,
+    const float64_t[::1] probB,
+):
+    """
+    Predict values T given a model.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter *param
+    cdef svm_csr_model *model
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        100.0,  # cache size has no effect on predict
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        -1,
+        -1,  # random seed has no effect on predict either
+    )
+
+    model = csr_set_model(
+        param,
+        <int> nSV.shape[0],
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *> SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        <char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
+        <char *> &intercept[0],
+        <char *> &nSV[0],
+        <char *> &probA[0] if probA.size > 0 else NULL,
+        <char *> &probB[0] if probB.size > 0 else NULL,
+    )
+    # TODO: use check_model
+    cdef intp_t n_class = get_nr(model)
+    cdef int rv
+    dec_values = np.empty((T_indptr.shape[0]-1, n_class), dtype=np.float64)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    with nogil:
+        rv = csr_copy_predict_proba(
+            <intp_t *> T_data.shape,
+            <char *> &T_data[0],
+            <intp_t *> T_indices.shape,
+            <char *> &T_indices[0],
+            <intp_t *> T_indptr.shape,
+            <char *> &T_indptr[0],
+            model,
+            <char *> &dec_values[0, 0],
+            &blas_functions,
+        )
+    if rv < 0:
+        raise MemoryError("We've run out of memory")
+    # free model and param
+    free_model_SV(model)
+    free_model(model)
+    free_param(param)
+    return dec_values.base
+
+
+def libsvm_sparse_decision_function(
+    const float64_t[::1] T_data,
+    const int32_t[::1] T_indices,
+    const int32_t[::1] T_indptr,
+    const float64_t[::1] SV_data,
+    const int32_t[::1] SV_indices,
+    const int32_t[::1] SV_indptr,
+    const float64_t[::1] sv_coef,
+    const float64_t[::1]
+    intercept, int svm_type, int kernel_type, int
+    degree, double gamma, double coef0, double
+    eps, double C,
+    const float64_t[:] class_weight,
+    double nu, double p, int shrinking, int probability,
+    const int32_t[::1] nSV,
+    const float64_t[::1] probA,
+    const float64_t[::1] probB,
+):
+    """
+    Predict margin (libsvm name for this is predict_values)
+
+    We have to reconstruct model and parameters to make sure we stay
+    in sync with the python object.
+    """
+    cdef float64_t[:, ::1] dec_values
+    cdef svm_parameter *param
+    cdef intp_t n_class
+
+    cdef svm_csr_model *model
+    cdef int32_t[::1] \
+        class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32)
+    param = set_parameter(
+        svm_type,
+        kernel_type,
+        degree,
+        gamma,
+        coef0,
+        nu,
+        100.0,  # cache size has no effect on predict
+        C,
+        eps,
+        p,
+        shrinking,
+        probability,
+        <int> class_weight.shape[0],
+        <char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
+        <char *> &class_weight[0] if class_weight.size > 0 else NULL,
+        -1,
+        -1,
+    )
+
+    model = csr_set_model(
+        param,
+        <int> nSV.shape[0],
+        <char *> &SV_data[0] if SV_data.size > 0 else NULL,
+        <intp_t *> SV_indices.shape,
+        <char *> &SV_indices[0] if SV_indices.size > 0 else NULL,
+        <intp_t *> SV_indptr.shape,
+        <char *> &SV_indptr[0] if SV_indptr.size > 0 else NULL,
+        <char *> &sv_coef[0] if sv_coef.size > 0 else NULL,
+        <char *> &intercept[0],
+        <char *> &nSV[0],
+        <char *> &probA[0] if probA.size > 0 else NULL,
+        <char *> &probB[0] if probB.size > 0 else NULL,
+    )
+
+    if svm_type > 1:
+        n_class = 1
+    else:
+        n_class = get_nr(model)
+        n_class = n_class * (n_class - 1) // 2
+
+    dec_values = np.empty((T_indptr.shape[0] - 1, n_class), dtype=np.float64)
+    cdef BlasFunctions blas_functions
+    blas_functions.dot = _dot[double]
+    if csr_copy_predict_values(
+            <intp_t *> T_data.shape,
+            <char *> &T_data[0],
+            <intp_t *> T_indices.shape,
+            <char *> &T_indices[0],
+            <intp_t *> T_indptr.shape,
+            <char *> &T_indptr[0],
+            model,
+            <char *> &dec_values[0, 0],
+            n_class,
+            &blas_functions,
+    ) < 0:
+        raise MemoryError("We've run out of memory")
+    # free model and param
+    free_model_SV(model)
+    free_model(model)
+    free_param(param)
+
+    return dec_values.base
+
+
+def set_verbosity_wrap(int verbosity):
+    """
+    Control verbosity of libsvm library
+    """
+    set_verbosity(verbosity)
diff --git a/.venv/Lib/site-packages/sklearn/svm/_newrand.cp39-win_amd64.lib b/.venv/Lib/site-packages/sklearn/svm/_newrand.cp39-win_amd64.lib
new file mode 100644
index 0000000000000000000000000000000000000000..222817860b0a597634521664a21617f937f695f0
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_newrand.cp39-win_amd64.lib differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_newrand.cp39-win_amd64.pyd b/.venv/Lib/site-packages/sklearn/svm/_newrand.cp39-win_amd64.pyd
new file mode 100644
index 0000000000000000000000000000000000000000..cf1ba27dff1ebaf46cd1cc34f6e4724a30979973
Binary files /dev/null and b/.venv/Lib/site-packages/sklearn/svm/_newrand.cp39-win_amd64.pyd differ
diff --git a/.venv/Lib/site-packages/sklearn/svm/_newrand.pyx b/.venv/Lib/site-packages/sklearn/svm/_newrand.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..585a01fc88c12074f7b4b52d8db05d7a630fcdb9
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/_newrand.pyx
@@ -0,0 +1,13 @@
+"""Wrapper for newrand.h"""
+
+cdef extern from "newrand.h":
+    void set_seed(unsigned int)
+    unsigned int bounded_rand_int(unsigned int)
+
+
+def set_seed_wrap(unsigned int custom_seed):
+    set_seed(custom_seed)
+
+
+def bounded_rand_int_wrap(unsigned int range_):
+    return bounded_rand_int(range_)
diff --git a/.venv/Lib/site-packages/sklearn/svm/meson.build b/.venv/Lib/site-packages/sklearn/svm/meson.build
new file mode 100644
index 0000000000000000000000000000000000000000..859b6ae5d8331ebb3e8526a5fb5f454df115eb5a
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/meson.build
@@ -0,0 +1,53 @@
+newrand_include = include_directories('src/newrand')
+libsvm_include = include_directories('src/libsvm')
+liblinear_include = include_directories('src/liblinear')
+
+_newrand = py.extension_module(
+  '_newrand',
+  '_newrand.pyx',
+  override_options: ['cython_language=cpp'],
+  include_directories: [newrand_include],
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+libsvm_skl = static_library(
+  'libsvm-skl',
+  ['src/libsvm/libsvm_template.cpp'],
+)
+
+py.extension_module(
+  '_libsvm',
+  ['_libsvm.pyx', utils_cython_tree],
+  include_directories: [newrand_include, libsvm_include],
+  link_with: libsvm_skl,
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+py.extension_module(
+  '_libsvm_sparse',
+  ['_libsvm_sparse.pyx', utils_cython_tree],
+  include_directories: [newrand_include, libsvm_include],
+  link_with: libsvm_skl,
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+liblinear_skl = static_library(
+  'liblinear-skl',
+  ['src/liblinear/linear.cpp', 'src/liblinear/tron.cpp'],
+)
+
+py.extension_module(
+  '_liblinear',
+  ['_liblinear.pyx', utils_cython_tree],
+  include_directories: [newrand_include, liblinear_include],
+  link_with: [liblinear_skl],
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/liblinear/COPYRIGHT b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/COPYRIGHT
new file mode 100644
index 0000000000000000000000000000000000000000..9f4fdcf69d75e59d7ad9cd15a47742768c1c9032
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/COPYRIGHT
@@ -0,0 +1,31 @@
+
+Copyright (c) 2007-2014 The LIBLINEAR Project.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither name of copyright holders nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/liblinear/_cython_blas_helpers.h b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/_cython_blas_helpers.h
new file mode 100644
index 0000000000000000000000000000000000000000..a28468112399c0e4a52a5ee87f65b990b9b9a276
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/_cython_blas_helpers.h
@@ -0,0 +1,16 @@
+#ifndef _CYTHON_BLAS_HELPERS_H
+#define _CYTHON_BLAS_HELPERS_H
+
+typedef double (*dot_func)(int, const double*, int, const double*, int);
+typedef void (*axpy_func)(int, double, const double*, int, double*, int);
+typedef void (*scal_func)(int, double, const double*, int);
+typedef double (*nrm2_func)(int, const double*, int);
+
+typedef struct BlasFunctions{
+    dot_func dot;
+    axpy_func axpy;
+    scal_func scal;
+    nrm2_func nrm2;
+} BlasFunctions;
+
+#endif
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/liblinear/liblinear_helper.c b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/liblinear_helper.c
new file mode 100644
index 0000000000000000000000000000000000000000..c7a05315f87ded23005059a581dce5ed369a1d7c
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/liblinear_helper.c
@@ -0,0 +1,236 @@
+#include <stdlib.h>
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "linear.h"
+
+
+/*
+ * Convert matrix to sparse representation suitable for liblinear. x is
+ * expected to be an array of length n_samples*n_features.
+ *
+ * Whether the matrix is densely or sparsely populated, the fastest way to
+ * convert it to liblinear's sparse format is to calculate the amount of memory
+ * needed and allocate a single big block.
+ *
+ * Special care must be taken with indices, since liblinear indices start at 1
+ * and not at 0.
+ *
+ * If bias is > 0, we append an item at the end.
+ */
+static struct feature_node **dense_to_sparse(char *x, int double_precision,
+        int n_samples, int n_features, int n_nonzero, double bias)
+{
+    float *x32 = (float *)x;
+    double *x64 = (double *)x;
+    struct feature_node **sparse;
+    int i, j;                           /* number of nonzero elements in row i */
+    struct feature_node *T;             /* pointer to the top of the stack */
+    int have_bias = (bias > 0);
+
+    sparse = malloc (n_samples * sizeof(struct feature_node *));
+    if (sparse == NULL)
+        return NULL;
+
+    n_nonzero += (have_bias+1) * n_samples;
+    T = malloc (n_nonzero * sizeof(struct feature_node));
+    if (T == NULL) {
+        free(sparse);
+        return NULL;
+    }
+
+    for (i=0; i<n_samples; ++i) {
+        sparse[i] = T;
+
+        for (j=1; j<=n_features; ++j) {
+            if (double_precision) {
+                if (*x64 != 0) {
+                    T->value = *x64;
+                    T->index = j;
+                    ++ T;
+                }
+                ++ x64; /* go to next element */
+            } else {
+                if (*x32 != 0) {
+                    T->value = *x32;
+                    T->index = j;
+                    ++ T;
+                }
+                ++ x32; /* go to next element */
+            }
+        }
+
+        /* set bias element */
+        if (have_bias) {
+                T->value = bias;
+                T->index = j;
+                ++ T;
+            }
+
+        /* set sentinel */
+        T->index = -1;
+        ++ T;
+    }
+
+    return sparse;
+}
+
+
+/*
+ * Convert scipy.sparse.csr to liblinear's sparse data structure
+ */
+static struct feature_node **csr_to_sparse(char *x, int double_precision,
+        int *indices, int *indptr, int n_samples, int n_features, int n_nonzero,
+        double bias)
+{
+    float *x32 = (float *)x;
+    double *x64 = (double *)x;
+    struct feature_node **sparse;
+    int i, j=0, k=0, n;
+    struct feature_node *T;
+    int have_bias = (bias > 0);
+
+    sparse = malloc (n_samples * sizeof(struct feature_node *));
+    if (sparse == NULL)
+        return NULL;
+
+    n_nonzero += (have_bias+1) * n_samples;
+    T = malloc (n_nonzero * sizeof(struct feature_node));
+    if (T == NULL) {
+        free(sparse);
+        return NULL;
+    }
+
+    for (i=0; i<n_samples; ++i) {
+        sparse[i] = T;
+        n = indptr[i+1] - indptr[i]; /* count elements in row i */
+
+        for (j=0; j<n; ++j) {
+            T->value = double_precision ? x64[k] : x32[k];
+            T->index = indices[k] + 1; /* liblinear uses 1-based indexing */
+            ++T;
+            ++k;
+        }
+
+        if (have_bias) {
+            T->value = bias;
+            T->index = n_features + 1;
+            ++T;
+            ++j;
+        }
+
+        /* set sentinel */
+        T->index = -1;
+        ++T;
+    }
+
+    return sparse;
+}
+
+struct problem * set_problem(char *X, int double_precision_X, int n_samples,
+        int n_features, int n_nonzero, double bias, char* sample_weight,
+        char *Y)
+{
+    struct problem *problem;
+    /* not performant but simple */
+    problem = malloc(sizeof(struct problem));
+    if (problem == NULL) return NULL;
+    problem->l = n_samples;
+    problem->n = n_features + (bias > 0);
+    problem->y = (double *) Y;
+    problem->W = (double *) sample_weight;
+    problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features,
+                        n_nonzero, bias);
+    problem->bias = bias;
+
+    if (problem->x == NULL) {
+        free(problem);
+        return NULL;
+    }
+
+    return problem;
+}
+
+struct problem * csr_set_problem (char *X, int double_precision_X,
+        char *indices, char *indptr, int n_samples, int n_features,
+        int n_nonzero, double bias, char *sample_weight, char *Y)
+{
+    struct problem *problem;
+    problem = malloc (sizeof (struct problem));
+    if (problem == NULL) return NULL;
+    problem->l = n_samples;
+    problem->n = n_features + (bias > 0);
+    problem->y = (double *) Y;
+    problem->W = (double *) sample_weight;
+    problem->x = csr_to_sparse(X, double_precision_X, (int *) indices,
+                        (int *) indptr, n_samples, n_features, n_nonzero, bias);
+    problem->bias = bias;
+
+    if (problem->x == NULL) {
+        free(problem);
+        return NULL;
+    }
+
+    return problem;
+}
+
+
+/* Create a parameter struct with and return it */
+struct parameter *set_parameter(int solver_type, double eps, double C,
+                                Py_ssize_t nr_weight, char *weight_label,
+                                char *weight, int max_iter, unsigned seed,
+                                double epsilon)
+{
+    struct parameter *param = malloc(sizeof(struct parameter));
+    if (param == NULL)
+        return NULL;
+
+    set_seed(seed);
+    param->solver_type = solver_type;
+    param->eps = eps;
+    param->C = C;
+    param->p = epsilon;  // epsilon for epsilon-SVR
+    param->nr_weight = (int) nr_weight;
+    param->weight_label = (int *) weight_label;
+    param->weight = (double *) weight;
+    param->max_iter = max_iter;
+    return param;
+}
+
+void copy_w(void *data, struct model *model, int len)
+{
+    memcpy(data, model->w, len * sizeof(double));
+}
+
+double get_bias(struct model *model)
+{
+    return model->bias;
+}
+
+void free_problem(struct problem *problem)
+{
+    free(problem->x[0]);
+    free(problem->x);
+    free(problem);
+}
+
+void free_parameter(struct parameter *param)
+{
+    free(param);
+}
+
+/* rely on built-in facility to control verbose output */
+static void print_null(const char *s) {}
+
+static void print_string_stdout(const char *s)
+{
+    fputs(s ,stdout);
+    fflush(stdout);
+}
+
+/* provide convenience wrapper */
+void set_verbosity(int verbosity_flag){
+    if (verbosity_flag)
+        set_print_string_function(&print_string_stdout);
+    else
+        set_print_string_function(&print_null);
+}
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/liblinear/linear.cpp b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/linear.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0286d9f1c53fe6c360190892a0cd60d43d441626
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/linear.cpp
@@ -0,0 +1,3075 @@
+/*
+   Modified 2011:
+
+   - Make labels sorted in group_classes, Dan Yamins.
+
+   Modified 2012:
+
+   - Changes roles of +1 and -1 to match scikit API, Andreas Mueller
+        See issue 546: https://github.com/scikit-learn/scikit-learn/pull/546
+   - Also changed roles for pairwise class weights, Andreas Mueller
+        See issue 1491: https://github.com/scikit-learn/scikit-learn/pull/1491
+
+   Modified 2014:
+
+   - Remove the hard-coded value of max_iter (1000), that allows max_iter
+     to be passed as a parameter from the classes LogisticRegression and
+     LinearSVC, Manoj Kumar
+   - Added function get_n_iter that exposes the number of iterations.
+        See issue 3499: https://github.com/scikit-learn/scikit-learn/issues/3499
+        See pull 3501: https://github.com/scikit-learn/scikit-learn/pull/3501
+
+   Modified 2015:
+   - Patched liblinear for sample_weights - Manoj Kumar
+     See https://github.com/scikit-learn/scikit-learn/pull/5274
+
+   Modified 2020:
+   - Improved random number generator by using a mersenne twister + tweaked
+     lemire postprocessor. This fixed a convergence issue on windows targets.
+     Sylvain Marie, Schneider Electric
+     See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
+
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <locale.h>
+#include "linear.h"
+#include "tron.h"
+#include <climits>
+#include <random>
+#include "../newrand/newrand.h"
+
+typedef signed char schar;
+template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
+#ifndef min
+template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
+#endif
+#ifndef max
+template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
+#endif
+template <class S, class T> static inline void clone(T*& dst, S* src, int n)
+{
+	dst = new T[n];
+	memcpy((void *)dst,(void *)src,sizeof(T)*n);
+}
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+#define INF HUGE_VAL
+
+static void print_string_stdout(const char *s)
+{
+	fputs(s,stdout);
+	fflush(stdout);
+}
+
+static void (*liblinear_print_string) (const char *) = &print_string_stdout;
+
+#if 1
+static void info(const char *fmt,...)
+{
+	char buf[BUFSIZ];
+	va_list ap;
+	va_start(ap,fmt);
+	vsprintf(buf,fmt,ap);
+	va_end(ap);
+	(*liblinear_print_string)(buf);
+}
+#else
+static void info(const char *fmt,...) {}
+#endif
+
+class l2r_lr_fun: public function
+{
+public:
+	l2r_lr_fun(const problem *prob, double *C);
+	~l2r_lr_fun();
+
+	double fun(double *w);
+	void grad(double *w, double *g);
+	void Hv(double *s, double *Hs);
+
+	int get_nr_variable(void);
+
+private:
+	void Xv(double *v, double *Xv);
+	void XTv(double *v, double *XTv);
+
+	double *C;
+	double *z;
+	double *D;
+	const problem *prob;
+};
+
+l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
+{
+	int l=prob->l;
+
+	this->prob = prob;
+
+	z = new double[l];
+	D = new double[l];
+	this->C = C;
+}
+
+l2r_lr_fun::~l2r_lr_fun()
+{
+	delete[] z;
+	delete[] D;
+}
+
+
+double l2r_lr_fun::fun(double *w)
+{
+	int i;
+	double f=0;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+
+	Xv(w, z);
+
+	for(i=0;i<w_size;i++)
+		f += w[i]*w[i];
+	f /= 2.0;
+	for(i=0;i<l;i++)
+	{
+		double yz = y[i]*z[i];
+		if (yz >= 0)
+			f += C[i]*log(1 + exp(-yz));
+		else
+			f += C[i]*(-yz+log(1 + exp(yz)));
+	}
+
+	return(f);
+}
+
+void l2r_lr_fun::grad(double *w, double *g)
+{
+	int i;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+
+	for(i=0;i<l;i++)
+	{
+		z[i] = 1/(1 + exp(-y[i]*z[i]));
+		D[i] = z[i]*(1-z[i]);
+		z[i] = C[i]*(z[i]-1)*y[i];
+	}
+	XTv(z, g);
+
+	for(i=0;i<w_size;i++)
+		g[i] = w[i] + g[i];
+}
+
+int l2r_lr_fun::get_nr_variable(void)
+{
+	return prob->n;
+}
+
+void l2r_lr_fun::Hv(double *s, double *Hs)
+{
+	int i;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+	double *wa = new double[l];
+
+	Xv(s, wa);
+	for(i=0;i<l;i++)
+		wa[i] = C[i]*D[i]*wa[i];
+
+	XTv(wa, Hs);
+	for(i=0;i<w_size;i++)
+		Hs[i] = s[i] + Hs[i];
+	delete[] wa;
+}
+
+void l2r_lr_fun::Xv(double *v, double *Xv)
+{
+	int i;
+	int l=prob->l;
+	feature_node **x=prob->x;
+
+	for(i=0;i<l;i++)
+	{
+		feature_node *s=x[i];
+		Xv[i]=0;
+		while(s->index!=-1)
+		{
+			Xv[i]+=v[s->index-1]*s->value;
+			s++;
+		}
+	}
+}
+
+void l2r_lr_fun::XTv(double *v, double *XTv)
+{
+	int i;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+	feature_node **x=prob->x;
+
+	for(i=0;i<w_size;i++)
+		XTv[i]=0;
+	for(i=0;i<l;i++)
+	{
+		feature_node *s=x[i];
+		while(s->index!=-1)
+		{
+			XTv[s->index-1]+=v[i]*s->value;
+			s++;
+		}
+	}
+}
+
+class l2r_l2_svc_fun: public function
+{
+public:
+	l2r_l2_svc_fun(const problem *prob, double *C);
+	~l2r_l2_svc_fun();
+
+	double fun(double *w);
+	void grad(double *w, double *g);
+	void Hv(double *s, double *Hs);
+
+	int get_nr_variable(void);
+
+protected:
+	void Xv(double *v, double *Xv);
+	void subXv(double *v, double *Xv);
+	void subXTv(double *v, double *XTv);
+
+	double *C;
+	double *z;
+	double *D;
+	int *I;
+	int sizeI;
+	const problem *prob;
+};
+
+l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
+{
+	int l=prob->l;
+
+	this->prob = prob;
+
+	z = new double[l];
+	D = new double[l];
+	I = new int[l];
+	this->C = C;
+}
+
+l2r_l2_svc_fun::~l2r_l2_svc_fun()
+{
+	delete[] z;
+	delete[] D;
+	delete[] I;
+}
+
+double l2r_l2_svc_fun::fun(double *w)
+{
+	int i;
+	double f=0;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+
+	Xv(w, z);
+
+	for(i=0;i<w_size;i++)
+		f += w[i]*w[i];
+	f /= 2.0;
+	for(i=0;i<l;i++)
+	{
+		z[i] = y[i]*z[i];
+		double d = 1-z[i];
+		if (d > 0)
+			f += C[i]*d*d;
+	}
+
+	return(f);
+}
+
+void l2r_l2_svc_fun::grad(double *w, double *g)
+{
+	int i;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+
+	sizeI = 0;
+	for (i=0;i<l;i++)
+		if (z[i] < 1)
+		{
+			z[sizeI] = C[i]*y[i]*(z[i]-1);
+			I[sizeI] = i;
+			sizeI++;
+		}
+	subXTv(z, g);
+
+	for(i=0;i<w_size;i++)
+		g[i] = w[i] + 2*g[i];
+}
+
+int l2r_l2_svc_fun::get_nr_variable(void)
+{
+	return prob->n;
+}
+
+void l2r_l2_svc_fun::Hv(double *s, double *Hs)
+{
+	int i;
+	int w_size=get_nr_variable();
+	double *wa = new double[sizeI];
+
+	subXv(s, wa);
+	for(i=0;i<sizeI;i++)
+		wa[i] = C[I[i]]*wa[i];
+
+	subXTv(wa, Hs);
+	for(i=0;i<w_size;i++)
+		Hs[i] = s[i] + 2*Hs[i];
+	delete[] wa;
+}
+
+void l2r_l2_svc_fun::Xv(double *v, double *Xv)
+{
+	int i;
+	int l=prob->l;
+	feature_node **x=prob->x;
+
+	for(i=0;i<l;i++)
+	{
+		feature_node *s=x[i];
+		Xv[i]=0;
+		while(s->index!=-1)
+		{
+			Xv[i]+=v[s->index-1]*s->value;
+			s++;
+		}
+	}
+}
+
+void l2r_l2_svc_fun::subXv(double *v, double *Xv)
+{
+	int i;
+	feature_node **x=prob->x;
+
+	for(i=0;i<sizeI;i++)
+	{
+		feature_node *s=x[I[i]];
+		Xv[i]=0;
+		while(s->index!=-1)
+		{
+			Xv[i]+=v[s->index-1]*s->value;
+			s++;
+		}
+	}
+}
+
+void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
+{
+	int i;
+	int w_size=get_nr_variable();
+	feature_node **x=prob->x;
+
+	for(i=0;i<w_size;i++)
+		XTv[i]=0;
+	for(i=0;i<sizeI;i++)
+	{
+		feature_node *s=x[I[i]];
+		while(s->index!=-1)
+		{
+			XTv[s->index-1]+=v[i]*s->value;
+			s++;
+		}
+	}
+}
+
+class l2r_l2_svr_fun: public l2r_l2_svc_fun
+{
+public:
+	l2r_l2_svr_fun(const problem *prob, double *C, double p);
+
+	double fun(double *w);
+	void grad(double *w, double *g);
+
+private:
+	double p;
+};
+
+l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
+	l2r_l2_svc_fun(prob, C)
+{
+	this->p = p;
+}
+
+double l2r_l2_svr_fun::fun(double *w)
+{
+	int i;
+	double f=0;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+	double d;
+
+	Xv(w, z);
+
+	for(i=0;i<w_size;i++)
+		f += w[i]*w[i];
+	f /= 2;
+	for(i=0;i<l;i++)
+	{
+		d = z[i] - y[i];
+		if(d < -p)
+			f += C[i]*(d+p)*(d+p);
+		else if(d > p)
+			f += C[i]*(d-p)*(d-p);
+	}
+
+	return(f);
+}
+
+void l2r_l2_svr_fun::grad(double *w, double *g)
+{
+	int i;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+	double d;
+
+	sizeI = 0;
+	for(i=0;i<l;i++)
+	{
+		d = z[i] - y[i];
+
+		// generate index set I
+		if(d < -p)
+		{
+			z[sizeI] = C[i]*(d+p);
+			I[sizeI] = i;
+			sizeI++;
+		}
+		else if(d > p)
+		{
+			z[sizeI] = C[i]*(d-p);
+			I[sizeI] = i;
+			sizeI++;
+		}
+
+	}
+	subXTv(z, g);
+
+	for(i=0;i<w_size;i++)
+		g[i] = w[i] + 2*g[i];
+}
+
+// A coordinate descent algorithm for
+// multi-class support vector machines by Crammer and Singer
+//
+//  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
+//    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
+//
+//  where e^m_i = 0 if y_i  = m,
+//        e^m_i = 1 if y_i != m,
+//  C^m_i = C if m  = y_i,
+//  C^m_i = 0 if m != y_i,
+//  and w_m(\alpha) = \sum_i \alpha^m_i x_i
+//
+// Given:
+// x, y, C
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Appendix of LIBLINEAR paper, Fan et al. (2008)
+
+#define GETI(i) (i)
+// To support weights for instances, use GETI(i) (i)
+
+class Solver_MCSVM_CS
+{
+	public:
+		Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
+		~Solver_MCSVM_CS();
+		int Solve(double *w);
+	private:
+		void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
+		bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
+		double *B, *C, *G;
+		int w_size, l;
+		int nr_class;
+		int max_iter;
+		double eps;
+		const problem *prob;
+};
+
+Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
+{
+	this->w_size = prob->n;
+	this->l = prob->l;
+	this->nr_class = nr_class;
+	this->eps = eps;
+	this->max_iter = max_iter;
+	this->prob = prob;
+	this->B = new double[nr_class];
+	this->G = new double[nr_class];
+	this->C = new double[prob->l];
+	for(int i = 0; i < prob->l; i++)
+		this->C[i] = prob->W[i] * weighted_C[(int)prob->y[i]];
+}
+
+Solver_MCSVM_CS::~Solver_MCSVM_CS()
+{
+	delete[] B;
+	delete[] G;
+	delete[] C;
+}
+
+int compare_double(const void *a, const void *b)
+{
+	if(*(double *)a > *(double *)b)
+		return -1;
+	if(*(double *)a < *(double *)b)
+		return 1;
+	return 0;
+}
+
+void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
+{
+	int r;
+	double *D;
+
+	clone(D, B, active_i);
+	if(yi < active_i)
+		D[yi] += A_i*C_yi;
+	qsort(D, active_i, sizeof(double), compare_double);
+
+	double beta = D[0] - A_i*C_yi;
+	for(r=1;r<active_i && beta<r*D[r];r++)
+		beta += D[r];
+	beta /= r;
+
+	for(r=0;r<active_i;r++)
+	{
+		if(r == yi)
+			alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
+		else
+			alpha_new[r] = min((double)0, (beta - B[r])/A_i);
+	}
+	delete[] D;
+}
+
+bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
+{
+	double bound = 0;
+	if(m == yi)
+		bound = C[GETI(i)];
+	if(alpha_i == bound && G[m] < minG)
+		return true;
+	return false;
+}
+
+int Solver_MCSVM_CS::Solve(double *w)
+{
+	int i, m, s;
+	int iter = 0;
+	double *alpha =  new double[l*nr_class];
+	double *alpha_new = new double[nr_class];
+	int *index = new int[l];
+	double *QD = new double[l];
+	int *d_ind = new int[nr_class];
+	double *d_val = new double[nr_class];
+	int *alpha_index = new int[nr_class*l];
+	int *y_index = new int[l];
+	int active_size = l;
+	int *active_size_i = new int[l];
+	double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
+	bool start_from_all = true;
+
+	// Initial alpha can be set here. Note that
+	// sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
+	// alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
+	// alpha[i*nr_class+m] <= 0 if prob->y[i] != m
+	// If initial alpha isn't zero, uncomment the for loop below to initialize w
+	for(i=0;i<l*nr_class;i++)
+		alpha[i] = 0;
+
+	for(i=0;i<w_size*nr_class;i++)
+		w[i] = 0;
+	for(i=0;i<l;i++)
+	{
+		for(m=0;m<nr_class;m++)
+			alpha_index[i*nr_class+m] = m;
+		feature_node *xi = prob->x[i];
+		QD[i] = 0;
+		while(xi->index != -1)
+		{
+			double val = xi->value;
+			QD[i] += val*val;
+
+			// Uncomment the for loop if initial alpha isn't zero
+			// for(m=0; m<nr_class; m++)
+			//	w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
+			xi++;
+		}
+		active_size_i[i] = nr_class;
+		y_index[i] = (int)prob->y[i];
+		index[i] = i;
+	}
+
+	while(iter < max_iter)
+	{
+		double stopping = -INF;
+		for(i=0;i<active_size;i++)
+		{
+			int j = i+bounded_rand_int(active_size-i);
+			swap(index[i], index[j]);
+		}
+		for(s=0;s<active_size;s++)
+		{
+			i = index[s];
+			double Ai = QD[i];
+			double *alpha_i = &alpha[i*nr_class];
+			int *alpha_index_i = &alpha_index[i*nr_class];
+
+			if(Ai > 0)
+			{
+				for(m=0;m<active_size_i[i];m++)
+					G[m] = 1;
+				if(y_index[i] < active_size_i[i])
+					G[y_index[i]] = 0;
+
+				feature_node *xi = prob->x[i];
+				while(xi->index!= -1)
+				{
+					double *w_i = &w[(xi->index-1)*nr_class];
+					for(m=0;m<active_size_i[i];m++)
+						G[m] += w_i[alpha_index_i[m]]*(xi->value);
+					xi++;
+				}
+
+				double minG = INF;
+				double maxG = -INF;
+				for(m=0;m<active_size_i[i];m++)
+				{
+					if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
+						minG = G[m];
+					if(G[m] > maxG)
+						maxG = G[m];
+				}
+				if(y_index[i] < active_size_i[i])
+					if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
+						minG = G[y_index[i]];
+
+				for(m=0;m<active_size_i[i];m++)
+				{
+					if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
+					{
+						active_size_i[i]--;
+						while(active_size_i[i]>m)
+						{
+							if(!be_shrunk(i, active_size_i[i], y_index[i],
+											alpha_i[alpha_index_i[active_size_i[i]]], minG))
+							{
+								swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
+								swap(G[m], G[active_size_i[i]]);
+								if(y_index[i] == active_size_i[i])
+									y_index[i] = m;
+								else if(y_index[i] == m)
+									y_index[i] = active_size_i[i];
+								break;
+							}
+							active_size_i[i]--;
+						}
+					}
+				}
+
+				if(active_size_i[i] <= 1)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+
+				if(maxG-minG <= 1e-12)
+					continue;
+				else
+					stopping = max(maxG - minG, stopping);
+
+				for(m=0;m<active_size_i[i];m++)
+					B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
+
+				solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
+				int nz_d = 0;
+				for(m=0;m<active_size_i[i];m++)
+				{
+					double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
+					alpha_i[alpha_index_i[m]] = alpha_new[m];
+					if(fabs(d) >= 1e-12)
+					{
+						d_ind[nz_d] = alpha_index_i[m];
+						d_val[nz_d] = d;
+						nz_d++;
+					}
+				}
+
+				xi = prob->x[i];
+				while(xi->index != -1)
+				{
+					double *w_i = &w[(xi->index-1)*nr_class];
+					for(m=0;m<nz_d;m++)
+						w_i[d_ind[m]] += d_val[m]*xi->value;
+					xi++;
+				}
+			}
+		}
+
+		iter++;
+		if(iter % 10 == 0)
+		{
+			info(".");
+		}
+
+		if(stopping < eps_shrink)
+		{
+			if(stopping < eps && start_from_all == true)
+				break;
+			else
+			{
+				active_size = l;
+				for(i=0;i<l;i++)
+					active_size_i[i] = nr_class;
+				info("*");
+				eps_shrink = max(eps_shrink/2, eps);
+				start_from_all = true;
+			}
+		}
+		else
+			start_from_all = false;
+	}
+
+	info("\noptimization finished, #iter = %d\n",iter);
+	if (iter >= max_iter)
+		info("\nWARNING: reaching max number of iterations\n");
+
+	// calculate objective value
+	double v = 0;
+	int nSV = 0;
+	for(i=0;i<w_size*nr_class;i++)
+		v += w[i]*w[i];
+	v = 0.5*v;
+	for(i=0;i<l*nr_class;i++)
+	{
+		v += alpha[i];
+		if(fabs(alpha[i]) > 0)
+			nSV++;
+	}
+	for(i=0;i<l;i++)
+		v -= alpha[i*nr_class+(int)prob->y[i]];
+	info("Objective value = %lf\n",v);
+	info("nSV = %d\n",nSV);
+
+	delete [] alpha;
+	delete [] alpha_new;
+	delete [] index;
+	delete [] QD;
+	delete [] d_ind;
+	delete [] d_val;
+	delete [] alpha_index;
+	delete [] y_index;
+	delete [] active_size_i;
+	return iter;
+}
+
+// A coordinate descent algorithm for
+// L1-loss and L2-loss SVM dual problems
+//
+//  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
+//    s.t.      0 <= \alpha_i <= upper_bound_i,
+//
+//  where Qij = yi yj xi^T xj and
+//  D is a diagonal matrix
+//
+// In L1-SVM case:
+// 		upper_bound_i = Cp if y_i = 1
+// 		upper_bound_i = Cn if y_i = -1
+// 		D_ii = 0
+// In L2-SVM case:
+// 		upper_bound_i = INF
+// 		D_ii = 1/(2*Cp)	if y_i = 1
+// 		D_ii = 1/(2*Cn)	if y_i = -1
+//
+// Given:
+// x, y, Cp, Cn
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Algorithm 3 of Hsieh et al., ICML 2008
+
+#undef GETI
+#define GETI(i) (i)
+// To support weights for instances, use GETI(i) (i)
+
+static int solve_l2r_l1l2_svc(
+	const problem *prob, double *w, double eps,
+	double Cp, double Cn, int solver_type, int max_iter)
+{
+	int l = prob->l;
+	int w_size = prob->n;
+	int i, s, iter = 0;
+	double C, d, G;
+	double *QD = new double[l];
+	int *index = new int[l];
+	double *alpha = new double[l];
+	schar *y = new schar[l];
+	int active_size = l;
+
+	// PG: projected gradient, for shrinking and stopping
+	double PG;
+	double PGmax_old = INF;
+	double PGmin_old = -INF;
+	double PGmax_new, PGmin_new;
+
+	// default solver_type: L2R_L2LOSS_SVC_DUAL
+	double *diag = new double[l];
+	double *upper_bound = new double[l];
+	double *C_ = new double[l];
+	for(i=0; i<l; i++)
+	{
+		if(prob->y[i]>0)
+			C_[i] = prob->W[i] * Cp;
+		else
+			C_[i] = prob->W[i] * Cn;
+		diag[i] = 0.5/C_[i];
+		upper_bound[i] = INF;
+	}
+	if(solver_type == L2R_L1LOSS_SVC_DUAL)
+	{
+		for(i=0; i<l; i++)
+		{
+			diag[i] = 0;
+			upper_bound[i] = C_[i];
+		}
+	}
+
+	for(i=0; i<l; i++)
+	{
+		if(prob->y[i] > 0)
+		{
+			y[i] = +1;
+		}
+		else
+		{
+			y[i] = -1;
+		}
+	}
+
+	// Initial alpha can be set here. Note that
+	// 0 <= alpha[i] <= upper_bound[GETI(i)]
+	for(i=0; i<l; i++)
+		alpha[i] = 0;
+
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		QD[i] = diag[GETI(i)];
+
+		feature_node *xi = prob->x[i];
+		while (xi->index != -1)
+		{
+			double val = xi->value;
+			QD[i] += val*val;
+			w[xi->index-1] += y[i]*alpha[i]*val;
+			xi++;
+		}
+		index[i] = i;
+	}
+
+	while (iter < max_iter)
+	{
+		PGmax_new = -INF;
+		PGmin_new = INF;
+
+		for (i=0; i<active_size; i++)
+		{
+			int j = i+bounded_rand_int(active_size-i);
+			swap(index[i], index[j]);
+		}
+
+		for (s=0; s<active_size; s++)
+		{
+			i = index[s];
+			G = 0;
+			schar yi = y[i];
+
+			feature_node *xi = prob->x[i];
+			while(xi->index!= -1)
+			{
+				G += w[xi->index-1]*(xi->value);
+				xi++;
+			}
+			G = G*yi-1;
+
+			C = upper_bound[GETI(i)];
+			G += alpha[i]*diag[GETI(i)];
+
+			PG = 0;
+			if (alpha[i] == 0)
+			{
+				if (G > PGmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+				else if (G < 0)
+					PG = G;
+			}
+			else if (alpha[i] == C)
+			{
+				if (G < PGmin_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+				else if (G > 0)
+					PG = G;
+			}
+			else
+				PG = G;
+
+			PGmax_new = max(PGmax_new, PG);
+			PGmin_new = min(PGmin_new, PG);
+
+			if(fabs(PG) > 1.0e-12)
+			{
+				double alpha_old = alpha[i];
+				alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
+				d = (alpha[i] - alpha_old)*yi;
+				xi = prob->x[i];
+				while (xi->index != -1)
+				{
+					w[xi->index-1] += d*xi->value;
+					xi++;
+				}
+			}
+		}
+
+		iter++;
+		if(iter % 10 == 0)
+			info(".");
+
+		if(PGmax_new - PGmin_new <= eps)
+		{
+			if(active_size == l)
+				break;
+			else
+			{
+				active_size = l;
+				info("*");
+				PGmax_old = INF;
+				PGmin_old = -INF;
+				continue;
+			}
+		}
+		PGmax_old = PGmax_new;
+		PGmin_old = PGmin_new;
+		if (PGmax_old <= 0)
+			PGmax_old = INF;
+		if (PGmin_old >= 0)
+			PGmin_old = -INF;
+	}
+
+	info("\noptimization finished, #iter = %d\n",iter);
+	if (iter >= max_iter)
+		info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
+
+	// calculate objective value
+
+	double v = 0;
+	int nSV = 0;
+	for(i=0; i<w_size; i++)
+		v += w[i]*w[i];
+	for(i=0; i<l; i++)
+	{
+		v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
+		if(alpha[i] > 0)
+			++nSV;
+	}
+	info("Objective value = %lf\n",v/2);
+	info("nSV = %d\n",nSV);
+
+	delete [] QD;
+	delete [] alpha;
+	delete [] y;
+	delete [] index;
+	delete [] diag;
+	delete [] upper_bound;
+	delete [] C_;
+	return iter;
+}
+
+
+// A coordinate descent algorithm for
+// L1-loss and L2-loss epsilon-SVR dual problem
+//
+//  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
+//    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
+//
+//  where Qij = xi^T xj and
+//  D is a diagonal matrix
+//
+// In L1-SVM case:
+// 		upper_bound_i = C
+// 		lambda_i = 0
+// In L2-SVM case:
+// 		upper_bound_i = INF
+// 		lambda_i = 1/(2*C)
+//
+// Given:
+// x, y, p, C
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Algorithm 4 of Ho and Lin, 2012
+
+#undef GETI
+#define GETI(i) (i)
+// To support weights for instances, use GETI(i) (i)
+
+static int solve_l2r_l1l2_svr(
+	const problem *prob, double *w, const parameter *param,
+	int solver_type, int max_iter)
+{
+	int l = prob->l;
+	double C = param->C;
+	double p = param->p;
+	int w_size = prob->n;
+	double eps = param->eps;
+	int i, s, iter = 0;
+	int active_size = l;
+	int *index = new int[l];
+
+	double d, G, H;
+	double Gmax_old = INF;
+	double Gmax_new, Gnorm1_new;
+	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
+	double *beta = new double[l];
+	double *QD = new double[l];
+	double *y = prob->y;
+
+	// L2R_L2LOSS_SVR_DUAL
+	double *lambda = new double[l];
+	double *upper_bound = new double[l];
+	double *C_ = new double[l];
+	for (i=0; i<l; i++)
+	{
+		C_[i] = prob->W[i] * C;
+		lambda[i] = 0.5/C_[i];
+		upper_bound[i] = INF;
+	}
+	if(solver_type == L2R_L1LOSS_SVR_DUAL)
+	{
+		for (i=0; i<l; i++)
+		{
+			lambda[i] = 0;
+			upper_bound[i] = C_[i];
+		}
+	}
+
+	// Initial beta can be set here. Note that
+	// -upper_bound <= beta[i] <= upper_bound
+	for(i=0; i<l; i++)
+		beta[i] = 0;
+
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		QD[i] = 0;
+		feature_node *xi = prob->x[i];
+		while(xi->index != -1)
+		{
+			double val = xi->value;
+			QD[i] += val*val;
+			w[xi->index-1] += beta[i]*val;
+			xi++;
+		}
+
+		index[i] = i;
+	}
+
+
+	while(iter < max_iter)
+	{
+		Gmax_new = 0;
+		Gnorm1_new = 0;
+
+		for(i=0; i<active_size; i++)
+		{
+			int j = i+bounded_rand_int(active_size-i);
+			swap(index[i], index[j]);
+		}
+
+		for(s=0; s<active_size; s++)
+		{
+			i = index[s];
+			G = -y[i] + lambda[GETI(i)]*beta[i];
+			H = QD[i] + lambda[GETI(i)];
+
+			feature_node *xi = prob->x[i];
+			while(xi->index != -1)
+			{
+				int ind = xi->index-1;
+				double val = xi->value;
+				G += val*w[ind];
+				xi++;
+			}
+
+			double Gp = G+p;
+			double Gn = G-p;
+			double violation = 0;
+			if(beta[i] == 0)
+			{
+				if(Gp < 0)
+					violation = -Gp;
+				else if(Gn > 0)
+					violation = Gn;
+				else if(Gp>Gmax_old && Gn<-Gmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(beta[i] >= upper_bound[GETI(i)])
+			{
+				if(Gp > 0)
+					violation = Gp;
+				else if(Gp < -Gmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(beta[i] <= -upper_bound[GETI(i)])
+			{
+				if(Gn < 0)
+					violation = -Gn;
+				else if(Gn > Gmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(beta[i] > 0)
+				violation = fabs(Gp);
+			else
+				violation = fabs(Gn);
+
+			Gmax_new = max(Gmax_new, violation);
+			Gnorm1_new += violation;
+
+			// obtain Newton direction d
+			if(Gp < H*beta[i])
+				d = -Gp/H;
+			else if(Gn > H*beta[i])
+				d = -Gn/H;
+			else
+				d = -beta[i];
+
+			if(fabs(d) < 1.0e-12)
+				continue;
+
+			double beta_old = beta[i];
+			beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
+			d = beta[i]-beta_old;
+
+			if(d != 0)
+			{
+				xi = prob->x[i];
+				while(xi->index != -1)
+				{
+					w[xi->index-1] += d*xi->value;
+					xi++;
+				}
+			}
+		}
+
+		if(iter == 0)
+			Gnorm1_init = Gnorm1_new;
+		iter++;
+		if(iter % 10 == 0)
+			info(".");
+
+		if(Gnorm1_new <= eps*Gnorm1_init)
+		{
+			if(active_size == l)
+				break;
+			else
+			{
+				active_size = l;
+				info("*");
+				Gmax_old = INF;
+				continue;
+			}
+		}
+
+		Gmax_old = Gmax_new;
+	}
+
+	info("\noptimization finished, #iter = %d\n", iter);
+	if(iter >= max_iter)
+		info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
+
+	// calculate objective value
+	double v = 0;
+	int nSV = 0;
+	for(i=0; i<w_size; i++)
+		v += w[i]*w[i];
+	v = 0.5*v;
+	for(i=0; i<l; i++)
+	{
+		v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
+		if(beta[i] != 0)
+			nSV++;
+	}
+
+	info("Objective value = %lf\n", v);
+	info("nSV = %d\n",nSV);
+
+	delete [] beta;
+	delete [] QD;
+	delete [] index;
+	delete [] lambda;
+	delete [] upper_bound;
+	delete [] C_;
+	return iter;
+}
+
+
+// A coordinate descent algorithm for
+// the dual of L2-regularized logistic regression problems
+//
+//  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
+//    s.t.      0 <= \alpha_i <= upper_bound_i,
+//
+//  where Qij = yi yj xi^T xj and
+//  upper_bound_i = Cp if y_i = 1
+//  upper_bound_i = Cn if y_i = -1
+//
+// Given:
+// x, y, Cp, Cn
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Algorithm 5 of Yu et al., MLJ 2010
+
+#undef GETI
+#define GETI(i) (i)
+// To support weights for instances, use GETI(i) (i)
+
+int solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn,
+					   int max_iter)
+{
+	int l = prob->l;
+	int w_size = prob->n;
+	int i, s, iter = 0;
+	double *xTx = new double[l];
+	int *index = new int[l];
+	double *alpha = new double[2*l]; // store alpha and C - alpha
+	schar *y = new schar[l];
+	int max_inner_iter = 100; // for inner Newton
+	double innereps = 1e-2;
+	double innereps_min = min(1e-8, eps);
+	double *upper_bound = new double [l];
+
+	for(i=0; i<l; i++)
+	{
+		if(prob->y[i] > 0)
+		{
+			upper_bound[i] = prob->W[i] * Cp;
+			y[i] = +1;
+		}
+		else
+		{
+			upper_bound[i] = prob->W[i] * Cn;
+			y[i] = -1;
+		}
+	}
+
+	// Initial alpha can be set here. Note that
+	// 0 < alpha[i] < upper_bound[GETI(i)]
+	// alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
+	for(i=0; i<l; i++)
+	{
+		alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
+		alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
+	}
+
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		xTx[i] = 0;
+		feature_node *xi = prob->x[i];
+		while (xi->index != -1)
+		{
+			double val = xi->value;
+			xTx[i] += val*val;
+			w[xi->index-1] += y[i]*alpha[2*i]*val;
+			xi++;
+		}
+		index[i] = i;
+	}
+
+	while (iter < max_iter)
+	{
+		for (i=0; i<l; i++)
+		{
+			int j = i+bounded_rand_int(l-i);
+			swap(index[i], index[j]);
+		}
+		int newton_iter = 0;
+		double Gmax = 0;
+		for (s=0; s<l; s++)
+		{
+			i = index[s];
+			schar yi = y[i];
+			double C = upper_bound[GETI(i)];
+			double ywTx = 0, xisq = xTx[i];
+			feature_node *xi = prob->x[i];
+			while (xi->index != -1)
+			{
+				ywTx += w[xi->index-1]*xi->value;
+				xi++;
+			}
+			ywTx *= y[i];
+			double a = xisq, b = ywTx;
+
+			// Decide to minimize g_1(z) or g_2(z)
+			int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
+			if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
+			{
+				ind1 = 2*i+1;
+				ind2 = 2*i;
+				sign = -1;
+			}
+
+			//  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
+			double alpha_old = alpha[ind1];
+			double z = alpha_old;
+			if(C - z < 0.5 * C)
+				z = 0.1*z;
+			double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
+			Gmax = max(Gmax, fabs(gp));
+
+			// Newton method on the sub-problem
+			const double eta = 0.1; // xi in the paper
+			int inner_iter = 0;
+			while (inner_iter <= max_inner_iter)
+			{
+				if(fabs(gp) < innereps)
+					break;
+				double gpp = a + C/(C-z)/z;
+				double tmpz = z - gp/gpp;
+				if(tmpz <= 0)
+					z *= eta;
+				else // tmpz in (0, C)
+					z = tmpz;
+				gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
+				newton_iter++;
+				inner_iter++;
+			}
+
+			if(inner_iter > 0) // update w
+			{
+				alpha[ind1] = z;
+				alpha[ind2] = C-z;
+				xi = prob->x[i];
+				while (xi->index != -1)
+				{
+					w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
+					xi++;
+				}
+			}
+		}
+
+		iter++;
+		if(iter % 10 == 0)
+			info(".");
+
+		if(Gmax < eps)
+			break;
+
+		if(newton_iter <= l/10)
+			innereps = max(innereps_min, 0.1*innereps);
+
+	}
+
+	info("\noptimization finished, #iter = %d\n",iter);
+	if (iter >= max_iter)
+		info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
+
+	// calculate objective value
+
+	double v = 0;
+	for(i=0; i<w_size; i++)
+		v += w[i] * w[i];
+	v *= 0.5;
+	for(i=0; i<l; i++)
+		v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
+			- upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
+	info("Objective value = %lf\n", v);
+
+	delete [] xTx;
+	delete [] alpha;
+	delete [] y;
+	delete [] index;
+	delete [] upper_bound;
+	return iter;
+}
+
+// A coordinate descent algorithm for
+// L1-regularized L2-loss support vector classification
+//
+//  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
+//
+// Given:
+// x, y, Cp, Cn
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
+
+#undef GETI
+#define GETI(i) (i)
+// To support weights for instances, use GETI(i) (i)
+
+static int solve_l1r_l2_svc(
+	problem *prob_col, double *w, double eps,
+	double Cp, double Cn, int max_iter)
+{
+	int l = prob_col->l;
+	int w_size = prob_col->n;
+	int j, s, iter = 0;
+	int active_size = w_size;
+	int max_num_linesearch = 20;
+
+	double sigma = 0.01;
+	double d, G_loss, G, H;
+	double Gmax_old = INF;
+	double Gmax_new, Gnorm1_new;
+	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
+	double d_old, d_diff;
+	double loss_old, loss_new;
+	double appxcond, cond;
+
+	int *index = new int[w_size];
+	schar *y = new schar[l];
+	double *b = new double[l]; // b = 1-ywTx
+	double *xj_sq = new double[w_size];
+	feature_node *x;
+
+	double *C = new double[l];
+
+	// Initial w can be set here.
+	for(j=0; j<w_size; j++)
+		w[j] = 0;
+
+	for(j=0; j<l; j++)
+	{
+		b[j] = 1;
+		if(prob_col->y[j] > 0)
+		{
+			y[j] = 1;
+			C[j] = prob_col->W[j] * Cp;
+		}
+		else
+		{
+			y[j] = -1;
+			C[j] = prob_col->W[j] * Cn;
+		}
+	}
+	for(j=0; j<w_size; j++)
+	{
+		index[j] = j;
+		xj_sq[j] = 0;
+		x = prob_col->x[j];
+		while(x->index != -1)
+		{
+			int ind = x->index-1;
+			x->value *= y[ind]; // x->value stores yi*xij
+			double val = x->value;
+			b[ind] -= w[j]*val;
+			xj_sq[j] += C[GETI(ind)]*val*val;
+			x++;
+		}
+	}
+
+	while(iter < max_iter)
+	{
+		Gmax_new = 0;
+		Gnorm1_new = 0;
+
+		for(j=0; j<active_size; j++)
+		{
+			int i = j+bounded_rand_int(active_size-j);
+			swap(index[i], index[j]);
+		}
+
+		for(s=0; s<active_size; s++)
+		{
+			j = index[s];
+			G_loss = 0;
+			H = 0;
+
+			x = prob_col->x[j];
+			while(x->index != -1)
+			{
+				int ind = x->index-1;
+				if(b[ind] > 0)
+				{
+					double val = x->value;
+					double tmp = C[GETI(ind)]*val;
+					G_loss -= tmp*b[ind];
+					H += tmp*val;
+				}
+				x++;
+			}
+			G_loss *= 2;
+
+			G = G_loss;
+			H *= 2;
+			H = max(H, 1e-12);
+
+			double Gp = G+1;
+			double Gn = G-1;
+			double violation = 0;
+			if(w[j] == 0)
+			{
+				if(Gp < 0)
+					violation = -Gp;
+				else if(Gn > 0)
+					violation = Gn;
+				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(w[j] > 0)
+				violation = fabs(Gp);
+			else
+				violation = fabs(Gn);
+
+			Gmax_new = max(Gmax_new, violation);
+			Gnorm1_new += violation;
+
+			// obtain Newton direction d
+			if(Gp < H*w[j])
+				d = -Gp/H;
+			else if(Gn > H*w[j])
+				d = -Gn/H;
+			else
+				d = -w[j];
+
+			if(fabs(d) < 1.0e-12)
+				continue;
+
+			double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
+			d_old = 0;
+			int num_linesearch;
+			for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
+			{
+				d_diff = d_old - d;
+				cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
+
+				appxcond = xj_sq[j]*d*d + G_loss*d + cond;
+				if(appxcond <= 0)
+				{
+					x = prob_col->x[j];
+					while(x->index != -1)
+					{
+						b[x->index-1] += d_diff*x->value;
+						x++;
+					}
+					break;
+				}
+
+				if(num_linesearch == 0)
+				{
+					loss_old = 0;
+					loss_new = 0;
+					x = prob_col->x[j];
+					while(x->index != -1)
+					{
+						int ind = x->index-1;
+						if(b[ind] > 0)
+							loss_old += C[GETI(ind)]*b[ind]*b[ind];
+						double b_new = b[ind] + d_diff*x->value;
+						b[ind] = b_new;
+						if(b_new > 0)
+							loss_new += C[GETI(ind)]*b_new*b_new;
+						x++;
+					}
+				}
+				else
+				{
+					loss_new = 0;
+					x = prob_col->x[j];
+					while(x->index != -1)
+					{
+						int ind = x->index-1;
+						double b_new = b[ind] + d_diff*x->value;
+						b[ind] = b_new;
+						if(b_new > 0)
+							loss_new += C[GETI(ind)]*b_new*b_new;
+						x++;
+					}
+				}
+
+				cond = cond + loss_new - loss_old;
+				if(cond <= 0)
+					break;
+				else
+				{
+					d_old = d;
+					d *= 0.5;
+					delta *= 0.5;
+				}
+			}
+
+			w[j] += d;
+
+			// recompute b[] if line search takes too many steps
+			if(num_linesearch >= max_num_linesearch)
+			{
+				info("#");
+				for(int i=0; i<l; i++)
+					b[i] = 1;
+
+				for(int i=0; i<w_size; i++)
+				{
+					if(w[i]==0) continue;
+					x = prob_col->x[i];
+					while(x->index != -1)
+					{
+						b[x->index-1] -= w[i]*x->value;
+						x++;
+					}
+				}
+			}
+		}
+
+		if(iter == 0)
+			Gnorm1_init = Gnorm1_new;
+		iter++;
+		if(iter % 10 == 0)
+			info(".");
+
+		if(Gnorm1_new <= eps*Gnorm1_init)
+		{
+			if(active_size == w_size)
+				break;
+			else
+			{
+				active_size = w_size;
+				info("*");
+				Gmax_old = INF;
+				continue;
+			}
+		}
+
+		Gmax_old = Gmax_new;
+	}
+
+	info("\noptimization finished, #iter = %d\n", iter);
+	if(iter >= max_iter)
+		info("\nWARNING: reaching max number of iterations\n");
+
+	// calculate objective value
+
+	double v = 0;
+	int nnz = 0;
+	for(j=0; j<w_size; j++)
+	{
+		x = prob_col->x[j];
+		while(x->index != -1)
+		{
+			x->value *= prob_col->y[x->index-1]; // restore x->value
+			x++;
+		}
+		if(w[j] != 0)
+		{
+			v += fabs(w[j]);
+			nnz++;
+		}
+	}
+	for(j=0; j<l; j++)
+		if(b[j] > 0)
+			v += C[GETI(j)]*b[j]*b[j];
+
+	info("Objective value = %lf\n", v);
+	info("#nonzeros/#features = %d/%d\n", nnz, w_size);
+
+	delete [] index;
+	delete [] y;
+	delete [] b;
+	delete [] xj_sq;
+	delete [] C;
+	return iter;
+}
+
+// A coordinate descent algorithm for
+// L1-regularized logistic regression problems
+//
+//  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
+//
+// Given:
+// x, y, Cp, Cn
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
+
+#undef GETI
+#define GETI(i) (i)
+// To support weights for instances, use GETI(i) (i)
+
+static int solve_l1r_lr(
+	const problem *prob_col, double *w, double eps,
+	double Cp, double Cn, int max_newton_iter)
+{
+	int l = prob_col->l;
+	int w_size = prob_col->n;
+	int j, s, newton_iter=0, iter=0;
+	int max_iter = 1000;
+	int max_num_linesearch = 20;
+	int active_size;
+	int QP_active_size;
+	int QP_no_change = 0;
+
+	double nu = 1e-12;
+	double inner_eps = 1;
+	double sigma = 0.01;
+	double w_norm, w_norm_new;
+	double z, G, H;
+	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
+	double Gmax_old = INF;
+	double Gmax_new, Gnorm1_new;
+	double QP_Gmax_old = INF;
+	double QP_Gmax_new, QP_Gnorm1_new;
+	double delta, negsum_xTd, cond;
+
+	int *index = new int[w_size];
+	schar *y = new schar[l];
+	double *Hdiag = new double[w_size];
+	double *Grad = new double[w_size];
+	double *wpd = new double[w_size];
+	double *xjneg_sum = new double[w_size];
+	double *xTd = new double[l];
+	double *exp_wTx = new double[l];
+	double *exp_wTx_new = new double[l];
+	double *tau = new double[l];
+	double *D = new double[l];
+	feature_node *x;
+
+	double *C = new double[l];
+
+	// Initial w can be set here.
+	for(j=0; j<w_size; j++)
+		w[j] = 0;
+
+	for(j=0; j<l; j++)
+	{
+		if(prob_col->y[j] > 0)
+		{
+			y[j] = 1;
+			C[j] = prob_col->W[j] * Cp;
+		}
+		else
+		{
+			y[j] = -1;
+			C[j] = prob_col->W[j] * Cn;
+		}
+
+		exp_wTx[j] = 0;
+	}
+
+	w_norm = 0;
+	for(j=0; j<w_size; j++)
+	{
+		w_norm += fabs(w[j]);
+		wpd[j] = w[j];
+		index[j] = j;
+		xjneg_sum[j] = 0;
+		x = prob_col->x[j];
+		while(x->index != -1)
+		{
+			int ind = x->index-1;
+			double val = x->value;
+			exp_wTx[ind] += w[j]*val;
+			if(y[ind] == -1)
+				xjneg_sum[j] += C[GETI(ind)]*val;
+			x++;
+		}
+	}
+	for(j=0; j<l; j++)
+	{
+		exp_wTx[j] = exp(exp_wTx[j]);
+		double tau_tmp = 1/(1+exp_wTx[j]);
+		tau[j] = C[GETI(j)]*tau_tmp;
+		D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
+	}
+
+	while(newton_iter < max_newton_iter)
+	{
+		Gmax_new = 0;
+		Gnorm1_new = 0;
+		active_size = w_size;
+
+		for(s=0; s<active_size; s++)
+		{
+			j = index[s];
+			Hdiag[j] = nu;
+			Grad[j] = 0;
+
+			double tmp = 0;
+			x = prob_col->x[j];
+			while(x->index != -1)
+			{
+				int ind = x->index-1;
+				Hdiag[j] += x->value*x->value*D[ind];
+				tmp += x->value*tau[ind];
+				x++;
+			}
+			Grad[j] = -tmp + xjneg_sum[j];
+
+			double Gp = Grad[j]+1;
+			double Gn = Grad[j]-1;
+			double violation = 0;
+			if(w[j] == 0)
+			{
+				if(Gp < 0)
+					violation = -Gp;
+				else if(Gn > 0)
+					violation = Gn;
+				//outer-level shrinking
+				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(w[j] > 0)
+				violation = fabs(Gp);
+			else
+				violation = fabs(Gn);
+
+			Gmax_new = max(Gmax_new, violation);
+			Gnorm1_new += violation;
+		}
+
+		if(newton_iter == 0)
+			Gnorm1_init = Gnorm1_new;
+
+		// Break outer-loop if the accumulated violation is small.
+		// Also break if no update in QP inner-loop ten times in a row.
+		if(Gnorm1_new <= eps*Gnorm1_init || QP_no_change >= 10)
+			break;
+
+		QP_no_change++;
+
+		iter = 0;
+		QP_Gmax_old = INF;
+		QP_active_size = active_size;
+
+		for(int i=0; i<l; i++)
+			xTd[i] = 0;
+
+		// optimize QP over wpd
+		while(iter < max_iter)
+		{
+			QP_Gmax_new = 0;
+			QP_Gnorm1_new = 0;
+
+			for(j=0; j<QP_active_size; j++)
+			{
+				int i = j+bounded_rand_int(QP_active_size-j);
+				swap(index[i], index[j]);
+			}
+
+			for(s=0; s<QP_active_size; s++)
+			{
+				j = index[s];
+				H = Hdiag[j];
+
+				x = prob_col->x[j];
+				G = Grad[j] + (wpd[j]-w[j])*nu;
+				while(x->index != -1)
+				{
+					int ind = x->index-1;
+					G += x->value*D[ind]*xTd[ind];
+					x++;
+				}
+
+				double Gp = G+1;
+				double Gn = G-1;
+				double violation = 0;
+				if(wpd[j] == 0)
+				{
+					if(Gp < 0)
+						violation = -Gp;
+					else if(Gn > 0)
+						violation = Gn;
+					//inner-level shrinking
+					else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
+					{
+						QP_active_size--;
+						swap(index[s], index[QP_active_size]);
+						s--;
+						continue;
+					}
+				}
+				else if(wpd[j] > 0)
+					violation = fabs(Gp);
+				else
+					violation = fabs(Gn);
+
+				// obtain solution of one-variable problem
+				if(Gp < H*wpd[j])
+					z = -Gp/H;
+				else if(Gn > H*wpd[j])
+					z = -Gn/H;
+				else
+					z = -wpd[j];
+
+				if(fabs(z) < 1.0e-12)
+					continue;
+				z = min(max(z,-10.0),10.0);
+
+				QP_no_change = 0;
+				QP_Gmax_new = max(QP_Gmax_new, violation);
+				QP_Gnorm1_new += violation;
+
+				wpd[j] += z;
+
+				x = prob_col->x[j];
+				while(x->index != -1)
+				{
+					int ind = x->index-1;
+					xTd[ind] += x->value*z;
+					x++;
+				}
+			}
+
+			iter++;
+
+			if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
+			{
+				//inner stopping
+				if(QP_active_size == active_size)
+					break;
+				//active set reactivation
+				else
+				{
+					QP_active_size = active_size;
+					QP_Gmax_old = INF;
+					continue;
+				}
+			}
+
+			QP_Gmax_old = QP_Gmax_new;
+		}
+
+		if(iter >= max_iter)
+			info("WARNING: reaching max number of inner iterations\n");
+
+		delta = 0;
+		w_norm_new = 0;
+		for(j=0; j<w_size; j++)
+		{
+			delta += Grad[j]*(wpd[j]-w[j]);
+			if(wpd[j] != 0)
+				w_norm_new += fabs(wpd[j]);
+		}
+		delta += (w_norm_new-w_norm);
+
+		negsum_xTd = 0;
+		for(int i=0; i<l; i++)
+			if(y[i] == -1)
+				negsum_xTd += C[GETI(i)]*xTd[i];
+
+		int num_linesearch;
+		for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
+		{
+			cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
+
+			for(int i=0; i<l; i++)
+			{
+				double exp_xTd = exp(xTd[i]);
+				exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
+				cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
+			}
+
+			if(cond <= 0)
+			{
+				w_norm = w_norm_new;
+				for(j=0; j<w_size; j++)
+					w[j] = wpd[j];
+				for(int i=0; i<l; i++)
+				{
+					exp_wTx[i] = exp_wTx_new[i];
+					double tau_tmp = 1/(1+exp_wTx[i]);
+					tau[i] = C[GETI(i)]*tau_tmp;
+					D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
+				}
+				break;
+			}
+			else
+			{
+				w_norm_new = 0;
+				for(j=0; j<w_size; j++)
+				{
+					wpd[j] = (w[j]+wpd[j])*0.5;
+					if(wpd[j] != 0)
+						w_norm_new += fabs(wpd[j]);
+				}
+				delta *= 0.5;
+				negsum_xTd *= 0.5;
+				for(int i=0; i<l; i++)
+					xTd[i] *= 0.5;
+			}
+		}
+
+		// Recompute some info due to too many line search steps
+		if(num_linesearch >= max_num_linesearch)
+		{
+			for(int i=0; i<l; i++)
+				exp_wTx[i] = 0;
+
+			for(int i=0; i<w_size; i++)
+			{
+				if(w[i]==0) continue;
+				x = prob_col->x[i];
+				while(x->index != -1)
+				{
+					exp_wTx[x->index-1] += w[i]*x->value;
+					x++;
+				}
+			}
+
+			for(int i=0; i<l; i++)
+				exp_wTx[i] = exp(exp_wTx[i]);
+		}
+
+		if(iter == 1)
+			inner_eps *= 0.25;
+
+		newton_iter++;
+		Gmax_old = Gmax_new;
+
+		info("iter %3d  #CD cycles %d\n", newton_iter, iter);
+	}
+
+	info("=========================\n");
+	info("optimization finished, #iter = %d\n", newton_iter);
+	if(newton_iter >= max_newton_iter)
+		info("WARNING: reaching max number of iterations\n");
+
+	// calculate objective value
+
+	double v = 0;
+	int nnz = 0;
+	for(j=0; j<w_size; j++)
+		if(w[j] != 0)
+		{
+			v += fabs(w[j]);
+			nnz++;
+		}
+	for(j=0; j<l; j++)
+		if(y[j] == 1)
+			v += C[GETI(j)]*log(1+1/exp_wTx[j]);
+		else
+			v += C[GETI(j)]*log(1+exp_wTx[j]);
+
+	info("Objective value = %lf\n", v);
+	info("#nonzeros/#features = %d/%d\n", nnz, w_size);
+
+	delete [] index;
+	delete [] y;
+	delete [] Hdiag;
+	delete [] Grad;
+	delete [] wpd;
+	delete [] xjneg_sum;
+	delete [] xTd;
+	delete [] exp_wTx;
+	delete [] exp_wTx_new;
+	delete [] tau;
+	delete [] D;
+	delete [] C;
+	return newton_iter;
+}
+
+// transpose matrix X from row format to column format
+static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
+{
+	int i;
+	int l = prob->l;
+	int n = prob->n;
+	size_t nnz = 0;
+	size_t *col_ptr = new size_t [n+1];
+	feature_node *x_space;
+	prob_col->l = l;
+	prob_col->n = n;
+	prob_col->y = new double[l];
+	prob_col->x = new feature_node*[n];
+	prob_col->W = new double[l];
+
+	for(i=0; i<l; i++)
+	{
+		prob_col->y[i] = prob->y[i];
+		prob_col->W[i] = prob->W[i];
+	}
+
+	for(i=0; i<n+1; i++)
+		col_ptr[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		feature_node *x = prob->x[i];
+		while(x->index != -1)
+		{
+			nnz++;
+			col_ptr[x->index]++;
+			x++;
+		}
+	}
+	for(i=1; i<n+1; i++)
+		col_ptr[i] += col_ptr[i-1] + 1;
+
+	x_space = new feature_node[nnz+n];
+	for(i=0; i<n; i++)
+		prob_col->x[i] = &x_space[col_ptr[i]];
+
+	for(i=0; i<l; i++)
+	{
+		feature_node *x = prob->x[i];
+		while(x->index != -1)
+		{
+			int ind = x->index-1;
+			x_space[col_ptr[ind]].index = i+1; // starts from 1
+			x_space[col_ptr[ind]].value = x->value;
+			col_ptr[ind]++;
+			x++;
+		}
+	}
+	for(i=0; i<n; i++)
+		x_space[col_ptr[i]].index = -1;
+
+	*x_space_ret = x_space;
+
+	delete [] col_ptr;
+}
+
+// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
+// perm, length l, must be allocated before calling this subroutine
+static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
+{
+	int l = prob->l;
+	int max_nr_class = 16;
+	int nr_class = 0;
+	int *label = Malloc(int,max_nr_class);
+	int *count = Malloc(int,max_nr_class);
+	int *data_label = Malloc(int,l);
+	int i;
+
+	for(i=0;i<l;i++)
+	{
+		int this_label = (int)prob->y[i];
+		int j;
+		for(j=0;j<nr_class;j++)
+		{
+			if(this_label == label[j])
+			{
+				++count[j];
+				break;
+			}
+		}
+		data_label[i] = j;
+		if(j == nr_class)
+		{
+			if(nr_class == max_nr_class)
+			{
+				max_nr_class *= 2;
+				label = (int *)realloc(label,max_nr_class*sizeof(int));
+				count = (int *)realloc(count,max_nr_class*sizeof(int));
+			}
+			label[nr_class] = this_label;
+			count[nr_class] = 1;
+			++nr_class;
+		}
+	}
+
+        /* START MOD: Sort labels and apply to array count --dyamins */
+
+        int j;
+        for (j=1; j<nr_class; j++)
+        {
+                i = j-1;
+                int this_label = label[j];
+                int this_count = count[j];
+                while(i>=0 && label[i] > this_label)
+                {
+                        label[i+1] = label[i];
+                        count[i+1] = count[i];
+                        i--;
+                }
+                label[i+1] = this_label;
+                count[i+1] = this_count;
+        }
+
+        for (i=0; i <l; i++)
+        {
+                j = 0;
+                int this_label = (int)prob->y[i];
+                while(this_label != label[j])
+                {
+                        j++;
+                }
+                data_label[i] = j;
+
+        }
+
+        /* END MOD */
+
+#if 0
+	//
+	// Labels are ordered by their first occurrence in the training set.
+	// However, for two-class sets with -1/+1 labels and -1 appears first,
+	// we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
+	//
+	if (nr_class == 2 && label[0] == -1 && label[1] == 1)
+	{
+		swap(label[0],label[1]);
+		swap(count[0],count[1]);
+		for(i=0;i<l;i++)
+		{
+			if(data_label[i] == 0)
+				data_label[i] = 1;
+			else
+				data_label[i] = 0;
+		}
+	}
+#endif
+
+	int *start = Malloc(int,nr_class);
+	start[0] = 0;
+	for(i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+	for(i=0;i<l;i++)
+	{
+		perm[start[data_label[i]]] = i;
+		++start[data_label[i]];
+	}
+	start[0] = 0;
+	for(i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+
+	*nr_class_ret = nr_class;
+	*label_ret = label;
+	*start_ret = start;
+	*count_ret = count;
+	free(data_label);
+}
+
+static int train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn, BlasFunctions *blas_functions)
+{
+	double eps=param->eps;
+	int max_iter=param->max_iter;
+	int pos = 0;
+	int neg = 0;
+	int n_iter = -1;
+	for(int i=0;i<prob->l;i++)
+		if(prob->y[i] > 0)
+			pos++;
+	neg = prob->l - pos;
+
+	double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
+
+	function *fun_obj=NULL;
+	switch(param->solver_type)
+	{
+		case L2R_LR:
+		{
+			double *C = new double[prob->l];
+			for(int i = 0; i < prob->l; i++)
+			{
+				if(prob->y[i] > 0)
+					C[i] = prob->W[i] * Cp;
+				else
+					C[i] = prob->W[i] * Cn;
+			}
+
+			fun_obj=new l2r_lr_fun(prob, C);
+			TRON tron_obj(fun_obj, primal_solver_tol, max_iter, blas_functions);
+			tron_obj.set_print_string(liblinear_print_string);
+			n_iter=tron_obj.tron(w);
+			delete fun_obj;
+			delete[] C;
+			break;
+		}
+		case L2R_L2LOSS_SVC:
+		{
+			double *C = new double[prob->l];
+			for(int i = 0; i < prob->l; i++)
+			{
+				if(prob->y[i] > 0)
+					C[i] = prob->W[i] * Cp;
+				else
+					C[i] = prob->W[i] * Cn;
+			}
+			fun_obj=new l2r_l2_svc_fun(prob, C);
+			TRON tron_obj(fun_obj, primal_solver_tol, max_iter, blas_functions);
+			tron_obj.set_print_string(liblinear_print_string);
+			n_iter=tron_obj.tron(w);
+			delete fun_obj;
+			delete[] C;
+			break;
+		}
+		case L2R_L2LOSS_SVC_DUAL:
+			n_iter=solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL, max_iter);
+			break;
+		case L2R_L1LOSS_SVC_DUAL:
+			n_iter=solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL, max_iter);
+			break;
+		case L1R_L2LOSS_SVC:
+		{
+			problem prob_col;
+			feature_node *x_space = NULL;
+			transpose(prob, &x_space ,&prob_col);
+			n_iter=solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, max_iter);
+			delete [] prob_col.y;
+			delete [] prob_col.x;
+			delete [] prob_col.W;
+			delete [] x_space;
+			break;
+		}
+		case L1R_LR:
+		{
+			problem prob_col;
+			feature_node *x_space = NULL;
+			transpose(prob, &x_space ,&prob_col);
+			n_iter=solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, max_iter);
+			delete [] prob_col.y;
+			delete [] prob_col.x;
+			delete [] prob_col.W;
+			delete [] x_space;
+			break;
+		}
+		case L2R_LR_DUAL:
+			n_iter=solve_l2r_lr_dual(prob, w, eps, Cp, Cn, max_iter);
+			break;
+		case L2R_L2LOSS_SVR:
+		{
+			double *C = new double[prob->l];
+			for(int i = 0; i < prob->l; i++)
+				C[i] = prob->W[i] * param->C;
+
+			fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
+			TRON tron_obj(fun_obj, param->eps, max_iter, blas_functions);
+			tron_obj.set_print_string(liblinear_print_string);
+			n_iter=tron_obj.tron(w);
+			delete fun_obj;
+			delete[] C;
+			break;
+
+		}
+		case L2R_L1LOSS_SVR_DUAL:
+			n_iter=solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL, max_iter);
+			break;
+		case L2R_L2LOSS_SVR_DUAL:
+			n_iter=solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL, max_iter);
+			break;
+		default:
+			fprintf(stderr, "ERROR: unknown solver_type\n");
+			break;
+	}
+	return n_iter;
+}
+
+//
+// Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
+//
+static void remove_zero_weight(problem *newprob, const problem *prob)
+{
+	int i;
+	int l = 0;
+	for(i=0;i<prob->l;i++)
+		if(prob->W[i] > 0) l++;
+	*newprob = *prob;
+	newprob->l = l;
+	newprob->x = Malloc(feature_node*,l);
+	newprob->y = Malloc(double,l);
+	newprob->W = Malloc(double,l);
+
+	int j = 0;
+	for(i=0;i<prob->l;i++)
+		if(prob->W[i] > 0)
+		{
+			newprob->x[j] = prob->x[i];
+			newprob->y[j] = prob->y[i];
+			newprob->W[j] = prob->W[i];
+			j++;
+		}
+}
+
+//
+// Interface functions
+//
+model* train(const problem *prob, const parameter *param, BlasFunctions *blas_functions)
+{
+	problem newprob;
+	remove_zero_weight(&newprob, prob);
+	prob = &newprob;
+	int i,j;
+	int l = prob->l;
+	int n = prob->n;
+	int w_size = prob->n;
+	model *model_ = Malloc(model,1);
+
+	if(prob->bias>=0)
+		model_->nr_feature=n-1;
+	else
+		model_->nr_feature=n;
+	model_->param = *param;
+	model_->bias = prob->bias;
+
+	if(check_regression_model(model_))
+	{
+		model_->w = Malloc(double, w_size);
+		model_->n_iter = Malloc(int, 1);
+		model_->nr_class = 2;
+		model_->label = NULL;
+		model_->n_iter[0] =train_one(prob, param, &model_->w[0], 0, 0, blas_functions);
+	}
+	else
+	{
+		int nr_class;
+		int *label = NULL;
+		int *start = NULL;
+		int *count = NULL;
+		int *perm = Malloc(int,l);
+
+		// group training data of the same class
+		group_classes(prob,&nr_class,&label,&start,&count,perm);
+
+		model_->nr_class=nr_class;
+		model_->label = Malloc(int,nr_class);
+		for(i=0;i<nr_class;i++)
+			model_->label[i] = label[i];
+
+		// calculate weighted C
+		double *weighted_C = Malloc(double, nr_class);
+		for(i=0;i<nr_class;i++)
+			weighted_C[i] = param->C;
+		for(i=0;i<param->nr_weight;i++)
+		{
+			for(j=0;j<nr_class;j++)
+				if(param->weight_label[i] == label[j])
+					break;
+			if(j == nr_class)
+				fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
+			else
+				weighted_C[j] *= param->weight[i];
+		}
+
+		// constructing the subproblem
+		feature_node **x = Malloc(feature_node *,l);
+		for(i=0;i<l;i++)
+			x[i] = prob->x[perm[i]];
+
+		int k;
+		problem sub_prob;
+		sub_prob.l = l;
+		sub_prob.n = n;
+		sub_prob.x = Malloc(feature_node *,sub_prob.l);
+		sub_prob.y = Malloc(double,sub_prob.l);
+		sub_prob.W = Malloc(double,sub_prob.l);
+		for(k=0; k<sub_prob.l; k++){
+			sub_prob.x[k] = x[k];
+			sub_prob.W[k] = prob->W[perm[k]];
+		}
+
+		// multi-class svm by Crammer and Singer
+		if(param->solver_type == MCSVM_CS)
+		{
+			model_->w=Malloc(double, n*nr_class);
+			model_->n_iter=Malloc(int, 1);
+			for(i=0;i<nr_class;i++)
+				for(j=start[i];j<start[i]+count[i];j++)
+					sub_prob.y[j] = i;
+			Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
+			model_->n_iter[0]=Solver.Solve(model_->w);
+		}
+		else
+		{
+			if(nr_class == 2)
+			{
+				model_->w=Malloc(double, w_size);
+				model_->n_iter=Malloc(int, 1);
+				int e0 = start[0]+count[0];
+				k=0;
+				for(; k<e0; k++)
+					sub_prob.y[k] = -1;
+				for(; k<sub_prob.l; k++)
+					sub_prob.y[k] = +1;
+
+				model_->n_iter[0]=train_one(&sub_prob, param, &model_->w[0], weighted_C[1], weighted_C[0], blas_functions);
+			}
+			else
+			{
+				model_->w=Malloc(double, w_size*nr_class);
+				double *w=Malloc(double, w_size);
+				model_->n_iter=Malloc(int, nr_class);
+				for(i=0;i<nr_class;i++)
+				{
+					int si = start[i];
+					int ei = si+count[i];
+
+					k=0;
+					for(; k<si; k++)
+						sub_prob.y[k] = -1;
+					for(; k<ei; k++)
+						sub_prob.y[k] = +1;
+					for(; k<sub_prob.l; k++)
+						sub_prob.y[k] = -1;
+
+					model_->n_iter[i]=train_one(&sub_prob, param, w, weighted_C[i], param->C, blas_functions);
+
+					for(int j=0;j<w_size;j++)
+						model_->w[j*nr_class+i] = w[j];
+				}
+				free(w);
+			}
+
+		}
+
+		free(x);
+		free(label);
+		free(start);
+		free(count);
+		free(perm);
+		free(sub_prob.x);
+		free(sub_prob.y);
+		free(sub_prob.W);
+		free(weighted_C);
+		free(newprob.x);
+		free(newprob.y);
+		free(newprob.W);
+	}
+	return model_;
+}
+
+#if 0
+void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
+{
+	int i;
+	int *fold_start;
+	int l = prob->l;
+	int *perm = Malloc(int,l);
+	if (nr_fold > l)
+	{
+		nr_fold = l;
+		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+	}
+	fold_start = Malloc(int,nr_fold+1);
+	for(i=0;i<l;i++) perm[i]=i;
+	for(i=0;i<l;i++)
+	{
+		int j = i+bounded_rand_int(l-i);
+		swap(perm[i],perm[j]);
+	}
+	for(i=0;i<=nr_fold;i++)
+		fold_start[i]=i*l/nr_fold;
+
+	for(i=0;i<nr_fold;i++)
+	{
+		int begin = fold_start[i];
+		int end = fold_start[i+1];
+		int j,k;
+		struct problem subprob;
+
+		subprob.bias = prob->bias;
+		subprob.n = prob->n;
+		subprob.l = l-(end-begin);
+		subprob.x = Malloc(struct feature_node*,subprob.l);
+		subprob.y = Malloc(double,subprob.l);
+
+		k=0;
+		for(j=0;j<begin;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		for(j=end;j<l;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		struct model *submodel = train(&subprob,param);
+		for(j=begin;j<end;j++)
+			target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+		free_and_destroy_model(&submodel);
+		free(subprob.x);
+		free(subprob.y);
+	}
+	free(fold_start);
+	free(perm);
+}
+
+double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
+{
+	int idx;
+	int n;
+	if(model_->bias>=0)
+		n=model_->nr_feature+1;
+	else
+		n=model_->nr_feature;
+	double *w=model_->w;
+	int nr_class=model_->nr_class;
+	int i;
+	int nr_w;
+	if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
+		nr_w = 1;
+	else
+		nr_w = nr_class;
+
+	const feature_node *lx=x;
+	for(i=0;i<nr_w;i++)
+		dec_values[i] = 0;
+	for(; (idx=lx->index)!=-1; lx++)
+	{
+		// the dimension of testing data may exceed that of training
+		if(idx<=n)
+			for(i=0;i<nr_w;i++)
+				dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
+	}
+
+	if(nr_class==2)
+	{
+		if(check_regression_model(model_))
+			return dec_values[0];
+		else
+			return (dec_values[0]>0)?model_->label[0]:model_->label[1];
+	}
+	else
+	{
+		int dec_max_idx = 0;
+		for(i=1;i<nr_class;i++)
+		{
+			if(dec_values[i] > dec_values[dec_max_idx])
+				dec_max_idx = i;
+		}
+		return model_->label[dec_max_idx];
+	}
+}
+
+double predict(const model *model_, const feature_node *x)
+{
+	double *dec_values = Malloc(double, model_->nr_class);
+	double label=predict_values(model_, x, dec_values);
+	free(dec_values);
+	return label;
+}
+
+double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
+{
+	if(check_probability_model(model_))
+	{
+		int i;
+		int nr_class=model_->nr_class;
+		int nr_w;
+		if(nr_class==2)
+			nr_w = 1;
+		else
+			nr_w = nr_class;
+
+		double label=predict_values(model_, x, prob_estimates);
+		for(i=0;i<nr_w;i++)
+			prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
+
+		if(nr_class==2) // for binary classification
+			prob_estimates[1]=1.-prob_estimates[0];
+		else
+		{
+			double sum=0;
+			for(i=0; i<nr_class; i++)
+				sum+=prob_estimates[i];
+
+			for(i=0; i<nr_class; i++)
+				prob_estimates[i]=prob_estimates[i]/sum;
+		}
+
+		return label;
+	}
+	else
+		return 0;
+}
+
+static const char *solver_type_table[]=
+{
+	"L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
+	"L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
+	"", "", "",
+	"L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
+};
+
+int save_model(const char *model_file_name, const struct model *model_)
+{
+	int i;
+	int nr_feature=model_->nr_feature;
+	int n;
+	const parameter& param = model_->param;
+
+	if(model_->bias>=0)
+		n=nr_feature+1;
+	else
+		n=nr_feature;
+	int w_size = n;
+	FILE *fp = fopen(model_file_name,"w");
+	if(fp==NULL) return -1;
+
+	char *old_locale = strdup(setlocale(LC_ALL, NULL));
+	setlocale(LC_ALL, "C");
+
+	int nr_w;
+	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
+		nr_w=1;
+	else
+		nr_w=model_->nr_class;
+
+	fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
+	fprintf(fp, "nr_class %d\n", model_->nr_class);
+
+	if(model_->label)
+	{
+		fprintf(fp, "label");
+		for(i=0; i<model_->nr_class; i++)
+			fprintf(fp, " %d", model_->label[i]);
+		fprintf(fp, "\n");
+	}
+
+	fprintf(fp, "nr_feature %d\n", nr_feature);
+
+	fprintf(fp, "bias %.16g\n", model_->bias);
+
+	fprintf(fp, "w\n");
+	for(i=0; i<w_size; i++)
+	{
+		int j;
+		for(j=0; j<nr_w; j++)
+			fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
+		fprintf(fp, "\n");
+	}
+
+	setlocale(LC_ALL, old_locale);
+	free(old_locale);
+
+	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
+	else return 0;
+}
+
+struct model *load_model(const char *model_file_name)
+{
+	FILE *fp = fopen(model_file_name,"r");
+	if(fp==NULL) return NULL;
+
+	int i;
+	int nr_feature;
+	int n;
+	int nr_class;
+	double bias;
+	model *model_ = Malloc(model,1);
+	parameter& param = model_->param;
+
+	model_->label = NULL;
+
+	char *old_locale = strdup(setlocale(LC_ALL, NULL));
+	setlocale(LC_ALL, "C");
+
+	char cmd[81];
+	while(1)
+	{
+		fscanf(fp,"%80s",cmd);
+		if(strcmp(cmd,"solver_type")==0)
+		{
+			fscanf(fp,"%80s",cmd);
+			int i;
+			for(i=0;solver_type_table[i];i++)
+			{
+				if(strcmp(solver_type_table[i],cmd)==0)
+				{
+					param.solver_type=i;
+					break;
+				}
+			}
+			if(solver_type_table[i] == NULL)
+			{
+				fprintf(stderr,"unknown solver type.\n");
+
+				setlocale(LC_ALL, old_locale);
+				free(model_->label);
+				free(model_);
+				free(old_locale);
+				return NULL;
+			}
+		}
+		else if(strcmp(cmd,"nr_class")==0)
+		{
+			fscanf(fp,"%d",&nr_class);
+			model_->nr_class=nr_class;
+		}
+		else if(strcmp(cmd,"nr_feature")==0)
+		{
+			fscanf(fp,"%d",&nr_feature);
+			model_->nr_feature=nr_feature;
+		}
+		else if(strcmp(cmd,"bias")==0)
+		{
+			fscanf(fp,"%lf",&bias);
+			model_->bias=bias;
+		}
+		else if(strcmp(cmd,"w")==0)
+		{
+			break;
+		}
+		else if(strcmp(cmd,"label")==0)
+		{
+			int nr_class = model_->nr_class;
+			model_->label = Malloc(int,nr_class);
+			for(int i=0;i<nr_class;i++)
+				fscanf(fp,"%d",&model_->label[i]);
+		}
+		else
+		{
+			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
+			setlocale(LC_ALL, old_locale);
+			free(model_->label);
+			free(model_);
+			free(old_locale);
+			return NULL;
+		}
+	}
+
+	nr_feature=model_->nr_feature;
+	if(model_->bias>=0)
+		n=nr_feature+1;
+	else
+		n=nr_feature;
+	int w_size = n;
+	int nr_w;
+	if(nr_class==2 && param.solver_type != MCSVM_CS)
+		nr_w = 1;
+	else
+		nr_w = nr_class;
+
+	model_->w=Malloc(double, w_size*nr_w);
+	for(i=0; i<w_size; i++)
+	{
+		int j;
+		for(j=0; j<nr_w; j++)
+			fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
+		fscanf(fp, "\n");
+	}
+
+	setlocale(LC_ALL, old_locale);
+	free(old_locale);
+
+	if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
+
+	return model_;
+}
+#endif
+
+int get_nr_feature(const model *model_)
+{
+	return model_->nr_feature;
+}
+
+int get_nr_class(const model *model_)
+{
+	return model_->nr_class;
+}
+
+void get_labels(const model *model_, int* label)
+{
+	if (model_->label != NULL)
+		for(int i=0;i<model_->nr_class;i++)
+			label[i] = model_->label[i];
+}
+
+void get_n_iter(const model *model_, int* n_iter)
+{
+    int labels;
+    labels = model_->nr_class;
+    if (labels == 2)
+        labels = 1;
+
+    if (model_->n_iter != NULL)
+        for(int i=0;i<labels;i++)
+            n_iter[i] = model_->n_iter[i];
+}
+
+#if 0
+// use inline here for better performance (around 20% faster than the non-inline one)
+static inline double get_w_value(const struct model *model_, int idx, int label_idx)
+{
+	int nr_class = model_->nr_class;
+	int solver_type = model_->param.solver_type;
+	const double *w = model_->w;
+
+	if(idx < 0 || idx > model_->nr_feature)
+		return 0;
+	if(check_regression_model(model_))
+		return w[idx];
+	else
+	{
+		if(label_idx < 0 || label_idx >= nr_class)
+			return 0;
+		if(nr_class == 2 && solver_type != MCSVM_CS)
+		{
+			if(label_idx == 0)
+				return w[idx];
+			else
+				return -w[idx];
+		}
+		else
+			return w[idx*nr_class+label_idx];
+	}
+}
+
+// feat_idx: starting from 1 to nr_feature
+// label_idx: starting from 0 to nr_class-1 for classification models;
+//            for regression models, label_idx is ignored.
+double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
+{
+	if(feat_idx > model_->nr_feature)
+		return 0;
+	return get_w_value(model_, feat_idx-1, label_idx);
+}
+
+double get_decfun_bias(const struct model *model_, int label_idx)
+{
+	int bias_idx = model_->nr_feature;
+	double bias = model_->bias;
+	if(bias <= 0)
+		return 0;
+	else
+		return bias*get_w_value(model_, bias_idx, label_idx);
+}
+#endif
+
+void free_model_content(struct model *model_ptr)
+{
+	if(model_ptr->w != NULL)
+		free(model_ptr->w);
+	if(model_ptr->label != NULL)
+		free(model_ptr->label);
+	if(model_ptr->n_iter != NULL)
+	    free(model_ptr->n_iter);
+}
+
+void free_and_destroy_model(struct model **model_ptr_ptr)
+{
+	struct model *model_ptr = *model_ptr_ptr;
+	if(model_ptr != NULL)
+	{
+		free_model_content(model_ptr);
+		free(model_ptr);
+	}
+}
+
+void destroy_param(parameter* param)
+{
+	if(param->weight_label != NULL)
+		free(param->weight_label);
+	if(param->weight != NULL)
+		free(param->weight);
+}
+
+const char *check_parameter(const problem *prob, const parameter *param)
+{
+	if(param->eps <= 0)
+		return "eps <= 0";
+
+	if(param->C <= 0)
+		return "C <= 0";
+
+	if(param->p < 0)
+		return "p < 0";
+
+	if(param->solver_type != L2R_LR
+		&& param->solver_type != L2R_L2LOSS_SVC_DUAL
+		&& param->solver_type != L2R_L2LOSS_SVC
+		&& param->solver_type != L2R_L1LOSS_SVC_DUAL
+		&& param->solver_type != MCSVM_CS
+		&& param->solver_type != L1R_L2LOSS_SVC
+		&& param->solver_type != L1R_LR
+		&& param->solver_type != L2R_LR_DUAL
+		&& param->solver_type != L2R_L2LOSS_SVR
+		&& param->solver_type != L2R_L2LOSS_SVR_DUAL
+		&& param->solver_type != L2R_L1LOSS_SVR_DUAL)
+		return "unknown solver type";
+
+	return NULL;
+}
+
+#if 0
+int check_probability_model(const struct model *model_)
+{
+	return (model_->param.solver_type==L2R_LR ||
+			model_->param.solver_type==L2R_LR_DUAL ||
+			model_->param.solver_type==L1R_LR);
+}
+#endif
+
+int check_regression_model(const struct model *model_)
+{
+	return (model_->param.solver_type==L2R_L2LOSS_SVR ||
+			model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
+			model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
+}
+
+void set_print_string_function(void (*print_func)(const char*))
+{
+	if (print_func == NULL)
+		liblinear_print_string = &print_string_stdout;
+	else
+		liblinear_print_string = print_func;
+}
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/liblinear/linear.h b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/linear.h
new file mode 100644
index 0000000000000000000000000000000000000000..d85f2cb8941840a11f63bf6237a6f584940f1429
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/linear.h
@@ -0,0 +1,87 @@
+#ifndef _LIBLINEAR_H
+#define _LIBLINEAR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "_cython_blas_helpers.h"
+
+struct feature_node
+{
+	int index;
+	double value;
+};
+
+struct problem
+{
+	int l, n;
+	double *y;
+	struct feature_node **x;
+	double bias;            /* < 0 if no bias term */
+	double *W;
+};
+
+enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
+
+struct parameter
+{
+	int solver_type;
+
+	/* these are for training only */
+	double eps;	        /* stopping criteria */
+	double C;
+	int nr_weight;
+	int *weight_label;
+	double* weight;
+	int max_iter;
+	double p;
+};
+
+struct model
+{
+	struct parameter param;
+	int nr_class;		/* number of classes */
+	int nr_feature;
+	double *w;
+	int *label;		/* label of each class */
+	double bias;
+	int *n_iter;    /* no. of iterations of each class */
+};
+
+void set_seed(unsigned seed);
+
+struct model* train(const struct problem *prob, const struct parameter *param, BlasFunctions *blas_functions);
+void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
+
+double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
+double predict(const struct model *model_, const struct feature_node *x);
+double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
+
+int save_model(const char *model_file_name, const struct model *model_);
+struct model *load_model(const char *model_file_name);
+
+int get_nr_feature(const struct model *model_);
+int get_nr_class(const struct model *model_);
+void get_labels(const struct model *model_, int* label);
+void get_n_iter(const struct model *model_, int* n_iter);
+#if 0
+double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
+double get_decfun_bias(const struct model *model_, int label_idx);
+#endif
+
+void free_model_content(struct model *model_ptr);
+void free_and_destroy_model(struct model **model_ptr_ptr);
+void destroy_param(struct parameter *param);
+
+const char *check_parameter(const struct problem *prob, const struct parameter *param);
+int check_probability_model(const struct model *model);
+int check_regression_model(const struct model *model);
+void set_print_string_function(void (*print_func) (const char*));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBLINEAR_H */
+
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/liblinear/tron.cpp b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/tron.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a59cb292c4a4017e002f3784e3a2bee03b39039
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/tron.cpp
@@ -0,0 +1,223 @@
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include "tron.h"
+
+#ifndef min
+template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
+#endif
+
+#ifndef max
+template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
+#endif
+
+static void default_print(const char *buf)
+{
+	fputs(buf,stdout);
+	fflush(stdout);
+}
+
+void TRON::info(const char *fmt,...)
+{
+	char buf[BUFSIZ];
+	va_list ap;
+	va_start(ap,fmt);
+	vsprintf(buf,fmt,ap);
+	va_end(ap);
+	(*tron_print_string)(buf);
+}
+
+TRON::TRON(const function *fun_obj, double eps, int max_iter, BlasFunctions *blas)
+{
+	this->fun_obj=const_cast<function *>(fun_obj);
+	this->eps=eps;
+	this->max_iter=max_iter;
+	this->blas=blas;
+	tron_print_string = default_print;
+}
+
+TRON::~TRON()
+{
+}
+
+int TRON::tron(double *w)
+{
+	// Parameters for updating the iterates.
+	double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
+
+	// Parameters for updating the trust region size delta.
+	double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
+
+	int n = fun_obj->get_nr_variable();
+	int i, cg_iter;
+	double delta, snorm;
+	double alpha, f, fnew, prered, actred, gs;
+	int search = 1, iter = 1, inc = 1;
+	double *s = new double[n];
+	double *r = new double[n];
+	double *w_new = new double[n];
+	double *g = new double[n];
+
+	for (i=0; i<n; i++)
+		w[i] = 0;
+
+	f = fun_obj->fun(w);
+	fun_obj->grad(w, g);
+	delta = blas->nrm2(n, g, inc);
+	double gnorm1 = delta;
+	double gnorm = gnorm1;
+
+	if (gnorm <= eps*gnorm1)
+		search = 0;
+
+	iter = 1;
+
+	while (iter <= max_iter && search)
+	{
+		cg_iter = trcg(delta, g, s, r);
+
+		memcpy(w_new, w, sizeof(double)*n);
+		blas->axpy(n, 1.0, s, inc, w_new, inc);
+
+		gs = blas->dot(n, g, inc, s, inc);
+		prered = -0.5*(gs - blas->dot(n, s, inc, r, inc));
+		fnew = fun_obj->fun(w_new);
+
+		// Compute the actual reduction.
+		actred = f - fnew;
+
+		// On the first iteration, adjust the initial step bound.
+		snorm = blas->nrm2(n, s, inc);
+		if (iter == 1)
+			delta = min(delta, snorm);
+
+		// Compute prediction alpha*snorm of the step.
+		if (fnew - f - gs <= 0)
+			alpha = sigma3;
+		else
+			alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
+
+		// Update the trust region bound according to the ratio of actual to predicted reduction.
+		if (actred < eta0*prered)
+			delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
+		else if (actred < eta1*prered)
+			delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
+		else if (actred < eta2*prered)
+			delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
+		else
+			delta = max(delta, min(alpha*snorm, sigma3*delta));
+
+		info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
+
+		if (actred > eta0*prered)
+		{
+			iter++;
+			memcpy(w, w_new, sizeof(double)*n);
+			f = fnew;
+			fun_obj->grad(w, g);
+
+			gnorm = blas->nrm2(n, g, inc);
+			if (gnorm <= eps*gnorm1)
+				break;
+		}
+		if (f < -1.0e+32)
+		{
+			info("WARNING: f < -1.0e+32\n");
+			break;
+		}
+		if (fabs(actred) <= 0 && prered <= 0)
+		{
+			info("WARNING: actred and prered <= 0\n");
+			break;
+		}
+		if (fabs(actred) <= 1.0e-12*fabs(f) &&
+		    fabs(prered) <= 1.0e-12*fabs(f))
+		{
+			info("WARNING: actred and prered too small\n");
+			break;
+		}
+	}
+
+	delete[] g;
+	delete[] r;
+	delete[] w_new;
+	delete[] s;
+	return --iter;
+}
+
+int TRON::trcg(double delta, double *g, double *s, double *r)
+{
+	int i, inc = 1;
+	int n = fun_obj->get_nr_variable();
+	double *d = new double[n];
+	double *Hd = new double[n];
+	double rTr, rnewTrnew, alpha, beta, cgtol;
+
+	for (i=0; i<n; i++)
+	{
+		s[i] = 0;
+		r[i] = -g[i];
+		d[i] = r[i];
+	}
+	cgtol = 0.1 * blas->nrm2(n, g, inc);
+
+	int cg_iter = 0;
+	rTr = blas->dot(n, r, inc, r, inc);
+	while (1)
+	{
+		if (blas->nrm2(n, r, inc) <= cgtol)
+			break;
+		cg_iter++;
+		fun_obj->Hv(d, Hd);
+
+		alpha = rTr / blas->dot(n, d, inc, Hd, inc);
+		blas->axpy(n, alpha, d, inc, s, inc);
+		if (blas->nrm2(n, s, inc) > delta)
+		{
+			info("cg reaches trust region boundary\n");
+			alpha = -alpha;
+			blas->axpy(n, alpha, d, inc, s, inc);
+
+			double std = blas->dot(n, s, inc, d, inc);
+			double sts = blas->dot(n, s, inc, s, inc);
+			double dtd = blas->dot(n, d, inc, d, inc);
+			double dsq = delta*delta;
+			double rad = sqrt(std*std + dtd*(dsq-sts));
+			if (std >= 0)
+				alpha = (dsq - sts)/(std + rad);
+			else
+				alpha = (rad - std)/dtd;
+			blas->axpy(n, alpha, d, inc, s, inc);
+			alpha = -alpha;
+			blas->axpy(n, alpha, Hd, inc, r, inc);
+			break;
+		}
+		alpha = -alpha;
+		blas->axpy(n, alpha, Hd, inc, r, inc);
+		rnewTrnew = blas->dot(n, r, inc, r, inc);
+		beta = rnewTrnew/rTr;
+		blas->scal(n, beta, d, inc);
+		blas->axpy(n, 1.0, r, inc, d, inc);
+		rTr = rnewTrnew;
+	}
+
+	delete[] d;
+	delete[] Hd;
+
+	return(cg_iter);
+}
+
+double TRON::norm_inf(int n, double *x)
+{
+	double dmax = fabs(x[0]);
+	for (int i=1; i<n; i++)
+		if (fabs(x[i]) >= dmax)
+			dmax = fabs(x[i]);
+	return(dmax);
+}
+
+void TRON::set_print_string(void (*print_string) (const char *buf))
+{
+	tron_print_string = print_string;
+}
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/liblinear/tron.h b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/tron.h
new file mode 100644
index 0000000000000000000000000000000000000000..91aca703f9830267c5686d302cf1271cc1786453
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/liblinear/tron.h
@@ -0,0 +1,37 @@
+#ifndef _TRON_H
+#define _TRON_H
+
+#include "_cython_blas_helpers.h"
+
+class function
+{
+public:
+	virtual double fun(double *w) = 0 ;
+	virtual void grad(double *w, double *g) = 0 ;
+	virtual void Hv(double *s, double *Hs) = 0 ;
+
+	virtual int get_nr_variable(void) = 0 ;
+	virtual ~function(void){}
+};
+
+class TRON
+{
+public:
+	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0);
+	~TRON();
+
+	int tron(double *w);
+	void set_print_string(void (*i_print) (const char *buf));
+
+private:
+	int trcg(double delta, double *g, double *s, double *r);
+	double norm_inf(int n, double *x);
+
+	double eps;
+	int max_iter;
+	function *fun_obj;
+	BlasFunctions *blas;
+	void info(const char *fmt,...);
+	void (*tron_print_string)(const char *buf);
+};
+#endif
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/libsvm/LIBSVM_CHANGES b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/LIBSVM_CHANGES
new file mode 100644
index 0000000000000000000000000000000000000000..082fa1de598876acbcc2ace038cfc4c8cc0ba76d
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/LIBSVM_CHANGES
@@ -0,0 +1,11 @@
+Changes to Libsvm
+
+This is here mainly as checklist for incorporation of new versions of libsvm.
+
+  * Add copyright to files svm.cpp and svm.h
+  * Add random_seed support and call to srand in fit function
+  * Improved random number generator (fix on windows, enhancement on other
+    platforms). See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
+  * invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See <https://github.com/scikit-learn/scikit-learn/pull/16530>
+  * Expose the number of iterations run in optimization. See <https://github.com/scikit-learn/scikit-learn/pull/21408>
+The changes made with respect to upstream are detailed in the heading of svm.cpp
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/libsvm/_svm_cython_blas_helpers.h b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/_svm_cython_blas_helpers.h
new file mode 100644
index 0000000000000000000000000000000000000000..cd6270e693f7cfe5a18153e5b76ffb25ae650c5f
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/_svm_cython_blas_helpers.h
@@ -0,0 +1,9 @@
+#ifndef _SVM_CYTHON_BLAS_HELPERS_H
+#define _SVM_CYTHON_BLAS_HELPERS_H
+
+typedef double (*dot_func)(int, const double*, int, const double*, int);
+typedef struct BlasFunctions{
+    dot_func dot;
+} BlasFunctions;
+
+#endif
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_helper.c b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_helper.c
new file mode 100644
index 0000000000000000000000000000000000000000..b75415c5929da90bd3d34da8bd4036ec07ef6867
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_helper.c
@@ -0,0 +1,425 @@
+#include <stdlib.h>
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "svm.h"
+#include "_svm_cython_blas_helpers.h"
+
+
+#ifndef MAX
+    #define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#endif
+
+
+/*
+ * Some helper methods for libsvm bindings.
+ *
+ * We need to access from python some parameters stored in svm_model
+ * but libsvm does not expose this structure, so we define it here
+ * along some utilities to convert from numpy arrays.
+ *
+ * License: BSD 3 clause
+ *
+ * Author: 2010 Fabian Pedregosa <fabian.pedregosa@inria.fr>
+ */
+
+
+/*
+ * Convert matrix to sparse representation suitable for libsvm. x is
+ * expected to be an array of length nrow*ncol.
+ *
+ * Typically the matrix will be dense, so we speed up the routine for
+ * this case. We create a temporary array temp that collects non-zero
+ * elements and after we just memcpy that to the proper array.
+ *
+ * Special care must be taken with indinces, since libsvm indices start
+ * at 1 and not at 0.
+ *
+ * Strictly speaking, the C standard does not require that structs are
+ * contiguous, but in practice its a reasonable assumption.
+ *
+ */
+struct svm_node *dense_to_libsvm (double *x, Py_ssize_t *dims)
+{
+    struct svm_node *node;
+    Py_ssize_t len_row = dims[1];
+    double *tx = x;
+    int i;
+
+    node = malloc (dims[0] * sizeof(struct svm_node));
+
+    if (node == NULL) return NULL;
+    for (i=0; i<dims[0]; ++i) {
+        node[i].values = tx;
+        node[i].dim = (int) len_row;
+        node[i].ind = i; /* only used if kernel=precomputed, but not
+                            too much overhead */
+        tx += len_row;
+    }
+
+    return node;
+}
+
+
+/*
+ * Fill an svm_parameter struct.
+ */
+void set_parameter(struct svm_parameter *param, int svm_type, int kernel_type, int degree,
+		double gamma, double coef0, double nu, double cache_size, double C,
+		double eps, double p, int shrinking, int probability, int nr_weight,
+		char *weight_label, char *weight, int max_iter, int random_seed)
+{
+    param->svm_type = svm_type;
+    param->kernel_type = kernel_type;
+    param->degree = degree;
+    param->coef0 = coef0;
+    param->nu = nu;
+    param->cache_size = cache_size;
+    param->C = C;
+    param->eps = eps;
+    param->p = p;
+    param->shrinking = shrinking;
+    param->probability = probability;
+    param->nr_weight = nr_weight;
+    param->weight_label = (int *) weight_label;
+    param->weight = (double *) weight;
+    param->gamma = gamma;
+    param->max_iter = max_iter;
+    param->random_seed = random_seed;
+}
+
+/*
+ * Fill an svm_problem struct. problem->x will be malloc'd.
+ */
+void set_problem(struct svm_problem *problem, char *X, char *Y, char *sample_weight, Py_ssize_t *dims, int kernel_type)
+{
+    if (problem == NULL) return;
+    problem->l = (int) dims[0]; /* number of samples */
+    problem->y = (double *) Y;
+    problem->x = dense_to_libsvm((double *) X, dims); /* implicit call to malloc */
+    problem->W = (double *) sample_weight;
+}
+
+/*
+ * Create and return an instance of svm_model.
+ *
+ * The copy of model->sv_coef should be straightforward, but
+ * unfortunately to represent a matrix numpy and libsvm use different
+ * approaches, so it requires some iteration.
+ *
+ * Possible issue: on 64 bits, the number of columns that numpy can
+ * store is a long, but libsvm enforces this number (model->l) to be
+ * an int, so we might have numpy matrices that do not fit into libsvm's
+ * data structure.
+ *
+ */
+struct svm_model *set_model(struct svm_parameter *param, int nr_class,
+                            char *SV, Py_ssize_t *SV_dims,
+                            char *support, Py_ssize_t *support_dims,
+                            Py_ssize_t *sv_coef_strides,
+                            char *sv_coef, char *rho, char *nSV,
+                            char *probA, char *probB)
+{
+    struct svm_model *model;
+    double *dsv_coef = (double *) sv_coef;
+    int i, m;
+
+    m = nr_class * (nr_class-1)/2;
+
+    if ((model = malloc(sizeof(struct svm_model))) == NULL)
+        goto model_error;
+    if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
+        goto nsv_error;
+    if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
+        goto label_error;
+    if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
+        goto sv_coef_error;
+    if ((model->rho = malloc( m * sizeof(double))) == NULL)
+        goto rho_error;
+
+    // This is only allocated in dynamic memory while training.
+    model->n_iter = NULL;
+
+    model->nr_class = nr_class;
+    model->param = *param;
+    model->l = (int) support_dims[0];
+
+    if (param->kernel_type == PRECOMPUTED) {
+        if ((model->SV = malloc ((model->l) * sizeof(struct svm_node))) == NULL)
+            goto SV_error;
+        for (i=0; i<model->l; ++i) {
+            model->SV[i].ind = ((int *) support)[i];
+            model->SV[i].values = NULL;
+        }
+    } else {
+        model->SV = dense_to_libsvm((double *) SV, SV_dims);
+    }
+    /*
+     * regression and one-class does not use nSV, label.
+     * TODO: does this provoke memory leaks (we just malloc'ed them)?
+     */
+    if (param->svm_type < 2) {
+        memcpy(model->nSV, nSV,     model->nr_class * sizeof(int));
+        for(i=0; i < model->nr_class; i++)
+            model->label[i] = i;
+    }
+
+    for (i=0; i < model->nr_class-1; i++) {
+        model->sv_coef[i] = dsv_coef + i*(model->l);
+    }
+
+    for (i=0; i<m; ++i) {
+        (model->rho)[i] = -((double *) rho)[i];
+    }
+
+    /*
+     * just to avoid segfaults, these features are not wrapped but
+     * svm_destroy_model will try to free them.
+     */
+
+    if (param->probability) {
+        if ((model->probA = malloc(m * sizeof(double))) == NULL)
+            goto probA_error;
+        memcpy(model->probA, probA, m * sizeof(double));
+        if ((model->probB = malloc(m * sizeof(double))) == NULL)
+            goto probB_error;
+        memcpy(model->probB, probB, m * sizeof(double));
+    } else {
+        model->probA = NULL;
+        model->probB = NULL;
+    }
+
+    /* We'll free SV ourselves */
+    model->free_sv = 0;
+    return model;
+
+probB_error:
+    free(model->probA);
+probA_error:
+    free(model->SV);
+SV_error:
+    free(model->rho);
+rho_error:
+    free(model->sv_coef);
+sv_coef_error:
+    free(model->label);
+label_error:
+    free(model->nSV);
+nsv_error:
+    free(model);
+model_error:
+    return NULL;
+}
+
+
+
+/*
+ * Get the number of support vectors in a model.
+ */
+Py_ssize_t get_l(struct svm_model *model)
+{
+    return (Py_ssize_t) model->l;
+}
+
+/*
+ * Get the number of classes in a model, = 2 in regression/one class
+ * svm.
+ */
+Py_ssize_t get_nr(struct svm_model *model)
+{
+    return (Py_ssize_t) model->nr_class;
+}
+
+/*
+ * Get the number of iterations run in optimization
+ */
+void copy_n_iter(char *data, struct svm_model *model)
+{
+    const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
+    memcpy(data, model->n_iter, n_models * sizeof(int));
+}
+
+/*
+ * Some helpers to convert from libsvm sparse data structures
+ * model->sv_coef is a double **, whereas data is just a double *,
+ * so we have to do some stupid copying.
+ */
+void copy_sv_coef(char *data, struct svm_model *model)
+{
+    int i, len = model->nr_class-1;
+    double *temp = (double *) data;
+    for(i=0; i<len; ++i) {
+        memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
+        temp += model->l;
+    }
+}
+
+void copy_intercept(char *data, struct svm_model *model, Py_ssize_t *dims)
+{
+    /* intercept = -rho */
+    Py_ssize_t i, n = dims[0];
+    double t, *ddata = (double *) data;
+    for (i=0; i<n; ++i) {
+        t = model->rho[i];
+        /* we do this to avoid ugly -0.0 */
+        *ddata = (t != 0) ? -t : 0;
+        ++ddata;
+    }
+}
+
+/*
+ * This is a bit more complex since SV are stored as sparse
+ * structures, so we have to do the conversion on the fly and also
+ * iterate fast over data.
+ */
+void copy_SV(char *data, struct svm_model *model, Py_ssize_t *dims)
+{
+    int i, n = model->l;
+    double *tdata = (double *) data;
+    int dim = model->SV[0].dim;
+    for (i=0; i<n; ++i) {
+        memcpy (tdata, model->SV[i].values, dim * sizeof(double));
+        tdata += dim;
+    }
+}
+
+void copy_support (char *data, struct svm_model *model)
+{
+    memcpy (data, model->sv_ind, (model->l) * sizeof(int));
+}
+
+/*
+ * copy svm_model.nSV, an array with the number of SV for each class
+ * will be NULL in the case of SVR, OneClass
+ */
+void copy_nSV(char *data, struct svm_model *model)
+{
+    if (model->label == NULL) return;
+    memcpy(data, model->nSV, model->nr_class * sizeof(int));
+}
+
+void copy_probA(char *data, struct svm_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probA, dims[0] * sizeof(double));
+}
+
+void copy_probB(char *data, struct svm_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probB, dims[0] * sizeof(double));
+}
+
+/*
+ * Predict using model.
+ *
+ *  It will return -1 if we run out of memory.
+ */
+int copy_predict(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
+                 char *dec_values, BlasFunctions *blas_functions)
+{
+    double *t = (double *) dec_values;
+    struct svm_node *predict_nodes;
+    Py_ssize_t i;
+
+    predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i<predict_dims[0]; ++i) {
+        *t = svm_predict(model, &predict_nodes[i], blas_functions);
+        ++t;
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+int copy_predict_values(char *predict, struct svm_model *model,
+                        Py_ssize_t *predict_dims, char *dec_values, int nr_class, BlasFunctions *blas_functions)
+{
+    Py_ssize_t i;
+    struct svm_node *predict_nodes;
+    predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i<predict_dims[0]; ++i) {
+        svm_predict_values(model, &predict_nodes[i],
+                                ((double *) dec_values) + i*nr_class,
+				blas_functions);
+    }
+
+    free(predict_nodes);
+    return 0;
+}
+
+
+
+int copy_predict_proba(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
+                 char *dec_values, BlasFunctions *blas_functions)
+{
+    Py_ssize_t i, n, m;
+    struct svm_node *predict_nodes;
+    n = predict_dims[0];
+    m = (Py_ssize_t) model->nr_class;
+    predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i<n; ++i) {
+        svm_predict_probability(model, &predict_nodes[i],
+                                ((double *) dec_values) + i*m,
+				blas_functions);
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+
+/*
+ * Some free routines. Some of them are nontrivial since a lot of
+ * sharing happens across objects (they *must* be called in the
+ * correct order)
+ */
+
+int free_model(struct svm_model *model)
+{
+    /* like svm_free_and_destroy_model, but does not free sv_coef[i] */
+    if (model == NULL) return -1;
+    free(model->SV);
+
+    /* We don't free sv_ind and n_iter, since we did not create them in
+       set_model */
+    /* free(model->sv_ind);
+     * free(model->n_iter);
+     */
+    free(model->sv_coef);
+    free(model->rho);
+    free(model->label);
+    free(model->probA);
+    free(model->probB);
+    free(model->nSV);
+    free(model);
+
+    return 0;
+}
+
+int free_param(struct svm_parameter *param)
+{
+    if (param == NULL) return -1;
+    free(param);
+    return 0;
+}
+
+
+/* borrowed from original libsvm code */
+static void print_null(const char *s) {}
+
+static void print_string_stdout(const char *s)
+{
+	fputs(s,stdout);
+	fflush(stdout);
+}
+
+/* provide convenience wrapper */
+void set_verbosity(int verbosity_flag){
+	if (verbosity_flag)
+		svm_set_print_string_function(&print_string_stdout);
+	else
+		svm_set_print_string_function(&print_null);
+}
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_sparse_helper.c b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_sparse_helper.c
new file mode 100644
index 0000000000000000000000000000000000000000..2e6bed578866b8f1572f8c74e05b983bceafd5c7
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_sparse_helper.c
@@ -0,0 +1,472 @@
+#include <stdlib.h>
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "svm.h"
+#include "_svm_cython_blas_helpers.h"
+
+
+#ifndef MAX
+    #define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#endif
+
+
+/*
+ * Convert scipy.sparse.csr to libsvm's sparse data structure
+ */
+struct svm_csr_node **csr_to_libsvm (double *values, int* indices, int* indptr, int n_samples)
+{
+    struct svm_csr_node **sparse, *temp;
+    int i, j=0, k=0, n;
+    sparse = malloc (n_samples * sizeof(struct svm_csr_node *));
+
+    if (sparse == NULL)
+        return NULL;
+
+    for (i=0; i<n_samples; ++i) {
+        n = indptr[i+1] - indptr[i]; /* count elements in row i */
+        temp = malloc ((n+1) * sizeof(struct svm_csr_node));
+
+        if (temp == NULL) {
+            for (j=0; j<i; j++)
+                free(sparse[j]);
+            free(sparse);
+            return NULL;
+        }
+
+        for (j=0; j<n; ++j) {
+            temp[j].value = values[k];
+            temp[j].index = indices[k] + 1; /* libsvm uses 1-based indexing */
+            ++k;
+        }
+        /* set sentinel */
+        temp[n].index = -1;
+        sparse[i] = temp;
+    }
+
+    return sparse;
+}
+
+
+
+struct svm_parameter * set_parameter(int svm_type, int kernel_type, int degree,
+		double gamma, double coef0, double nu, double cache_size, double C,
+		double eps, double p, int shrinking, int probability, int nr_weight,
+		char *weight_label, char *weight, int max_iter, int random_seed)
+{
+    struct svm_parameter *param;
+    param = malloc(sizeof(struct svm_parameter));
+    if (param == NULL) return NULL;
+    param->svm_type = svm_type;
+    param->kernel_type = kernel_type;
+    param->degree = degree;
+    param->coef0 = coef0;
+    param->nu = nu;
+    param->cache_size = cache_size;
+    param->C = C;
+    param->eps = eps;
+    param->p = p;
+    param->shrinking = shrinking;
+    param->probability = probability;
+    param->nr_weight = nr_weight;
+    param->weight_label = (int *) weight_label;
+    param->weight = (double *) weight;
+    param->gamma = gamma;
+    param->max_iter = max_iter;
+    param->random_seed = random_seed;
+    return param;
+}
+
+
+/*
+ * Create and return a svm_csr_problem struct from a scipy.sparse.csr matrix. It is
+ * up to the user to free resulting structure.
+ *
+ * TODO: precomputed kernel.
+ */
+struct svm_csr_problem * csr_set_problem (char *values, Py_ssize_t *n_indices,
+		char *indices, Py_ssize_t *n_indptr, char *indptr, char *Y,
+                char *sample_weight, int kernel_type) {
+
+    struct svm_csr_problem *problem;
+    problem = malloc (sizeof (struct svm_csr_problem));
+    if (problem == NULL) return NULL;
+    problem->l = (int) n_indptr[0] - 1;
+    problem->y = (double *) Y;
+    problem->x = csr_to_libsvm((double *) values, (int *) indices,
+                               (int *) indptr, problem->l);
+    /* should be removed once we implement weighted samples */
+    problem->W = (double *) sample_weight;
+
+    if (problem->x == NULL) {
+        free(problem);
+        return NULL;
+    }
+    return problem;
+}
+
+
+struct svm_csr_model *csr_set_model(struct svm_parameter *param, int nr_class,
+                            char *SV_data, Py_ssize_t *SV_indices_dims,
+                            char *SV_indices, Py_ssize_t *SV_indptr_dims,
+                            char *SV_intptr,
+                            char *sv_coef, char *rho, char *nSV,
+                            char *probA, char *probB)
+{
+    struct svm_csr_model *model;
+    double *dsv_coef = (double *) sv_coef;
+    int i, m;
+
+    m = nr_class * (nr_class-1)/2;
+
+    if ((model = malloc(sizeof(struct svm_csr_model))) == NULL)
+        goto model_error;
+    if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
+        goto nsv_error;
+    if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
+        goto label_error;
+    if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
+        goto sv_coef_error;
+    if ((model->rho = malloc( m * sizeof(double))) == NULL)
+        goto rho_error;
+
+    // This is only allocated in dynamic memory while training.
+    model->n_iter = NULL;
+
+    /* in the case of precomputed kernels we do not use
+       dense_to_precomputed because we don't want the leading 0. As
+       indices start at 1 (not at 0) this will work */
+    model->l = (int) SV_indptr_dims[0] - 1;
+    model->SV = csr_to_libsvm((double *) SV_data, (int *) SV_indices,
+                              (int *) SV_intptr, model->l);
+    model->nr_class = nr_class;
+    model->param = *param;
+
+    /*
+     * regression and one-class does not use nSV, label.
+     */
+    if (param->svm_type < 2) {
+        memcpy(model->nSV,   nSV,   model->nr_class * sizeof(int));
+        for(i=0; i < model->nr_class; i++)
+            model->label[i] = i;
+    }
+
+    for (i=0; i < model->nr_class-1; i++) {
+        /*
+         * We cannot squash all this mallocs in a single call since
+         * svm_destroy_model will free each element of the array.
+         */
+        if ((model->sv_coef[i] = malloc((model->l) * sizeof(double))) == NULL) {
+            int j;
+            for (j=0; j<i; j++)
+                free(model->sv_coef[j]);
+            goto sv_coef_i_error;
+        }
+        memcpy(model->sv_coef[i], dsv_coef, (model->l) * sizeof(double));
+        dsv_coef += model->l;
+    }
+
+    for (i=0; i<m; ++i) {
+        (model->rho)[i] = -((double *) rho)[i];
+    }
+
+    /*
+     * just to avoid segfaults, these features are not wrapped but
+     * svm_destroy_model will try to free them.
+     */
+
+    if (param->probability) {
+        if ((model->probA = malloc(m * sizeof(double))) == NULL)
+            goto probA_error;
+        memcpy(model->probA, probA, m * sizeof(double));
+        if ((model->probB = malloc(m * sizeof(double))) == NULL)
+            goto probB_error;
+        memcpy(model->probB, probB, m * sizeof(double));
+    } else {
+        model->probA = NULL;
+        model->probB = NULL;
+    }
+
+    /* We'll free SV ourselves */
+    model->free_sv = 0;
+    return model;
+
+probB_error:
+    free(model->probA);
+probA_error:
+    for (i=0; i < model->nr_class-1; i++)
+        free(model->sv_coef[i]);
+sv_coef_i_error:
+    free(model->rho);
+rho_error:
+    free(model->sv_coef);
+sv_coef_error:
+    free(model->label);
+label_error:
+    free(model->nSV);
+nsv_error:
+    free(model);
+model_error:
+    return NULL;
+}
+
+
+/*
+ * Copy support vectors into a scipy.sparse.csr matrix
+ */
+int csr_copy_SV (char *data, Py_ssize_t *n_indices,
+		char *indices, Py_ssize_t *n_indptr, char *indptr,
+		struct svm_csr_model *model, int n_features)
+{
+	int i, j, k=0, index;
+	double *dvalues = (double *) data;
+	int *iindices = (int *) indices;
+	int *iindptr  = (int *) indptr;
+	iindptr[0] = 0;
+	for (i=0; i<model->l; ++i) { /* iterate over support vectors */
+		index = model->SV[i][0].index;
+        for(j=0; index >=0 ; ++j) {
+        	iindices[k] = index - 1;
+            dvalues[k] = model->SV[i][j].value;
+            index = model->SV[i][j+1].index;
+            ++k;
+        }
+        iindptr[i+1] = k;
+	}
+
+	return 0;
+}
+
+/* get number of nonzero coefficients in support vectors */
+Py_ssize_t get_nonzero_SV (struct svm_csr_model *model) {
+	int i, j;
+	Py_ssize_t count=0;
+	for (i=0; i<model->l; ++i) {
+		j = 0;
+		while (model->SV[i][j].index != -1) {
+			++j;
+			++count;
+		}
+	}
+	return count;
+}
+
+
+/*
+ * Predict using a model, where data is expected to be encoded into a csr matrix.
+ */
+int csr_copy_predict (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
+		char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
+		char *dec_values, BlasFunctions *blas_functions) {
+    double *t = (double *) dec_values;
+    struct svm_csr_node **predict_nodes;
+    Py_ssize_t i;
+
+    predict_nodes = csr_to_libsvm((double *) data, (int *) index,
+                                  (int *) intptr, intptr_size[0]-1);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i < intptr_size[0] - 1; ++i) {
+        *t = svm_csr_predict(model, predict_nodes[i], blas_functions);
+        free(predict_nodes[i]);
+        ++t;
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+int csr_copy_predict_values (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
+                char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
+                char *dec_values, int nr_class, BlasFunctions *blas_functions) {
+    struct svm_csr_node **predict_nodes;
+    Py_ssize_t i;
+
+    predict_nodes = csr_to_libsvm((double *) data, (int *) index,
+                                  (int *) intptr, intptr_size[0]-1);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i < intptr_size[0] - 1; ++i) {
+        svm_csr_predict_values(model, predict_nodes[i],
+                               ((double *) dec_values) + i*nr_class,
+			       blas_functions);
+        free(predict_nodes[i]);
+    }
+    free(predict_nodes);
+
+    return 0;
+}
+
+int csr_copy_predict_proba (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
+		char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
+		char *dec_values, BlasFunctions *blas_functions) {
+
+    struct svm_csr_node **predict_nodes;
+    Py_ssize_t i;
+    int m = model->nr_class;
+
+    predict_nodes = csr_to_libsvm((double *) data, (int *) index,
+                                  (int *) intptr, intptr_size[0]-1);
+
+    if (predict_nodes == NULL)
+        return -1;
+    for(i=0; i < intptr_size[0] - 1; ++i) {
+        svm_csr_predict_probability(
+		model, predict_nodes[i], ((double *) dec_values) + i*m, blas_functions);
+        free(predict_nodes[i]);
+    }
+    free(predict_nodes);
+    return 0;
+}
+
+
+Py_ssize_t get_nr(struct svm_csr_model *model)
+{
+    return (Py_ssize_t) model->nr_class;
+}
+
+void copy_intercept(char *data, struct svm_csr_model *model, Py_ssize_t *dims)
+{
+    /* intercept = -rho */
+    Py_ssize_t i, n = dims[0];
+    double t, *ddata = (double *) data;
+    for (i=0; i<n; ++i) {
+        t = model->rho[i];
+        /* we do this to avoid ugly -0.0 */
+        *ddata = (t != 0) ? -t : 0;
+        ++ddata;
+    }
+}
+
+void copy_support (char *data, struct svm_csr_model *model)
+{
+    memcpy (data, model->sv_ind, (model->l) * sizeof(int));
+}
+
+/*
+ * Some helpers to convert from libsvm sparse data structures
+ * model->sv_coef is a double **, whereas data is just a double *,
+ * so we have to do some stupid copying.
+ */
+void copy_sv_coef(char *data, struct svm_csr_model *model)
+{
+    int i, len = model->nr_class-1;
+    double *temp = (double *) data;
+    for(i=0; i<len; ++i) {
+        memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
+        temp += model->l;
+    }
+}
+
+/*
+ * Get the number of iterations run in optimization
+ */
+void copy_n_iter(char *data, struct svm_csr_model *model)
+{
+    const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
+    memcpy(data, model->n_iter, n_models * sizeof(int));
+}
+
+/*
+ * Get the number of support vectors in a model.
+ */
+Py_ssize_t get_l(struct svm_csr_model *model)
+{
+    return (Py_ssize_t) model->l;
+}
+
+void copy_nSV(char *data, struct svm_csr_model *model)
+{
+    if (model->label == NULL) return;
+    memcpy(data, model->nSV, model->nr_class * sizeof(int));
+}
+
+/*
+ * same as above with model->label
+ * TODO: merge in the cython layer
+ */
+void copy_label(char *data, struct svm_csr_model *model)
+{
+    if (model->label == NULL) return;
+    memcpy(data, model->label, model->nr_class * sizeof(int));
+}
+
+void copy_probA(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probA, dims[0] * sizeof(double));
+}
+
+void copy_probB(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
+{
+    memcpy(data, model->probB, dims[0] * sizeof(double));
+}
+
+
+/*
+ * Some free routines. Some of them are nontrivial since a lot of
+ * sharing happens across objects (they *must* be called in the
+ * correct order)
+ */
+int free_problem(struct svm_csr_problem *problem)
+{
+    int i;
+    if (problem == NULL) return -1;
+    for (i=0; i<problem->l; ++i)
+        free (problem->x[i]);
+    free (problem->x);
+    free (problem);
+    return 0;
+}
+
+int free_model(struct svm_csr_model *model)
+{
+    /* like svm_free_and_destroy_model, but does not free sv_coef[i] */
+    /* We don't free n_iter, since we did not create them in set_model. */
+    if (model == NULL) return -1;
+    free(model->SV);
+    free(model->sv_coef);
+    free(model->rho);
+    free(model->label);
+    free(model->probA);
+    free(model->probB);
+    free(model->nSV);
+    free(model);
+
+    return 0;
+}
+
+int free_param(struct svm_parameter *param)
+{
+    if (param == NULL) return -1;
+    free(param);
+    return 0;
+}
+
+
+int free_model_SV(struct svm_csr_model *model)
+{
+    int i;
+    for (i=model->l-1; i>=0; --i) free(model->SV[i]);
+    /* svn_destroy_model frees model->SV */
+    for (i=0; i < model->nr_class-1 ; ++i) free(model->sv_coef[i]);
+    /* svn_destroy_model frees model->sv_coef */
+    return 0;
+}
+
+
+/* borrowed from original libsvm code */
+static void print_null(const char *s) {}
+
+static void print_string_stdout(const char *s)
+{
+	fputs(s,stdout);
+	fflush(stdout);
+}
+
+/* provide convenience wrapper */
+void set_verbosity(int verbosity_flag){
+	if (verbosity_flag)
+		svm_set_print_string_function(&print_string_stdout);
+	else
+		svm_set_print_string_function(&print_null);
+}
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_template.cpp b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_template.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b4a485cf2b7703b474b2097c72eb7bd6c56bf61b
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/libsvm_template.cpp
@@ -0,0 +1,8 @@
+
+/* this is a hack to generate libsvm with both sparse and dense
+   methods in the same binary*/
+
+#define _DENSE_REP
+#include "svm.cpp"
+#undef _DENSE_REP
+#include "svm.cpp"
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/libsvm/svm.cpp b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/svm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5c266e1d9a0a3f3c26983838d7d6089674816b70
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/svm.cpp
@@ -0,0 +1,3187 @@
+/*
+Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither name of copyright holders nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+   Modified 2010:
+
+   - Support for dense data by Ming-Fang Weng
+
+   - Return indices for support vectors, Fabian Pedregosa
+     <fabian.pedregosa@inria.fr>
+
+   - Fixes to avoid name collision, Fabian Pedregosa
+
+   - Add support for instance weights, Fabian Pedregosa based on work
+     by Ming-Wei Chang, Hsuan-Tien Lin, Ming-Hen Tsai, Chia-Hua Ho and
+     Hsiang-Fu Yu,
+     <https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/#weights_for_data_instances>.
+
+   - Make labels sorted in svm_group_classes, Fabian Pedregosa.
+
+   Modified 2020:
+
+   - Improved random number generator by using a mersenne twister + tweaked
+     lemire postprocessor. This fixed a convergence issue on windows targets.
+     Sylvain Marie, Schneider Electric
+     see <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
+
+   Modified 2021:
+
+   - Exposed number of iterations run in optimization, Juan Martín Loyola.
+     See <https://github.com/scikit-learn/scikit-learn/pull/21408/>
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <float.h>
+#include <string.h>
+#include <stdarg.h>
+#include <climits>
+#include <random>
+#include "svm.h"
+#include "_svm_cython_blas_helpers.h"
+#include "../newrand/newrand.h"
+
+
+#ifndef _LIBSVM_CPP
+typedef float Qfloat;
+typedef signed char schar;
+#ifndef min
+template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
+#endif
+#ifndef max
+template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
+#endif
+template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
+template <class S, class T> static inline void clone(T*& dst, S* src, int n)
+{
+	dst = new T[n];
+	memcpy((void *)dst,(void *)src,sizeof(T)*n);
+}
+static inline double powi(double base, int times)
+{
+	double tmp = base, ret = 1.0;
+
+	for(int t=times; t>0; t/=2)
+	{
+		if(t%2==1) ret*=tmp;
+		tmp = tmp * tmp;
+	}
+	return ret;
+}
+#define INF HUGE_VAL
+#define TAU 1e-12
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+
+static void print_string_stdout(const char *s)
+{
+	fputs(s,stdout);
+	fflush(stdout);
+}
+static void (*svm_print_string) (const char *) = &print_string_stdout;
+
+static void info(const char *fmt,...)
+{
+	char buf[BUFSIZ];
+	va_list ap;
+	va_start(ap,fmt);
+	vsprintf(buf,fmt,ap);
+	va_end(ap);
+	(*svm_print_string)(buf);
+}
+#endif
+#define _LIBSVM_CPP
+
+
+/* yeah, this is ugly.  It helps us to have unique names for both sparse
+and dense versions of this library */
+#ifdef _DENSE_REP
+  #ifdef PREFIX
+    #undef PREFIX
+  #endif
+  #ifdef NAMESPACE
+    #undef NAMESPACE
+  #endif
+  #define PREFIX(name) svm_##name
+  #define NAMESPACE svm
+  namespace svm {
+#else
+  /* sparse representation */
+  #ifdef PREFIX
+    #undef PREFIX
+  #endif
+  #ifdef NAMESPACE
+    #undef NAMESPACE
+  #endif
+  #define PREFIX(name) svm_csr_##name
+  #define NAMESPACE svm_csr
+  namespace svm_csr {
+#endif
+
+
+//
+// Kernel Cache
+//
+// l is the number of total data items
+// size is the cache size limit in bytes
+//
+class Cache
+{
+public:
+	Cache(int l,long int size);
+	~Cache();
+
+	// request data [0,len)
+	// return some position p where [p,len) need to be filled
+	// (p >= len if nothing needs to be filled)
+	int get_data(const int index, Qfloat **data, int len);
+	void swap_index(int i, int j);
+private:
+	int l;
+	long int size;
+	struct head_t
+	{
+		head_t *prev, *next;	// a circular list
+		Qfloat *data;
+		int len;		// data[0,len) is cached in this entry
+	};
+
+	head_t *head;
+	head_t lru_head;
+	void lru_delete(head_t *h);
+	void lru_insert(head_t *h);
+};
+
+Cache::Cache(int l_,long int size_):l(l_),size(size_)
+{
+	head = (head_t *)calloc(l,sizeof(head_t));	// initialized to 0
+	size /= sizeof(Qfloat);
+	size -= l * sizeof(head_t) / sizeof(Qfloat);
+	size = max(size, 2 * (long int) l);	// cache must be large enough for two columns
+	lru_head.next = lru_head.prev = &lru_head;
+}
+
+Cache::~Cache()
+{
+	for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
+		free(h->data);
+	free(head);
+}
+
+void Cache::lru_delete(head_t *h)
+{
+	// delete from current location
+	h->prev->next = h->next;
+	h->next->prev = h->prev;
+}
+
+void Cache::lru_insert(head_t *h)
+{
+	// insert to last position
+	h->next = &lru_head;
+	h->prev = lru_head.prev;
+	h->prev->next = h;
+	h->next->prev = h;
+}
+
+int Cache::get_data(const int index, Qfloat **data, int len)
+{
+	head_t *h = &head[index];
+	if(h->len) lru_delete(h);
+	int more = len - h->len;
+
+	if(more > 0)
+	{
+		// free old space
+		while(size < more)
+		{
+			head_t *old = lru_head.next;
+			lru_delete(old);
+			free(old->data);
+			size += old->len;
+			old->data = 0;
+			old->len = 0;
+		}
+
+		// allocate new space
+		h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
+		size -= more;
+		swap(h->len,len);
+	}
+
+	lru_insert(h);
+	*data = h->data;
+	return len;
+}
+
+void Cache::swap_index(int i, int j)
+{
+	if(i==j) return;
+
+	if(head[i].len) lru_delete(&head[i]);
+	if(head[j].len) lru_delete(&head[j]);
+	swap(head[i].data,head[j].data);
+	swap(head[i].len,head[j].len);
+	if(head[i].len) lru_insert(&head[i]);
+	if(head[j].len) lru_insert(&head[j]);
+
+	if(i>j) swap(i,j);
+	for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
+	{
+		if(h->len > i)
+		{
+			if(h->len > j)
+				swap(h->data[i],h->data[j]);
+			else
+			{
+				// give up
+				lru_delete(h);
+				free(h->data);
+				size += h->len;
+				h->data = 0;
+				h->len = 0;
+			}
+		}
+	}
+}
+
+//
+// Kernel evaluation
+//
+// the static method k_function is for doing single kernel evaluation
+// the constructor of Kernel prepares to calculate the l*l kernel matrix
+// the member function get_Q is for getting one column from the Q Matrix
+//
+class QMatrix {
+public:
+	virtual Qfloat *get_Q(int column, int len) const = 0;
+	virtual double *get_QD() const = 0;
+	virtual void swap_index(int i, int j) const = 0;
+	virtual ~QMatrix() {}
+};
+
+class Kernel: public QMatrix {
+public:
+#ifdef _DENSE_REP
+	Kernel(int l, PREFIX(node) * x, const svm_parameter& param, BlasFunctions *blas_functions);
+#else
+	Kernel(int l, PREFIX(node) * const * x, const svm_parameter& param, BlasFunctions *blas_functions);
+#endif
+	virtual ~Kernel();
+
+	static double k_function(const PREFIX(node) *x, const PREFIX(node) *y,
+				 const svm_parameter& param, BlasFunctions *blas_functions);
+	virtual Qfloat *get_Q(int column, int len) const = 0;
+	virtual double *get_QD() const = 0;
+	virtual void swap_index(int i, int j) const	// no so const...
+	{
+		swap(x[i],x[j]);
+		if(x_square) swap(x_square[i],x_square[j]);
+	}
+protected:
+
+	double (Kernel::*kernel_function)(int i, int j) const;
+
+private:
+#ifdef _DENSE_REP
+	PREFIX(node) *x;
+#else
+	const PREFIX(node) **x;
+#endif
+	double *x_square;
+	// scipy blas pointer
+	BlasFunctions *m_blas;
+
+	// svm_parameter
+	const int kernel_type;
+	const int degree;
+	const double gamma;
+	const double coef0;
+
+	static double dot(const PREFIX(node) *px, const PREFIX(node) *py, BlasFunctions *blas_functions);
+#ifdef _DENSE_REP
+	static double dot(const PREFIX(node) &px, const PREFIX(node) &py, BlasFunctions *blas_functions);
+#endif
+
+	double kernel_linear(int i, int j) const
+	{
+		return dot(x[i],x[j],m_blas);
+	}
+	double kernel_poly(int i, int j) const
+	{
+		return powi(gamma*dot(x[i],x[j],m_blas)+coef0,degree);
+	}
+	double kernel_rbf(int i, int j) const
+	{
+		return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j],m_blas)));
+	}
+	double kernel_sigmoid(int i, int j) const
+	{
+		return tanh(gamma*dot(x[i],x[j],m_blas)+coef0);
+	}
+	double kernel_precomputed(int i, int j) const
+	{
+#ifdef _DENSE_REP
+		return (x+i)->values[x[j].ind];
+#else
+		return x[i][(int)(x[j][0].value)].value;
+#endif
+	}
+};
+
+#ifdef _DENSE_REP
+Kernel::Kernel(int l, PREFIX(node) * x_, const svm_parameter& param, BlasFunctions *blas_functions)
+#else
+Kernel::Kernel(int l, PREFIX(node) * const * x_, const svm_parameter& param, BlasFunctions *blas_functions)
+#endif
+:kernel_type(param.kernel_type), degree(param.degree),
+ gamma(param.gamma), coef0(param.coef0)
+{
+	m_blas = blas_functions;
+	switch(kernel_type)
+	{
+		case LINEAR:
+			kernel_function = &Kernel::kernel_linear;
+			break;
+		case POLY:
+			kernel_function = &Kernel::kernel_poly;
+			break;
+		case RBF:
+			kernel_function = &Kernel::kernel_rbf;
+			break;
+		case SIGMOID:
+			kernel_function = &Kernel::kernel_sigmoid;
+			break;
+		case PRECOMPUTED:
+			kernel_function = &Kernel::kernel_precomputed;
+			break;
+	}
+
+	clone(x,x_,l);
+
+	if(kernel_type == RBF)
+	{
+		x_square = new double[l];
+		for(int i=0;i<l;i++)
+			x_square[i] = dot(x[i],x[i],blas_functions);
+	}
+	else
+		x_square = 0;
+}
+
+Kernel::~Kernel()
+{
+	delete[] x;
+	delete[] x_square;
+}
+
+#ifdef _DENSE_REP
+double Kernel::dot(const PREFIX(node) *px, const PREFIX(node) *py, BlasFunctions *blas_functions)
+{
+	double sum = 0;
+
+	int dim = min(px->dim, py->dim);
+	sum = blas_functions->dot(dim, px->values, 1, py->values, 1);
+	return sum;
+}
+
+double Kernel::dot(const PREFIX(node) &px, const PREFIX(node) &py, BlasFunctions *blas_functions)
+{
+	double sum = 0;
+
+	int dim = min(px.dim, py.dim);
+	sum = blas_functions->dot(dim, px.values, 1, py.values, 1);
+	return sum;
+}
+#else
+double Kernel::dot(const PREFIX(node) *px, const PREFIX(node) *py, BlasFunctions *blas_functions)
+{
+	double sum = 0;
+	while(px->index != -1 && py->index != -1)
+	{
+		if(px->index == py->index)
+		{
+			sum += px->value * py->value;
+			++px;
+			++py;
+		}
+		else
+		{
+			if(px->index > py->index)
+				++py;
+			else
+				++px;
+		}
+	}
+	return sum;
+}
+#endif
+
+double Kernel::k_function(const PREFIX(node) *x, const PREFIX(node) *y,
+			  const svm_parameter& param, BlasFunctions *blas_functions)
+{
+	switch(param.kernel_type)
+	{
+		case LINEAR:
+			return dot(x,y,blas_functions);
+		case POLY:
+			return powi(param.gamma*dot(x,y,blas_functions)+param.coef0,param.degree);
+		case RBF:
+		{
+			double sum = 0;
+#ifdef _DENSE_REP
+			int dim = min(x->dim, y->dim), i;
+			double* m_array = (double*)malloc(sizeof(double)*dim);
+			for (i = 0; i < dim; i++)
+			{
+				m_array[i] = x->values[i] - y->values[i];
+			}
+			sum = blas_functions->dot(dim, m_array, 1, m_array, 1);
+			free(m_array);
+			for (; i < x->dim; i++)
+				sum += x->values[i] * x->values[i];
+			for (; i < y->dim; i++)
+				sum += y->values[i] * y->values[i];
+#else
+			while(x->index != -1 && y->index !=-1)
+			{
+				if(x->index == y->index)
+				{
+					double d = x->value - y->value;
+					sum += d*d;
+					++x;
+					++y;
+				}
+				else
+				{
+					if(x->index > y->index)
+					{
+						sum += y->value * y->value;
+						++y;
+					}
+					else
+					{
+						sum += x->value * x->value;
+						++x;
+					}
+				}
+			}
+
+			while(x->index != -1)
+			{
+				sum += x->value * x->value;
+				++x;
+			}
+
+			while(y->index != -1)
+			{
+				sum += y->value * y->value;
+				++y;
+			}
+#endif
+			return exp(-param.gamma*sum);
+		}
+		case SIGMOID:
+			return tanh(param.gamma*dot(x,y,blas_functions)+param.coef0);
+		case PRECOMPUTED:  //x: test (validation), y: SV
+                    {
+#ifdef _DENSE_REP
+			return x->values[y->ind];
+#else
+			return x[(int)(y->value)].value;
+#endif
+                    }
+		default:
+			return 0;  // Unreachable
+	}
+}
+// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
+// Solves:
+//
+//	min 0.5(\alpha^T Q \alpha) + p^T \alpha
+//
+//		y^T \alpha = \delta
+//		y_i = +1 or -1
+//		0 <= alpha_i <= Cp for y_i = 1
+//		0 <= alpha_i <= Cn for y_i = -1
+//
+// Given:
+//
+//	Q, p, y, Cp, Cn, and an initial feasible point \alpha
+//	l is the size of vectors and matrices
+//	eps is the stopping tolerance
+//
+// solution will be put in \alpha, objective value will be put in obj
+//
+
+class Solver {
+public:
+	Solver() {};
+	virtual ~Solver() {};
+
+	struct SolutionInfo {
+		double obj;
+		double rho;
+                double *upper_bound;
+		double r;	// for Solver_NU
+                bool solve_timed_out;
+		int n_iter;
+	};
+
+	void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
+		   double *alpha_, const double *C_, double eps,
+		   SolutionInfo* si, int shrinking, int max_iter);
+protected:
+	int active_size;
+	schar *y;
+	double *G;		// gradient of objective function
+	enum { LOWER_BOUND, UPPER_BOUND, FREE };
+	char *alpha_status;	// LOWER_BOUND, UPPER_BOUND, FREE
+	double *alpha;
+	const QMatrix *Q;
+	const double *QD;
+	double eps;
+	double Cp,Cn;
+        double *C;
+	double *p;
+	int *active_set;
+	double *G_bar;		// gradient, if we treat free variables as 0
+	int l;
+	bool unshrink;	// XXX
+
+	double get_C(int i)
+	{
+		return C[i];
+	}
+	void update_alpha_status(int i)
+	{
+		if(alpha[i] >= get_C(i))
+			alpha_status[i] = UPPER_BOUND;
+		else if(alpha[i] <= 0)
+			alpha_status[i] = LOWER_BOUND;
+		else alpha_status[i] = FREE;
+	}
+	bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
+	bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
+	bool is_free(int i) { return alpha_status[i] == FREE; }
+	void swap_index(int i, int j);
+	void reconstruct_gradient();
+	virtual int select_working_set(int &i, int &j);
+	virtual double calculate_rho();
+	virtual void do_shrinking();
+private:
+	bool be_shrunk(int i, double Gmax1, double Gmax2);
+};
+
+void Solver::swap_index(int i, int j)
+{
+	Q->swap_index(i,j);
+	swap(y[i],y[j]);
+	swap(G[i],G[j]);
+	swap(alpha_status[i],alpha_status[j]);
+	swap(alpha[i],alpha[j]);
+	swap(p[i],p[j]);
+	swap(active_set[i],active_set[j]);
+	swap(G_bar[i],G_bar[j]);
+        swap(C[i], C[j]);
+}
+
+void Solver::reconstruct_gradient()
+{
+	// reconstruct inactive elements of G from G_bar and free variables
+
+	if(active_size == l) return;
+
+	int i,j;
+	int nr_free = 0;
+
+	for(j=active_size;j<l;j++)
+		G[j] = G_bar[j] + p[j];
+
+	for(j=0;j<active_size;j++)
+		if(is_free(j))
+			nr_free++;
+
+	if(2*nr_free < active_size)
+		info("\nWarning: using -h 0 may be faster\n");
+
+	if (nr_free*l > 2*active_size*(l-active_size))
+	{
+		for(i=active_size;i<l;i++)
+		{
+			const Qfloat *Q_i = Q->get_Q(i,active_size);
+			for(j=0;j<active_size;j++)
+				if(is_free(j))
+					G[i] += alpha[j] * Q_i[j];
+		}
+	}
+	else
+	{
+		for(i=0;i<active_size;i++)
+			if(is_free(i))
+			{
+				const Qfloat *Q_i = Q->get_Q(i,l);
+				double alpha_i = alpha[i];
+				for(j=active_size;j<l;j++)
+					G[j] += alpha_i * Q_i[j];
+			}
+	}
+}
+
+void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
+		   double *alpha_, const double *C_, double eps,
+		   SolutionInfo* si, int shrinking, int max_iter)
+{
+	this->l = l;
+	this->Q = &Q;
+	QD=Q.get_QD();
+	clone(p, p_,l);
+	clone(y, y_,l);
+	clone(alpha,alpha_,l);
+        clone(C, C_, l);
+	this->eps = eps;
+	unshrink = false;
+        si->solve_timed_out = false;
+
+	// initialize alpha_status
+	{
+		alpha_status = new char[l];
+		for(int i=0;i<l;i++)
+			update_alpha_status(i);
+	}
+
+	// initialize active set (for shrinking)
+	{
+		active_set = new int[l];
+		for(int i=0;i<l;i++)
+			active_set[i] = i;
+		active_size = l;
+	}
+
+	// initialize gradient
+	{
+		G = new double[l];
+		G_bar = new double[l];
+		int i;
+		for(i=0;i<l;i++)
+		{
+			G[i] = p[i];
+			G_bar[i] = 0;
+		}
+		for(i=0;i<l;i++)
+			if(!is_lower_bound(i))
+			{
+				const Qfloat *Q_i = Q.get_Q(i,l);
+				double alpha_i = alpha[i];
+				int j;
+				for(j=0;j<l;j++)
+					G[j] += alpha_i*Q_i[j];
+				if(is_upper_bound(i))
+					for(j=0;j<l;j++)
+						G_bar[j] += get_C(i) * Q_i[j];
+			}
+	}
+
+	// optimization step
+
+	int iter = 0;
+	int counter = min(l,1000)+1;
+
+	while(1)
+	{
+                // set max_iter to -1 to disable the mechanism
+                if ((max_iter != -1) && (iter >= max_iter)) {
+                    info("WARN: libsvm Solver reached max_iter");
+                    si->solve_timed_out = true;
+                    break;
+                }
+
+		// show progress and do shrinking
+
+		if(--counter == 0)
+		{
+			counter = min(l,1000);
+			if(shrinking) do_shrinking();
+			info(".");
+		}
+
+		int i,j;
+		if(select_working_set(i,j)!=0)
+		{
+			// reconstruct the whole gradient
+			reconstruct_gradient();
+			// reset active set size and check
+			active_size = l;
+			info("*");
+			if(select_working_set(i,j)!=0)
+				break;
+			else
+				counter = 1;	// do shrinking next iteration
+		}
+
+		++iter;
+
+		// update alpha[i] and alpha[j], handle bounds carefully
+
+		const Qfloat *Q_i = Q.get_Q(i,active_size);
+		const Qfloat *Q_j = Q.get_Q(j,active_size);
+
+		double C_i = get_C(i);
+		double C_j = get_C(j);
+
+		double old_alpha_i = alpha[i];
+		double old_alpha_j = alpha[j];
+
+		if(y[i]!=y[j])
+		{
+			double quad_coef = QD[i]+QD[j]+2*Q_i[j];
+			if (quad_coef <= 0)
+				quad_coef = TAU;
+			double delta = (-G[i]-G[j])/quad_coef;
+			double diff = alpha[i] - alpha[j];
+			alpha[i] += delta;
+			alpha[j] += delta;
+
+			if(diff > 0)
+			{
+				if(alpha[j] < 0)
+				{
+					alpha[j] = 0;
+					alpha[i] = diff;
+				}
+			}
+			else
+			{
+				if(alpha[i] < 0)
+				{
+					alpha[i] = 0;
+					alpha[j] = -diff;
+				}
+			}
+			if(diff > C_i - C_j)
+			{
+				if(alpha[i] > C_i)
+				{
+					alpha[i] = C_i;
+					alpha[j] = C_i - diff;
+				}
+			}
+			else
+			{
+				if(alpha[j] > C_j)
+				{
+					alpha[j] = C_j;
+					alpha[i] = C_j + diff;
+				}
+			}
+		}
+		else
+		{
+			double quad_coef = QD[i]+QD[j]-2*Q_i[j];
+			if (quad_coef <= 0)
+				quad_coef = TAU;
+			double delta = (G[i]-G[j])/quad_coef;
+			double sum = alpha[i] + alpha[j];
+			alpha[i] -= delta;
+			alpha[j] += delta;
+
+			if(sum > C_i)
+			{
+				if(alpha[i] > C_i)
+				{
+					alpha[i] = C_i;
+					alpha[j] = sum - C_i;
+				}
+			}
+			else
+			{
+				if(alpha[j] < 0)
+				{
+					alpha[j] = 0;
+					alpha[i] = sum;
+				}
+			}
+			if(sum > C_j)
+			{
+				if(alpha[j] > C_j)
+				{
+					alpha[j] = C_j;
+					alpha[i] = sum - C_j;
+				}
+			}
+			else
+			{
+				if(alpha[i] < 0)
+				{
+					alpha[i] = 0;
+					alpha[j] = sum;
+				}
+			}
+		}
+
+		// update G
+
+		double delta_alpha_i = alpha[i] - old_alpha_i;
+		double delta_alpha_j = alpha[j] - old_alpha_j;
+
+		for(int k=0;k<active_size;k++)
+		{
+			G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
+		}
+
+		// update alpha_status and G_bar
+
+		{
+			bool ui = is_upper_bound(i);
+			bool uj = is_upper_bound(j);
+			update_alpha_status(i);
+			update_alpha_status(j);
+			int k;
+			if(ui != is_upper_bound(i))
+			{
+				Q_i = Q.get_Q(i,l);
+				if(ui)
+					for(k=0;k<l;k++)
+						G_bar[k] -= C_i * Q_i[k];
+				else
+					for(k=0;k<l;k++)
+						G_bar[k] += C_i * Q_i[k];
+			}
+
+			if(uj != is_upper_bound(j))
+			{
+				Q_j = Q.get_Q(j,l);
+				if(uj)
+					for(k=0;k<l;k++)
+						G_bar[k] -= C_j * Q_j[k];
+				else
+					for(k=0;k<l;k++)
+						G_bar[k] += C_j * Q_j[k];
+			}
+		}
+	}
+
+	// calculate rho
+
+	si->rho = calculate_rho();
+
+	// calculate objective value
+	{
+		double v = 0;
+		int i;
+		for(i=0;i<l;i++)
+			v += alpha[i] * (G[i] + p[i]);
+
+		si->obj = v/2;
+	}
+
+	// put back the solution
+	{
+		for(int i=0;i<l;i++)
+			alpha_[active_set[i]] = alpha[i];
+	}
+
+	// juggle everything back
+	/*{
+		for(int i=0;i<l;i++)
+			while(active_set[i] != i)
+				swap_index(i,active_set[i]);
+				// or Q.swap_index(i,active_set[i]);
+	}*/
+
+	for(int i=0;i<l;i++)
+		si->upper_bound[i] = C[i];
+
+	// store number of iterations
+	si->n_iter = iter;
+
+	info("\noptimization finished, #iter = %d\n",iter);
+
+	delete[] p;
+	delete[] y;
+	delete[] alpha;
+	delete[] alpha_status;
+	delete[] active_set;
+	delete[] G;
+	delete[] G_bar;
+	delete[] C;
+}
+
+// return 1 if already optimal, return 0 otherwise
+int Solver::select_working_set(int &out_i, int &out_j)
+{
+	// return i,j such that
+	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
+	// j: minimizes the decrease of obj value
+	//    (if quadratic coefficient <= 0, replace it with tau)
+	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
+
+	double Gmax = -INF;
+	double Gmax2 = -INF;
+	int Gmax_idx = -1;
+	int Gmin_idx = -1;
+	double obj_diff_min = INF;
+
+	for(int t=0;t<active_size;t++)
+		if(y[t]==+1)
+		{
+			if(!is_upper_bound(t))
+				if(-G[t] >= Gmax)
+				{
+					Gmax = -G[t];
+					Gmax_idx = t;
+				}
+		}
+		else
+		{
+			if(!is_lower_bound(t))
+				if(G[t] >= Gmax)
+				{
+					Gmax = G[t];
+					Gmax_idx = t;
+				}
+		}
+
+	int i = Gmax_idx;
+	const Qfloat *Q_i = NULL;
+	if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
+		Q_i = Q->get_Q(i,active_size);
+
+	for(int j=0;j<active_size;j++)
+	{
+		if(y[j]==+1)
+		{
+			if (!is_lower_bound(j))
+			{
+				double grad_diff=Gmax+G[j];
+				if (G[j] >= Gmax2)
+					Gmax2 = G[j];
+				if (grad_diff > 0)
+				{
+					double obj_diff;
+					double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+		else
+		{
+			if (!is_upper_bound(j))
+			{
+				double grad_diff= Gmax-G[j];
+				if (-G[j] >= Gmax2)
+					Gmax2 = -G[j];
+				if (grad_diff > 0)
+				{
+					double obj_diff;
+					double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+	}
+
+	if(Gmax+Gmax2 < eps || Gmin_idx == -1)
+		return 1;
+
+	out_i = Gmax_idx;
+	out_j = Gmin_idx;
+	return 0;
+}
+
+bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
+{
+	if(is_upper_bound(i))
+	{
+		if(y[i]==+1)
+			return(-G[i] > Gmax1);
+		else
+			return(-G[i] > Gmax2);
+	}
+	else if(is_lower_bound(i))
+	{
+		if(y[i]==+1)
+			return(G[i] > Gmax2);
+		else
+			return(G[i] > Gmax1);
+	}
+	else
+		return(false);
+}
+
+void Solver::do_shrinking()
+{
+	int i;
+	double Gmax1 = -INF;		// max { -y_i * grad(f)_i | i in I_up(\alpha) }
+	double Gmax2 = -INF;		// max { y_i * grad(f)_i | i in I_low(\alpha) }
+
+	// find maximal violating pair first
+	for(i=0;i<active_size;i++)
+	{
+		if(y[i]==+1)
+		{
+			if(!is_upper_bound(i))
+			{
+				if(-G[i] >= Gmax1)
+					Gmax1 = -G[i];
+			}
+			if(!is_lower_bound(i))
+			{
+				if(G[i] >= Gmax2)
+					Gmax2 = G[i];
+			}
+		}
+		else
+		{
+			if(!is_upper_bound(i))
+			{
+				if(-G[i] >= Gmax2)
+					Gmax2 = -G[i];
+			}
+			if(!is_lower_bound(i))
+			{
+				if(G[i] >= Gmax1)
+					Gmax1 = G[i];
+			}
+		}
+	}
+
+	if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
+	{
+		unshrink = true;
+		reconstruct_gradient();
+		active_size = l;
+		info("*");
+	}
+
+	for(i=0;i<active_size;i++)
+		if (be_shrunk(i, Gmax1, Gmax2))
+		{
+			active_size--;
+			while (active_size > i)
+			{
+				if (!be_shrunk(active_size, Gmax1, Gmax2))
+				{
+					swap_index(i,active_size);
+					break;
+				}
+				active_size--;
+			}
+		}
+}
+
+double Solver::calculate_rho()
+{
+	double r;
+	int nr_free = 0;
+	double ub = INF, lb = -INF, sum_free = 0;
+	for(int i=0;i<active_size;i++)
+	{
+		double yG = y[i]*G[i];
+
+		if(is_upper_bound(i))
+		{
+			if(y[i]==-1)
+				ub = min(ub,yG);
+			else
+				lb = max(lb,yG);
+		}
+		else if(is_lower_bound(i))
+		{
+			if(y[i]==+1)
+				ub = min(ub,yG);
+			else
+				lb = max(lb,yG);
+		}
+		else
+		{
+			++nr_free;
+			sum_free += yG;
+		}
+	}
+
+	if(nr_free>0)
+		r = sum_free/nr_free;
+	else
+		r = (ub+lb)/2;
+
+	return r;
+}
+
+//
+// Solver for nu-svm classification and regression
+//
+// additional constraint: e^T \alpha = constant
+//
+class Solver_NU : public Solver
+{
+public:
+	Solver_NU() {}
+	void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
+		   double *alpha, const double *C_, double eps,
+		   SolutionInfo* si, int shrinking, int max_iter)
+	{
+		this->si = si;
+		Solver::Solve(l,Q,p,y,alpha,C_,eps,si,shrinking,max_iter);
+	}
+private:
+	SolutionInfo *si;
+	int select_working_set(int &i, int &j);
+	double calculate_rho();
+	bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
+	void do_shrinking();
+};
+
+// return 1 if already optimal, return 0 otherwise
+int Solver_NU::select_working_set(int &out_i, int &out_j)
+{
+	// return i,j such that y_i = y_j and
+	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
+	// j: minimizes the decrease of obj value
+	//    (if quadratic coefficient <= 0, replace it with tau)
+	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
+
+	double Gmaxp = -INF;
+	double Gmaxp2 = -INF;
+	int Gmaxp_idx = -1;
+
+	double Gmaxn = -INF;
+	double Gmaxn2 = -INF;
+	int Gmaxn_idx = -1;
+
+	int Gmin_idx = -1;
+	double obj_diff_min = INF;
+
+	for(int t=0;t<active_size;t++)
+		if(y[t]==+1)
+		{
+			if(!is_upper_bound(t))
+				if(-G[t] >= Gmaxp)
+				{
+					Gmaxp = -G[t];
+					Gmaxp_idx = t;
+				}
+		}
+		else
+		{
+			if(!is_lower_bound(t))
+				if(G[t] >= Gmaxn)
+				{
+					Gmaxn = G[t];
+					Gmaxn_idx = t;
+				}
+		}
+
+	int ip = Gmaxp_idx;
+	int in = Gmaxn_idx;
+	const Qfloat *Q_ip = NULL;
+	const Qfloat *Q_in = NULL;
+	if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
+		Q_ip = Q->get_Q(ip,active_size);
+	if(in != -1)
+		Q_in = Q->get_Q(in,active_size);
+
+	for(int j=0;j<active_size;j++)
+	{
+		if(y[j]==+1)
+		{
+			if (!is_lower_bound(j))
+			{
+				double grad_diff=Gmaxp+G[j];
+				if (G[j] >= Gmaxp2)
+					Gmaxp2 = G[j];
+				if (grad_diff > 0)
+				{
+					double obj_diff;
+					double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+		else
+		{
+			if (!is_upper_bound(j))
+			{
+				double grad_diff=Gmaxn-G[j];
+				if (-G[j] >= Gmaxn2)
+					Gmaxn2 = -G[j];
+				if (grad_diff > 0)
+				{
+					double obj_diff;
+					double quad_coef = QD[in]+QD[j]-2*Q_in[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+	}
+
+	if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1)
+		return 1;
+
+	if (y[Gmin_idx] == +1)
+		out_i = Gmaxp_idx;
+	else
+		out_i = Gmaxn_idx;
+	out_j = Gmin_idx;
+
+	return 0;
+}
+
+bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
+{
+	if(is_upper_bound(i))
+	{
+		if(y[i]==+1)
+			return(-G[i] > Gmax1);
+		else
+			return(-G[i] > Gmax4);
+	}
+	else if(is_lower_bound(i))
+	{
+		if(y[i]==+1)
+			return(G[i] > Gmax2);
+		else
+			return(G[i] > Gmax3);
+	}
+	else
+		return(false);
+}
+
+void Solver_NU::do_shrinking()
+{
+	double Gmax1 = -INF;	// max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
+	double Gmax2 = -INF;	// max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
+	double Gmax3 = -INF;	// max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
+	double Gmax4 = -INF;	// max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
+
+	// find maximal violating pair first
+	int i;
+	for(i=0;i<active_size;i++)
+	{
+		if(!is_upper_bound(i))
+		{
+			if(y[i]==+1)
+			{
+				if(-G[i] > Gmax1) Gmax1 = -G[i];
+			}
+			else	if(-G[i] > Gmax4) Gmax4 = -G[i];
+		}
+		if(!is_lower_bound(i))
+		{
+			if(y[i]==+1)
+			{
+				if(G[i] > Gmax2) Gmax2 = G[i];
+			}
+			else	if(G[i] > Gmax3) Gmax3 = G[i];
+		}
+	}
+
+	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
+	{
+		unshrink = true;
+		reconstruct_gradient();
+		active_size = l;
+	}
+
+	for(i=0;i<active_size;i++)
+		if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
+		{
+			active_size--;
+			while (active_size > i)
+			{
+				if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
+				{
+					swap_index(i,active_size);
+					break;
+				}
+				active_size--;
+			}
+		}
+}
+
+double Solver_NU::calculate_rho()
+{
+	int nr_free1 = 0,nr_free2 = 0;
+	double ub1 = INF, ub2 = INF;
+	double lb1 = -INF, lb2 = -INF;
+	double sum_free1 = 0, sum_free2 = 0;
+
+	for(int i=0;i<active_size;i++)
+	{
+		if(y[i]==+1)
+		{
+			if(is_upper_bound(i))
+				lb1 = max(lb1,G[i]);
+			else if(is_lower_bound(i))
+				ub1 = min(ub1,G[i]);
+			else
+			{
+				++nr_free1;
+				sum_free1 += G[i];
+			}
+		}
+		else
+		{
+			if(is_upper_bound(i))
+				lb2 = max(lb2,G[i]);
+			else if(is_lower_bound(i))
+				ub2 = min(ub2,G[i]);
+			else
+			{
+				++nr_free2;
+				sum_free2 += G[i];
+			}
+		}
+	}
+
+	double r1,r2;
+	if(nr_free1 > 0)
+		r1 = sum_free1/nr_free1;
+	else
+		r1 = (ub1+lb1)/2;
+
+	if(nr_free2 > 0)
+		r2 = sum_free2/nr_free2;
+	else
+		r2 = (ub2+lb2)/2;
+
+	si->r = (r1+r2)/2;
+	return (r1-r2)/2;
+}
+
+//
+// Q matrices for various formulations
+//
+class SVC_Q: public Kernel
+{
+public:
+	SVC_Q(const PREFIX(problem)& prob, const svm_parameter& param, const schar *y_, BlasFunctions *blas_functions)
+	:Kernel(prob.l, prob.x, param, blas_functions)
+	{
+		clone(y,y_,prob.l);
+		cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
+		QD = new double[prob.l];
+		for(int i=0;i<prob.l;i++)
+			QD[i] = (this->*kernel_function)(i,i);
+	}
+
+	Qfloat *get_Q(int i, int len) const
+	{
+		Qfloat *data;
+		int start, j;
+		if((start = cache->get_data(i,&data,len)) < len)
+		{
+			for(j=start;j<len;j++)
+				data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
+		}
+		return data;
+	}
+
+	double *get_QD() const
+	{
+		return QD;
+	}
+
+	void swap_index(int i, int j) const
+	{
+		cache->swap_index(i,j);
+		Kernel::swap_index(i,j);
+		swap(y[i],y[j]);
+		swap(QD[i],QD[j]);
+	}
+
+	~SVC_Q()
+	{
+		delete[] y;
+		delete cache;
+		delete[] QD;
+	}
+private:
+	schar *y;
+	Cache *cache;
+	double *QD;
+};
+
+class ONE_CLASS_Q: public Kernel
+{
+public:
+	ONE_CLASS_Q(const PREFIX(problem)& prob, const svm_parameter& param, BlasFunctions *blas_functions)
+	:Kernel(prob.l, prob.x, param, blas_functions)
+	{
+		cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
+		QD = new double[prob.l];
+		for(int i=0;i<prob.l;i++)
+			QD[i] = (this->*kernel_function)(i,i);
+	}
+
+	Qfloat *get_Q(int i, int len) const
+	{
+		Qfloat *data;
+		int start, j;
+		if((start = cache->get_data(i,&data,len)) < len)
+		{
+			for(j=start;j<len;j++)
+				data[j] = (Qfloat)(this->*kernel_function)(i,j);
+		}
+		return data;
+	}
+
+	double *get_QD() const
+	{
+		return QD;
+	}
+
+	void swap_index(int i, int j) const
+	{
+		cache->swap_index(i,j);
+		Kernel::swap_index(i,j);
+		swap(QD[i],QD[j]);
+	}
+
+	~ONE_CLASS_Q()
+	{
+		delete cache;
+		delete[] QD;
+	}
+private:
+	Cache *cache;
+	double *QD;
+};
+
+class SVR_Q: public Kernel
+{
+public:
+	SVR_Q(const PREFIX(problem)& prob, const svm_parameter& param, BlasFunctions *blas_functions)
+	:Kernel(prob.l, prob.x, param, blas_functions)
+	{
+		l = prob.l;
+		cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
+		QD = new double[2*l];
+		sign = new schar[2*l];
+		index = new int[2*l];
+		for(int k=0;k<l;k++)
+		{
+			sign[k] = 1;
+			sign[k+l] = -1;
+			index[k] = k;
+			index[k+l] = k;
+			QD[k] = (this->*kernel_function)(k,k);
+			QD[k+l] = QD[k];
+		}
+		buffer[0] = new Qfloat[2*l];
+		buffer[1] = new Qfloat[2*l];
+		next_buffer = 0;
+	}
+
+	void swap_index(int i, int j) const
+	{
+		swap(sign[i],sign[j]);
+		swap(index[i],index[j]);
+		swap(QD[i],QD[j]);
+	}
+
+	Qfloat *get_Q(int i, int len) const
+	{
+		Qfloat *data;
+		int j, real_i = index[i];
+		if(cache->get_data(real_i,&data,l) < l)
+		{
+			for(j=0;j<l;j++)
+				data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
+		}
+
+		// reorder and copy
+		Qfloat *buf = buffer[next_buffer];
+		next_buffer = 1 - next_buffer;
+		schar si = sign[i];
+		for(j=0;j<len;j++)
+			buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
+		return buf;
+	}
+
+	double *get_QD() const
+	{
+		return QD;
+	}
+
+	~SVR_Q()
+	{
+		delete cache;
+		delete[] sign;
+		delete[] index;
+		delete[] buffer[0];
+		delete[] buffer[1];
+		delete[] QD;
+	}
+private:
+	int l;
+	Cache *cache;
+	schar *sign;
+	int *index;
+	mutable int next_buffer;
+	Qfloat *buffer[2];
+	double *QD;
+};
+
+//
+// construct and solve various formulations
+//
+static void solve_c_svc(
+	const PREFIX(problem) *prob, const svm_parameter* param,
+	double *alpha, Solver::SolutionInfo* si, double Cp, double Cn, BlasFunctions *blas_functions)
+{
+	int l = prob->l;
+	double *minus_ones = new double[l];
+	schar *y = new schar[l];
+        double *C = new double[l];
+
+	int i;
+
+	for(i=0;i<l;i++)
+	{
+		alpha[i] = 0;
+		minus_ones[i] = -1;
+		if(prob->y[i] > 0)
+		{
+			y[i] = +1;
+			C[i] = prob->W[i]*Cp;
+		}
+		else
+		{
+			y[i] = -1;
+			C[i] = prob->W[i]*Cn;
+		}
+	}
+
+	Solver s;
+	s.Solve(l, SVC_Q(*prob,*param,y, blas_functions), minus_ones, y,
+		alpha, C, param->eps, si, param->shrinking,
+                param->max_iter);
+
+        /*
+	double sum_alpha=0;
+	for(i=0;i<l;i++)
+		sum_alpha += alpha[i];
+
+	if (Cp==Cn)
+		info("nu = %f\n", sum_alpha/(Cp*prob->l));
+        */
+
+	for(i=0;i<l;i++)
+		alpha[i] *= y[i];
+
+        delete[] C;
+	delete[] minus_ones;
+	delete[] y;
+}
+
+static void solve_nu_svc(
+	const PREFIX(problem) *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si, BlasFunctions *blas_functions)
+{
+	int i;
+	int l = prob->l;
+	double nu = param->nu;
+
+	schar *y = new schar[l];
+        double *C = new double[l];
+
+	for(i=0;i<l;i++)
+        {
+		if(prob->y[i]>0)
+			y[i] = +1;
+		else
+			y[i] = -1;
+
+		C[i] = prob->W[i];
+	}
+
+	double nu_l = 0;
+	for(i=0;i<l;i++) nu_l += nu*C[i];
+	double sum_pos = nu_l/2;
+	double sum_neg = nu_l/2;
+
+	for(i=0;i<l;i++)
+		if(y[i] == +1)
+		{
+			alpha[i] = min(C[i],sum_pos);
+			sum_pos -= alpha[i];
+		}
+		else
+		{
+			alpha[i] = min(C[i],sum_neg);
+			sum_neg -= alpha[i];
+		}
+
+	double *zeros = new double[l];
+
+	for(i=0;i<l;i++)
+		zeros[i] = 0;
+
+	Solver_NU s;
+	s.Solve(l, SVC_Q(*prob,*param,y,blas_functions), zeros, y,
+		alpha, C, param->eps, si,  param->shrinking, param->max_iter);
+	double r = si->r;
+
+	info("C = %f\n",1/r);
+
+	for(i=0;i<l;i++)
+        {
+		alpha[i] *= y[i]/r;
+		si->upper_bound[i] /= r;
+        }
+
+	si->rho /= r;
+	si->obj /= (r*r);
+
+        delete[] C;
+	delete[] y;
+	delete[] zeros;
+}
+
+static void solve_one_class(
+	const PREFIX(problem) *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si, BlasFunctions *blas_functions)
+{
+	int l = prob->l;
+	double *zeros = new double[l];
+	schar *ones = new schar[l];
+	double *C = new double[l];
+	int i;
+
+	double nu_l = 0;
+
+	for(i=0;i<l;i++)
+	{
+		C[i] = prob->W[i];
+		nu_l += C[i] * param->nu;
+	}
+
+	i = 0;
+	while(nu_l > 0)
+	{
+		alpha[i] = min(C[i],nu_l);
+		nu_l -= alpha[i];
+		++i;
+	}
+	for(;i<l;i++)
+		alpha[i] = 0;
+
+	for(i=0;i<l;i++)
+	{
+		zeros[i] = 0;
+		ones[i] = 1;
+	}
+
+	Solver s;
+	s.Solve(l, ONE_CLASS_Q(*prob,*param,blas_functions), zeros, ones,
+		alpha, C, param->eps, si, param->shrinking, param->max_iter);
+
+        delete[] C;
+	delete[] zeros;
+	delete[] ones;
+}
+
+static void solve_epsilon_svr(
+	const PREFIX(problem) *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si, BlasFunctions *blas_functions)
+{
+	int l = prob->l;
+	double *alpha2 = new double[2*l];
+	double *linear_term = new double[2*l];
+	schar *y = new schar[2*l];
+        double *C = new double[2*l];
+        int i;
+
+	for(i=0;i<l;i++)
+	{
+		alpha2[i] = 0;
+		linear_term[i] = param->p - prob->y[i];
+		y[i] = 1;
+                C[i] = prob->W[i]*param->C;
+
+		alpha2[i+l] = 0;
+		linear_term[i+l] = param->p + prob->y[i];
+		y[i+l] = -1;
+                C[i+l] = prob->W[i]*param->C;
+	}
+
+	Solver s;
+	s.Solve(2*l, SVR_Q(*prob,*param,blas_functions), linear_term, y,
+		alpha2, C, param->eps, si, param->shrinking, param->max_iter);
+
+	double sum_alpha = 0;
+	for(i=0;i<l;i++)
+	{
+		alpha[i] = alpha2[i] - alpha2[i+l];
+		sum_alpha += fabs(alpha[i]);
+	}
+
+
+	delete[] alpha2;
+	delete[] linear_term;
+        delete[] C;
+	delete[] y;
+}
+
+static void solve_nu_svr(
+	const PREFIX(problem) *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si, BlasFunctions *blas_functions)
+{
+	int l = prob->l;
+	double *C = new double[2*l];
+	double *alpha2 = new double[2*l];
+	double *linear_term = new double[2*l];
+	schar *y = new schar[2*l];
+	int i;
+
+	double sum = 0;
+	for(i=0;i<l;i++)
+	{
+		C[i] = C[i+l] = prob->W[i]*param->C;
+		sum += C[i] * param->nu;
+	}
+	sum /= 2;
+
+	for(i=0;i<l;i++)
+	{
+		alpha2[i] = alpha2[i+l] = min(sum,C[i]);
+		sum -= alpha2[i];
+
+		linear_term[i] = - prob->y[i];
+		y[i] = 1;
+
+		linear_term[i+l] = prob->y[i];
+		y[i+l] = -1;
+	}
+
+	Solver_NU s;
+	s.Solve(2*l, SVR_Q(*prob,*param,blas_functions), linear_term, y,
+		alpha2, C, param->eps, si, param->shrinking, param->max_iter);
+
+	info("epsilon = %f\n",-si->r);
+
+	for(i=0;i<l;i++)
+		alpha[i] = alpha2[i] - alpha2[i+l];
+
+	delete[] alpha2;
+	delete[] linear_term;
+        delete[] C;
+	delete[] y;
+}
+
+//
+// decision_function
+//
+struct decision_function
+{
+	double *alpha;
+	double rho;
+	int n_iter;
+};
+
+static decision_function svm_train_one(
+	const PREFIX(problem) *prob, const svm_parameter *param,
+	double Cp, double Cn, int *status, BlasFunctions *blas_functions)
+{
+	double *alpha = Malloc(double,prob->l);
+	Solver::SolutionInfo si;
+	switch(param->svm_type)
+	{
+ 		case C_SVC:
+			si.upper_bound = Malloc(double,prob->l);
+ 			solve_c_svc(prob,param,alpha,&si,Cp,Cn,blas_functions);
+ 			break;
+ 		case NU_SVC:
+			si.upper_bound = Malloc(double,prob->l);
+ 			solve_nu_svc(prob,param,alpha,&si,blas_functions);
+ 			break;
+ 		case ONE_CLASS:
+			si.upper_bound = Malloc(double,prob->l);
+ 			solve_one_class(prob,param,alpha,&si,blas_functions);
+ 			break;
+ 		case EPSILON_SVR:
+			si.upper_bound = Malloc(double,2*prob->l);
+ 			solve_epsilon_svr(prob,param,alpha,&si,blas_functions);
+ 			break;
+ 		case NU_SVR:
+			si.upper_bound = Malloc(double,2*prob->l);
+ 			solve_nu_svr(prob,param,alpha,&si,blas_functions);
+ 			break;
+	}
+
+        *status |= si.solve_timed_out;
+
+	info("obj = %f, rho = %f\n",si.obj,si.rho);
+
+	// output SVs
+
+	int nSV = 0;
+	int nBSV = 0;
+	for(int i=0;i<prob->l;i++)
+	{
+		if(fabs(alpha[i]) > 0)
+		{
+			++nSV;
+			if(prob->y[i] > 0)
+			{
+				if(fabs(alpha[i]) >= si.upper_bound[i])
+					++nBSV;
+			}
+			else
+			{
+				if(fabs(alpha[i]) >= si.upper_bound[i])
+					++nBSV;
+			}
+		}
+	}
+
+        free(si.upper_bound);
+
+	info("nSV = %d, nBSV = %d\n",nSV,nBSV);
+
+	decision_function f;
+	f.alpha = alpha;
+	f.rho = si.rho;
+	f.n_iter = si.n_iter;
+	return f;
+}
+
+// Platt's binary SVM Probabilistic Output: an improvement from Lin et al.
+static void sigmoid_train(
+	int l, const double *dec_values, const double *labels,
+	double& A, double& B)
+{
+	double prior1=0, prior0 = 0;
+	int i;
+
+	for (i=0;i<l;i++)
+		if (labels[i] > 0) prior1+=1;
+		else prior0+=1;
+
+	int max_iter=100;	// Maximal number of iterations
+	double min_step=1e-10;	// Minimal step taken in line search
+	double sigma=1e-12;	// For numerically strict PD of Hessian
+	double eps=1e-5;
+	double hiTarget=(prior1+1.0)/(prior1+2.0);
+	double loTarget=1/(prior0+2.0);
+	double *t=Malloc(double,l);
+	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
+	double newA,newB,newf,d1,d2;
+	int iter;
+
+	// Initial Point and Initial Fun Value
+	A=0.0; B=log((prior0+1.0)/(prior1+1.0));
+	double fval = 0.0;
+
+	for (i=0;i<l;i++)
+	{
+		if (labels[i]>0) t[i]=hiTarget;
+		else t[i]=loTarget;
+		fApB = dec_values[i]*A+B;
+		if (fApB>=0)
+			fval += t[i]*fApB + log(1+exp(-fApB));
+		else
+			fval += (t[i] - 1)*fApB +log(1+exp(fApB));
+	}
+	for (iter=0;iter<max_iter;iter++)
+	{
+		// Update Gradient and Hessian (use H' = H + sigma I)
+		h11=sigma; // numerically ensures strict PD
+		h22=sigma;
+		h21=0.0;g1=0.0;g2=0.0;
+		for (i=0;i<l;i++)
+		{
+			fApB = dec_values[i]*A+B;
+			if (fApB >= 0)
+			{
+				p=exp(-fApB)/(1.0+exp(-fApB));
+				q=1.0/(1.0+exp(-fApB));
+			}
+			else
+			{
+				p=1.0/(1.0+exp(fApB));
+				q=exp(fApB)/(1.0+exp(fApB));
+			}
+			d2=p*q;
+			h11+=dec_values[i]*dec_values[i]*d2;
+			h22+=d2;
+			h21+=dec_values[i]*d2;
+			d1=t[i]-p;
+			g1+=dec_values[i]*d1;
+			g2+=d1;
+		}
+
+		// Stopping Criteria
+		if (fabs(g1)<eps && fabs(g2)<eps)
+			break;
+
+		// Finding Newton direction: -inv(H') * g
+		det=h11*h22-h21*h21;
+		dA=-(h22*g1 - h21 * g2) / det;
+		dB=-(-h21*g1+ h11 * g2) / det;
+		gd=g1*dA+g2*dB;
+
+
+		stepsize = 1;		// Line Search
+		while (stepsize >= min_step)
+		{
+			newA = A + stepsize * dA;
+			newB = B + stepsize * dB;
+
+			// New function value
+			newf = 0.0;
+			for (i=0;i<l;i++)
+			{
+				fApB = dec_values[i]*newA+newB;
+				if (fApB >= 0)
+					newf += t[i]*fApB + log(1+exp(-fApB));
+				else
+					newf += (t[i] - 1)*fApB +log(1+exp(fApB));
+			}
+			// Check sufficient decrease
+			if (newf<fval+0.0001*stepsize*gd)
+			{
+				A=newA;B=newB;fval=newf;
+				break;
+			}
+			else
+				stepsize = stepsize / 2.0;
+		}
+
+		if (stepsize < min_step)
+		{
+			info("Line search fails in two-class probability estimates\n");
+			break;
+		}
+	}
+
+	if (iter>=max_iter)
+		info("Reaching maximal iterations in two-class probability estimates\n");
+	free(t);
+}
+
+static double sigmoid_predict(double decision_value, double A, double B)
+{
+	double fApB = decision_value*A+B;
+	// 1-p used later; avoid catastrophic cancellation
+	if (fApB >= 0)
+		return exp(-fApB)/(1.0+exp(-fApB));
+	else
+		return 1.0/(1+exp(fApB)) ;
+}
+
+// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
+static void multiclass_probability(int k, double **r, double *p)
+{
+	int t,j;
+	int iter = 0, max_iter=max(100,k);
+	double **Q=Malloc(double *,k);
+	double *Qp=Malloc(double,k);
+	double pQp, eps=0.005/k;
+
+	for (t=0;t<k;t++)
+	{
+		p[t]=1.0/k;  // Valid if k = 1
+		Q[t]=Malloc(double,k);
+		Q[t][t]=0;
+		for (j=0;j<t;j++)
+		{
+			Q[t][t]+=r[j][t]*r[j][t];
+			Q[t][j]=Q[j][t];
+		}
+		for (j=t+1;j<k;j++)
+		{
+			Q[t][t]+=r[j][t]*r[j][t];
+			Q[t][j]=-r[j][t]*r[t][j];
+		}
+	}
+	for (iter=0;iter<max_iter;iter++)
+	{
+		// stopping condition, recalculate QP,pQP for numerical accuracy
+		pQp=0;
+		for (t=0;t<k;t++)
+		{
+			Qp[t]=0;
+			for (j=0;j<k;j++)
+				Qp[t]+=Q[t][j]*p[j];
+			pQp+=p[t]*Qp[t];
+		}
+		double max_error=0;
+		for (t=0;t<k;t++)
+		{
+			double error=fabs(Qp[t]-pQp);
+			if (error>max_error)
+				max_error=error;
+		}
+		if (max_error<eps) break;
+
+		for (t=0;t<k;t++)
+		{
+			double diff=(-Qp[t]+pQp)/Q[t][t];
+			p[t]+=diff;
+			pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
+			for (j=0;j<k;j++)
+			{
+				Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
+				p[j]/=(1+diff);
+			}
+		}
+	}
+	if (iter>=max_iter)
+		info("Exceeds max_iter in multiclass_prob\n");
+	for(t=0;t<k;t++) free(Q[t]);
+	free(Q);
+	free(Qp);
+}
+
+// Cross-validation decision values for probability estimates
+static void svm_binary_svc_probability(
+	const PREFIX(problem) *prob, const svm_parameter *param,
+	double Cp, double Cn, double& probA, double& probB, int * status, BlasFunctions *blas_functions)
+{
+	int i;
+	int nr_fold = 5;
+	int *perm = Malloc(int,prob->l);
+	double *dec_values = Malloc(double,prob->l);
+
+	// random shuffle
+	for(i=0;i<prob->l;i++) perm[i]=i;
+	for(i=0;i<prob->l;i++)
+	{
+		int j = i+bounded_rand_int(prob->l-i);
+		swap(perm[i],perm[j]);
+	}
+	for(i=0;i<nr_fold;i++)
+	{
+		int begin = i*prob->l/nr_fold;
+		int end = (i+1)*prob->l/nr_fold;
+		int j,k;
+		struct PREFIX(problem) subprob;
+
+		subprob.l = prob->l-(end-begin);
+#ifdef _DENSE_REP
+		subprob.x = Malloc(struct PREFIX(node),subprob.l);
+#else
+		subprob.x = Malloc(struct PREFIX(node)*,subprob.l);
+#endif
+		subprob.y = Malloc(double,subprob.l);
+                subprob.W = Malloc(double,subprob.l);
+
+		k=0;
+		for(j=0;j<begin;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			subprob.W[k] = prob->W[perm[j]];
+			++k;
+		}
+		for(j=end;j<prob->l;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			subprob.W[k] = prob->W[perm[j]];
+			++k;
+		}
+		int p_count=0,n_count=0;
+		for(j=0;j<k;j++)
+			if(subprob.y[j]>0)
+				p_count++;
+			else
+				n_count++;
+
+		if(p_count==0 && n_count==0)
+			for(j=begin;j<end;j++)
+				dec_values[perm[j]] = 0;
+		else if(p_count > 0 && n_count == 0)
+			for(j=begin;j<end;j++)
+				dec_values[perm[j]] = 1;
+		else if(p_count == 0 && n_count > 0)
+			for(j=begin;j<end;j++)
+				dec_values[perm[j]] = -1;
+		else
+		{
+			svm_parameter subparam = *param;
+			subparam.probability=0;
+			subparam.C=1.0;
+			subparam.nr_weight=2;
+			subparam.weight_label = Malloc(int,2);
+			subparam.weight = Malloc(double,2);
+			subparam.weight_label[0]=+1;
+			subparam.weight_label[1]=-1;
+			subparam.weight[0]=Cp;
+			subparam.weight[1]=Cn;
+			struct PREFIX(model) *submodel = PREFIX(train)(&subprob,&subparam, status, blas_functions);
+			for(j=begin;j<end;j++)
+			{
+#ifdef _DENSE_REP
+                                PREFIX(predict_values)(submodel,(prob->x+perm[j]),&(dec_values[perm[j]]), blas_functions);
+#else
+				PREFIX(predict_values)(submodel,prob->x[perm[j]],&(dec_values[perm[j]]), blas_functions);
+#endif
+				// ensure +1 -1 order; reason not using CV subroutine
+				dec_values[perm[j]] *= submodel->label[0];
+			}
+			PREFIX(free_and_destroy_model)(&submodel);
+			PREFIX(destroy_param)(&subparam);
+		}
+		free(subprob.x);
+		free(subprob.y);
+                free(subprob.W);
+	}
+	sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
+	free(dec_values);
+	free(perm);
+}
+
+// Return parameter of a Laplace distribution
+static double svm_svr_probability(
+	const PREFIX(problem) *prob, const svm_parameter *param, BlasFunctions *blas_functions)
+{
+	int i;
+	int nr_fold = 5;
+	double *ymv = Malloc(double,prob->l);
+	double mae = 0;
+
+	svm_parameter newparam = *param;
+	newparam.probability = 0;
+    newparam.random_seed = -1; // This is called from train, which already sets
+                               // the seed.
+	PREFIX(cross_validation)(prob,&newparam,nr_fold,ymv, blas_functions);
+	for(i=0;i<prob->l;i++)
+	{
+		ymv[i]=prob->y[i]-ymv[i];
+		mae += fabs(ymv[i]);
+	}
+	mae /= prob->l;
+	double std=sqrt(2*mae*mae);
+	int count=0;
+	mae=0;
+	for(i=0;i<prob->l;i++)
+		if (fabs(ymv[i]) > 5*std)
+			count=count+1;
+		else
+			mae+=fabs(ymv[i]);
+	mae /= (prob->l-count);
+	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
+	free(ymv);
+	return mae;
+}
+
+
+
+// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
+// perm, length l, must be allocated before calling this subroutine
+static void svm_group_classes(const PREFIX(problem) *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
+{
+	int l = prob->l;
+	int max_nr_class = 16;
+	int nr_class = 0;
+	int *label = Malloc(int,max_nr_class);
+	int *count = Malloc(int,max_nr_class);
+	int *data_label = Malloc(int,l);
+	int i, j, this_label, this_count;
+
+	for(i=0;i<l;i++)
+	{
+		this_label = (int)prob->y[i];
+		for(j=0;j<nr_class;j++)
+		{
+			if(this_label == label[j])
+			{
+				++count[j];
+				break;
+			}
+		}
+		if(j == nr_class)
+		{
+			if(nr_class == max_nr_class)
+			{
+				max_nr_class *= 2;
+				label = (int *)realloc(label,max_nr_class*sizeof(int));
+				count = (int *)realloc(count,max_nr_class*sizeof(int));
+			}
+			label[nr_class] = this_label;
+			count[nr_class] = 1;
+			++nr_class;
+		}
+	}
+
+        /*
+         * Sort labels by straight insertion and apply the same
+         * transformation to array count.
+         */
+        for(j=1; j<nr_class; j++)
+        {
+                i = j-1;
+                this_label = label[j];
+                this_count = count[j];
+                while(i>=0 && label[i] > this_label)
+                {
+                        label[i+1] = label[i];
+                        count[i+1] = count[i];
+                        i--;
+                }
+                label[i+1] = this_label;
+                count[i+1] = this_count;
+        }
+
+        for (i=0; i<l; i++)
+        {
+                j = 0;
+                this_label = (int)prob->y[i];
+                while(this_label != label[j]){
+                        j ++;
+                }
+                data_label[i] = j;
+        }
+
+	int *start = Malloc(int,nr_class);
+	start[0] = 0;
+	for(i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+	for(i=0;i<l;i++)
+	{
+		perm[start[data_label[i]]] = i;
+		++start[data_label[i]];
+	}
+
+	start[0] = 0;
+	for(i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+
+	*nr_class_ret = nr_class;
+	*label_ret = label;
+	*start_ret = start;
+	*count_ret = count;
+	free(data_label);
+}
+
+} /* end namespace */
+
+// Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
+//
+static void remove_zero_weight(PREFIX(problem) *newprob, const PREFIX(problem) *prob)
+{
+	int i;
+	int l = 0;
+	for(i=0;i<prob->l;i++)
+		if(prob->W[i] > 0) l++;
+	*newprob = *prob;
+	newprob->l = l;
+#ifdef _DENSE_REP
+	newprob->x = Malloc(PREFIX(node),l);
+#else
+      	newprob->x = Malloc(PREFIX(node) *,l);
+#endif
+	newprob->y = Malloc(double,l);
+	newprob->W = Malloc(double,l);
+
+	int j = 0;
+	for(i=0;i<prob->l;i++)
+		if(prob->W[i] > 0)
+		{
+			newprob->x[j] = prob->x[i];
+			newprob->y[j] = prob->y[i];
+			newprob->W[j] = prob->W[i];
+			j++;
+		}
+}
+
+//
+// Interface functions
+//
+PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *param,
+        int *status, BlasFunctions *blas_functions)
+{
+	PREFIX(problem) newprob;
+	remove_zero_weight(&newprob, prob);
+	prob = &newprob;
+
+	PREFIX(model) *model = Malloc(PREFIX(model),1);
+	model->param = *param;
+	model->free_sv = 0;	// XXX
+
+    if(param->random_seed >= 0)
+    {
+        set_seed(param->random_seed);
+    }
+
+	if(param->svm_type == ONE_CLASS ||
+	   param->svm_type == EPSILON_SVR ||
+	   param->svm_type == NU_SVR)
+	{
+		// regression or one-class-svm
+		model->nr_class = 2;
+		model->label = NULL;
+		model->nSV = NULL;
+		model->probA = NULL; model->probB = NULL;
+		model->sv_coef = Malloc(double *,1);
+
+		if(param->probability &&
+		   (param->svm_type == EPSILON_SVR ||
+		    param->svm_type == NU_SVR))
+		{
+			model->probA = Malloc(double,1);
+			model->probA[0] = NAMESPACE::svm_svr_probability(prob,param,blas_functions);
+		}
+
+                NAMESPACE::decision_function f = NAMESPACE::svm_train_one(prob,param,0,0, status,blas_functions);
+		model->rho = Malloc(double,1);
+		model->rho[0] = f.rho;
+		model->n_iter = Malloc(int,1);
+		model->n_iter[0] = f.n_iter;
+
+		int nSV = 0;
+		int i;
+		for(i=0;i<prob->l;i++)
+			if(fabs(f.alpha[i]) > 0) ++nSV;
+		model->l = nSV;
+#ifdef _DENSE_REP
+		model->SV = Malloc(PREFIX(node),nSV);
+#else
+		model->SV = Malloc(PREFIX(node) *,nSV);
+#endif
+                model->sv_ind = Malloc(int, nSV);
+		model->sv_coef[0] = Malloc(double, nSV);
+		int j = 0;
+		for(i=0;i<prob->l;i++)
+			if(fabs(f.alpha[i]) > 0)
+			{
+				model->SV[j] = prob->x[i];
+                                model->sv_ind[j] = i;
+				model->sv_coef[0][j] = f.alpha[i];
+				++j;
+			}
+
+		free(f.alpha);
+	}
+	else
+	{
+		// classification
+		int l = prob->l;
+		int nr_class;
+		int *label = NULL;
+		int *start = NULL;
+		int *count = NULL;
+		int *perm = Malloc(int,l);
+
+		// group training data of the same class
+                NAMESPACE::svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
+#ifdef _DENSE_REP
+		PREFIX(node) *x = Malloc(PREFIX(node),l);
+#else
+		PREFIX(node) **x = Malloc(PREFIX(node) *,l);
+#endif
+                double *W = Malloc(double, l);
+
+		int i;
+		for(i=0;i<l;i++)
+                {
+			x[i] = prob->x[perm[i]];
+			W[i] = prob->W[perm[i]];
+                }
+
+		// calculate weighted C
+
+		double *weighted_C = Malloc(double, nr_class);
+		for(i=0;i<nr_class;i++)
+			weighted_C[i] = param->C;
+		for(i=0;i<param->nr_weight;i++)
+		{
+			int j;
+			for(j=0;j<nr_class;j++)
+				if(param->weight_label[i] == label[j])
+					break;
+			if(j == nr_class)
+				fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
+			else
+				weighted_C[j] *= param->weight[i];
+		}
+
+		// train k*(k-1)/2 models
+
+		bool *nonzero = Malloc(bool,l);
+		for(i=0;i<l;i++)
+			nonzero[i] = false;
+                NAMESPACE::decision_function *f = Malloc(NAMESPACE::decision_function,nr_class*(nr_class-1)/2);
+
+		double *probA=NULL,*probB=NULL;
+		if (param->probability)
+		{
+			probA=Malloc(double,nr_class*(nr_class-1)/2);
+			probB=Malloc(double,nr_class*(nr_class-1)/2);
+		}
+
+		int p = 0;
+		for(i=0;i<nr_class;i++)
+			for(int j=i+1;j<nr_class;j++)
+			{
+				PREFIX(problem) sub_prob;
+				int si = start[i], sj = start[j];
+				int ci = count[i], cj = count[j];
+				sub_prob.l = ci+cj;
+#ifdef _DENSE_REP
+				sub_prob.x = Malloc(PREFIX(node),sub_prob.l);
+#else
+				sub_prob.x = Malloc(PREFIX(node) *,sub_prob.l);
+#endif
+				sub_prob.W = Malloc(double,sub_prob.l);
+				sub_prob.y = Malloc(double,sub_prob.l);
+				int k;
+				for(k=0;k<ci;k++)
+				{
+					sub_prob.x[k] = x[si+k];
+					sub_prob.y[k] = +1;
+					sub_prob.W[k] = W[si+k];
+				}
+				for(k=0;k<cj;k++)
+				{
+					sub_prob.x[ci+k] = x[sj+k];
+					sub_prob.y[ci+k] = -1;
+					sub_prob.W[ci+k] = W[sj+k];
+				}
+
+				if(param->probability)
+                                    NAMESPACE::svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p], status, blas_functions);
+
+				f[p] = NAMESPACE::svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j], status, blas_functions);
+				for(k=0;k<ci;k++)
+					if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
+						nonzero[si+k] = true;
+				for(k=0;k<cj;k++)
+					if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
+						nonzero[sj+k] = true;
+				free(sub_prob.x);
+				free(sub_prob.y);
+                                free(sub_prob.W);
+				++p;
+			}
+
+		// build output
+
+		model->nr_class = nr_class;
+
+		model->label = Malloc(int,nr_class);
+		for(i=0;i<nr_class;i++)
+			model->label[i] = label[i];
+
+		model->rho = Malloc(double,nr_class*(nr_class-1)/2);
+		model->n_iter = Malloc(int,nr_class*(nr_class-1)/2);
+		for(i=0;i<nr_class*(nr_class-1)/2;i++)
+		{
+			model->rho[i] = f[i].rho;
+			model->n_iter[i] = f[i].n_iter;
+		}
+
+		if(param->probability)
+		{
+			model->probA = Malloc(double,nr_class*(nr_class-1)/2);
+			model->probB = Malloc(double,nr_class*(nr_class-1)/2);
+			for(i=0;i<nr_class*(nr_class-1)/2;i++)
+			{
+				model->probA[i] = probA[i];
+				model->probB[i] = probB[i];
+			}
+		}
+		else
+		{
+			model->probA=NULL;
+			model->probB=NULL;
+		}
+
+		int total_sv = 0;
+		int *nz_count = Malloc(int,nr_class);
+		model->nSV = Malloc(int,nr_class);
+		for(i=0;i<nr_class;i++)
+		{
+			int nSV = 0;
+			for(int j=0;j<count[i];j++)
+				if(nonzero[start[i]+j])
+				{
+					++nSV;
+					++total_sv;
+				}
+			model->nSV[i] = nSV;
+			nz_count[i] = nSV;
+		}
+
+                info("Total nSV = %d\n",total_sv);
+
+		model->l = total_sv;
+                model->sv_ind = Malloc(int, total_sv);
+#ifdef _DENSE_REP
+		model->SV = Malloc(PREFIX(node),total_sv);
+#else
+		model->SV = Malloc(PREFIX(node) *,total_sv);
+#endif
+		p = 0;
+		for(i=0;i<l;i++) {
+			if(nonzero[i]) {
+                                model->SV[p] = x[i];
+                                model->sv_ind[p] = perm[i];
+                                ++p;
+                        }
+                }
+
+		int *nz_start = Malloc(int,nr_class);
+		nz_start[0] = 0;
+		for(i=1;i<nr_class;i++)
+			nz_start[i] = nz_start[i-1]+nz_count[i-1];
+
+		model->sv_coef = Malloc(double *,nr_class-1);
+		for(i=0;i<nr_class-1;i++)
+			model->sv_coef[i] = Malloc(double,total_sv);
+
+		p = 0;
+		for(i=0;i<nr_class;i++)
+			for(int j=i+1;j<nr_class;j++)
+			{
+				// classifier (i,j): coefficients with
+				// i are in sv_coef[j-1][nz_start[i]...],
+				// j are in sv_coef[i][nz_start[j]...]
+
+				int si = start[i];
+				int sj = start[j];
+				int ci = count[i];
+				int cj = count[j];
+
+				int q = nz_start[i];
+				int k;
+				for(k=0;k<ci;k++)
+					if(nonzero[si+k])
+						model->sv_coef[j-1][q++] = f[p].alpha[k];
+				q = nz_start[j];
+				for(k=0;k<cj;k++)
+					if(nonzero[sj+k])
+						model->sv_coef[i][q++] = f[p].alpha[ci+k];
+				++p;
+			}
+
+		free(label);
+		free(probA);
+		free(probB);
+		free(count);
+		free(perm);
+		free(start);
+                free(W);
+		free(x);
+		free(weighted_C);
+		free(nonzero);
+		for(i=0;i<nr_class*(nr_class-1)/2;i++)
+			free(f[i].alpha);
+		free(f);
+		free(nz_count);
+		free(nz_start);
+	}
+	free(newprob.x);
+	free(newprob.y);
+	free(newprob.W);
+	return model;
+}
+
+// Stratified cross validation
+void PREFIX(cross_validation)(const PREFIX(problem) *prob, const svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions)
+{
+	int i;
+	int *fold_start = Malloc(int,nr_fold+1);
+	int l = prob->l;
+	int *perm = Malloc(int,l);
+	int nr_class;
+    if(param->random_seed >= 0)
+    {
+        set_seed(param->random_seed);
+    }
+
+	// stratified cv may not give leave-one-out rate
+	// Each class to l folds -> some folds may have zero elements
+	if((param->svm_type == C_SVC ||
+	    param->svm_type == NU_SVC) && nr_fold < l)
+	{
+		int *start = NULL;
+		int *label = NULL;
+		int *count = NULL;
+                NAMESPACE::svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
+
+		// random shuffle and then data grouped by fold using the array perm
+		int *fold_count = Malloc(int,nr_fold);
+		int c;
+		int *index = Malloc(int,l);
+		for(i=0;i<l;i++)
+			index[i]=perm[i];
+		for (c=0; c<nr_class; c++)
+			for(i=0;i<count[c];i++)
+			{
+				int j = i+bounded_rand_int(count[c]-i);
+				swap(index[start[c]+j],index[start[c]+i]);
+			}
+		for(i=0;i<nr_fold;i++)
+		{
+			fold_count[i] = 0;
+			for (c=0; c<nr_class;c++)
+				fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
+		}
+		fold_start[0]=0;
+		for (i=1;i<=nr_fold;i++)
+			fold_start[i] = fold_start[i-1]+fold_count[i-1];
+		for (c=0; c<nr_class;c++)
+			for(i=0;i<nr_fold;i++)
+			{
+				int begin = start[c]+i*count[c]/nr_fold;
+				int end = start[c]+(i+1)*count[c]/nr_fold;
+				for(int j=begin;j<end;j++)
+				{
+					perm[fold_start[i]] = index[j];
+					fold_start[i]++;
+				}
+			}
+		fold_start[0]=0;
+		for (i=1;i<=nr_fold;i++)
+			fold_start[i] = fold_start[i-1]+fold_count[i-1];
+		free(start);
+		free(label);
+		free(count);
+		free(index);
+		free(fold_count);
+	}
+	else
+	{
+		for(i=0;i<l;i++) perm[i]=i;
+		for(i=0;i<l;i++)
+		{
+			int j = i+bounded_rand_int(l-i);
+			swap(perm[i],perm[j]);
+		}
+		for(i=0;i<=nr_fold;i++)
+			fold_start[i]=i*l/nr_fold;
+	}
+
+	for(i=0;i<nr_fold;i++)
+	{
+		int begin = fold_start[i];
+		int end = fold_start[i+1];
+		int j,k;
+		struct PREFIX(problem) subprob;
+
+		subprob.l = l-(end-begin);
+#ifdef _DENSE_REP
+		subprob.x = Malloc(struct PREFIX(node),subprob.l);
+#else
+		subprob.x = Malloc(struct PREFIX(node)*,subprob.l);
+#endif
+		subprob.y = Malloc(double,subprob.l);
+		subprob.W = Malloc(double,subprob.l);
+
+		k=0;
+		for(j=0;j<begin;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			subprob.W[k] = prob->W[perm[j]];
+			++k;
+		}
+		for(j=end;j<l;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			subprob.W[k] = prob->W[perm[j]];
+			++k;
+		}
+                int dummy_status = 0; // IGNORES TIMEOUT ERRORS
+		struct PREFIX(model) *submodel = PREFIX(train)(&subprob,param, &dummy_status, blas_functions);
+		if(param->probability &&
+		   (param->svm_type == C_SVC || param->svm_type == NU_SVC))
+		{
+			double *prob_estimates=Malloc(double, PREFIX(get_nr_class)(submodel));
+			for(j=begin;j<end;j++)
+#ifdef _DENSE_REP
+				target[perm[j]] = PREFIX(predict_probability)(submodel,(prob->x + perm[j]),prob_estimates, blas_functions);
+#else
+                                target[perm[j]] = PREFIX(predict_probability)(submodel,prob->x[perm[j]],prob_estimates, blas_functions);
+#endif
+			free(prob_estimates);
+		}
+		else
+			for(j=begin;j<end;j++)
+#ifdef _DENSE_REP
+				target[perm[j]] = PREFIX(predict)(submodel,prob->x+perm[j],blas_functions);
+#else
+                target[perm[j]] = PREFIX(predict)(submodel,prob->x[perm[j]],blas_functions);
+#endif
+		PREFIX(free_and_destroy_model)(&submodel);
+		free(subprob.x);
+		free(subprob.y);
+                free(subprob.W);
+	}
+	free(fold_start);
+	free(perm);
+}
+
+
+int PREFIX(get_svm_type)(const PREFIX(model) *model)
+{
+	return model->param.svm_type;
+}
+
+int PREFIX(get_nr_class)(const PREFIX(model) *model)
+{
+	return model->nr_class;
+}
+
+void PREFIX(get_labels)(const PREFIX(model) *model, int* label)
+{
+	if (model->label != NULL)
+		for(int i=0;i<model->nr_class;i++)
+			label[i] = model->label[i];
+}
+
+double PREFIX(get_svr_probability)(const PREFIX(model) *model)
+{
+	if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
+	    model->probA!=NULL)
+		return model->probA[0];
+	else
+	{
+		fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
+		return 0;
+	}
+}
+
+double PREFIX(predict_values)(const PREFIX(model) *model, const PREFIX(node) *x, double* dec_values, BlasFunctions *blas_functions)
+{
+	int i;
+	if(model->param.svm_type == ONE_CLASS ||
+	   model->param.svm_type == EPSILON_SVR ||
+	   model->param.svm_type == NU_SVR)
+	{
+		double *sv_coef = model->sv_coef[0];
+		double sum = 0;
+
+		for(i=0;i<model->l;i++)
+#ifdef _DENSE_REP
+                    sum += sv_coef[i] * NAMESPACE::Kernel::k_function(x,model->SV+i,model->param,blas_functions);
+#else
+                sum += sv_coef[i] * NAMESPACE::Kernel::k_function(x,model->SV[i],model->param,blas_functions);
+#endif
+		sum -= model->rho[0];
+		*dec_values = sum;
+
+		if(model->param.svm_type == ONE_CLASS)
+			return (sum>0)?1:-1;
+		else
+			return sum;
+	}
+	else
+	{
+		int nr_class = model->nr_class;
+		int l = model->l;
+
+		double *kvalue = Malloc(double,l);
+		for(i=0;i<l;i++)
+#ifdef _DENSE_REP
+                    kvalue[i] = NAMESPACE::Kernel::k_function(x,model->SV+i,model->param,blas_functions);
+#else
+                kvalue[i] = NAMESPACE::Kernel::k_function(x,model->SV[i],model->param,blas_functions);
+#endif
+
+		int *start = Malloc(int,nr_class);
+		start[0] = 0;
+		for(i=1;i<nr_class;i++)
+			start[i] = start[i-1]+model->nSV[i-1];
+
+		int *vote = Malloc(int,nr_class);
+		for(i=0;i<nr_class;i++)
+			vote[i] = 0;
+
+		int p=0;
+		for(i=0;i<nr_class;i++)
+			for(int j=i+1;j<nr_class;j++)
+			{
+				double sum = 0;
+				int si = start[i];
+				int sj = start[j];
+				int ci = model->nSV[i];
+				int cj = model->nSV[j];
+
+				int k;
+				double *coef1 = model->sv_coef[j-1];
+				double *coef2 = model->sv_coef[i];
+				for(k=0;k<ci;k++)
+					sum += coef1[si+k] * kvalue[si+k];
+				for(k=0;k<cj;k++)
+					sum += coef2[sj+k] * kvalue[sj+k];
+				sum -= model->rho[p];
+				dec_values[p] = sum;
+
+				if(dec_values[p] > 0)
+					++vote[i];
+				else
+					++vote[j];
+				p++;
+			}
+
+		int vote_max_idx = 0;
+		for(i=1;i<nr_class;i++)
+			if(vote[i] > vote[vote_max_idx])
+				vote_max_idx = i;
+
+		free(kvalue);
+		free(start);
+		free(vote);
+		return model->label[vote_max_idx];
+	}
+}
+
+double PREFIX(predict)(const PREFIX(model) *model, const PREFIX(node) *x, BlasFunctions *blas_functions)
+{
+	int nr_class = model->nr_class;
+	double *dec_values;
+	if(model->param.svm_type == ONE_CLASS ||
+	   model->param.svm_type == EPSILON_SVR ||
+	   model->param.svm_type == NU_SVR)
+		dec_values = Malloc(double, 1);
+	else
+		dec_values = Malloc(double, nr_class*(nr_class-1)/2);
+	double pred_result = PREFIX(predict_values)(model, x, dec_values, blas_functions);
+	free(dec_values);
+	return pred_result;
+}
+
+double PREFIX(predict_probability)(
+	const PREFIX(model) *model, const PREFIX(node) *x, double *prob_estimates, BlasFunctions *blas_functions)
+{
+	if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
+	    model->probA!=NULL && model->probB!=NULL)
+	{
+		int i;
+		int nr_class = model->nr_class;
+		double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
+		PREFIX(predict_values)(model, x, dec_values, blas_functions);
+
+		double min_prob=1e-7;
+		double **pairwise_prob=Malloc(double *,nr_class);
+		for(i=0;i<nr_class;i++)
+			pairwise_prob[i]=Malloc(double,nr_class);
+		int k=0;
+		for(i=0;i<nr_class;i++)
+			for(int j=i+1;j<nr_class;j++)
+			{
+                            pairwise_prob[i][j]=min(max(NAMESPACE::sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
+				pairwise_prob[j][i]=1-pairwise_prob[i][j];
+				k++;
+			}
+                NAMESPACE::multiclass_probability(nr_class,pairwise_prob,prob_estimates);
+
+		int prob_max_idx = 0;
+		for(i=1;i<nr_class;i++)
+			if(prob_estimates[i] > prob_estimates[prob_max_idx])
+				prob_max_idx = i;
+		for(i=0;i<nr_class;i++)
+			free(pairwise_prob[i]);
+		free(dec_values);
+		free(pairwise_prob);
+		return model->label[prob_max_idx];
+	}
+	else
+		return PREFIX(predict)(model, x, blas_functions);
+}
+
+
+void PREFIX(free_model_content)(PREFIX(model)* model_ptr)
+{
+	if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
+#ifdef _DENSE_REP
+		for (int i = 0; i < model_ptr->l; i++)
+			free(model_ptr->SV[i].values);
+#else
+		free((void *)(model_ptr->SV[0]));
+#endif
+
+	if(model_ptr->sv_coef)
+	{
+		for(int i=0;i<model_ptr->nr_class-1;i++)
+			free(model_ptr->sv_coef[i]);
+	}
+
+	free(model_ptr->SV);
+	model_ptr->SV = NULL;
+
+	free(model_ptr->sv_coef);
+	model_ptr->sv_coef = NULL;
+
+	free(model_ptr->sv_ind);
+	model_ptr->sv_ind = NULL;
+
+	free(model_ptr->rho);
+	model_ptr->rho = NULL;
+
+	free(model_ptr->label);
+	model_ptr->label= NULL;
+
+	free(model_ptr->probA);
+	model_ptr->probA = NULL;
+
+	free(model_ptr->probB);
+	model_ptr->probB= NULL;
+
+	free(model_ptr->nSV);
+	model_ptr->nSV = NULL;
+
+	free(model_ptr->n_iter);
+	model_ptr->n_iter = NULL;
+}
+
+void PREFIX(free_and_destroy_model)(PREFIX(model)** model_ptr_ptr)
+{
+	if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
+	{
+		PREFIX(free_model_content)(*model_ptr_ptr);
+		free(*model_ptr_ptr);
+		*model_ptr_ptr = NULL;
+	}
+}
+
+void PREFIX(destroy_param)(svm_parameter* param)
+{
+	free(param->weight_label);
+	free(param->weight);
+}
+
+const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_parameter *param)
+{
+	// svm_type
+
+	int svm_type = param->svm_type;
+	if(svm_type != C_SVC &&
+	   svm_type != NU_SVC &&
+	   svm_type != ONE_CLASS &&
+	   svm_type != EPSILON_SVR &&
+	   svm_type != NU_SVR)
+		return "unknown svm type";
+
+	// kernel_type, degree
+
+	int kernel_type = param->kernel_type;
+	if(kernel_type != LINEAR &&
+	   kernel_type != POLY &&
+	   kernel_type != RBF &&
+	   kernel_type != SIGMOID &&
+	   kernel_type != PRECOMPUTED)
+		return "unknown kernel type";
+
+	if(param->gamma < 0)
+		return "gamma < 0";
+
+	if(param->degree < 0)
+		return "degree of polynomial kernel < 0";
+
+	// cache_size,eps,C,nu,p,shrinking
+
+	if(param->cache_size <= 0)
+		return "cache_size <= 0";
+
+	if(param->eps <= 0)
+		return "eps <= 0";
+
+	if(svm_type == C_SVC ||
+	   svm_type == EPSILON_SVR ||
+	   svm_type == NU_SVR)
+		if(param->C <= 0)
+			return "C <= 0";
+
+	if(svm_type == NU_SVC ||
+	   svm_type == ONE_CLASS ||
+	   svm_type == NU_SVR)
+		if(param->nu <= 0 || param->nu > 1)
+			return "nu <= 0 or nu > 1";
+
+	if(svm_type == EPSILON_SVR)
+		if(param->p < 0)
+			return "p < 0";
+
+	if(param->shrinking != 0 &&
+	   param->shrinking != 1)
+		return "shrinking != 0 and shrinking != 1";
+
+	if(param->probability != 0 &&
+	   param->probability != 1)
+		return "probability != 0 and probability != 1";
+
+	if(param->probability == 1 &&
+	   svm_type == ONE_CLASS)
+		return "one-class SVM probability output not supported yet";
+
+
+	// check whether nu-svc is feasible
+
+	if(svm_type == NU_SVC)
+	{
+		int l = prob->l;
+		int max_nr_class = 16;
+		int nr_class = 0;
+		int *label = Malloc(int,max_nr_class);
+		double *count = Malloc(double,max_nr_class);
+
+		int i;
+		for(i=0;i<l;i++)
+		{
+			int this_label = (int)prob->y[i];
+			int j;
+			for(j=0;j<nr_class;j++)
+				if(this_label == label[j])
+				{
+					count[j] += prob->W[i];
+					break;
+				}
+			if(j == nr_class)
+			{
+				if(nr_class == max_nr_class)
+				{
+					max_nr_class *= 2;
+					label = (int *)realloc(label,max_nr_class*sizeof(int));
+					count = (double *)realloc(count,max_nr_class*sizeof(double));
+
+				}
+				label[nr_class] = this_label;
+				count[nr_class] = prob->W[i];
+				++nr_class;
+			}
+		}
+
+		for(i=0;i<nr_class;i++)
+		{
+			double n1 = count[i];
+			for(int j=i+1;j<nr_class;j++)
+			{
+				double n2 = count[j];
+				if(param->nu*(n1+n2)/2 > min(n1,n2))
+				{
+					free(label);
+					free(count);
+					return "specified nu is infeasible";
+				}
+			}
+		}
+		free(label);
+		free(count);
+	}
+
+	if(svm_type == C_SVC ||
+	   svm_type == EPSILON_SVR ||
+	   svm_type == NU_SVR ||
+	   svm_type == ONE_CLASS)
+	{
+		PREFIX(problem) newprob;
+		// filter samples with negative and null weights
+		remove_zero_weight(&newprob, prob);
+
+		// all samples were removed
+		if(newprob.l == 0) {
+			free(newprob.x);
+			free(newprob.y);
+			free(newprob.W);
+			return "Invalid input - all samples have zero or negative weights.";
+		}
+		else if(prob->l != newprob.l &&
+		        svm_type == C_SVC)
+		{
+			bool only_one_label = true;
+			int first_label = newprob.y[0];
+			for(int i=1;i<newprob.l;i++)
+			{
+				if(newprob.y[i] != first_label)
+				{
+					only_one_label = false;
+					break;
+				}
+			}
+			if(only_one_label) {
+				free(newprob.x);
+				free(newprob.y);
+				free(newprob.W);
+				return "Invalid input - all samples with positive weights belong to the same class.";
+			}
+		}
+
+		free(newprob.x);
+		free(newprob.y);
+		free(newprob.W);
+	}
+	return NULL;
+}
+
+void PREFIX(set_print_string_function)(void (*print_func)(const char *))
+{
+	if(print_func == NULL)
+		svm_print_string = &print_string_stdout;
+	else
+		svm_print_string = print_func;
+}
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/libsvm/svm.h b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/svm.h
new file mode 100644
index 0000000000000000000000000000000000000000..12b4bd20d894972b65aaca5207b6a4f003b0cb91
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/libsvm/svm.h
@@ -0,0 +1,176 @@
+#ifndef _LIBSVM_H
+#define _LIBSVM_H
+
+#define LIBSVM_VERSION 310
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "_svm_cython_blas_helpers.h"
+
+struct svm_node
+{
+	int dim;
+	int ind; /* index. A bit redundant, but needed if using a
+                    precomputed kernel */
+	double *values;
+};
+
+struct svm_problem
+{
+	int l;
+	double *y;
+	struct svm_node *x;
+	double *W; /* instance weights */
+};
+
+
+struct svm_csr_node
+{
+	int index;
+	double value;
+};
+
+struct svm_csr_problem
+{
+	int l;
+	double *y;
+	struct svm_csr_node **x;
+        double *W; /* instance weights */
+};
+
+
+enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
+enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
+
+struct svm_parameter
+{
+	int svm_type;
+	int kernel_type;
+	int degree;	/* for poly */
+	double gamma;	/* for poly/rbf/sigmoid */
+	double coef0;	/* for poly/sigmoid */
+
+	/* these are for training only */
+	double cache_size; /* in MB */
+	double eps;	/* stopping criteria */
+	double C;	/* for C_SVC, EPSILON_SVR and NU_SVR */
+	int nr_weight;		/* for C_SVC */
+	int *weight_label;	/* for C_SVC */
+	double* weight;		/* for C_SVC */
+	double nu;	/* for NU_SVC, ONE_CLASS, and NU_SVR */
+	double p;	/* for EPSILON_SVR */
+	int shrinking;	/* use the shrinking heuristics */
+	int probability; /* do probability estimates */
+	int max_iter; /* ceiling on Solver runtime */
+    int random_seed; /* seed for random number generator */
+};
+
+//
+// svm_model
+//
+struct svm_model
+{
+	struct svm_parameter param;	/* parameter */
+	int nr_class;		/* number of classes, = 2 in regression/one class svm */
+	int l;			/* total #SV */
+	struct svm_node *SV;		/* SVs (SV[l]) */
+	double **sv_coef;	/* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
+	int *n_iter;		/* number of iterations run by the optimization routine to fit the model */
+
+	int *sv_ind;            /* index of support vectors */
+
+	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
+	double *probA;		/* pairwise probability information */
+	double *probB;
+
+	/* for classification only */
+
+	int *label;		/* label of each class (label[k]) */
+	int *nSV;		/* number of SVs for each class (nSV[k]) */
+				/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
+	/* XXX */
+	int free_sv;		/* 1 if svm_model is created by svm_load_model*/
+				/* 0 if svm_model is created by svm_train */
+};
+
+
+struct svm_csr_model
+{
+	struct svm_parameter param;	/* parameter */
+	int nr_class;		/* number of classes, = 2 in regression/one class svm */
+	int l;			/* total #SV */
+	struct svm_csr_node **SV;		/* SVs (SV[l]) */
+	double **sv_coef;	/* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
+	int *n_iter;		/* number of iterations run by the optimization routine to fit the model */
+
+        int *sv_ind;            /* index of support vectors */
+
+	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
+	double *probA;		/* pairwise probability information */
+	double *probB;
+
+	/* for classification only */
+
+	int *label;		/* label of each class (label[k]) */
+	int *nSV;		/* number of SVs for each class (nSV[k]) */
+				/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
+	/* XXX */
+	int free_sv;		/* 1 if svm_model is created by svm_load_model*/
+				/* 0 if svm_model is created by svm_train */
+};
+
+/* svm_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
+struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
+void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
+
+int svm_save_model(const char *model_file_name, const struct svm_model *model);
+struct svm_model *svm_load_model(const char *model_file_name);
+
+int svm_get_svm_type(const struct svm_model *model);
+int svm_get_nr_class(const struct svm_model *model);
+void svm_get_labels(const struct svm_model *model, int *label);
+double svm_get_svr_probability(const struct svm_model *model);
+
+double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values, BlasFunctions *blas_functions);
+double svm_predict(const struct svm_model *model, const struct svm_node *x, BlasFunctions *blas_functions);
+double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates, BlasFunctions *blas_functions);
+
+void svm_free_model_content(struct svm_model *model_ptr);
+void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
+void svm_destroy_param(struct svm_parameter *param);
+
+const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
+
+void svm_set_print_string_function(void (*print_func)(const char *));
+
+
+/* sparse version */
+
+/* svm_csr_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
+struct svm_csr_model *svm_csr_train(const struct svm_csr_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
+void svm_csr_cross_validation(const struct svm_csr_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
+
+int svm_csr_get_svm_type(const struct svm_csr_model *model);
+int svm_csr_get_nr_class(const struct svm_csr_model *model);
+void svm_csr_get_labels(const struct svm_csr_model *model, int *label);
+double svm_csr_get_svr_probability(const struct svm_csr_model *model);
+
+double svm_csr_predict_values(const struct svm_csr_model *model, const struct svm_csr_node *x, double* dec_values, BlasFunctions *blas_functions);
+double svm_csr_predict(const struct svm_csr_model *model, const struct svm_csr_node *x, BlasFunctions *blas_functions);
+double svm_csr_predict_probability(const struct svm_csr_model *model, const struct svm_csr_node *x, double* prob_estimates, BlasFunctions *blas_functions);
+
+void svm_csr_free_model_content(struct svm_csr_model *model_ptr);
+void svm_csr_free_and_destroy_model(struct svm_csr_model **model_ptr_ptr);
+void svm_csr_destroy_param(struct svm_parameter *param);
+
+const char *svm_csr_check_parameter(const struct svm_csr_problem *prob, const struct svm_parameter *param);
+
+/* end sparse version */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBSVM_H */
diff --git a/.venv/Lib/site-packages/sklearn/svm/src/newrand/newrand.h b/.venv/Lib/site-packages/sklearn/svm/src/newrand/newrand.h
new file mode 100644
index 0000000000000000000000000000000000000000..73ba1034e31303caba90746472c70bc685059329
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/src/newrand/newrand.h
@@ -0,0 +1,59 @@
+/*
+   Creation, 2020:
+   - New random number generator using a mersenne twister + tweaked lemire
+     postprocessor. This fixed a convergence issue on windows targets for
+     libsvm and liblinear.
+     Sylvain Marie, Schneider Electric
+     See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
+ */
+#ifndef _NEWRAND_H
+#define _NEWRAND_H
+
+#ifdef __cplusplus
+#include <random>  // needed for cython to generate a .cpp file from newrand.h
+extern "C" {
+#endif
+
+// Scikit-Learn-specific random number generator replacing `rand()` originally
+// used in LibSVM / LibLinear, to ensure the same behaviour on windows-linux,
+// with increased speed
+// - (1) Init a `mt_rand` object
+std::mt19937 mt_rand(std::mt19937::default_seed);
+
+// - (2) public `set_seed()` function that should be used instead of `srand()` to set a new seed.
+void set_seed(unsigned custom_seed) {
+    mt_rand.seed(custom_seed);
+}
+
+// - (3) New internal `bounded_rand_int` function, used instead of rand() everywhere.
+inline uint32_t bounded_rand_int(uint32_t range) {
+    // "LibSVM / LibLinear Original way" - make a 31bit positive
+    // random number and use modulo to make it fit in the range
+    // return abs( (int)mt_rand()) % range;
+
+    // "Better way": tweaked Lemire post-processor
+    // from http://www.pcg-random.org/posts/bounded-rands.html
+    uint32_t x = mt_rand();
+    uint64_t m = uint64_t(x) * uint64_t(range);
+    uint32_t l = uint32_t(m);
+    if (l < range) {
+        uint32_t t = -range;
+        if (t >= range) {
+            t -= range;
+            if (t >= range)
+                t %= range;
+        }
+        while (l < t) {
+            x = mt_rand();
+            m = uint64_t(x) * uint64_t(range);
+            l = uint32_t(m);
+        }
+    }
+    return m >> 32;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NEWRAND_H */
diff --git a/.venv/Lib/site-packages/sklearn/svm/tests/__init__.py b/.venv/Lib/site-packages/sklearn/svm/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/Lib/site-packages/sklearn/svm/tests/test_bounds.py b/.venv/Lib/site-packages/sklearn/svm/tests/test_bounds.py
new file mode 100644
index 0000000000000000000000000000000000000000..01c33b1319b42323e9a112d4d54c4ed673473633
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/tests/test_bounds.py
@@ -0,0 +1,142 @@
+import numpy as np
+import pytest
+from scipy import stats
+
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import LinearSVC
+from sklearn.svm._bounds import l1_min_c
+from sklearn.svm._newrand import bounded_rand_int_wrap, set_seed_wrap
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]]
+
+Y1 = [0, 1, 1, 1]
+Y2 = [2, 1, 0, 0]
+
+
+@pytest.mark.parametrize("X_container", CSR_CONTAINERS + [np.array])
+@pytest.mark.parametrize("loss", ["squared_hinge", "log"])
+@pytest.mark.parametrize("Y_label", ["two-classes", "multi-class"])
+@pytest.mark.parametrize("intercept_label", ["no-intercept", "fit-intercept"])
+def test_l1_min_c(X_container, loss, Y_label, intercept_label):
+    Ys = {"two-classes": Y1, "multi-class": Y2}
+    intercepts = {
+        "no-intercept": {"fit_intercept": False},
+        "fit-intercept": {"fit_intercept": True, "intercept_scaling": 10},
+    }
+
+    X = X_container(dense_X)
+    Y = Ys[Y_label]
+    intercept_params = intercepts[intercept_label]
+    check_l1_min_c(X, Y, loss, **intercept_params)
+
+
+def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=1.0):
+    min_c = l1_min_c(
+        X,
+        y,
+        loss=loss,
+        fit_intercept=fit_intercept,
+        intercept_scaling=intercept_scaling,
+    )
+
+    clf = {
+        "log": LogisticRegression(penalty="l1", solver="liblinear"),
+        "squared_hinge": LinearSVC(loss="squared_hinge", penalty="l1", dual=False),
+    }[loss]
+
+    clf.fit_intercept = fit_intercept
+    clf.intercept_scaling = intercept_scaling
+
+    clf.C = min_c
+    clf.fit(X, y)
+    assert (np.asarray(clf.coef_) == 0).all()
+    assert (np.asarray(clf.intercept_) == 0).all()
+
+    clf.C = min_c * 1.01
+    clf.fit(X, y)
+    assert (np.asarray(clf.coef_) != 0).any() or (np.asarray(clf.intercept_) != 0).any()
+
+
+def test_ill_posed_min_c():
+    X = [[0, 0], [0, 0]]
+    y = [0, 1]
+    with pytest.raises(ValueError):
+        l1_min_c(X, y)
+
+
+_MAX_UNSIGNED_INT = 4294967295
+
+
+def test_newrand_default():
+    """Test that bounded_rand_int_wrap without seeding respects the range
+
+    Note this test should pass either if executed alone, or in conjunctions
+    with other tests that call set_seed explicit in any order: it checks
+    invariants on the RNG instead of specific values.
+    """
+    generated = [bounded_rand_int_wrap(100) for _ in range(10)]
+    assert all(0 <= x < 100 for x in generated)
+    assert not all(x == generated[0] for x in generated)
+
+
+@pytest.mark.parametrize("seed, expected", [(0, 54), (_MAX_UNSIGNED_INT, 9)])
+def test_newrand_set_seed(seed, expected):
+    """Test that `set_seed` produces deterministic results"""
+    set_seed_wrap(seed)
+    generated = bounded_rand_int_wrap(100)
+    assert generated == expected
+
+
+@pytest.mark.parametrize("seed", [-1, _MAX_UNSIGNED_INT + 1])
+def test_newrand_set_seed_overflow(seed):
+    """Test that `set_seed_wrap` is defined for unsigned 32bits ints"""
+    with pytest.raises(OverflowError):
+        set_seed_wrap(seed)
+
+
+@pytest.mark.parametrize("range_, n_pts", [(_MAX_UNSIGNED_INT, 10000), (100, 25)])
+def test_newrand_bounded_rand_int(range_, n_pts):
+    """Test that `bounded_rand_int` follows a uniform distribution"""
+    # XXX: this test is very seed sensitive: either it is wrong (too strict?)
+    # or the wrapped RNG is not uniform enough, at least on some platforms.
+    set_seed_wrap(42)
+    n_iter = 100
+    ks_pvals = []
+    uniform_dist = stats.uniform(loc=0, scale=range_)
+    # perform multiple samplings to make chance of outlier sampling negligible
+    for _ in range(n_iter):
+        # Deterministic random sampling
+        sample = [bounded_rand_int_wrap(range_) for _ in range(n_pts)]
+        res = stats.kstest(sample, uniform_dist.cdf)
+        ks_pvals.append(res.pvalue)
+    # Null hypothesis = samples come from an uniform distribution.
+    # Under the null hypothesis, p-values should be uniformly distributed
+    # and not concentrated on low values
+    # (this may seem counter-intuitive but is backed by multiple refs)
+    # So we can do two checks:
+
+    # (1) check uniformity of p-values
+    uniform_p_vals_dist = stats.uniform(loc=0, scale=1)
+    res_pvals = stats.kstest(ks_pvals, uniform_p_vals_dist.cdf)
+    assert res_pvals.pvalue > 0.05, (
+        "Null hypothesis rejected: generated random numbers are not uniform."
+        " Details: the (meta) p-value of the test of uniform distribution"
+        f" of p-values is {res_pvals.pvalue} which is not > 0.05"
+    )
+
+    # (2) (safety belt) check that 90% of p-values are above 0.05
+    min_10pct_pval = np.percentile(ks_pvals, q=10)
+    # lower 10th quantile pvalue <= 0.05 means that the test rejects the
+    # null hypothesis that the sample came from the uniform distribution
+    assert min_10pct_pval > 0.05, (
+        "Null hypothesis rejected: generated random numbers are not uniform. "
+        f"Details: lower 10th quantile p-value of {min_10pct_pval} not > 0.05."
+    )
+
+
+@pytest.mark.parametrize("range_", [-1, _MAX_UNSIGNED_INT + 1])
+def test_newrand_bounded_rand_int_limits(range_):
+    """Test that `bounded_rand_int_wrap` is defined for unsigned 32bits ints"""
+    with pytest.raises(OverflowError):
+        bounded_rand_int_wrap(range_)
diff --git a/.venv/Lib/site-packages/sklearn/svm/tests/test_sparse.py b/.venv/Lib/site-packages/sklearn/svm/tests/test_sparse.py
new file mode 100644
index 0000000000000000000000000000000000000000..42774f6866bbd337c1695a89a4842f1dcbf756ae
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/tests/test_sparse.py
@@ -0,0 +1,493 @@
+import numpy as np
+import pytest
+from scipy import sparse
+
+from sklearn import base, datasets, linear_model, svm
+from sklearn.datasets import load_digits, make_blobs, make_classification
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.svm.tests import test_svm
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+    skip_if_32bit,
+)
+from sklearn.utils.extmath import safe_sparse_dot
+from sklearn.utils.fixes import (
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+
+# test sample 1
+X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
+Y = [1, 1, 1, 2, 2, 2]
+T = np.array([[-1, -1], [2, 2], [3, 2]])
+true_result = [1, 2, 2]
+
+# test sample 2
+X2 = np.array(
+    [
+        [0, 0, 0],
+        [1, 1, 1],
+        [2, 0, 0],
+        [0, 0, 2],
+        [3, 3, 3],
+    ]
+)
+Y2 = [1, 2, 2, 2, 3]
+T2 = np.array([[-1, -1, -1], [1, 1, 1], [2, 2, 2]])
+true_result2 = [1, 2, 3]
+
+iris = datasets.load_iris()
+rng = np.random.RandomState(0)
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+X_blobs, y_blobs = make_blobs(n_samples=100, centers=10, random_state=0)
+
+
+def check_svm_model_equal(dense_svm, X_train, y_train, X_test):
+    # Use the original svm model for dense fit and clone an exactly same
+    # svm model for sparse fit
+    sparse_svm = base.clone(dense_svm)
+
+    dense_svm.fit(X_train.toarray(), y_train)
+    if sparse.issparse(X_test):
+        X_test_dense = X_test.toarray()
+    else:
+        X_test_dense = X_test
+    sparse_svm.fit(X_train, y_train)
+    assert sparse.issparse(sparse_svm.support_vectors_)
+    assert sparse.issparse(sparse_svm.dual_coef_)
+    assert_allclose(dense_svm.support_vectors_, sparse_svm.support_vectors_.toarray())
+    assert_allclose(dense_svm.dual_coef_, sparse_svm.dual_coef_.toarray())
+    if dense_svm.kernel == "linear":
+        assert sparse.issparse(sparse_svm.coef_)
+        assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray())
+    assert_allclose(dense_svm.support_, sparse_svm.support_)
+    assert_allclose(dense_svm.predict(X_test_dense), sparse_svm.predict(X_test))
+
+    assert_array_almost_equal(
+        dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test)
+    )
+    assert_array_almost_equal(
+        dense_svm.decision_function(X_test_dense),
+        sparse_svm.decision_function(X_test_dense),
+    )
+    if isinstance(dense_svm, svm.OneClassSVM):
+        msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
+    else:
+        assert_array_almost_equal(
+            dense_svm.predict_proba(X_test_dense),
+            sparse_svm.predict_proba(X_test),
+            decimal=4,
+        )
+        msg = "cannot use sparse input in 'SVC' trained on dense data"
+    if sparse.issparse(X_test):
+        with pytest.raises(ValueError, match=msg):
+            dense_svm.predict(X_test)
+
+
+@skip_if_32bit
+@pytest.mark.parametrize(
+    "X_train, y_train, X_test",
+    [
+        [X, Y, T],
+        [X2, Y2, T2],
+        [X_blobs[:80], y_blobs[:80], X_blobs[80:]],
+        [iris.data, iris.target, iris.data],
+    ],
+)
+@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
+@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + LIL_CONTAINERS)
+def test_svc(X_train, y_train, X_test, kernel, sparse_container):
+    """Check that sparse SVC gives the same result as SVC."""
+    X_train = sparse_container(X_train)
+
+    clf = svm.SVC(
+        gamma=1,
+        kernel=kernel,
+        probability=True,
+        random_state=0,
+        decision_function_shape="ovo",
+    )
+    check_svm_model_equal(clf, X_train, y_train, X_test)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_unsorted_indices(csr_container):
+    # test that the result with sorted and unsorted indices in csr is the same
+    # we use a subset of digits as iris, blobs or make_classification didn't
+    # show the problem
+    X, y = load_digits(return_X_y=True)
+    X_test = csr_container(X[50:100])
+    X, y = X[:50], y[:50]
+
+    X_sparse = csr_container(X)
+    coef_dense = (
+        svm.SVC(kernel="linear", probability=True, random_state=0).fit(X, y).coef_
+    )
+    sparse_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
+        X_sparse, y
+    )
+    coef_sorted = sparse_svc.coef_
+    # make sure dense and sparse SVM give the same result
+    assert_allclose(coef_dense, coef_sorted.toarray())
+
+    # reverse each row's indices
+    def scramble_indices(X):
+        new_data = []
+        new_indices = []
+        for i in range(1, len(X.indptr)):
+            row_slice = slice(*X.indptr[i - 1 : i + 1])
+            new_data.extend(X.data[row_slice][::-1])
+            new_indices.extend(X.indices[row_slice][::-1])
+        return csr_container((new_data, new_indices, X.indptr), shape=X.shape)
+
+    X_sparse_unsorted = scramble_indices(X_sparse)
+    X_test_unsorted = scramble_indices(X_test)
+
+    assert not X_sparse_unsorted.has_sorted_indices
+    assert not X_test_unsorted.has_sorted_indices
+
+    unsorted_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
+        X_sparse_unsorted, y
+    )
+    coef_unsorted = unsorted_svc.coef_
+    # make sure unsorted indices give same result
+    assert_allclose(coef_unsorted.toarray(), coef_sorted.toarray())
+    assert_allclose(
+        sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test)
+    )
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_svc_with_custom_kernel(lil_container):
+    def kfunc(x, y):
+        return safe_sparse_dot(x, y.T)
+
+    X_sp = lil_container(X)
+    clf_lin = svm.SVC(kernel="linear").fit(X_sp, Y)
+    clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y)
+    assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
+
+
+@skip_if_32bit
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf"])
+def test_svc_iris(csr_container, kernel):
+    # Test the sparse SVC with the iris dataset
+    iris_data_sp = csr_container(iris.data)
+
+    sp_clf = svm.SVC(kernel=kernel).fit(iris_data_sp, iris.target)
+    clf = svm.SVC(kernel=kernel).fit(iris.data, iris.target)
+
+    assert_allclose(clf.support_vectors_, sp_clf.support_vectors_.toarray())
+    assert_allclose(clf.dual_coef_, sp_clf.dual_coef_.toarray())
+    assert_allclose(clf.predict(iris.data), sp_clf.predict(iris_data_sp))
+    if kernel == "linear":
+        assert_allclose(clf.coef_, sp_clf.coef_.toarray())
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_decision_function(csr_container):
+    # Test decision_function
+
+    # Sanity check, test that decision_function implemented in python
+    # returns the same as the one in libsvm
+
+    # multi class:
+    iris_data_sp = csr_container(iris.data)
+    svc = svm.SVC(kernel="linear", C=0.1, decision_function_shape="ovo")
+    clf = svc.fit(iris_data_sp, iris.target)
+
+    dec = safe_sparse_dot(iris_data_sp, clf.coef_.T) + clf.intercept_
+
+    assert_allclose(dec, clf.decision_function(iris_data_sp))
+
+    # binary:
+    clf.fit(X, Y)
+    dec = np.dot(X, clf.coef_.T) + clf.intercept_
+    prediction = clf.predict(X)
+    assert_allclose(dec.ravel(), clf.decision_function(X))
+    assert_allclose(
+        prediction, clf.classes_[(clf.decision_function(X) > 0).astype(int).ravel()]
+    )
+    expected = np.array([-1.0, -0.66, -1.0, 0.66, 1.0, 1.0])
+    assert_array_almost_equal(clf.decision_function(X), expected, decimal=2)
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_error(lil_container):
+    # Test that it gives proper exception on deficient input
+    clf = svm.SVC()
+    X_sp = lil_container(X)
+
+    Y2 = Y[:-1]  # wrong dimensions for labels
+    with pytest.raises(ValueError):
+        clf.fit(X_sp, Y2)
+
+    clf.fit(X_sp, Y)
+    assert_array_equal(clf.predict(T), true_result)
+
+
+@pytest.mark.parametrize(
+    "lil_container, dok_container", zip(LIL_CONTAINERS, DOK_CONTAINERS)
+)
+def test_linearsvc(lil_container, dok_container):
+    # Similar to test_SVC
+    X_sp = lil_container(X)
+    X2_sp = dok_container(X2)
+
+    clf = svm.LinearSVC(random_state=0).fit(X, Y)
+    sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y)
+
+    assert sp_clf.fit_intercept
+
+    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
+    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
+
+    assert_allclose(clf.predict(X), sp_clf.predict(X_sp))
+
+    clf.fit(X2, Y2)
+    sp_clf.fit(X2_sp, Y2)
+
+    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
+    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_linearsvc_iris(csr_container):
+    # Test the sparse LinearSVC with the iris dataset
+    iris_data_sp = csr_container(iris.data)
+
+    sp_clf = svm.LinearSVC(random_state=0).fit(iris_data_sp, iris.target)
+    clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
+
+    assert clf.fit_intercept == sp_clf.fit_intercept
+
+    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1)
+    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1)
+    assert_allclose(clf.predict(iris.data), sp_clf.predict(iris_data_sp))
+
+    # check decision_function
+    pred = np.argmax(sp_clf.decision_function(iris_data_sp), axis=1)
+    assert_allclose(pred, clf.predict(iris.data))
+
+    # sparsify the coefficients on both models and check that they still
+    # produce the same results
+    clf.sparsify()
+    assert_array_equal(pred, clf.predict(iris_data_sp))
+    sp_clf.sparsify()
+    assert_array_equal(pred, sp_clf.predict(iris_data_sp))
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_weight(csr_container):
+    # Test class weights
+    X_, y_ = make_classification(
+        n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0
+    )
+
+    X_ = csr_container(X_)
+    for clf in (
+        linear_model.LogisticRegression(),
+        svm.LinearSVC(random_state=0),
+        svm.SVC(),
+    ):
+        clf.set_params(class_weight={0: 5})
+        clf.fit(X_[:180], y_[:180])
+        y_pred = clf.predict(X_[180:])
+        assert np.sum(y_pred == y_[180:]) >= 11
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_sample_weights(lil_container):
+    # Test weights on individual samples
+    X_sp = lil_container(X)
+
+    clf = svm.SVC()
+    clf.fit(X_sp, Y)
+    assert_array_equal(clf.predict([X[2]]), [1.0])
+
+    sample_weight = [0.1] * 3 + [10] * 3
+    clf.fit(X_sp, Y, sample_weight=sample_weight)
+    assert_array_equal(clf.predict([X[2]]), [2.0])
+
+
+def test_sparse_liblinear_intercept_handling():
+    # Test that sparse liblinear honours intercept_scaling param
+    test_svm.test_dense_liblinear_intercept_handling(svm.LinearSVC)
+
+
+@pytest.mark.parametrize(
+    "X_train, y_train, X_test",
+    [
+        [X, None, T],
+        [X2, None, T2],
+        [X_blobs[:80], None, X_blobs[80:]],
+        [iris.data, None, iris.data],
+    ],
+)
+@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
+@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + LIL_CONTAINERS)
+@skip_if_32bit
+def test_sparse_oneclasssvm(X_train, y_train, X_test, kernel, sparse_container):
+    # Check that sparse OneClassSVM gives the same result as dense OneClassSVM
+    X_train = sparse_container(X_train)
+
+    clf = svm.OneClassSVM(gamma=1, kernel=kernel)
+    check_svm_model_equal(clf, X_train, y_train, X_test)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_realdata(csr_container):
+    # Test on a subset from the 20newsgroups dataset.
+    # This catches some bugs if input is not correctly converted into
+    # sparse format or weights are not correctly initialized.
+    data = np.array([0.03771744, 0.1003567, 0.01174647, 0.027069])
+
+    # SVC does not support large sparse, so we specify int32 indices
+    # In this case, `csr_matrix` automatically uses int32 regardless of the dtypes of
+    # `indices` and `indptr` but `csr_array` may or may not use the same dtype as
+    # `indices` and `indptr`, which would be int64 if not specified
+    indices = np.array([6, 5, 35, 31], dtype=np.int32)
+    indptr = np.array([0] * 8 + [1] * 32 + [2] * 38 + [4] * 3, dtype=np.int32)
+
+    X = csr_container((data, indices, indptr))
+    y = np.array(
+        [
+            1.0,
+            0.0,
+            2.0,
+            2.0,
+            1.0,
+            1.0,
+            1.0,
+            2.0,
+            2.0,
+            0.0,
+            1.0,
+            2.0,
+            2.0,
+            0.0,
+            2.0,
+            0.0,
+            3.0,
+            0.0,
+            3.0,
+            0.0,
+            1.0,
+            1.0,
+            3.0,
+            2.0,
+            3.0,
+            2.0,
+            0.0,
+            3.0,
+            1.0,
+            0.0,
+            2.0,
+            1.0,
+            2.0,
+            0.0,
+            1.0,
+            0.0,
+            2.0,
+            3.0,
+            1.0,
+            3.0,
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            2.0,
+            0.0,
+            1.0,
+            2.0,
+            2.0,
+            2.0,
+            3.0,
+            2.0,
+            0.0,
+            3.0,
+            2.0,
+            1.0,
+            2.0,
+            3.0,
+            2.0,
+            2.0,
+            0.0,
+            1.0,
+            0.0,
+            1.0,
+            2.0,
+            3.0,
+            0.0,
+            0.0,
+            2.0,
+            2.0,
+            1.0,
+            3.0,
+            1.0,
+            1.0,
+            0.0,
+            1.0,
+            2.0,
+            1.0,
+            1.0,
+            3.0,
+        ]
+    )
+
+    clf = svm.SVC(kernel="linear").fit(X.toarray(), y)
+    sp_clf = svm.SVC(kernel="linear").fit(X.tocoo(), y)
+
+    assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
+    assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_sparse_svc_clone_with_callable_kernel(lil_container):
+    # Test that the "dense_fit" is called even though we use sparse input
+    # meaning that everything works fine.
+    a = svm.SVC(C=1, kernel=lambda x, y: x @ y.T, probability=True, random_state=0)
+    b = base.clone(a)
+
+    X_sp = lil_container(X)
+    b.fit(X_sp, Y)
+    pred = b.predict(X_sp)
+    b.predict_proba(X_sp)
+
+    dense_svm = svm.SVC(
+        C=1, kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0
+    )
+    pred_dense = dense_svm.fit(X, Y).predict(X)
+    assert_array_equal(pred_dense, pred)
+    # b.decision_function(X_sp)  # XXX : should be supported
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_timeout(lil_container):
+    sp = svm.SVC(
+        C=1, kernel=lambda x, y: x @ y.T, probability=True, random_state=0, max_iter=1
+    )
+    warning_msg = (
+        r"Solver terminated early \(max_iter=1\).  Consider pre-processing "
+        r"your data with StandardScaler or MinMaxScaler."
+    )
+    with pytest.warns(ConvergenceWarning, match=warning_msg):
+        sp.fit(lil_container(X), Y)
+
+
+def test_consistent_proba():
+    a = svm.SVC(probability=True, max_iter=1, random_state=0)
+    with ignore_warnings(category=ConvergenceWarning):
+        proba_1 = a.fit(X, Y).predict_proba(X)
+    a = svm.SVC(probability=True, max_iter=1, random_state=0)
+    with ignore_warnings(category=ConvergenceWarning):
+        proba_2 = a.fit(X, Y).predict_proba(X)
+    assert_allclose(proba_1, proba_2)
diff --git a/.venv/Lib/site-packages/sklearn/svm/tests/test_svm.py b/.venv/Lib/site-packages/sklearn/svm/tests/test_svm.py
new file mode 100644
index 0000000000000000000000000000000000000000..82c37d14f77b2520585433ae786c21f0dd87ba8c
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/svm/tests/test_svm.py
@@ -0,0 +1,1440 @@
+"""
+Testing for Support Vector Machine module (sklearn.svm)
+
+TODO: remove hard coded numerical results when possible
+"""
+
+import numpy as np
+import pytest
+from numpy.testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+
+from sklearn import base, datasets, linear_model, metrics, svm
+from sklearn.datasets import make_blobs, make_classification, make_regression
+from sklearn.exceptions import (
+    ConvergenceWarning,
+    NotFittedError,
+)
+from sklearn.metrics import f1_score
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.model_selection import train_test_split
+from sklearn.multiclass import OneVsRestClassifier
+
+# mypy error: Module 'sklearn.svm' has no attribute '_libsvm'
+from sklearn.svm import (  # type: ignore
+    SVR,
+    LinearSVC,
+    LinearSVR,
+    NuSVR,
+    OneClassSVM,
+    _libsvm,
+)
+from sklearn.svm._classes import _validate_dual_parameter
+from sklearn.utils import check_random_state, shuffle
+from sklearn.utils.fixes import _IS_32BIT, CSR_CONTAINERS, LIL_CONTAINERS
+from sklearn.utils.validation import _num_samples
+
+# toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+Y = [1, 1, 1, 2, 2, 2]
+T = [[-1, -1], [2, 2], [3, 2]]
+true_result = [1, 2, 2]
+
+# also load the iris dataset
+iris = datasets.load_iris()
+rng = check_random_state(42)
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+
+def test_libsvm_parameters():
+    # Test parameters on classes that make use of libsvm.
+    clf = svm.SVC(kernel="linear").fit(X, Y)
+    assert_array_equal(clf.dual_coef_, [[-0.25, 0.25]])
+    assert_array_equal(clf.support_, [1, 3])
+    assert_array_equal(clf.support_vectors_, (X[1], X[3]))
+    assert_array_equal(clf.intercept_, [0.0])
+    assert_array_equal(clf.predict(X), Y)
+
+
+def test_libsvm_iris():
+    # Check consistency on dataset iris.
+
+    # shuffle the dataset so that labels are not ordered
+    for k in ("linear", "rbf"):
+        clf = svm.SVC(kernel=k).fit(iris.data, iris.target)
+        assert np.mean(clf.predict(iris.data) == iris.target) > 0.9
+        assert hasattr(clf, "coef_") == (k == "linear")
+
+    assert_array_equal(clf.classes_, np.sort(clf.classes_))
+
+    # check also the low-level API
+    # We unpack the values to create a dictionary with some of the return values
+    # from Libsvm's fit.
+    (
+        libsvm_support,
+        libsvm_support_vectors,
+        libsvm_n_class_SV,
+        libsvm_sv_coef,
+        libsvm_intercept,
+        libsvm_probA,
+        libsvm_probB,
+        # libsvm_fit_status and libsvm_n_iter won't be used below.
+        libsvm_fit_status,
+        libsvm_n_iter,
+    ) = _libsvm.fit(iris.data, iris.target.astype(np.float64))
+
+    model_params = {
+        "support": libsvm_support,
+        "SV": libsvm_support_vectors,
+        "nSV": libsvm_n_class_SV,
+        "sv_coef": libsvm_sv_coef,
+        "intercept": libsvm_intercept,
+        "probA": libsvm_probA,
+        "probB": libsvm_probB,
+    }
+    pred = _libsvm.predict(iris.data, **model_params)
+    assert np.mean(pred == iris.target) > 0.95
+
+    # We unpack the values to create a dictionary with some of the return values
+    # from Libsvm's fit.
+    (
+        libsvm_support,
+        libsvm_support_vectors,
+        libsvm_n_class_SV,
+        libsvm_sv_coef,
+        libsvm_intercept,
+        libsvm_probA,
+        libsvm_probB,
+        # libsvm_fit_status and libsvm_n_iter won't be used below.
+        libsvm_fit_status,
+        libsvm_n_iter,
+    ) = _libsvm.fit(iris.data, iris.target.astype(np.float64), kernel="linear")
+
+    model_params = {
+        "support": libsvm_support,
+        "SV": libsvm_support_vectors,
+        "nSV": libsvm_n_class_SV,
+        "sv_coef": libsvm_sv_coef,
+        "intercept": libsvm_intercept,
+        "probA": libsvm_probA,
+        "probB": libsvm_probB,
+    }
+    pred = _libsvm.predict(iris.data, **model_params, kernel="linear")
+    assert np.mean(pred == iris.target) > 0.95
+
+    pred = _libsvm.cross_validation(
+        iris.data, iris.target.astype(np.float64), 5, kernel="linear", random_seed=0
+    )
+    assert np.mean(pred == iris.target) > 0.95
+
+    # If random_seed >= 0, the libsvm rng is seeded (by calling `srand`), hence
+    # we should get deterministic results (assuming that there is no other
+    # thread calling this wrapper calling `srand` concurrently).
+    pred2 = _libsvm.cross_validation(
+        iris.data, iris.target.astype(np.float64), 5, kernel="linear", random_seed=0
+    )
+    assert_array_equal(pred, pred2)
+
+
+def test_precomputed():
+    # SVC with a precomputed kernel.
+    # We test it with a toy dataset and with iris.
+    clf = svm.SVC(kernel="precomputed")
+    # Gram matrix for train data (square matrix)
+    # (we use just a linear kernel)
+    K = np.dot(X, np.array(X).T)
+    clf.fit(K, Y)
+    # Gram matrix for test data (rectangular matrix)
+    KT = np.dot(T, np.array(X).T)
+    pred = clf.predict(KT)
+    with pytest.raises(ValueError):
+        clf.predict(KT.T)
+
+    assert_array_equal(clf.dual_coef_, [[-0.25, 0.25]])
+    assert_array_equal(clf.support_, [1, 3])
+    assert_array_equal(clf.intercept_, [0])
+    assert_array_almost_equal(clf.support_, [1, 3])
+    assert_array_equal(pred, true_result)
+
+    # Gram matrix for test data but compute KT[i,j]
+    # for support vectors j only.
+    KT = np.zeros_like(KT)
+    for i in range(len(T)):
+        for j in clf.support_:
+            KT[i, j] = np.dot(T[i], X[j])
+
+    pred = clf.predict(KT)
+    assert_array_equal(pred, true_result)
+
+    # same as before, but using a callable function instead of the kernel
+    # matrix. kernel is just a linear kernel
+
+    def kfunc(x, y):
+        return np.dot(x, y.T)
+
+    clf = svm.SVC(kernel=kfunc)
+    clf.fit(np.array(X), Y)
+    pred = clf.predict(T)
+
+    assert_array_equal(clf.dual_coef_, [[-0.25, 0.25]])
+    assert_array_equal(clf.intercept_, [0])
+    assert_array_almost_equal(clf.support_, [1, 3])
+    assert_array_equal(pred, true_result)
+
+    # test a precomputed kernel with the iris dataset
+    # and check parameters against a linear SVC
+    clf = svm.SVC(kernel="precomputed")
+    clf2 = svm.SVC(kernel="linear")
+    K = np.dot(iris.data, iris.data.T)
+    clf.fit(K, iris.target)
+    clf2.fit(iris.data, iris.target)
+    pred = clf.predict(K)
+    assert_array_almost_equal(clf.support_, clf2.support_)
+    assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_)
+    assert_array_almost_equal(clf.intercept_, clf2.intercept_)
+    assert_almost_equal(np.mean(pred == iris.target), 0.99, decimal=2)
+
+    # Gram matrix for test data but compute KT[i,j]
+    # for support vectors j only.
+    K = np.zeros_like(K)
+    for i in range(len(iris.data)):
+        for j in clf.support_:
+            K[i, j] = np.dot(iris.data[i], iris.data[j])
+
+    pred = clf.predict(K)
+    assert_almost_equal(np.mean(pred == iris.target), 0.99, decimal=2)
+
+    clf = svm.SVC(kernel=kfunc)
+    clf.fit(iris.data, iris.target)
+    assert_almost_equal(np.mean(pred == iris.target), 0.99, decimal=2)
+
+
+def test_svr():
+    # Test Support Vector Regression
+
+    diabetes = datasets.load_diabetes()
+    for clf in (
+        svm.NuSVR(kernel="linear", nu=0.4, C=1.0),
+        svm.NuSVR(kernel="linear", nu=0.4, C=10.0),
+        svm.SVR(kernel="linear", C=10.0),
+        svm.LinearSVR(C=10.0),
+        svm.LinearSVR(C=10.0),
+    ):
+        clf.fit(diabetes.data, diabetes.target)
+        assert clf.score(diabetes.data, diabetes.target) > 0.02
+
+    # non-regression test; previously, BaseLibSVM would check that
+    # len(np.unique(y)) < 2, which must only be done for SVC
+    svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
+    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
+
+
+def test_linearsvr():
+    # check that SVR(kernel='linear') and LinearSVC() give
+    # comparable results
+    diabetes = datasets.load_diabetes()
+    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
+    score1 = lsvr.score(diabetes.data, diabetes.target)
+
+    svr = svm.SVR(kernel="linear", C=1e3).fit(diabetes.data, diabetes.target)
+    score2 = svr.score(diabetes.data, diabetes.target)
+
+    assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1, 0.0001)
+    assert_almost_equal(score1, score2, 2)
+
+
+def test_linearsvr_fit_sampleweight():
+    # check correct result when sample_weight is 1
+    # check that SVR(kernel='linear') and LinearSVC() give
+    # comparable results
+    diabetes = datasets.load_diabetes()
+    n_samples = len(diabetes.target)
+    unit_weight = np.ones(n_samples)
+    lsvr = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(
+        diabetes.data, diabetes.target, sample_weight=unit_weight
+    )
+    score1 = lsvr.score(diabetes.data, diabetes.target)
+
+    lsvr_no_weight = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(
+        diabetes.data, diabetes.target
+    )
+    score2 = lsvr_no_weight.score(diabetes.data, diabetes.target)
+
+    assert_allclose(
+        np.linalg.norm(lsvr.coef_), np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001
+    )
+    assert_almost_equal(score1, score2, 2)
+
+    # check that fit(X)  = fit([X1, X2, X3], sample_weight = [n1, n2, n3]) where
+    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
+    random_state = check_random_state(0)
+    random_weight = random_state.randint(0, 10, n_samples)
+    lsvr_unflat = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(
+        diabetes.data, diabetes.target, sample_weight=random_weight
+    )
+    score3 = lsvr_unflat.score(
+        diabetes.data, diabetes.target, sample_weight=random_weight
+    )
+
+    X_flat = np.repeat(diabetes.data, random_weight, axis=0)
+    y_flat = np.repeat(diabetes.target, random_weight, axis=0)
+    lsvr_flat = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit(X_flat, y_flat)
+    score4 = lsvr_flat.score(X_flat, y_flat)
+
+    assert_almost_equal(score3, score4, 2)
+
+
+def test_svr_errors():
+    X = [[0.0], [1.0]]
+    y = [0.0, 0.5]
+
+    # Bad kernel
+    clf = svm.SVR(kernel=lambda x, y: np.array([[1.0]]))
+    clf.fit(X, y)
+    with pytest.raises(ValueError):
+        clf.predict(X)
+
+
+def test_oneclass():
+    # Test OneClassSVM
+    clf = svm.OneClassSVM()
+    clf.fit(X)
+    pred = clf.predict(T)
+
+    assert_array_equal(pred, [1, -1, -1])
+    assert pred.dtype == np.dtype("intp")
+    assert_array_almost_equal(clf.intercept_, [-1.218], decimal=3)
+    assert_array_almost_equal(clf.dual_coef_, [[0.750, 0.750, 0.750, 0.750]], decimal=3)
+    with pytest.raises(AttributeError):
+        (lambda: clf.coef_)()
+
+
+def test_oneclass_decision_function():
+    # Test OneClassSVM decision function
+    clf = svm.OneClassSVM()
+    rnd = check_random_state(2)
+
+    # Generate train data
+    X = 0.3 * rnd.randn(100, 2)
+    X_train = np.r_[X + 2, X - 2]
+
+    # Generate some regular novel observations
+    X = 0.3 * rnd.randn(20, 2)
+    X_test = np.r_[X + 2, X - 2]
+    # Generate some abnormal novel observations
+    X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2))
+
+    # fit the model
+    clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
+    clf.fit(X_train)
+
+    # predict things
+    y_pred_test = clf.predict(X_test)
+    assert np.mean(y_pred_test == 1) > 0.9
+    y_pred_outliers = clf.predict(X_outliers)
+    assert np.mean(y_pred_outliers == -1) > 0.9
+    dec_func_test = clf.decision_function(X_test)
+    assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
+    dec_func_outliers = clf.decision_function(X_outliers)
+    assert_array_equal((dec_func_outliers > 0).ravel(), y_pred_outliers == 1)
+
+
+def test_oneclass_score_samples():
+    X_train = [[1, 1], [1, 2], [2, 1]]
+    clf = svm.OneClassSVM(gamma=1).fit(X_train)
+    assert_array_equal(
+        clf.score_samples([[2.0, 2.0]]),
+        clf.decision_function([[2.0, 2.0]]) + clf.offset_,
+    )
+
+
+def test_tweak_params():
+    # Make sure some tweaking of parameters works.
+    # We change clf.dual_coef_ at run time and expect .predict() to change
+    # accordingly. Notice that this is not trivial since it involves a lot
+    # of C/Python copying in the libsvm bindings.
+    # The success of this test ensures that the mapping between libsvm and
+    # the python classifier is complete.
+    clf = svm.SVC(kernel="linear", C=1.0)
+    clf.fit(X, Y)
+    assert_array_equal(clf.dual_coef_, [[-0.25, 0.25]])
+    assert_array_equal(clf.predict([[-0.1, -0.1]]), [1])
+    clf._dual_coef_ = np.array([[0.0, 1.0]])
+    assert_array_equal(clf.predict([[-0.1, -0.1]]), [2])
+
+
+def test_probability():
+    # Predict probabilities using SVC
+    # This uses cross validation, so we use a slightly bigger testing set.
+
+    for clf in (
+        svm.SVC(probability=True, random_state=0, C=1.0),
+        svm.NuSVC(probability=True, random_state=0),
+    ):
+        clf.fit(iris.data, iris.target)
+
+        prob_predict = clf.predict_proba(iris.data)
+        assert_array_almost_equal(np.sum(prob_predict, 1), np.ones(iris.data.shape[0]))
+        assert np.mean(np.argmax(prob_predict, 1) == clf.predict(iris.data)) > 0.9
+
+        assert_almost_equal(
+            clf.predict_proba(iris.data), np.exp(clf.predict_log_proba(iris.data)), 8
+        )
+
+
+def test_decision_function():
+    # Test decision_function
+    # Sanity check, test that decision_function implemented in python
+    # returns the same as the one in libsvm
+    # multi class:
+    clf = svm.SVC(kernel="linear", C=0.1, decision_function_shape="ovo").fit(
+        iris.data, iris.target
+    )
+
+    dec = np.dot(iris.data, clf.coef_.T) + clf.intercept_
+
+    assert_array_almost_equal(dec, clf.decision_function(iris.data))
+
+    # binary:
+    clf.fit(X, Y)
+    dec = np.dot(X, clf.coef_.T) + clf.intercept_
+    prediction = clf.predict(X)
+    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
+    assert_array_almost_equal(
+        prediction, clf.classes_[(clf.decision_function(X) > 0).astype(int)]
+    )
+    expected = np.array([-1.0, -0.66, -1.0, 0.66, 1.0, 1.0])
+    assert_array_almost_equal(clf.decision_function(X), expected, 2)
+
+    # kernel binary:
+    clf = svm.SVC(kernel="rbf", gamma=1, decision_function_shape="ovo")
+    clf.fit(X, Y)
+
+    rbfs = rbf_kernel(X, clf.support_vectors_, gamma=clf.gamma)
+    dec = np.dot(rbfs, clf.dual_coef_.T) + clf.intercept_
+    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
+
+
+@pytest.mark.parametrize("SVM", (svm.SVC, svm.NuSVC))
+def test_decision_function_shape(SVM):
+    # check that decision_function_shape='ovr' or 'ovo' gives
+    # correct shape and is consistent with predict
+
+    clf = SVM(kernel="linear", decision_function_shape="ovr").fit(
+        iris.data, iris.target
+    )
+    dec = clf.decision_function(iris.data)
+    assert dec.shape == (len(iris.data), 3)
+    assert_array_equal(clf.predict(iris.data), np.argmax(dec, axis=1))
+
+    # with five classes:
+    X, y = make_blobs(n_samples=80, centers=5, random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    clf = SVM(kernel="linear", decision_function_shape="ovr").fit(X_train, y_train)
+    dec = clf.decision_function(X_test)
+    assert dec.shape == (len(X_test), 5)
+    assert_array_equal(clf.predict(X_test), np.argmax(dec, axis=1))
+
+    # check shape of ovo_decition_function=True
+    clf = SVM(kernel="linear", decision_function_shape="ovo").fit(X_train, y_train)
+    dec = clf.decision_function(X_train)
+    assert dec.shape == (len(X_train), 10)
+
+
+def test_svr_predict():
+    # Test SVR's decision_function
+    # Sanity check, test that predict implemented in python
+    # returns the same as the one in libsvm
+
+    X = iris.data
+    y = iris.target
+
+    # linear kernel
+    reg = svm.SVR(kernel="linear", C=0.1).fit(X, y)
+
+    dec = np.dot(X, reg.coef_.T) + reg.intercept_
+    assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())
+
+    # rbf kernel
+    reg = svm.SVR(kernel="rbf", gamma=1).fit(X, y)
+
+    rbfs = rbf_kernel(X, reg.support_vectors_, gamma=reg.gamma)
+    dec = np.dot(rbfs, reg.dual_coef_.T) + reg.intercept_
+    assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())
+
+
+def test_weight():
+    # Test class weights
+    clf = svm.SVC(class_weight={1: 0.1})
+    # we give a small weights to class 1
+    clf.fit(X, Y)
+    # so all predicted values belong to class 2
+    assert_array_almost_equal(clf.predict(X), [2] * 6)
+
+    X_, y_ = make_classification(
+        n_samples=200, n_features=10, weights=[0.833, 0.167], random_state=2
+    )
+
+    for clf in (
+        linear_model.LogisticRegression(),
+        svm.LinearSVC(random_state=0),
+        svm.SVC(),
+    ):
+        clf.set_params(class_weight={0: 0.1, 1: 10})
+        clf.fit(X_[:100], y_[:100])
+        y_pred = clf.predict(X_[100:])
+        assert f1_score(y_[100:], y_pred) > 0.3
+
+
+@pytest.mark.parametrize("estimator", [svm.SVC(C=1e-2), svm.NuSVC()])
+def test_svm_classifier_sided_sample_weight(estimator):
+    # fit a linear SVM and check that giving more weight to opposed samples
+    # in the space will flip the decision toward these samples.
+    X = [[-2, 0], [-1, -1], [0, -2], [0, 2], [1, 1], [2, 0]]
+    estimator.set_params(kernel="linear")
+
+    # check that with unit weights, a sample is supposed to be predicted on
+    # the boundary
+    sample_weight = [1] * 6
+    estimator.fit(X, Y, sample_weight=sample_weight)
+    y_pred = estimator.decision_function([[-1.0, 1.0]])
+    assert y_pred == pytest.approx(0)
+
+    # give more weights to opposed samples
+    sample_weight = [10.0, 0.1, 0.1, 0.1, 0.1, 10]
+    estimator.fit(X, Y, sample_weight=sample_weight)
+    y_pred = estimator.decision_function([[-1.0, 1.0]])
+    assert y_pred < 0
+
+    sample_weight = [1.0, 0.1, 10.0, 10.0, 0.1, 0.1]
+    estimator.fit(X, Y, sample_weight=sample_weight)
+    y_pred = estimator.decision_function([[-1.0, 1.0]])
+    assert y_pred > 0
+
+
+@pytest.mark.parametrize("estimator", [svm.SVR(C=1e-2), svm.NuSVR(C=1e-2)])
+def test_svm_regressor_sided_sample_weight(estimator):
+    # similar test to test_svm_classifier_sided_sample_weight but for
+    # SVM regressors
+    X = [[-2, 0], [-1, -1], [0, -2], [0, 2], [1, 1], [2, 0]]
+    estimator.set_params(kernel="linear")
+
+    # check that with unit weights, a sample is supposed to be predicted on
+    # the boundary
+    sample_weight = [1] * 6
+    estimator.fit(X, Y, sample_weight=sample_weight)
+    y_pred = estimator.predict([[-1.0, 1.0]])
+    assert y_pred == pytest.approx(1.5)
+
+    # give more weights to opposed samples
+    sample_weight = [10.0, 0.1, 0.1, 0.1, 0.1, 10]
+    estimator.fit(X, Y, sample_weight=sample_weight)
+    y_pred = estimator.predict([[-1.0, 1.0]])
+    assert y_pred < 1.5
+
+    sample_weight = [1.0, 0.1, 10.0, 10.0, 0.1, 0.1]
+    estimator.fit(X, Y, sample_weight=sample_weight)
+    y_pred = estimator.predict([[-1.0, 1.0]])
+    assert y_pred > 1.5
+
+
+def test_svm_equivalence_sample_weight_C():
+    # test that rescaling all samples is the same as changing C
+    clf = svm.SVC()
+    clf.fit(X, Y)
+    dual_coef_no_weight = clf.dual_coef_
+    clf.set_params(C=100)
+    clf.fit(X, Y, sample_weight=np.repeat(0.01, len(X)))
+    assert_allclose(dual_coef_no_weight, clf.dual_coef_)
+
+
+@pytest.mark.parametrize(
+    "Estimator, err_msg",
+    [
+        (svm.SVC, "Invalid input - all samples have zero or negative weights."),
+        (svm.NuSVC, "(negative dimensions are not allowed|nu is infeasible)"),
+        (svm.SVR, "Invalid input - all samples have zero or negative weights."),
+        (svm.NuSVR, "Invalid input - all samples have zero or negative weights."),
+        (svm.OneClassSVM, "Invalid input - all samples have zero or negative weights."),
+    ],
+    ids=["SVC", "NuSVC", "SVR", "NuSVR", "OneClassSVM"],
+)
+@pytest.mark.parametrize(
+    "sample_weight",
+    [[0] * len(Y), [-0.3] * len(Y)],
+    ids=["weights-are-zero", "weights-are-negative"],
+)
+def test_negative_sample_weights_mask_all_samples(Estimator, err_msg, sample_weight):
+    est = Estimator(kernel="linear")
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit(X, Y, sample_weight=sample_weight)
+
+
+@pytest.mark.parametrize(
+    "Classifier, err_msg",
+    [
+        (
+            svm.SVC,
+            (
+                "Invalid input - all samples with positive weights belong to the same"
+                " class"
+            ),
+        ),
+        (svm.NuSVC, "specified nu is infeasible"),
+    ],
+    ids=["SVC", "NuSVC"],
+)
+@pytest.mark.parametrize(
+    "sample_weight",
+    [[0, -0.5, 0, 1, 1, 1], [1, 1, 1, 0, -0.1, -0.3]],
+    ids=["mask-label-1", "mask-label-2"],
+)
+def test_negative_weights_svc_leave_just_one_label(Classifier, err_msg, sample_weight):
+    clf = Classifier(kernel="linear")
+    with pytest.raises(ValueError, match=err_msg):
+        clf.fit(X, Y, sample_weight=sample_weight)
+
+
+@pytest.mark.parametrize(
+    "Classifier, model",
+    [
+        (svm.SVC, {"when-left": [0.3998, 0.4], "when-right": [0.4, 0.3999]}),
+        (svm.NuSVC, {"when-left": [0.3333, 0.3333], "when-right": [0.3333, 0.3333]}),
+    ],
+    ids=["SVC", "NuSVC"],
+)
+@pytest.mark.parametrize(
+    "sample_weight, mask_side",
+    [([1, -0.5, 1, 1, 1, 1], "when-left"), ([1, 1, 1, 0, 1, 1], "when-right")],
+    ids=["partial-mask-label-1", "partial-mask-label-2"],
+)
+def test_negative_weights_svc_leave_two_labels(
+    Classifier, model, sample_weight, mask_side
+):
+    clf = Classifier(kernel="linear")
+    clf.fit(X, Y, sample_weight=sample_weight)
+    assert_allclose(clf.coef_, [model[mask_side]], rtol=1e-3)
+
+
+@pytest.mark.parametrize(
+    "Estimator", [svm.SVC, svm.NuSVC, svm.NuSVR], ids=["SVC", "NuSVC", "NuSVR"]
+)
+@pytest.mark.parametrize(
+    "sample_weight",
+    [[1, -0.5, 1, 1, 1, 1], [1, 1, 1, 0, 1, 1]],
+    ids=["partial-mask-label-1", "partial-mask-label-2"],
+)
+def test_negative_weight_equal_coeffs(Estimator, sample_weight):
+    # model generates equal coefficients
+    est = Estimator(kernel="linear")
+    est.fit(X, Y, sample_weight=sample_weight)
+    coef = np.abs(est.coef_).ravel()
+    assert coef[0] == pytest.approx(coef[1], rel=1e-3)
+
+
+def test_auto_weight():
+    # Test class weights for imbalanced data
+    from sklearn.linear_model import LogisticRegression
+
+    # We take as dataset the two-dimensional projection of iris so
+    # that it is not separable and remove half of predictors from
+    # class 1.
+    # We add one to the targets as a non-regression test:
+    # class_weight="balanced"
+    # used to work only when the labels where a range [0..K).
+    from sklearn.utils import compute_class_weight
+
+    X, y = iris.data[:, :2], iris.target + 1
+    unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2])
+
+    classes = np.unique(y[unbalanced])
+    class_weights = compute_class_weight("balanced", classes=classes, y=y[unbalanced])
+    assert np.argmax(class_weights) == 2
+
+    for clf in (
+        svm.SVC(kernel="linear"),
+        svm.LinearSVC(random_state=0),
+        LogisticRegression(),
+    ):
+        # check that score is better when class='balanced' is set.
+        y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
+        clf.set_params(class_weight="balanced")
+        y_pred_balanced = clf.fit(
+            X[unbalanced],
+            y[unbalanced],
+        ).predict(X)
+        assert metrics.f1_score(y, y_pred, average="macro") <= metrics.f1_score(
+            y, y_pred_balanced, average="macro"
+        )
+
+
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_bad_input(lil_container):
+    # Test dimensions for labels
+    Y2 = Y[:-1]  # wrong dimensions for labels
+    with pytest.raises(ValueError):
+        svm.SVC().fit(X, Y2)
+
+    # Test with arrays that are non-contiguous.
+    for clf in (svm.SVC(), svm.LinearSVC(random_state=0)):
+        Xf = np.asfortranarray(X)
+        assert not Xf.flags["C_CONTIGUOUS"]
+        yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T)
+        yf = yf[:, -1]
+        assert not yf.flags["F_CONTIGUOUS"]
+        assert not yf.flags["C_CONTIGUOUS"]
+        clf.fit(Xf, yf)
+        assert_array_equal(clf.predict(T), true_result)
+
+    # error for precomputed kernelsx
+    clf = svm.SVC(kernel="precomputed")
+    with pytest.raises(ValueError):
+        clf.fit(X, Y)
+
+    # predict with sparse input when trained with dense
+    clf = svm.SVC().fit(X, Y)
+    with pytest.raises(ValueError):
+        clf.predict(lil_container(X))
+
+    Xt = np.array(X).T
+    clf.fit(np.dot(X, Xt), Y)
+    with pytest.raises(ValueError):
+        clf.predict(X)
+
+    clf = svm.SVC()
+    clf.fit(X, Y)
+    with pytest.raises(ValueError):
+        clf.predict(Xt)
+
+
+def test_svc_nonfinite_params():
+    # Check SVC throws ValueError when dealing with non-finite parameter values
+    rng = np.random.RandomState(0)
+    n_samples = 10
+    fmax = np.finfo(np.float64).max
+    X = fmax * rng.uniform(size=(n_samples, 2))
+    y = rng.randint(0, 2, size=n_samples)
+
+    clf = svm.SVC()
+    msg = "The dual coefficients or intercepts are not finite"
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X, y)
+
+
+def test_unicode_kernel():
+    # Test that a unicode kernel name does not cause a TypeError
+    clf = svm.SVC(kernel="linear", probability=True)
+    clf.fit(X, Y)
+    clf.predict_proba(T)
+    _libsvm.cross_validation(
+        iris.data, iris.target.astype(np.float64), 5, kernel="linear", random_seed=0
+    )
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_precomputed(csr_container):
+    clf = svm.SVC(kernel="precomputed")
+    sparse_gram = csr_container([[1, 0], [0, 1]])
+    with pytest.raises(TypeError, match="Sparse precomputed"):
+        clf.fit(sparse_gram, [0, 1])
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_fit_support_vectors_empty(csr_container):
+    # Regression test for #14893
+    X_train = csr_container([[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])
+    y_train = np.array([0.04, 0.04, 0.10, 0.16])
+    model = svm.SVR(kernel="linear")
+    model.fit(X_train, y_train)
+    assert not model.support_vectors_.data.size
+    assert not model.dual_coef_.data.size
+
+
+@pytest.mark.parametrize("loss", ["hinge", "squared_hinge"])
+@pytest.mark.parametrize("penalty", ["l1", "l2"])
+@pytest.mark.parametrize("dual", [True, False])
+def test_linearsvc_parameters(loss, penalty, dual):
+    # Test possible parameter combinations in LinearSVC
+    # Generate list of possible parameter combinations
+    X, y = make_classification(n_samples=5, n_features=5, random_state=0)
+
+    clf = svm.LinearSVC(penalty=penalty, loss=loss, dual=dual, random_state=0)
+    if (
+        (loss, penalty) == ("hinge", "l1")
+        or (loss, penalty, dual) == ("hinge", "l2", False)
+        or (penalty, dual) == ("l1", True)
+    ):
+        with pytest.raises(
+            ValueError,
+            match="Unsupported set of arguments.*penalty='%s.*loss='%s.*dual=%s"
+            % (penalty, loss, dual),
+        ):
+            clf.fit(X, y)
+    else:
+        clf.fit(X, y)
+
+
+def test_linearsvc():
+    # Test basic routines using LinearSVC
+    clf = svm.LinearSVC(random_state=0).fit(X, Y)
+
+    # by default should have intercept
+    assert clf.fit_intercept
+
+    assert_array_equal(clf.predict(T), true_result)
+    assert_array_almost_equal(clf.intercept_, [0], decimal=3)
+
+    # the same with l1 penalty
+    clf = svm.LinearSVC(
+        penalty="l1", loss="squared_hinge", dual=False, random_state=0
+    ).fit(X, Y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # l2 penalty with dual formulation
+    clf = svm.LinearSVC(penalty="l2", dual=True, random_state=0).fit(X, Y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # l2 penalty, l1 loss
+    clf = svm.LinearSVC(penalty="l2", loss="hinge", dual=True, random_state=0)
+    clf.fit(X, Y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # test also decision function
+    dec = clf.decision_function(T)
+    res = (dec > 0).astype(int) + 1
+    assert_array_equal(res, true_result)
+
+
+def test_linearsvc_crammer_singer():
+    # Test LinearSVC with crammer_singer multi-class svm
+    ovr_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
+    cs_clf = svm.LinearSVC(multi_class="crammer_singer", random_state=0)
+    cs_clf.fit(iris.data, iris.target)
+
+    # similar prediction for ovr and crammer-singer:
+    assert (ovr_clf.predict(iris.data) == cs_clf.predict(iris.data)).mean() > 0.9
+
+    # classifiers shouldn't be the same
+    assert (ovr_clf.coef_ != cs_clf.coef_).all()
+
+    # test decision function
+    assert_array_equal(
+        cs_clf.predict(iris.data),
+        np.argmax(cs_clf.decision_function(iris.data), axis=1),
+    )
+    dec_func = np.dot(iris.data, cs_clf.coef_.T) + cs_clf.intercept_
+    assert_array_almost_equal(dec_func, cs_clf.decision_function(iris.data))
+
+
+def test_linearsvc_fit_sampleweight():
+    # check correct result when sample_weight is 1
+    n_samples = len(X)
+    unit_weight = np.ones(n_samples)
+    clf = svm.LinearSVC(random_state=0).fit(X, Y)
+    clf_unitweight = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).fit(
+        X, Y, sample_weight=unit_weight
+    )
+
+    # check if same as sample_weight=None
+    assert_array_equal(clf_unitweight.predict(T), clf.predict(T))
+    assert_allclose(clf.coef_, clf_unitweight.coef_, 1, 0.0001)
+
+    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
+    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
+
+    random_state = check_random_state(0)
+    random_weight = random_state.randint(0, 10, n_samples)
+    lsvc_unflat = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).fit(
+        X, Y, sample_weight=random_weight
+    )
+
+    pred1 = lsvc_unflat.predict(T)
+
+    X_flat = np.repeat(X, random_weight, axis=0)
+    y_flat = np.repeat(Y, random_weight, axis=0)
+    lsvc_flat = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).fit(
+        X_flat, y_flat
+    )
+    pred2 = lsvc_flat.predict(T)
+
+    assert_array_equal(pred1, pred2)
+    assert_allclose(lsvc_unflat.coef_, lsvc_flat.coef_, 1, 0.0001)
+
+
+def test_crammer_singer_binary():
+    # Test Crammer-Singer formulation in the binary case
+    X, y = make_classification(n_classes=2, random_state=0)
+
+    for fit_intercept in (True, False):
+        acc = (
+            svm.LinearSVC(
+                fit_intercept=fit_intercept,
+                multi_class="crammer_singer",
+                random_state=0,
+            )
+            .fit(X, y)
+            .score(X, y)
+        )
+        assert acc > 0.9
+
+
+def test_linearsvc_iris():
+    # Test that LinearSVC gives plausible predictions on the iris dataset
+    # Also, test symbolic class names (classes_).
+    target = iris.target_names[iris.target]
+    clf = svm.LinearSVC(random_state=0).fit(iris.data, target)
+    assert set(clf.classes_) == set(iris.target_names)
+    assert np.mean(clf.predict(iris.data) == target) > 0.8
+
+    dec = clf.decision_function(iris.data)
+    pred = iris.target_names[np.argmax(dec, 1)]
+    assert_array_equal(pred, clf.predict(iris.data))
+
+
+def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC):
+    # Test that dense liblinear honours intercept_scaling param
+    X = [[2, 1], [3, 1], [1, 3], [2, 3]]
+    y = [0, 0, 1, 1]
+    clf = classifier(
+        fit_intercept=True,
+        penalty="l1",
+        loss="squared_hinge",
+        dual=False,
+        C=4,
+        tol=1e-7,
+        random_state=0,
+    )
+    assert clf.intercept_scaling == 1, clf.intercept_scaling
+    assert clf.fit_intercept
+
+    # when intercept_scaling is low the intercept value is highly "penalized"
+    # by regularization
+    clf.intercept_scaling = 1
+    clf.fit(X, y)
+    assert_almost_equal(clf.intercept_, 0, decimal=5)
+
+    # when intercept_scaling is sufficiently high, the intercept value
+    # is not affected by regularization
+    clf.intercept_scaling = 100
+    clf.fit(X, y)
+    intercept1 = clf.intercept_
+    assert intercept1 < -1
+
+    # when intercept_scaling is sufficiently high, the intercept value
+    # doesn't depend on intercept_scaling value
+    clf.intercept_scaling = 1000
+    clf.fit(X, y)
+    intercept2 = clf.intercept_
+    assert_array_almost_equal(intercept1, intercept2, decimal=2)
+
+
+def test_liblinear_set_coef():
+    # multi-class case
+    clf = svm.LinearSVC().fit(iris.data, iris.target)
+    values = clf.decision_function(iris.data)
+    clf.coef_ = clf.coef_.copy()
+    clf.intercept_ = clf.intercept_.copy()
+    values2 = clf.decision_function(iris.data)
+    assert_array_almost_equal(values, values2)
+
+    # binary-class case
+    X = [[2, 1], [3, 1], [1, 3], [2, 3]]
+    y = [0, 0, 1, 1]
+
+    clf = svm.LinearSVC().fit(X, y)
+    values = clf.decision_function(X)
+    clf.coef_ = clf.coef_.copy()
+    clf.intercept_ = clf.intercept_.copy()
+    values2 = clf.decision_function(X)
+    assert_array_equal(values, values2)
+
+
+def test_immutable_coef_property():
+    # Check that primal coef modification are not silently ignored
+    svms = [
+        svm.SVC(kernel="linear").fit(iris.data, iris.target),
+        svm.NuSVC(kernel="linear").fit(iris.data, iris.target),
+        svm.SVR(kernel="linear").fit(iris.data, iris.target),
+        svm.NuSVR(kernel="linear").fit(iris.data, iris.target),
+        svm.OneClassSVM(kernel="linear").fit(iris.data),
+    ]
+    for clf in svms:
+        with pytest.raises(AttributeError):
+            clf.__setattr__("coef_", np.arange(3))
+        with pytest.raises((RuntimeError, ValueError)):
+            clf.coef_.__setitem__((0, 0), 0)
+
+
+def test_linearsvc_verbose():
+    # stdout: redirect
+    import os
+
+    stdout = os.dup(1)  # save original stdout
+    os.dup2(os.pipe()[1], 1)  # replace it
+
+    # actual call
+    clf = svm.LinearSVC(verbose=1)
+    clf.fit(X, Y)
+
+    # stdout: restore
+    os.dup2(stdout, 1)  # restore original stdout
+
+
+def test_svc_clone_with_callable_kernel():
+    # create SVM with callable linear kernel, check that results are the same
+    # as with built-in linear kernel
+    svm_callable = svm.SVC(
+        kernel=lambda x, y: np.dot(x, y.T),
+        probability=True,
+        random_state=0,
+        decision_function_shape="ovr",
+    )
+    # clone for checking clonability with lambda functions..
+    svm_cloned = base.clone(svm_callable)
+    svm_cloned.fit(iris.data, iris.target)
+
+    svm_builtin = svm.SVC(
+        kernel="linear", probability=True, random_state=0, decision_function_shape="ovr"
+    )
+    svm_builtin.fit(iris.data, iris.target)
+
+    assert_array_almost_equal(svm_cloned.dual_coef_, svm_builtin.dual_coef_)
+    assert_array_almost_equal(svm_cloned.intercept_, svm_builtin.intercept_)
+    assert_array_equal(svm_cloned.predict(iris.data), svm_builtin.predict(iris.data))
+
+    assert_array_almost_equal(
+        svm_cloned.predict_proba(iris.data),
+        svm_builtin.predict_proba(iris.data),
+        decimal=4,
+    )
+    assert_array_almost_equal(
+        svm_cloned.decision_function(iris.data),
+        svm_builtin.decision_function(iris.data),
+    )
+
+
+def test_svc_bad_kernel():
+    svc = svm.SVC(kernel=lambda x, y: x)
+    with pytest.raises(ValueError):
+        svc.fit(X, Y)
+
+
+def test_libsvm_convergence_warnings():
+    a = svm.SVC(
+        kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0, max_iter=2
+    )
+    warning_msg = (
+        r"Solver terminated early \(max_iter=2\).  Consider pre-processing "
+        r"your data with StandardScaler or MinMaxScaler."
+    )
+    with pytest.warns(ConvergenceWarning, match=warning_msg):
+        a.fit(np.array(X), Y)
+    assert np.all(a.n_iter_ == 2)
+
+
+def test_unfitted():
+    X = "foo!"  # input validation not required when SVM not fitted
+
+    clf = svm.SVC()
+    with pytest.raises(Exception, match=r".*\bSVC\b.*\bnot\b.*\bfitted\b"):
+        clf.predict(X)
+
+    clf = svm.NuSVR()
+    with pytest.raises(Exception, match=r".*\bNuSVR\b.*\bnot\b.*\bfitted\b"):
+        clf.predict(X)
+
+
+# ignore convergence warnings from max_iter=1
+@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
+def test_consistent_proba():
+    a = svm.SVC(probability=True, max_iter=1, random_state=0)
+    proba_1 = a.fit(X, Y).predict_proba(X)
+    a = svm.SVC(probability=True, max_iter=1, random_state=0)
+    proba_2 = a.fit(X, Y).predict_proba(X)
+    assert_array_almost_equal(proba_1, proba_2)
+
+
+def test_linear_svm_convergence_warnings():
+    # Test that warnings are raised if model does not converge
+
+    lsvc = svm.LinearSVC(random_state=0, max_iter=2)
+    warning_msg = "Liblinear failed to converge, increase the number of iterations."
+    with pytest.warns(ConvergenceWarning, match=warning_msg):
+        lsvc.fit(X, Y)
+    # Check that we have an n_iter_ attribute with int type as opposed to a
+    # numpy array or an np.int32 so as to match the docstring.
+    assert isinstance(lsvc.n_iter_, int)
+    assert lsvc.n_iter_ == 2
+
+    lsvr = svm.LinearSVR(random_state=0, max_iter=2)
+    with pytest.warns(ConvergenceWarning, match=warning_msg):
+        lsvr.fit(iris.data, iris.target)
+    assert isinstance(lsvr.n_iter_, int)
+    assert lsvr.n_iter_ == 2
+
+
+def test_svr_coef_sign():
+    # Test that SVR(kernel="linear") has coef_ with the right sign.
+    # Non-regression test for #2933.
+    X = np.random.RandomState(21).randn(10, 3)
+    y = np.random.RandomState(12).randn(10)
+
+    for svr in [
+        svm.SVR(kernel="linear"),
+        svm.NuSVR(kernel="linear"),
+        svm.LinearSVR(),
+    ]:
+        svr.fit(X, y)
+        assert_array_almost_equal(
+            svr.predict(X), np.dot(X, svr.coef_.ravel()) + svr.intercept_
+        )
+
+
+def test_lsvc_intercept_scaling_zero():
+    # Test that intercept_scaling is ignored when fit_intercept is False
+
+    lsvc = svm.LinearSVC(fit_intercept=False)
+    lsvc.fit(X, Y)
+    assert lsvc.intercept_ == 0.0
+
+
+def test_hasattr_predict_proba():
+    # Method must be (un)available before or after fit, switched by
+    # `probability` param
+
+    G = svm.SVC(probability=True)
+    assert hasattr(G, "predict_proba")
+    G.fit(iris.data, iris.target)
+    assert hasattr(G, "predict_proba")
+
+    G = svm.SVC(probability=False)
+    assert not hasattr(G, "predict_proba")
+    G.fit(iris.data, iris.target)
+    assert not hasattr(G, "predict_proba")
+
+    # Switching to `probability=True` after fitting should make
+    # predict_proba available, but calling it must not work:
+    G.probability = True
+    assert hasattr(G, "predict_proba")
+    msg = "predict_proba is not available when fitted with probability=False"
+
+    with pytest.raises(NotFittedError, match=msg):
+        G.predict_proba(iris.data)
+
+
+def test_decision_function_shape_two_class():
+    for n_classes in [2, 3]:
+        X, y = make_blobs(centers=n_classes, random_state=0)
+        for estimator in [svm.SVC, svm.NuSVC]:
+            clf = OneVsRestClassifier(estimator(decision_function_shape="ovr")).fit(
+                X, y
+            )
+            assert len(clf.predict(X)) == len(y)
+
+
+def test_ovr_decision_function():
+    # One point from each quadrant represents one class
+    X_train = np.array([[1, 1], [-1, 1], [-1, -1], [1, -1]])
+    y_train = [0, 1, 2, 3]
+
+    # First point is closer to the decision boundaries than the second point
+    base_points = np.array([[5, 5], [10, 10]])
+
+    # For all the quadrants (classes)
+    X_test = np.vstack(
+        (
+            base_points * [1, 1],  # Q1
+            base_points * [-1, 1],  # Q2
+            base_points * [-1, -1],  # Q3
+            base_points * [1, -1],  # Q4
+        )
+    )
+
+    y_test = [0] * 2 + [1] * 2 + [2] * 2 + [3] * 2
+
+    clf = svm.SVC(kernel="linear", decision_function_shape="ovr")
+    clf.fit(X_train, y_train)
+
+    y_pred = clf.predict(X_test)
+
+    # Test if the prediction is the same as y
+    assert_array_equal(y_pred, y_test)
+
+    deci_val = clf.decision_function(X_test)
+
+    # Assert that the predicted class has the maximum value
+    assert_array_equal(np.argmax(deci_val, axis=1), y_pred)
+
+    # Get decision value at test points for the predicted class
+    pred_class_deci_val = deci_val[range(8), y_pred].reshape((4, 2))
+
+    # Assert pred_class_deci_val > 0 here
+    assert np.min(pred_class_deci_val) > 0.0
+
+    # Test if the first point has lower decision value on every quadrant
+    # compared to the second point
+    assert np.all(pred_class_deci_val[:, 0] < pred_class_deci_val[:, 1])
+
+
+@pytest.mark.parametrize("SVCClass", [svm.SVC, svm.NuSVC])
+def test_svc_invalid_break_ties_param(SVCClass):
+    X, y = make_blobs(random_state=42)
+
+    svm = SVCClass(
+        kernel="linear", decision_function_shape="ovo", break_ties=True, random_state=42
+    ).fit(X, y)
+
+    with pytest.raises(ValueError, match="break_ties must be False"):
+        svm.predict(y)
+
+
+@pytest.mark.parametrize("SVCClass", [svm.SVC, svm.NuSVC])
+def test_svc_ovr_tie_breaking(SVCClass):
+    """Test if predict breaks ties in OVR mode.
+    Related issue: https://github.com/scikit-learn/scikit-learn/issues/8277
+    """
+    if SVCClass.__name__ == "NuSVC" and _IS_32BIT:
+        # XXX: known failure to be investigated. Either the code needs to be
+        # fixed or the test itself might need to be made less sensitive to
+        # random changes in test data and rounding errors more generally.
+        # https://github.com/scikit-learn/scikit-learn/issues/29633
+        pytest.xfail("Failing test on 32bit OS")
+
+    X, y = make_blobs(random_state=0, n_samples=20, n_features=2)
+
+    xs = np.linspace(X[:, 0].min(), X[:, 0].max(), 100)
+    ys = np.linspace(X[:, 1].min(), X[:, 1].max(), 100)
+    xx, yy = np.meshgrid(xs, ys)
+
+    common_params = dict(
+        kernel="rbf", gamma=1e6, random_state=42, decision_function_shape="ovr"
+    )
+    svm = SVCClass(
+        break_ties=False,
+        **common_params,
+    ).fit(X, y)
+    pred = svm.predict(np.c_[xx.ravel(), yy.ravel()])
+    dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()])
+    assert not np.all(pred == np.argmax(dv, axis=1))
+
+    svm = SVCClass(
+        break_ties=True,
+        **common_params,
+    ).fit(X, y)
+    pred = svm.predict(np.c_[xx.ravel(), yy.ravel()])
+    dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()])
+    assert np.all(pred == np.argmax(dv, axis=1))
+
+
+def test_gamma_scale():
+    X, y = [[0.0], [1.0]], [0, 1]
+
+    clf = svm.SVC()
+    clf.fit(X, y)
+    assert_almost_equal(clf._gamma, 4)
+
+
+@pytest.mark.parametrize(
+    "SVM, params",
+    [
+        (LinearSVC, {"penalty": "l1", "loss": "squared_hinge", "dual": False}),
+        (LinearSVC, {"penalty": "l2", "loss": "squared_hinge", "dual": True}),
+        (LinearSVC, {"penalty": "l2", "loss": "squared_hinge", "dual": False}),
+        (LinearSVC, {"penalty": "l2", "loss": "hinge", "dual": True}),
+        (LinearSVR, {"loss": "epsilon_insensitive", "dual": True}),
+        (LinearSVR, {"loss": "squared_epsilon_insensitive", "dual": True}),
+        (LinearSVR, {"loss": "squared_epsilon_insensitive", "dual": True}),
+    ],
+)
+def test_linearsvm_liblinear_sample_weight(SVM, params):
+    X = np.array(
+        [
+            [1, 3],
+            [1, 3],
+            [1, 3],
+            [1, 3],
+            [2, 1],
+            [2, 1],
+            [2, 1],
+            [2, 1],
+            [3, 3],
+            [3, 3],
+            [3, 3],
+            [3, 3],
+            [4, 1],
+            [4, 1],
+            [4, 1],
+            [4, 1],
+        ],
+        dtype=np.dtype("float"),
+    )
+    y = np.array(
+        [1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype("int")
+    )
+
+    X2 = np.vstack([X, X])
+    y2 = np.hstack([y, 3 - y])
+    sample_weight = np.ones(shape=len(y) * 2)
+    sample_weight[len(y) :] = 0
+    X2, y2, sample_weight = shuffle(X2, y2, sample_weight, random_state=0)
+
+    base_estimator = SVM(random_state=42)
+    base_estimator.set_params(**params)
+    base_estimator.set_params(tol=1e-12, max_iter=1000)
+    est_no_weight = base.clone(base_estimator).fit(X, y)
+    est_with_weight = base.clone(base_estimator).fit(
+        X2, y2, sample_weight=sample_weight
+    )
+
+    for method in ("predict", "decision_function"):
+        if hasattr(base_estimator, method):
+            X_est_no_weight = getattr(est_no_weight, method)(X)
+            X_est_with_weight = getattr(est_with_weight, method)(X)
+            assert_allclose(X_est_no_weight, X_est_with_weight)
+
+
+@pytest.mark.parametrize("Klass", (OneClassSVM, SVR, NuSVR))
+def test_n_support(Klass):
+    # Make n_support is correct for oneclass and SVR (used to be
+    # non-initialized)
+    # this is a non regression test for issue #14774
+    X = np.array([[0], [0.44], [0.45], [0.46], [1]])
+    y = np.arange(X.shape[0])
+    est = Klass()
+    assert not hasattr(est, "n_support_")
+    est.fit(X, y)
+    assert est.n_support_[0] == est.support_vectors_.shape[0]
+    assert est.n_support_.size == 1
+
+
+@pytest.mark.parametrize("Estimator", [svm.SVC, svm.SVR])
+def test_custom_kernel_not_array_input(Estimator):
+    """Test using a custom kernel that is not fed with array-like for floats"""
+    data = ["A A", "A", "B", "B B", "A B"]
+    X = np.array([[2, 0], [1, 0], [0, 1], [0, 2], [1, 1]])  # count encoding
+    y = np.array([1, 1, 2, 2, 1])
+
+    def string_kernel(X1, X2):
+        assert isinstance(X1[0], str)
+        n_samples1 = _num_samples(X1)
+        n_samples2 = _num_samples(X2)
+        K = np.zeros((n_samples1, n_samples2))
+        for ii in range(n_samples1):
+            for jj in range(ii, n_samples2):
+                K[ii, jj] = X1[ii].count("A") * X2[jj].count("A")
+                K[ii, jj] += X1[ii].count("B") * X2[jj].count("B")
+                K[jj, ii] = K[ii, jj]
+        return K
+
+    K = string_kernel(data, data)
+    assert_array_equal(np.dot(X, X.T), K)
+
+    svc1 = Estimator(kernel=string_kernel).fit(data, y)
+    svc2 = Estimator(kernel="linear").fit(X, y)
+    svc3 = Estimator(kernel="precomputed").fit(K, y)
+
+    assert svc1.score(data, y) == svc3.score(K, y)
+    assert svc1.score(data, y) == svc2.score(X, y)
+    if hasattr(svc1, "decision_function"):  # classifier
+        assert_allclose(svc1.decision_function(data), svc2.decision_function(X))
+        assert_allclose(svc1.decision_function(data), svc3.decision_function(K))
+        assert_array_equal(svc1.predict(data), svc2.predict(X))
+        assert_array_equal(svc1.predict(data), svc3.predict(K))
+    else:  # regressor
+        assert_allclose(svc1.predict(data), svc2.predict(X))
+        assert_allclose(svc1.predict(data), svc3.predict(K))
+
+
+def test_svc_raises_error_internal_representation():
+    """Check that SVC raises error when internal representation is altered.
+
+    Non-regression test for #18891 and https://nvd.nist.gov/vuln/detail/CVE-2020-28975
+    """
+    clf = svm.SVC(kernel="linear").fit(X, Y)
+    clf._n_support[0] = 1000000
+
+    msg = "The internal representation of SVC was altered"
+    with pytest.raises(ValueError, match=msg):
+        clf.predict(X)
+
+
+@pytest.mark.parametrize(
+    "estimator, expected_n_iter_type",
+    [
+        (svm.SVC, np.ndarray),
+        (svm.NuSVC, np.ndarray),
+        (svm.SVR, int),
+        (svm.NuSVR, int),
+        (svm.OneClassSVM, int),
+    ],
+)
+@pytest.mark.parametrize(
+    "dataset",
+    [
+        make_classification(n_classes=2, n_informative=2, random_state=0),
+        make_classification(n_classes=3, n_informative=3, random_state=0),
+        make_classification(n_classes=4, n_informative=4, random_state=0),
+    ],
+)
+def test_n_iter_libsvm(estimator, expected_n_iter_type, dataset):
+    # Check that the type of n_iter_ is correct for the classes that inherit
+    # from BaseSVC.
+    # Note that for SVC, and NuSVC this is an ndarray; while for SVR, NuSVR, and
+    # OneClassSVM, it is an int.
+    # For SVC and NuSVC also check the shape of n_iter_.
+    X, y = dataset
+    n_iter = estimator(kernel="linear").fit(X, y).n_iter_
+    assert type(n_iter) == expected_n_iter_type
+    if estimator in [svm.SVC, svm.NuSVC]:
+        n_classes = len(np.unique(y))
+        assert n_iter.shape == (n_classes * (n_classes - 1) // 2,)
+
+
+@pytest.mark.parametrize("loss", ["squared_hinge", "squared_epsilon_insensitive"])
+def test_dual_auto(loss):
+    # OvR, L2, N > M (6,2)
+    dual = _validate_dual_parameter("auto", loss, "l2", "ovr", np.asarray(X))
+    assert dual is False
+    # OvR, L2, N < M (2,6)
+    dual = _validate_dual_parameter("auto", loss, "l2", "ovr", np.asarray(X).T)
+    assert dual is True
+
+
+def test_dual_auto_edge_cases():
+    # Hinge, OvR, L2, N > M (6,2)
+    dual = _validate_dual_parameter("auto", "hinge", "l2", "ovr", np.asarray(X))
+    assert dual is True  # only supports True
+    dual = _validate_dual_parameter(
+        "auto", "epsilon_insensitive", "l2", "ovr", np.asarray(X)
+    )
+    assert dual is True  # only supports True
+    # SqHinge, OvR, L1, N < M (2,6)
+    dual = _validate_dual_parameter(
+        "auto", "squared_hinge", "l1", "ovr", np.asarray(X).T
+    )
+    assert dual is False  # only supports False
+
+
+@pytest.mark.parametrize(
+    "Estimator, make_dataset",
+    [(svm.SVC, make_classification), (svm.SVR, make_regression)],
+)
+@pytest.mark.parametrize("C_inf", [np.inf, float("inf")])
+def test_svm_with_infinite_C(Estimator, make_dataset, C_inf, global_random_seed):
+    """Check that we can pass `C=inf` that is equivalent to a very large C value.
+
+    Non-regression test for
+    https://github.com/scikit-learn/scikit-learn/issues/29772
+    """
+    X, y = make_dataset(random_state=global_random_seed)
+    estimator_C_inf = Estimator(C=C_inf).fit(X, y)
+    estimator_C_large = Estimator(C=1e10).fit(X, y)
+
+    assert_allclose(estimator_C_large.predict(X), estimator_C_inf.predict(X))
diff --git a/.venv/Lib/site-packages/sklearn/tests/metadata_routing_common.py b/.venv/Lib/site-packages/sklearn/tests/metadata_routing_common.py
new file mode 100644
index 0000000000000000000000000000000000000000..5daccf49f2bc5c740b585df877ba07cd8b03d7f0
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/metadata_routing_common.py
@@ -0,0 +1,544 @@
+import inspect
+from collections import defaultdict
+from functools import partial
+
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from sklearn.base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MetaEstimatorMixin,
+    RegressorMixin,
+    TransformerMixin,
+    clone,
+)
+from sklearn.metrics._scorer import _Scorer, mean_squared_error
+from sklearn.model_selection import BaseCrossValidator
+from sklearn.model_selection._split import GroupsConsumerMixin
+from sklearn.utils._metadata_requests import (
+    SIMPLE_METHODS,
+)
+from sklearn.utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    process_routing,
+)
+from sklearn.utils.multiclass import _check_partial_fit_first_call
+
+
+def record_metadata(obj, record_default=True, **kwargs):
+    """Utility function to store passed metadata to a method of obj.
+
+    If record_default is False, kwargs whose values are "default" are skipped.
+    This is so that checks on keyword arguments whose default was not changed
+    are skipped.
+
+    """
+    stack = inspect.stack()
+    callee = stack[1].function
+    caller = stack[2].function
+    if not hasattr(obj, "_records"):
+        obj._records = defaultdict(lambda: defaultdict(list))
+    if not record_default:
+        kwargs = {
+            key: val
+            for key, val in kwargs.items()
+            if not isinstance(val, str) or (val != "default")
+        }
+    obj._records[callee][caller].append(kwargs)
+
+
+def check_recorded_metadata(obj, method, parent, split_params=tuple(), **kwargs):
+    """Check whether the expected metadata is passed to the object's method.
+
+    Parameters
+    ----------
+    obj : estimator object
+        sub-estimator to check routed params for
+    method : str
+        sub-estimator's method where metadata is routed to, or otherwise in
+        the context of metadata routing referred to as 'callee'
+    parent : str
+        the parent method which should have called `method`, or otherwise in
+        the context of metadata routing referred to as 'caller'
+    split_params : tuple, default=empty
+        specifies any parameters which are to be checked as being a subset
+        of the original values
+    **kwargs : dict
+        passed metadata
+    """
+    all_records = (
+        getattr(obj, "_records", dict()).get(method, dict()).get(parent, list())
+    )
+    for record in all_records:
+        # first check that the names of the metadata passed are the same as
+        # expected. The names are stored as keys in `record`.
+        assert set(kwargs.keys()) == set(
+            record.keys()
+        ), f"Expected {kwargs.keys()} vs {record.keys()}"
+        for key, value in kwargs.items():
+            recorded_value = record[key]
+            # The following condition is used to check for any specified parameters
+            # being a subset of the original values
+            if key in split_params and recorded_value is not None:
+                assert np.isin(recorded_value, value).all()
+            else:
+                if isinstance(recorded_value, np.ndarray):
+                    assert_array_equal(recorded_value, value)
+                else:
+                    assert (
+                        recorded_value is value
+                    ), f"Expected {recorded_value} vs {value}. Method: {method}"
+
+
+record_metadata_not_default = partial(record_metadata, record_default=False)
+
+
+def assert_request_is_empty(metadata_request, exclude=None):
+    """Check if a metadata request dict is empty.
+
+    One can exclude a method or a list of methods from the check using the
+    ``exclude`` parameter. If metadata_request is a MetadataRouter, then
+    ``exclude`` can be of the form ``{"object" : [method, ...]}``.
+    """
+    if isinstance(metadata_request, MetadataRouter):
+        for name, route_mapping in metadata_request:
+            if exclude is not None and name in exclude:
+                _exclude = exclude[name]
+            else:
+                _exclude = None
+            assert_request_is_empty(route_mapping.router, exclude=_exclude)
+        return
+
+    exclude = [] if exclude is None else exclude
+    for method in SIMPLE_METHODS:
+        if method in exclude:
+            continue
+        mmr = getattr(metadata_request, method)
+        props = [
+            prop
+            for prop, alias in mmr.requests.items()
+            if isinstance(alias, str) or alias is not None
+        ]
+        assert not props
+
+
+def assert_request_equal(request, dictionary):
+    for method, requests in dictionary.items():
+        mmr = getattr(request, method)
+        assert mmr.requests == requests
+
+    empty_methods = [method for method in SIMPLE_METHODS if method not in dictionary]
+    for method in empty_methods:
+        assert not len(getattr(request, method).requests)
+
+
+class _Registry(list):
+    # This list is used to get a reference to the sub-estimators, which are not
+    # necessarily stored on the metaestimator. We need to override __deepcopy__
+    # because the sub-estimators are probably cloned, which would result in a
+    # new copy of the list, but we need copy and deep copy both to return the
+    # same instance.
+    def __deepcopy__(self, memo):
+        return self
+
+    def __copy__(self):
+        return self
+
+
+class ConsumingRegressor(RegressorMixin, BaseEstimator):
+    """A regressor consuming metadata.
+
+    Parameters
+    ----------
+    registry : list, default=None
+        If a list, the estimator will append itself to the list in order to have
+        a reference to the estimator later on. Since that reference is not
+        required in all tests, registration can be skipped by leaving this value
+        as None.
+    """
+
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def partial_fit(self, X, y, sample_weight="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return self
+
+    def fit(self, X, y, sample_weight="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return self
+
+    def predict(self, X, y=None, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return np.zeros(shape=(len(X),))
+
+    def score(self, X, y, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return 1
+
+
+class NonConsumingClassifier(ClassifierMixin, BaseEstimator):
+    """A classifier which accepts no metadata on any method."""
+
+    def __init__(self, alpha=0.0):
+        self.alpha = alpha
+
+    def fit(self, X, y):
+        self.classes_ = np.unique(y)
+        self.coef_ = np.ones_like(X)
+        return self
+
+    def partial_fit(self, X, y, classes=None):
+        return self
+
+    def decision_function(self, X):
+        return self.predict(X)
+
+    def predict(self, X):
+        y_pred = np.empty(shape=(len(X),))
+        y_pred[: len(X) // 2] = 0
+        y_pred[len(X) // 2 :] = 1
+        return y_pred
+
+    def predict_proba(self, X):
+        # dummy probabilities to support predict_proba
+        y_proba = np.empty(shape=(len(X), 2))
+        y_proba[: len(X) // 2, :] = np.asarray([1.0, 0.0])
+        y_proba[len(X) // 2 :, :] = np.asarray([0.0, 1.0])
+        return y_proba
+
+    def predict_log_proba(self, X):
+        # dummy probabilities to support predict_log_proba
+        return self.predict_proba(X)
+
+
+class NonConsumingRegressor(RegressorMixin, BaseEstimator):
+    """A classifier which accepts no metadata on any method."""
+
+    def fit(self, X, y):
+        return self
+
+    def partial_fit(self, X, y):
+        return self
+
+    def predict(self, X):
+        return np.ones(len(X))  # pragma: no cover
+
+
+class ConsumingClassifier(ClassifierMixin, BaseEstimator):
+    """A classifier consuming metadata.
+
+    Parameters
+    ----------
+    registry : list, default=None
+        If a list, the estimator will append itself to the list in order to have
+        a reference to the estimator later on. Since that reference is not
+        required in all tests, registration can be skipped by leaving this value
+        as None.
+
+    alpha : float, default=0
+        This parameter is only used to test the ``*SearchCV`` objects, and
+        doesn't do anything.
+    """
+
+    def __init__(self, registry=None, alpha=0.0):
+        self.alpha = alpha
+        self.registry = registry
+
+    def partial_fit(
+        self, X, y, classes=None, sample_weight="default", metadata="default"
+    ):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        _check_partial_fit_first_call(self, classes)
+        return self
+
+    def fit(self, X, y, sample_weight="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+
+        self.classes_ = np.unique(y)
+        self.coef_ = np.ones_like(X)
+        return self
+
+    def predict(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        y_score = np.empty(shape=(len(X),), dtype="int8")
+        y_score[len(X) // 2 :] = 0
+        y_score[: len(X) // 2] = 1
+        return y_score
+
+    def predict_proba(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        y_proba = np.empty(shape=(len(X), 2))
+        y_proba[: len(X) // 2, :] = np.asarray([1.0, 0.0])
+        y_proba[len(X) // 2 :, :] = np.asarray([0.0, 1.0])
+        return y_proba
+
+    def predict_log_proba(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return np.zeros(shape=(len(X), 2))
+
+    def decision_function(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        y_score = np.empty(shape=(len(X),))
+        y_score[len(X) // 2 :] = 0
+        y_score[: len(X) // 2] = 1
+        return y_score
+
+    def score(self, X, y, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return 1
+
+
+class ConsumingTransformer(TransformerMixin, BaseEstimator):
+    """A transformer which accepts metadata on fit and transform.
+
+    Parameters
+    ----------
+    registry : list, default=None
+        If a list, the estimator will append itself to the list in order to have
+        a reference to the estimator later on. Since that reference is not
+        required in all tests, registration can be skipped by leaving this value
+        as None.
+    """
+
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def fit(self, X, y=None, sample_weight="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        self.fitted_ = True
+        return self
+
+    def transform(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return X + 1
+
+    def fit_transform(self, X, y, sample_weight="default", metadata="default"):
+        # implementing ``fit_transform`` is necessary since
+        # ``TransformerMixin.fit_transform`` doesn't route any metadata to
+        # ``transform``, while here we want ``transform`` to receive
+        # ``sample_weight`` and ``metadata``.
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return self.fit(X, y, sample_weight=sample_weight, metadata=metadata).transform(
+            X, sample_weight=sample_weight, metadata=metadata
+        )
+
+    def inverse_transform(self, X, sample_weight=None, metadata=None):
+        record_metadata_not_default(
+            self, sample_weight=sample_weight, metadata=metadata
+        )
+        return X - 1
+
+
+class ConsumingNoFitTransformTransformer(BaseEstimator):
+    """A metadata consuming transformer that doesn't inherit from
+    TransformerMixin, and thus doesn't implement `fit_transform`. Note that
+    TransformerMixin's `fit_transform` doesn't route metadata to `transform`."""
+
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def fit(self, X, y=None, sample_weight=None, metadata=None):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata(self, sample_weight=sample_weight, metadata=metadata)
+
+        return self
+
+    def transform(self, X, sample_weight=None, metadata=None):
+        record_metadata(self, sample_weight=sample_weight, metadata=metadata)
+        return X
+
+
+class ConsumingScorer(_Scorer):
+    def __init__(self, registry=None):
+        super().__init__(
+            score_func=mean_squared_error, sign=1, kwargs={}, response_method="predict"
+        )
+        self.registry = registry
+
+    def _score(self, method_caller, clf, X, y, **kwargs):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(self, **kwargs)
+
+        sample_weight = kwargs.get("sample_weight", None)
+        return super()._score(method_caller, clf, X, y, sample_weight=sample_weight)
+
+
+class ConsumingSplitter(GroupsConsumerMixin, BaseCrossValidator):
+    def __init__(self, registry=None):
+        self.registry = registry
+
+    def split(self, X, y=None, groups="default", metadata="default"):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata_not_default(self, groups=groups, metadata=metadata)
+
+        split_index = len(X) // 2
+        train_indices = list(range(0, split_index))
+        test_indices = list(range(split_index, len(X)))
+        yield test_indices, train_indices
+        yield train_indices, test_indices
+
+    def get_n_splits(self, X=None, y=None, groups=None, metadata=None):
+        return 2
+
+    def _iter_test_indices(self, X=None, y=None, groups=None):
+        split_index = len(X) // 2
+        train_indices = list(range(0, split_index))
+        test_indices = list(range(split_index, len(X)))
+        yield test_indices
+        yield train_indices
+
+
+class MetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator):
+    """A meta-regressor which is only a router."""
+
+    def __init__(self, estimator):
+        self.estimator = estimator
+
+    def fit(self, X, y, **fit_params):
+        params = process_routing(self, "fit", **fit_params)
+        self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit)
+
+    def get_metadata_routing(self):
+        router = MetadataRouter(owner=self.__class__.__name__).add(
+            estimator=self.estimator,
+            method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+        )
+        return router
+
+
+class WeightedMetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator):
+    """A meta-regressor which is also a consumer."""
+
+    def __init__(self, estimator, registry=None):
+        self.estimator = estimator
+        self.registry = registry
+
+    def fit(self, X, y, sample_weight=None, **fit_params):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata(self, sample_weight=sample_weight)
+        params = process_routing(self, "fit", sample_weight=sample_weight, **fit_params)
+        self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit)
+        return self
+
+    def predict(self, X, **predict_params):
+        params = process_routing(self, "predict", **predict_params)
+        return self.estimator_.predict(X, **params.estimator.predict)
+
+    def get_metadata_routing(self):
+        router = (
+            MetadataRouter(owner=self.__class__.__name__)
+            .add_self_request(self)
+            .add(
+                estimator=self.estimator,
+                method_mapping=MethodMapping()
+                .add(caller="fit", callee="fit")
+                .add(caller="predict", callee="predict"),
+            )
+        )
+        return router
+
+
+class WeightedMetaClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
+    """A meta-estimator which also consumes sample_weight itself in ``fit``."""
+
+    def __init__(self, estimator, registry=None):
+        self.estimator = estimator
+        self.registry = registry
+
+    def fit(self, X, y, sample_weight=None, **kwargs):
+        if self.registry is not None:
+            self.registry.append(self)
+
+        record_metadata(self, sample_weight=sample_weight)
+        params = process_routing(self, "fit", sample_weight=sample_weight, **kwargs)
+        self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit)
+        return self
+
+    def get_metadata_routing(self):
+        router = (
+            MetadataRouter(owner=self.__class__.__name__)
+            .add_self_request(self)
+            .add(
+                estimator=self.estimator,
+                method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+            )
+        )
+        return router
+
+
+class MetaTransformer(MetaEstimatorMixin, TransformerMixin, BaseEstimator):
+    """A simple meta-transformer."""
+
+    def __init__(self, transformer):
+        self.transformer = transformer
+
+    def fit(self, X, y=None, **fit_params):
+        params = process_routing(self, "fit", **fit_params)
+        self.transformer_ = clone(self.transformer).fit(X, y, **params.transformer.fit)
+        return self
+
+    def transform(self, X, y=None, **transform_params):
+        params = process_routing(self, "transform", **transform_params)
+        return self.transformer_.transform(X, **params.transformer.transform)
+
+    def get_metadata_routing(self):
+        return MetadataRouter(owner=self.__class__.__name__).add(
+            transformer=self.transformer,
+            method_mapping=MethodMapping()
+            .add(caller="fit", callee="fit")
+            .add(caller="transform", callee="transform"),
+        )
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_base.py b/.venv/Lib/site-packages/sklearn/tests/test_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5b141ec199b5b4af146ab529c1cc84e40b93e09
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_base.py
@@ -0,0 +1,994 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import pickle
+import re
+import warnings
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_allclose
+
+import sklearn
+from sklearn import config_context, datasets
+from sklearn.base import (
+    BaseEstimator,
+    OutlierMixin,
+    TransformerMixin,
+    clone,
+    is_classifier,
+    is_clusterer,
+    is_outlier_detector,
+    is_regressor,
+)
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+from sklearn.ensemble import IsolationForest
+from sklearn.exceptions import InconsistentVersionWarning
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC, SVR
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._mocking import MockDataFrame
+from sklearn.utils._set_output import _get_output_config
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_array_equal,
+)
+from sklearn.utils.validation import _check_n_features, validate_data
+
+
+#############################################################################
+# A few test classes
+class MyEstimator(BaseEstimator):
+    def __init__(self, l1=0, empty=None):
+        self.l1 = l1
+        self.empty = empty
+
+
+class K(BaseEstimator):
+    def __init__(self, c=None, d=None):
+        self.c = c
+        self.d = d
+
+
+class T(BaseEstimator):
+    def __init__(self, a=None, b=None):
+        self.a = a
+        self.b = b
+
+
+class NaNTag(BaseEstimator):
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = True
+        return tags
+
+
+class NoNaNTag(BaseEstimator):
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = False
+        return tags
+
+
+class OverrideTag(NaNTag):
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = False
+        return tags
+
+
+class DiamondOverwriteTag(NaNTag, NoNaNTag):
+    pass
+
+
+class InheritDiamondOverwriteTag(DiamondOverwriteTag):
+    pass
+
+
+class ModifyInitParams(BaseEstimator):
+    """Deprecated behavior.
+    Equal parameters but with a type cast.
+    Doesn't fulfill a is a
+    """
+
+    def __init__(self, a=np.array([0])):
+        self.a = a.copy()
+
+
+class Buggy(BaseEstimator):
+    "A buggy estimator that does not set its parameters right."
+
+    def __init__(self, a=None):
+        self.a = 1
+
+
+class NoEstimator:
+    def __init__(self):
+        pass
+
+    def fit(self, X=None, y=None):
+        return self
+
+    def predict(self, X=None):
+        return None
+
+
+class VargEstimator(BaseEstimator):
+    """scikit-learn estimators shouldn't have vargs."""
+
+    def __init__(self, *vargs):
+        pass
+
+
+#############################################################################
+# The tests
+
+
+def test_clone():
+    # Tests that clone creates a correct deep copy.
+    # We create an estimator, make a copy of its original state
+    # (which, in this case, is the current state of the estimator),
+    # and check that the obtained copy is a correct deep copy.
+
+    from sklearn.feature_selection import SelectFpr, f_classif
+
+    selector = SelectFpr(f_classif, alpha=0.1)
+    new_selector = clone(selector)
+    assert selector is not new_selector
+    assert selector.get_params() == new_selector.get_params()
+
+    selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
+    new_selector = clone(selector)
+    assert selector is not new_selector
+
+
+def test_clone_2():
+    # Tests that clone doesn't copy everything.
+    # We first create an estimator, give it an own attribute, and
+    # make a copy of its original state. Then we check that the copy doesn't
+    # have the specific attribute we manually added to the initial estimator.
+
+    from sklearn.feature_selection import SelectFpr, f_classif
+
+    selector = SelectFpr(f_classif, alpha=0.1)
+    selector.own_attribute = "test"
+    new_selector = clone(selector)
+    assert not hasattr(new_selector, "own_attribute")
+
+
+def test_clone_buggy():
+    # Check that clone raises an error on buggy estimators.
+    buggy = Buggy()
+    buggy.a = 2
+    with pytest.raises(RuntimeError):
+        clone(buggy)
+
+    no_estimator = NoEstimator()
+    with pytest.raises(TypeError):
+        clone(no_estimator)
+
+    varg_est = VargEstimator()
+    with pytest.raises(RuntimeError):
+        clone(varg_est)
+
+    est = ModifyInitParams()
+    with pytest.raises(RuntimeError):
+        clone(est)
+
+
+def test_clone_empty_array():
+    # Regression test for cloning estimators with empty arrays
+    clf = MyEstimator(empty=np.array([]))
+    clf2 = clone(clf)
+    assert_array_equal(clf.empty, clf2.empty)
+
+    clf = MyEstimator(empty=sp.csr_matrix(np.array([[0]])))
+    clf2 = clone(clf)
+    assert_array_equal(clf.empty.data, clf2.empty.data)
+
+
+def test_clone_nan():
+    # Regression test for cloning estimators with default parameter as np.nan
+    clf = MyEstimator(empty=np.nan)
+    clf2 = clone(clf)
+
+    assert clf.empty is clf2.empty
+
+
+def test_clone_dict():
+    # test that clone creates a clone of a dict
+    orig = {"a": MyEstimator()}
+    cloned = clone(orig)
+    assert orig["a"] is not cloned["a"]
+
+
+def test_clone_sparse_matrices():
+    sparse_matrix_classes = [
+        cls
+        for name in dir(sp)
+        if name.endswith("_matrix") and type(cls := getattr(sp, name)) is type
+    ]
+
+    for cls in sparse_matrix_classes:
+        sparse_matrix = cls(np.eye(5))
+        clf = MyEstimator(empty=sparse_matrix)
+        clf_cloned = clone(clf)
+        assert clf.empty.__class__ is clf_cloned.empty.__class__
+        assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray())
+
+
+def test_clone_estimator_types():
+    # Check that clone works for parameters that are types rather than
+    # instances
+    clf = MyEstimator(empty=MyEstimator)
+    clf2 = clone(clf)
+
+    assert clf.empty is clf2.empty
+
+
+def test_clone_class_rather_than_instance():
+    # Check that clone raises expected error message when
+    # cloning class rather than instance
+    msg = "You should provide an instance of scikit-learn estimator"
+    with pytest.raises(TypeError, match=msg):
+        clone(MyEstimator)
+
+
+def test_repr():
+    # Smoke test the repr of the base estimator.
+    my_estimator = MyEstimator()
+    repr(my_estimator)
+    test = T(K(), K())
+    assert repr(test) == "T(a=K(), b=K())"
+
+    some_est = T(a=["long_params"] * 1000)
+    assert len(repr(some_est)) == 485
+
+
+def test_str():
+    # Smoke test the str of the base estimator
+    my_estimator = MyEstimator()
+    str(my_estimator)
+
+
+def test_get_params():
+    test = T(K(), K)
+
+    assert "a__d" in test.get_params(deep=True)
+    assert "a__d" not in test.get_params(deep=False)
+
+    test.set_params(a__d=2)
+    assert test.a.d == 2
+
+    with pytest.raises(ValueError):
+        test.set_params(a__a=2)
+
+
+# TODO(1.8): Remove this test when the deprecation is removed
+def test_is_estimator_type_class():
+    with pytest.warns(FutureWarning, match="passing a class to.*is deprecated"):
+        assert is_classifier(SVC)
+
+    with pytest.warns(FutureWarning, match="passing a class to.*is deprecated"):
+        assert is_regressor(SVR)
+
+    with pytest.warns(FutureWarning, match="passing a class to.*is deprecated"):
+        assert is_clusterer(KMeans)
+
+    with pytest.warns(FutureWarning, match="passing a class to.*is deprecated"):
+        assert is_outlier_detector(IsolationForest)
+
+
+@pytest.mark.parametrize(
+    "estimator, expected_result",
+    [
+        (SVC(), True),
+        (GridSearchCV(SVC(), {"C": [0.1, 1]}), True),
+        (Pipeline([("svc", SVC())]), True),
+        (Pipeline([("svc_cv", GridSearchCV(SVC(), {"C": [0.1, 1]}))]), True),
+        (SVR(), False),
+        (GridSearchCV(SVR(), {"C": [0.1, 1]}), False),
+        (Pipeline([("svr", SVR())]), False),
+        (Pipeline([("svr_cv", GridSearchCV(SVR(), {"C": [0.1, 1]}))]), False),
+    ],
+)
+def test_is_classifier(estimator, expected_result):
+    assert is_classifier(estimator) == expected_result
+
+
+@pytest.mark.parametrize(
+    "estimator, expected_result",
+    [
+        (SVR(), True),
+        (GridSearchCV(SVR(), {"C": [0.1, 1]}), True),
+        (Pipeline([("svr", SVR())]), True),
+        (Pipeline([("svr_cv", GridSearchCV(SVR(), {"C": [0.1, 1]}))]), True),
+        (SVC(), False),
+        (GridSearchCV(SVC(), {"C": [0.1, 1]}), False),
+        (Pipeline([("svc", SVC())]), False),
+        (Pipeline([("svc_cv", GridSearchCV(SVC(), {"C": [0.1, 1]}))]), False),
+    ],
+)
+def test_is_regressor(estimator, expected_result):
+    assert is_regressor(estimator) == expected_result
+
+
+@pytest.mark.parametrize(
+    "estimator, expected_result",
+    [
+        (KMeans(), True),
+        (GridSearchCV(KMeans(), {"n_clusters": [3, 8]}), True),
+        (Pipeline([("km", KMeans())]), True),
+        (Pipeline([("km_cv", GridSearchCV(KMeans(), {"n_clusters": [3, 8]}))]), True),
+        (SVC(), False),
+        (GridSearchCV(SVC(), {"C": [0.1, 1]}), False),
+        (Pipeline([("svc", SVC())]), False),
+        (Pipeline([("svc_cv", GridSearchCV(SVC(), {"C": [0.1, 1]}))]), False),
+    ],
+)
+def test_is_clusterer(estimator, expected_result):
+    assert is_clusterer(estimator) == expected_result
+
+
+def test_set_params():
+    # test nested estimator parameter setting
+    clf = Pipeline([("svc", SVC())])
+
+    # non-existing parameter in svc
+    with pytest.raises(ValueError):
+        clf.set_params(svc__stupid_param=True)
+
+    # non-existing parameter of pipeline
+    with pytest.raises(ValueError):
+        clf.set_params(svm__stupid_param=True)
+
+    # we don't currently catch if the things in pipeline are estimators
+    # bad_pipeline = Pipeline([("bad", NoEstimator())])
+    # with pytest.raises(AttributeError):
+    #    bad_pipeline.set_params(bad__stupid_param=True)
+
+
+def test_set_params_passes_all_parameters():
+    # Make sure all parameters are passed together to set_params
+    # of nested estimator. Regression test for #9944
+
+    class TestDecisionTree(DecisionTreeClassifier):
+        def set_params(self, **kwargs):
+            super().set_params(**kwargs)
+            # expected_kwargs is in test scope
+            assert kwargs == expected_kwargs
+            return self
+
+    expected_kwargs = {"max_depth": 5, "min_samples_leaf": 2}
+    for est in [
+        Pipeline([("estimator", TestDecisionTree())]),
+        GridSearchCV(TestDecisionTree(), {}),
+    ]:
+        est.set_params(estimator__max_depth=5, estimator__min_samples_leaf=2)
+
+
+def test_set_params_updates_valid_params():
+    # Check that set_params tries to set SVC().C, not
+    # DecisionTreeClassifier().C
+    gscv = GridSearchCV(DecisionTreeClassifier(), {})
+    gscv.set_params(estimator=SVC(), estimator__C=42.0)
+    assert gscv.estimator.C == 42.0
+
+
+@pytest.mark.parametrize(
+    "tree,dataset",
+    [
+        (
+            DecisionTreeClassifier(max_depth=2, random_state=0),
+            datasets.make_classification(random_state=0),
+        ),
+        (
+            DecisionTreeRegressor(max_depth=2, random_state=0),
+            datasets.make_regression(random_state=0),
+        ),
+    ],
+)
+def test_score_sample_weight(tree, dataset):
+    rng = np.random.RandomState(0)
+    # check that the score with and without sample weights are different
+    X, y = dataset
+
+    tree.fit(X, y)
+    # generate random sample weights
+    sample_weight = rng.randint(1, 10, size=len(y))
+    score_unweighted = tree.score(X, y)
+    score_weighted = tree.score(X, y, sample_weight=sample_weight)
+    msg = "Unweighted and weighted scores are unexpectedly equal"
+    assert score_unweighted != score_weighted, msg
+
+
+def test_clone_pandas_dataframe():
+    class DummyEstimator(TransformerMixin, BaseEstimator):
+        """This is a dummy class for generating numerical features
+
+        This feature extractor extracts numerical features from pandas data
+        frame.
+
+        Parameters
+        ----------
+
+        df: pandas data frame
+            The pandas data frame parameter.
+
+        Notes
+        -----
+        """
+
+        def __init__(self, df=None, scalar_param=1):
+            self.df = df
+            self.scalar_param = scalar_param
+
+        def fit(self, X, y=None):
+            pass
+
+        def transform(self, X):
+            pass
+
+    # build and clone estimator
+    d = np.arange(10)
+    df = MockDataFrame(d)
+    e = DummyEstimator(df, scalar_param=1)
+    cloned_e = clone(e)
+
+    # the test
+    assert (e.df == cloned_e.df).values.all()
+    assert e.scalar_param == cloned_e.scalar_param
+
+
+def test_clone_protocol():
+    """Checks that clone works with `__sklearn_clone__` protocol."""
+
+    class FrozenEstimator(BaseEstimator):
+        def __init__(self, fitted_estimator):
+            self.fitted_estimator = fitted_estimator
+
+        def __getattr__(self, name):
+            return getattr(self.fitted_estimator, name)
+
+        def __sklearn_clone__(self):
+            return self
+
+        def fit(self, *args, **kwargs):
+            return self
+
+        def fit_transform(self, *args, **kwargs):
+            return self.fitted_estimator.transform(*args, **kwargs)
+
+    X = np.array([[-1, -1], [-2, -1], [-3, -2]])
+    pca = PCA().fit(X)
+    components = pca.components_
+
+    frozen_pca = FrozenEstimator(pca)
+    assert_allclose(frozen_pca.components_, components)
+
+    # Calling PCA methods such as `get_feature_names_out` still works
+    assert_array_equal(frozen_pca.get_feature_names_out(), pca.get_feature_names_out())
+
+    # Fitting on a new data does not alter `components_`
+    X_new = np.asarray([[-1, 2], [3, 4], [1, 2]])
+    frozen_pca.fit(X_new)
+    assert_allclose(frozen_pca.components_, components)
+
+    # `fit_transform` does not alter state
+    frozen_pca.fit_transform(X_new)
+    assert_allclose(frozen_pca.components_, components)
+
+    # Cloning estimator is a no-op
+    clone_frozen_pca = clone(frozen_pca)
+    assert clone_frozen_pca is frozen_pca
+    assert_allclose(clone_frozen_pca.components_, components)
+
+
+def test_pickle_version_warning_is_not_raised_with_matching_version():
+    iris = datasets.load_iris()
+    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
+    tree_pickle = pickle.dumps(tree)
+    assert b"_sklearn_version" in tree_pickle
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        tree_restored = pickle.loads(tree_pickle)
+
+    # test that we can predict with the restored decision tree classifier
+    score_of_original = tree.score(iris.data, iris.target)
+    score_of_restored = tree_restored.score(iris.data, iris.target)
+    assert score_of_original == score_of_restored
+
+
+class TreeBadVersion(DecisionTreeClassifier):
+    def __getstate__(self):
+        return dict(self.__dict__.items(), _sklearn_version="something")
+
+
+pickle_error_message = (
+    "Trying to unpickle estimator {estimator} from "
+    "version {old_version} when using version "
+    "{current_version}. This might "
+    "lead to breaking code or invalid results. "
+    "Use at your own risk."
+)
+
+
+def test_pickle_version_warning_is_issued_upon_different_version():
+    iris = datasets.load_iris()
+    tree = TreeBadVersion().fit(iris.data, iris.target)
+    tree_pickle_other = pickle.dumps(tree)
+    message = pickle_error_message.format(
+        estimator="TreeBadVersion",
+        old_version="something",
+        current_version=sklearn.__version__,
+    )
+    with pytest.warns(UserWarning, match=message) as warning_record:
+        pickle.loads(tree_pickle_other)
+
+    message = warning_record.list[0].message
+    assert isinstance(message, InconsistentVersionWarning)
+    assert message.estimator_name == "TreeBadVersion"
+    assert message.original_sklearn_version == "something"
+    assert message.current_sklearn_version == sklearn.__version__
+
+
+class TreeNoVersion(DecisionTreeClassifier):
+    def __getstate__(self):
+        return self.__dict__
+
+
+def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle():
+    iris = datasets.load_iris()
+    # TreeNoVersion has no getstate, like pre-0.18
+    tree = TreeNoVersion().fit(iris.data, iris.target)
+
+    tree_pickle_noversion = pickle.dumps(tree)
+    assert b"_sklearn_version" not in tree_pickle_noversion
+    message = pickle_error_message.format(
+        estimator="TreeNoVersion",
+        old_version="pre-0.18",
+        current_version=sklearn.__version__,
+    )
+    # check we got the warning about using pre-0.18 pickle
+    with pytest.warns(UserWarning, match=message):
+        pickle.loads(tree_pickle_noversion)
+
+
+def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
+    iris = datasets.load_iris()
+    tree = TreeNoVersion().fit(iris.data, iris.target)
+    tree_pickle_noversion = pickle.dumps(tree)
+    try:
+        module_backup = TreeNoVersion.__module__
+        TreeNoVersion.__module__ = "notsklearn"
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+
+            pickle.loads(tree_pickle_noversion)
+    finally:
+        TreeNoVersion.__module__ = module_backup
+
+
+class DontPickleAttributeMixin:
+    def __getstate__(self):
+        data = self.__dict__.copy()
+        data["_attribute_not_pickled"] = None
+        return data
+
+    def __setstate__(self, state):
+        state["_restored"] = True
+        self.__dict__.update(state)
+
+
+class MultiInheritanceEstimator(DontPickleAttributeMixin, BaseEstimator):
+    def __init__(self, attribute_pickled=5):
+        self.attribute_pickled = attribute_pickled
+        self._attribute_not_pickled = None
+
+
+def test_pickling_when_getstate_is_overwritten_by_mixin():
+    estimator = MultiInheritanceEstimator()
+    estimator._attribute_not_pickled = "this attribute should not be pickled"
+
+    serialized = pickle.dumps(estimator)
+    estimator_restored = pickle.loads(serialized)
+    assert estimator_restored.attribute_pickled == 5
+    assert estimator_restored._attribute_not_pickled is None
+    assert estimator_restored._restored
+
+
+def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():
+    try:
+        estimator = MultiInheritanceEstimator()
+        text = "this attribute should not be pickled"
+        estimator._attribute_not_pickled = text
+        old_mod = type(estimator).__module__
+        type(estimator).__module__ = "notsklearn"
+
+        serialized = estimator.__getstate__()
+        assert serialized == {"_attribute_not_pickled": None, "attribute_pickled": 5}
+
+        serialized["attribute_pickled"] = 4
+        estimator.__setstate__(serialized)
+        assert estimator.attribute_pickled == 4
+        assert estimator._restored
+    finally:
+        type(estimator).__module__ = old_mod
+
+
+class SingleInheritanceEstimator(BaseEstimator):
+    def __init__(self, attribute_pickled=5):
+        self.attribute_pickled = attribute_pickled
+        self._attribute_not_pickled = None
+
+    def __getstate__(self):
+        state = super().__getstate__()
+        state["_attribute_not_pickled"] = None
+        return state
+
+
+def test_pickling_works_when_getstate_is_overwritten_in_the_child_class():
+    estimator = SingleInheritanceEstimator()
+    estimator._attribute_not_pickled = "this attribute should not be pickled"
+
+    serialized = pickle.dumps(estimator)
+    estimator_restored = pickle.loads(serialized)
+    assert estimator_restored.attribute_pickled == 5
+    assert estimator_restored._attribute_not_pickled is None
+
+
+def test_tag_inheritance():
+    # test that changing tags by inheritance is not allowed
+
+    nan_tag_est = NaNTag()
+    no_nan_tag_est = NoNaNTag()
+    assert nan_tag_est.__sklearn_tags__().input_tags.allow_nan
+    assert not no_nan_tag_est.__sklearn_tags__().input_tags.allow_nan
+
+    redefine_tags_est = OverrideTag()
+    assert not redefine_tags_est.__sklearn_tags__().input_tags.allow_nan
+
+    diamond_tag_est = DiamondOverwriteTag()
+    assert diamond_tag_est.__sklearn_tags__().input_tags.allow_nan
+
+    inherit_diamond_tag_est = InheritDiamondOverwriteTag()
+    assert inherit_diamond_tag_est.__sklearn_tags__().input_tags.allow_nan
+
+
+def test_raises_on_get_params_non_attribute():
+    class MyEstimator(BaseEstimator):
+        def __init__(self, param=5):
+            pass
+
+        def fit(self, X, y=None):
+            return self
+
+    est = MyEstimator()
+    msg = "'MyEstimator' object has no attribute 'param'"
+
+    with pytest.raises(AttributeError, match=msg):
+        est.get_params()
+
+
+def test_repr_mimebundle_():
+    # Checks the display configuration flag controls the json output
+    tree = DecisionTreeClassifier()
+    output = tree._repr_mimebundle_()
+    assert "text/plain" in output
+    assert "text/html" in output
+
+    with config_context(display="text"):
+        output = tree._repr_mimebundle_()
+        assert "text/plain" in output
+        assert "text/html" not in output
+
+
+def test_repr_html_wraps():
+    # Checks the display configuration flag controls the html output
+    tree = DecisionTreeClassifier()
+
+    output = tree._repr_html_()
+    assert "<style>" in output
+
+    with config_context(display="text"):
+        msg = "_repr_html_ is only defined when"
+        with pytest.raises(AttributeError, match=msg):
+            output = tree._repr_html_()
+
+
+def test_n_features_in_validation():
+    """Check that `_check_n_features` validates data when reset=False"""
+    est = MyEstimator()
+    X_train = [[1, 2, 3], [4, 5, 6]]
+    _check_n_features(est, X_train, reset=True)
+
+    assert est.n_features_in_ == 3
+
+    msg = "X does not contain any features, but MyEstimator is expecting 3 features"
+    with pytest.raises(ValueError, match=msg):
+        _check_n_features(est, "invalid X", reset=False)
+
+
+def test_n_features_in_no_validation():
+    """Check that `_check_n_features` does not validate data when
+    n_features_in_ is not defined."""
+    est = MyEstimator()
+    _check_n_features(est, "invalid X", reset=True)
+
+    assert not hasattr(est, "n_features_in_")
+
+    # does not raise
+    _check_n_features(est, "invalid X", reset=False)
+
+
+def test_feature_names_in():
+    """Check that feature_name_in are recorded by `_validate_data`"""
+    pd = pytest.importorskip("pandas")
+    iris = datasets.load_iris()
+    X_np = iris.data
+    df = pd.DataFrame(X_np, columns=iris.feature_names)
+
+    class NoOpTransformer(TransformerMixin, BaseEstimator):
+        def fit(self, X, y=None):
+            validate_data(self, X)
+            return self
+
+        def transform(self, X):
+            validate_data(self, X, reset=False)
+            return X
+
+    # fit on dataframe saves the feature names
+    trans = NoOpTransformer().fit(df)
+    assert_array_equal(trans.feature_names_in_, df.columns)
+
+    # fit again but on ndarray does not keep the previous feature names (see #21383)
+    trans.fit(X_np)
+    assert not hasattr(trans, "feature_names_in_")
+
+    trans.fit(df)
+    msg = "The feature names should match those that were passed"
+    df_bad = pd.DataFrame(X_np, columns=iris.feature_names[::-1])
+    with pytest.raises(ValueError, match=msg):
+        trans.transform(df_bad)
+
+    # warns when fitted on dataframe and transforming a ndarray
+    msg = (
+        "X does not have valid feature names, but NoOpTransformer was "
+        "fitted with feature names"
+    )
+    with pytest.warns(UserWarning, match=msg):
+        trans.transform(X_np)
+
+    # warns when fitted on a ndarray and transforming dataframe
+    msg = "X has feature names, but NoOpTransformer was fitted without feature names"
+    trans = NoOpTransformer().fit(X_np)
+    with pytest.warns(UserWarning, match=msg):
+        trans.transform(df)
+
+    # fit on dataframe with all integer feature names works without warning
+    df_int_names = pd.DataFrame(X_np)
+    trans = NoOpTransformer()
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        trans.fit(df_int_names)
+
+    # fit on dataframe with no feature names or all integer feature names
+    # -> do not warn on transform
+    Xs = [X_np, df_int_names]
+    for X in Xs:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", UserWarning)
+            trans.transform(X)
+
+    # fit on dataframe with feature names that are mixed raises an error:
+    df_mixed = pd.DataFrame(X_np, columns=["a", "b", 1, 2])
+    trans = NoOpTransformer()
+    msg = re.escape(
+        "Feature names are only supported if all input features have string names, "
+        "but your input has ['int', 'str'] as feature name / column name types. "
+        "If you want feature names to be stored and validated, you must convert "
+        "them all to strings, by using X.columns = X.columns.astype(str) for "
+        "example. Otherwise you can remove feature / column names from your input "
+        "data, or convert them all to a non-string data type."
+    )
+    with pytest.raises(TypeError, match=msg):
+        trans.fit(df_mixed)
+
+    # transform on feature names that are mixed also raises:
+    with pytest.raises(TypeError, match=msg):
+        trans.transform(df_mixed)
+
+
+def test_validate_data_skip_check_array():
+    """Check skip_check_array option of _validate_data."""
+
+    pd = pytest.importorskip("pandas")
+    iris = datasets.load_iris()
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+    y = pd.Series(iris.target)
+
+    class NoOpTransformer(TransformerMixin, BaseEstimator):
+        pass
+
+    no_op = NoOpTransformer()
+    X_np_out = validate_data(no_op, df, skip_check_array=False)
+    assert isinstance(X_np_out, np.ndarray)
+    assert_allclose(X_np_out, df.to_numpy())
+
+    X_df_out = validate_data(no_op, df, skip_check_array=True)
+    assert X_df_out is df
+
+    y_np_out = validate_data(no_op, y=y, skip_check_array=False)
+    assert isinstance(y_np_out, np.ndarray)
+    assert_allclose(y_np_out, y.to_numpy())
+
+    y_series_out = validate_data(no_op, y=y, skip_check_array=True)
+    assert y_series_out is y
+
+    X_np_out, y_np_out = validate_data(no_op, df, y, skip_check_array=False)
+    assert isinstance(X_np_out, np.ndarray)
+    assert_allclose(X_np_out, df.to_numpy())
+    assert isinstance(y_np_out, np.ndarray)
+    assert_allclose(y_np_out, y.to_numpy())
+
+    X_df_out, y_series_out = validate_data(no_op, df, y, skip_check_array=True)
+    assert X_df_out is df
+    assert y_series_out is y
+
+    msg = "Validation should be done on X, y or both."
+    with pytest.raises(ValueError, match=msg):
+        validate_data(no_op)
+
+
+def test_clone_keeps_output_config():
+    """Check that clone keeps the set_output config."""
+
+    ss = StandardScaler().set_output(transform="pandas")
+    config = _get_output_config("transform", ss)
+
+    ss_clone = clone(ss)
+    config_clone = _get_output_config("transform", ss_clone)
+    assert config == config_clone
+
+
+class _Empty:
+    pass
+
+
+class EmptyEstimator(_Empty, BaseEstimator):
+    pass
+
+
+@pytest.mark.parametrize("estimator", [BaseEstimator(), EmptyEstimator()])
+def test_estimator_empty_instance_dict(estimator):
+    """Check that ``__getstate__`` returns an empty ``dict`` with an empty
+    instance.
+
+    Python 3.11+ changed behaviour by returning ``None`` instead of raising an
+    ``AttributeError``. Non-regression test for gh-25188.
+    """
+    state = estimator.__getstate__()
+    expected = {"_sklearn_version": sklearn.__version__}
+    assert state == expected
+
+    # this should not raise
+    pickle.loads(pickle.dumps(BaseEstimator()))
+
+
+def test_estimator_getstate_using_slots_error_message():
+    """Using a `BaseEstimator` with `__slots__` is not supported."""
+
+    class WithSlots:
+        __slots__ = ("x",)
+
+    class Estimator(BaseEstimator, WithSlots):
+        pass
+
+    msg = (
+        "You cannot use `__slots__` in objects inheriting from "
+        "`sklearn.base.BaseEstimator`"
+    )
+
+    with pytest.raises(TypeError, match=msg):
+        Estimator().__getstate__()
+
+    with pytest.raises(TypeError, match=msg):
+        pickle.dumps(Estimator())
+
+
+@pytest.mark.parametrize(
+    "constructor_name, minversion",
+    [
+        ("dataframe", "1.5.0"),
+        ("pyarrow", "12.0.0"),
+        ("polars", "0.20.23"),
+    ],
+)
+def test_dataframe_protocol(constructor_name, minversion):
+    """Uses the dataframe exchange protocol to get feature names."""
+    data = [[1, 4, 2], [3, 3, 6]]
+    columns = ["col_0", "col_1", "col_2"]
+    df = _convert_container(
+        data, constructor_name, columns_name=columns, minversion=minversion
+    )
+
+    class NoOpTransformer(TransformerMixin, BaseEstimator):
+        def fit(self, X, y=None):
+            validate_data(self, X)
+            return self
+
+        def transform(self, X):
+            return validate_data(self, X, reset=False)
+
+    no_op = NoOpTransformer()
+    no_op.fit(df)
+    assert_array_equal(no_op.feature_names_in_, columns)
+    X_out = no_op.transform(df)
+
+    if constructor_name != "pyarrow":
+        # pyarrow does not work with `np.asarray`
+        # https://github.com/apache/arrow/issues/34886
+        assert_allclose(df, X_out)
+
+    bad_names = ["a", "b", "c"]
+    df_bad = _convert_container(data, constructor_name, columns_name=bad_names)
+    with pytest.raises(ValueError, match="The feature names should match"):
+        no_op.transform(df_bad)
+
+
+@config_context(enable_metadata_routing=True)
+def test_transformer_fit_transform_with_metadata_in_transform():
+    """Test that having a transformer with metadata for transform raises a
+    warning when calling fit_transform."""
+
+    class CustomTransformer(BaseEstimator, TransformerMixin):
+        def fit(self, X, y=None, prop=None):
+            return self
+
+        def transform(self, X, prop=None):
+            return X
+
+    # passing the metadata to `fit_transform` should raise a warning since it
+    # could potentially be consumed by `transform`
+    with pytest.warns(UserWarning, match="`transform` method which consumes metadata"):
+        CustomTransformer().set_transform_request(prop=True).fit_transform(
+            [[1]], [1], prop=1
+        )
+
+    # not passing a metadata which can potentially be consumed by `transform` should
+    # not raise a warning
+    with warnings.catch_warnings(record=True) as record:
+        CustomTransformer().set_transform_request(prop=True).fit_transform([[1]], [1])
+        assert len(record) == 0
+
+
+@config_context(enable_metadata_routing=True)
+def test_outlier_mixin_fit_predict_with_metadata_in_predict():
+    """Test that having an OutlierMixin with metadata for predict raises a
+    warning when calling fit_predict."""
+
+    class CustomOutlierDetector(BaseEstimator, OutlierMixin):
+        def fit(self, X, y=None, prop=None):
+            return self
+
+        def predict(self, X, prop=None):
+            return X
+
+    # passing the metadata to `fit_predict` should raise a warning since it
+    # could potentially be consumed by `predict`
+    with pytest.warns(UserWarning, match="`predict` method which consumes metadata"):
+        CustomOutlierDetector().set_predict_request(prop=True).fit_predict(
+            [[1]], [1], prop=1
+        )
+
+    # not passing a metadata which can potentially be consumed by `predict` should
+    # not raise a warning
+    with warnings.catch_warnings(record=True) as record:
+        CustomOutlierDetector().set_predict_request(prop=True).fit_predict([[1]], [1])
+        assert len(record) == 0
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_build.py b/.venv/Lib/site-packages/sklearn/tests/test_build.py
new file mode 100644
index 0000000000000000000000000000000000000000..03ca46e362af43173c6793836c4cc1cdab23072c
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_build.py
@@ -0,0 +1,34 @@
+import os
+import textwrap
+
+import pytest
+
+from sklearn import __version__
+from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled
+
+
+def test_openmp_parallelism_enabled():
+    # Check that sklearn is built with OpenMP-based parallelism enabled.
+    # This test can be skipped by setting the environment variable
+    # ``SKLEARN_SKIP_OPENMP_TEST``.
+    if os.getenv("SKLEARN_SKIP_OPENMP_TEST"):
+        pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)")
+
+    base_url = "dev" if __version__.endswith(".dev0") else "stable"
+    err_msg = textwrap.dedent(
+        """
+        This test fails because scikit-learn has been built without OpenMP.
+        This is not recommended since some estimators will run in sequential
+        mode instead of leveraging thread-based parallelism.
+
+        You can find instructions to build scikit-learn with OpenMP at this
+        address:
+
+            https://scikit-learn.org/{}/developers/advanced_installation.html
+
+        You can skip this test by setting the environment variable
+        SKLEARN_SKIP_OPENMP_TEST to any value.
+        """
+    ).format(base_url)
+
+    assert _openmp_parallelism_enabled(), err_msg
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_calibration.py b/.venv/Lib/site-packages/sklearn/tests/test_calibration.py
new file mode 100644
index 0000000000000000000000000000000000000000..118adc32ca8b1d57a946b46279b44e53a060b412
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_calibration.py
@@ -0,0 +1,1104 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.calibration import (
+    CalibratedClassifierCV,
+    CalibrationDisplay,
+    _CalibratedClassifier,
+    _sigmoid_calibration,
+    _SigmoidCalibration,
+    calibration_curve,
+)
+from sklearn.datasets import load_iris, make_blobs, make_classification
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import (
+    RandomForestClassifier,
+    VotingClassifier,
+)
+from sklearn.exceptions import NotFittedError
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.frozen import FrozenEstimator
+from sklearn.impute import SimpleImputer
+from sklearn.isotonic import IsotonicRegression
+from sklearn.linear_model import LogisticRegression, SGDClassifier
+from sklearn.metrics import brier_score_loss
+from sklearn.model_selection import (
+    KFold,
+    LeaveOneOut,
+    check_cv,
+    cross_val_predict,
+    cross_val_score,
+    train_test_split,
+)
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.svm import LinearSVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._mocking import CheckingClassifier
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.extmath import softmax
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+N_SAMPLES = 200
+
+
+@pytest.fixture(scope="module")
+def data():
+    X, y = make_classification(n_samples=N_SAMPLES, n_features=6, random_state=42)
+    return X, y
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("method", ["sigmoid", "isotonic"])
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_calibration(data, method, csr_container, ensemble):
+    # Test calibration objects with isotonic and sigmoid
+    n_samples = N_SAMPLES // 2
+    X, y = data
+    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)
+
+    X -= X.min()  # MultinomialNB only allows positive X
+
+    # split train and test
+    X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples]
+    X_test, y_test = X[n_samples:], y[n_samples:]
+
+    # Naive-Bayes
+    clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
+    prob_pos_clf = clf.predict_proba(X_test)[:, 1]
+
+    cal_clf = CalibratedClassifierCV(clf, cv=y.size + 1, ensemble=ensemble)
+    with pytest.raises(ValueError):
+        cal_clf.fit(X, y)
+
+    # Naive Bayes with calibration
+    for this_X_train, this_X_test in [
+        (X_train, X_test),
+        (csr_container(X_train), csr_container(X_test)),
+    ]:
+        cal_clf = CalibratedClassifierCV(clf, method=method, cv=5, ensemble=ensemble)
+        # Note that this fit overwrites the fit on the entire training
+        # set
+        cal_clf.fit(this_X_train, y_train, sample_weight=sw_train)
+        prob_pos_cal_clf = cal_clf.predict_proba(this_X_test)[:, 1]
+
+        # Check that brier score has improved after calibration
+        assert brier_score_loss(y_test, prob_pos_clf) > brier_score_loss(
+            y_test, prob_pos_cal_clf
+        )
+
+        # Check invariance against relabeling [0, 1] -> [1, 2]
+        cal_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
+        prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1]
+        assert_array_almost_equal(prob_pos_cal_clf, prob_pos_cal_clf_relabeled)
+
+        # Check invariance against relabeling [0, 1] -> [-1, 1]
+        cal_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
+        prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1]
+        assert_array_almost_equal(prob_pos_cal_clf, prob_pos_cal_clf_relabeled)
+
+        # Check invariance against relabeling [0, 1] -> [1, 0]
+        cal_clf.fit(this_X_train, (y_train + 1) % 2, sample_weight=sw_train)
+        prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1]
+        if method == "sigmoid":
+            assert_array_almost_equal(prob_pos_cal_clf, 1 - prob_pos_cal_clf_relabeled)
+        else:
+            # Isotonic calibration is not invariant against relabeling
+            # but should improve in both cases
+            assert brier_score_loss(y_test, prob_pos_clf) > brier_score_loss(
+                (y_test + 1) % 2, prob_pos_cal_clf_relabeled
+            )
+
+
+def test_calibration_default_estimator(data):
+    # Check estimator default is LinearSVC
+    X, y = data
+    calib_clf = CalibratedClassifierCV(cv=2)
+    calib_clf.fit(X, y)
+
+    base_est = calib_clf.calibrated_classifiers_[0].estimator
+    assert isinstance(base_est, LinearSVC)
+
+
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_calibration_cv_splitter(data, ensemble):
+    # Check when `cv` is a CV splitter
+    X, y = data
+
+    splits = 5
+    kfold = KFold(n_splits=splits)
+    calib_clf = CalibratedClassifierCV(cv=kfold, ensemble=ensemble)
+    assert isinstance(calib_clf.cv, KFold)
+    assert calib_clf.cv.n_splits == splits
+
+    calib_clf.fit(X, y)
+    expected_n_clf = splits if ensemble else 1
+    assert len(calib_clf.calibrated_classifiers_) == expected_n_clf
+
+
+def test_calibration_cv_nfold(data):
+    # Check error raised when number of examples per class less than nfold
+    X, y = data
+
+    kfold = KFold(n_splits=101)
+    calib_clf = CalibratedClassifierCV(cv=kfold, ensemble=True)
+    with pytest.raises(ValueError, match="Requesting 101-fold cross-validation"):
+        calib_clf.fit(X, y)
+
+    calib_clf = CalibratedClassifierCV(cv=LeaveOneOut(), ensemble=True)
+    with pytest.raises(ValueError, match="LeaveOneOut cross-validation does"):
+        calib_clf.fit(X, y)
+
+
+@pytest.mark.parametrize("method", ["sigmoid", "isotonic"])
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_sample_weight(data, method, ensemble):
+    n_samples = N_SAMPLES // 2
+    X, y = data
+
+    sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
+    X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples]
+    X_test = X[n_samples:]
+
+    estimator = LinearSVC(random_state=42)
+    calibrated_clf = CalibratedClassifierCV(estimator, method=method, ensemble=ensemble)
+    calibrated_clf.fit(X_train, y_train, sample_weight=sw_train)
+    probs_with_sw = calibrated_clf.predict_proba(X_test)
+
+    # As the weights are used for the calibration, they should still yield
+    # different predictions
+    calibrated_clf.fit(X_train, y_train)
+    probs_without_sw = calibrated_clf.predict_proba(X_test)
+
+    diff = np.linalg.norm(probs_with_sw - probs_without_sw)
+    assert diff > 0.1
+
+
+@pytest.mark.parametrize("method", ["sigmoid", "isotonic"])
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_parallel_execution(data, method, ensemble):
+    """Test parallel calibration"""
+    X, y = data
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+
+    estimator = make_pipeline(StandardScaler(), LinearSVC(random_state=42))
+
+    cal_clf_parallel = CalibratedClassifierCV(
+        estimator, method=method, n_jobs=2, ensemble=ensemble
+    )
+    cal_clf_parallel.fit(X_train, y_train)
+    probs_parallel = cal_clf_parallel.predict_proba(X_test)
+
+    cal_clf_sequential = CalibratedClassifierCV(
+        estimator, method=method, n_jobs=1, ensemble=ensemble
+    )
+    cal_clf_sequential.fit(X_train, y_train)
+    probs_sequential = cal_clf_sequential.predict_proba(X_test)
+
+    assert_allclose(probs_parallel, probs_sequential)
+
+
+@pytest.mark.parametrize("method", ["sigmoid", "isotonic"])
+@pytest.mark.parametrize("ensemble", [True, False])
+# increase the number of RNG seeds to assess the statistical stability of this
+# test:
+@pytest.mark.parametrize("seed", range(2))
+def test_calibration_multiclass(method, ensemble, seed):
+    def multiclass_brier(y_true, proba_pred, n_classes):
+        Y_onehot = np.eye(n_classes)[y_true]
+        return np.sum((Y_onehot - proba_pred) ** 2) / Y_onehot.shape[0]
+
+    # Test calibration for multiclass with classifier that implements
+    # only decision function.
+    clf = LinearSVC(random_state=7)
+    X, y = make_blobs(
+        n_samples=500, n_features=100, random_state=seed, centers=10, cluster_std=15.0
+    )
+
+    # Use an unbalanced dataset by collapsing 8 clusters into one class
+    # to make the naive calibration based on a softmax more unlikely
+    # to work.
+    y[y > 2] = 2
+    n_classes = np.unique(y).shape[0]
+    X_train, y_train = X[::2], y[::2]
+    X_test, y_test = X[1::2], y[1::2]
+
+    clf.fit(X_train, y_train)
+
+    cal_clf = CalibratedClassifierCV(clf, method=method, cv=5, ensemble=ensemble)
+    cal_clf.fit(X_train, y_train)
+    probas = cal_clf.predict_proba(X_test)
+    # Check probabilities sum to 1
+    assert_allclose(np.sum(probas, axis=1), np.ones(len(X_test)))
+
+    # Check that the dataset is not too trivial, otherwise it's hard
+    # to get interesting calibration data during the internal
+    # cross-validation loop.
+    assert 0.65 < clf.score(X_test, y_test) < 0.95
+
+    # Check that the accuracy of the calibrated model is never degraded
+    # too much compared to the original classifier.
+    assert cal_clf.score(X_test, y_test) > 0.95 * clf.score(X_test, y_test)
+
+    # Check that Brier loss of calibrated classifier is smaller than
+    # loss obtained by naively turning OvR decision function to
+    # probabilities via a softmax
+    uncalibrated_brier = multiclass_brier(
+        y_test, softmax(clf.decision_function(X_test)), n_classes=n_classes
+    )
+    calibrated_brier = multiclass_brier(y_test, probas, n_classes=n_classes)
+
+    assert calibrated_brier < 1.1 * uncalibrated_brier
+
+    # Test that calibration of a multiclass classifier decreases log-loss
+    # for RandomForestClassifier
+    clf = RandomForestClassifier(n_estimators=30, random_state=42)
+    clf.fit(X_train, y_train)
+    clf_probs = clf.predict_proba(X_test)
+    uncalibrated_brier = multiclass_brier(y_test, clf_probs, n_classes=n_classes)
+
+    cal_clf = CalibratedClassifierCV(clf, method=method, cv=5, ensemble=ensemble)
+    cal_clf.fit(X_train, y_train)
+    cal_clf_probs = cal_clf.predict_proba(X_test)
+    calibrated_brier = multiclass_brier(y_test, cal_clf_probs, n_classes=n_classes)
+    assert calibrated_brier < 1.1 * uncalibrated_brier
+
+
+def test_calibration_zero_probability():
+    # Test an edge case where _CalibratedClassifier avoids numerical errors
+    # in the multiclass normalization step if all the calibrators output
+    # are zero all at once for a given sample and instead fallback to uniform
+    # probabilities.
+    class ZeroCalibrator:
+        # This function is called from _CalibratedClassifier.predict_proba.
+        def predict(self, X):
+            return np.zeros(X.shape[0])
+
+    X, y = make_blobs(
+        n_samples=50, n_features=10, random_state=7, centers=10, cluster_std=15.0
+    )
+    clf = DummyClassifier().fit(X, y)
+    calibrator = ZeroCalibrator()
+    cal_clf = _CalibratedClassifier(
+        estimator=clf, calibrators=[calibrator], classes=clf.classes_
+    )
+
+    probas = cal_clf.predict_proba(X)
+
+    # Check that all probabilities are uniformly 1. / clf.n_classes_
+    assert_allclose(probas, 1.0 / clf.n_classes_)
+
+
+@ignore_warnings(category=FutureWarning)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_calibration_prefit(csr_container):
+    """Test calibration for prefitted classifiers"""
+    # TODO(1.8): Remove cv="prefit" options here and the @ignore_warnings of the test
+    n_samples = 50
+    X, y = make_classification(n_samples=3 * n_samples, n_features=6, random_state=42)
+    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)
+
+    X -= X.min()  # MultinomialNB only allows positive X
+
+    # split train and test
+    X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples]
+    X_calib, y_calib, sw_calib = (
+        X[n_samples : 2 * n_samples],
+        y[n_samples : 2 * n_samples],
+        sample_weight[n_samples : 2 * n_samples],
+    )
+    X_test, y_test = X[2 * n_samples :], y[2 * n_samples :]
+
+    # Naive-Bayes
+    clf = MultinomialNB()
+    # Check error if clf not prefit
+    unfit_clf = CalibratedClassifierCV(clf, cv="prefit")
+    with pytest.raises(NotFittedError):
+        unfit_clf.fit(X_calib, y_calib)
+
+    clf.fit(X_train, y_train, sw_train)
+    prob_pos_clf = clf.predict_proba(X_test)[:, 1]
+
+    # Naive Bayes with calibration
+    for this_X_calib, this_X_test in [
+        (X_calib, X_test),
+        (csr_container(X_calib), csr_container(X_test)),
+    ]:
+        for method in ["isotonic", "sigmoid"]:
+            cal_clf_prefit = CalibratedClassifierCV(clf, method=method, cv="prefit")
+            cal_clf_frozen = CalibratedClassifierCV(FrozenEstimator(clf), method=method)
+
+            for sw in [sw_calib, None]:
+                cal_clf_prefit.fit(this_X_calib, y_calib, sample_weight=sw)
+                cal_clf_frozen.fit(this_X_calib, y_calib, sample_weight=sw)
+
+                y_prob_prefit = cal_clf_prefit.predict_proba(this_X_test)
+                y_prob_frozen = cal_clf_frozen.predict_proba(this_X_test)
+                y_pred_prefit = cal_clf_prefit.predict(this_X_test)
+                y_pred_frozen = cal_clf_frozen.predict(this_X_test)
+                prob_pos_cal_clf_prefit = y_prob_prefit[:, 1]
+                prob_pos_cal_clf_frozen = y_prob_frozen[:, 1]
+                assert_array_equal(y_pred_prefit, y_pred_frozen)
+                assert_array_equal(
+                    y_pred_prefit, np.array([0, 1])[np.argmax(y_prob_prefit, axis=1)]
+                )
+                assert brier_score_loss(y_test, prob_pos_clf) > brier_score_loss(
+                    y_test, prob_pos_cal_clf_frozen
+                )
+
+
+@pytest.mark.parametrize("method", ["sigmoid", "isotonic"])
+def test_calibration_ensemble_false(data, method):
+    # Test that `ensemble=False` is the same as using predictions from
+    # `cross_val_predict` to train calibrator.
+    X, y = data
+    clf = LinearSVC(random_state=7)
+
+    cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False)
+    cal_clf.fit(X, y)
+    cal_probas = cal_clf.predict_proba(X)
+
+    # Get probas manually
+    unbiased_preds = cross_val_predict(clf, X, y, cv=3, method="decision_function")
+    if method == "isotonic":
+        calibrator = IsotonicRegression(out_of_bounds="clip")
+    else:
+        calibrator = _SigmoidCalibration()
+    calibrator.fit(unbiased_preds, y)
+    # Use `clf` fit on all data
+    clf.fit(X, y)
+    clf_df = clf.decision_function(X)
+    manual_probas = calibrator.predict(clf_df)
+    assert_allclose(cal_probas[:, 1], manual_probas)
+
+
+def test_sigmoid_calibration():
+    """Test calibration values with Platt sigmoid model"""
+    exF = np.array([5, -4, 1.0])
+    exY = np.array([1, -1, -1])
+    # computed from my python port of the C++ code in LibSVM
+    AB_lin_libsvm = np.array([-0.20261354391187855, 0.65236314980010512])
+    assert_array_almost_equal(AB_lin_libsvm, _sigmoid_calibration(exF, exY), 3)
+    lin_prob = 1.0 / (1.0 + np.exp(AB_lin_libsvm[0] * exF + AB_lin_libsvm[1]))
+    sk_prob = _SigmoidCalibration().fit(exF, exY).predict(exF)
+    assert_array_almost_equal(lin_prob, sk_prob, 6)
+
+    # check that _SigmoidCalibration().fit only accepts 1d array or 2d column
+    # arrays
+    with pytest.raises(ValueError):
+        _SigmoidCalibration().fit(np.vstack((exF, exF)), exY)
+
+
+def test_calibration_curve():
+    """Check calibration_curve function"""
+    y_true = np.array([0, 0, 0, 1, 1, 1])
+    y_pred = np.array([0.0, 0.1, 0.2, 0.8, 0.9, 1.0])
+    prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2)
+    assert len(prob_true) == len(prob_pred)
+    assert len(prob_true) == 2
+    assert_almost_equal(prob_true, [0, 1])
+    assert_almost_equal(prob_pred, [0.1, 0.9])
+
+    # Probabilities outside [0, 1] should not be accepted at all.
+    with pytest.raises(ValueError):
+        calibration_curve([1], [-0.1])
+
+    # test that quantiles work as expected
+    y_true2 = np.array([0, 0, 0, 0, 1, 1])
+    y_pred2 = np.array([0.0, 0.1, 0.2, 0.5, 0.9, 1.0])
+    prob_true_quantile, prob_pred_quantile = calibration_curve(
+        y_true2, y_pred2, n_bins=2, strategy="quantile"
+    )
+
+    assert len(prob_true_quantile) == len(prob_pred_quantile)
+    assert len(prob_true_quantile) == 2
+    assert_almost_equal(prob_true_quantile, [0, 2 / 3])
+    assert_almost_equal(prob_pred_quantile, [0.1, 0.8])
+
+    # Check that error is raised when invalid strategy is selected
+    with pytest.raises(ValueError):
+        calibration_curve(y_true2, y_pred2, strategy="percentile")
+
+
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_calibration_nan_imputer(ensemble):
+    """Test that calibration can accept nan"""
+    X, y = make_classification(
+        n_samples=10, n_features=2, n_informative=2, n_redundant=0, random_state=42
+    )
+    X[0, 0] = np.nan
+    clf = Pipeline(
+        [("imputer", SimpleImputer()), ("rf", RandomForestClassifier(n_estimators=1))]
+    )
+    clf_c = CalibratedClassifierCV(clf, cv=2, method="isotonic", ensemble=ensemble)
+    clf_c.fit(X, y)
+    clf_c.predict(X)
+
+
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_calibration_prob_sum(ensemble):
+    # Test that sum of probabilities is (max) 1. A non-regression test for
+    # issue #7796 - when test has fewer classes than train
+    X, _ = make_classification(n_samples=10, n_features=5, n_classes=2)
+    y = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
+    clf = LinearSVC(C=1.0, random_state=7)
+    # In the first and last fold, test will have 1 class while train will have 2
+    clf_prob = CalibratedClassifierCV(
+        clf, method="sigmoid", cv=KFold(n_splits=3), ensemble=ensemble
+    )
+    clf_prob.fit(X, y)
+    assert_allclose(clf_prob.predict_proba(X).sum(axis=1), 1.0)
+
+
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_calibration_less_classes(ensemble):
+    # Test to check calibration works fine when train set in a test-train
+    # split does not contain all classes
+    # In 1st split, train is missing class 0
+    # In 3rd split, train is missing class 3
+    X = np.random.randn(12, 5)
+    y = [0, 0, 0, 1] + [1, 1, 2, 2] + [2, 3, 3, 3]
+    clf = DecisionTreeClassifier(random_state=7)
+    cal_clf = CalibratedClassifierCV(
+        clf, method="sigmoid", cv=KFold(3), ensemble=ensemble
+    )
+    cal_clf.fit(X, y)
+
+    if ensemble:
+        classes = np.arange(4)
+        for calib_i, class_i in zip([0, 2], [0, 3]):
+            proba = cal_clf.calibrated_classifiers_[calib_i].predict_proba(X)
+            # Check that the unobserved class has proba=0
+            assert_array_equal(proba[:, class_i], np.zeros(len(y)))
+            # Check for all other classes proba>0
+            assert np.all(proba[:, classes != class_i] > 0)
+
+    # When `ensemble=False`, `cross_val_predict` is used to compute predictions
+    # to fit only one `calibrated_classifiers_`
+    else:
+        proba = cal_clf.calibrated_classifiers_[0].predict_proba(X)
+        assert_array_almost_equal(proba.sum(axis=1), np.ones(proba.shape[0]))
+
+
+@pytest.mark.parametrize(
+    "X",
+    [
+        np.random.RandomState(42).randn(15, 5, 2),
+        np.random.RandomState(42).randn(15, 5, 2, 6),
+    ],
+)
+def test_calibration_accepts_ndarray(X):
+    """Test that calibration accepts n-dimensional arrays as input"""
+    y = [1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0]
+
+    class MockTensorClassifier(ClassifierMixin, BaseEstimator):
+        """A toy estimator that accepts tensor inputs"""
+
+        def fit(self, X, y):
+            self.classes_ = np.unique(y)
+            return self
+
+        def decision_function(self, X):
+            # toy decision function that just needs to have the right shape:
+            return X.reshape(X.shape[0], -1).sum(axis=1)
+
+    calibrated_clf = CalibratedClassifierCV(MockTensorClassifier())
+    # we should be able to fit this classifier with no error
+    calibrated_clf.fit(X, y)
+
+
+@pytest.fixture
+def dict_data():
+    dict_data = [
+        {"state": "NY", "age": "adult"},
+        {"state": "TX", "age": "adult"},
+        {"state": "VT", "age": "child"},
+        {"state": "CT", "age": "adult"},
+        {"state": "BR", "age": "child"},
+    ]
+    text_labels = [1, 0, 1, 1, 0]
+    return dict_data, text_labels
+
+
+@pytest.fixture
+def dict_data_pipeline(dict_data):
+    X, y = dict_data
+    pipeline_prefit = Pipeline(
+        [("vectorizer", DictVectorizer()), ("clf", RandomForestClassifier())]
+    )
+    return pipeline_prefit.fit(X, y)
+
+
+def test_calibration_dict_pipeline(dict_data, dict_data_pipeline):
+    """Test that calibration works in prefit pipeline with transformer
+
+    `X` is not array-like, sparse matrix or dataframe at the start.
+    See https://github.com/scikit-learn/scikit-learn/issues/8710
+
+    Also test it can predict without running into validation errors.
+    See https://github.com/scikit-learn/scikit-learn/issues/19637
+    """
+    X, y = dict_data
+    clf = dict_data_pipeline
+    calib_clf = CalibratedClassifierCV(FrozenEstimator(clf), cv=2)
+    calib_clf.fit(X, y)
+    # Check attributes are obtained from fitted estimator
+    assert_array_equal(calib_clf.classes_, clf.classes_)
+
+    # Neither the pipeline nor the calibration meta-estimator
+    # expose the n_features_in_ check on this kind of data.
+    assert not hasattr(clf, "n_features_in_")
+    assert not hasattr(calib_clf, "n_features_in_")
+
+    # Ensure that no error is thrown with predict and predict_proba
+    calib_clf.predict(X)
+    calib_clf.predict_proba(X)
+
+
+@pytest.mark.parametrize(
+    "clf, cv",
+    [
+        pytest.param(LinearSVC(C=1), 2),
+        pytest.param(LinearSVC(C=1), "prefit"),
+    ],
+)
+def test_calibration_attributes(clf, cv):
+    # Check that `n_features_in_` and `classes_` attributes created properly
+    X, y = make_classification(n_samples=10, n_features=5, n_classes=2, random_state=7)
+    if cv == "prefit":
+        clf = clf.fit(X, y)
+    calib_clf = CalibratedClassifierCV(clf, cv=cv)
+    calib_clf.fit(X, y)
+
+    if cv == "prefit":
+        assert_array_equal(calib_clf.classes_, clf.classes_)
+        assert calib_clf.n_features_in_ == clf.n_features_in_
+    else:
+        classes = LabelEncoder().fit(y).classes_
+        assert_array_equal(calib_clf.classes_, classes)
+        assert calib_clf.n_features_in_ == X.shape[1]
+
+
+def test_calibration_inconsistent_prefit_n_features_in():
+    # Check that `n_features_in_` from prefit base estimator
+    # is consistent with training set
+    X, y = make_classification(n_samples=10, n_features=5, n_classes=2, random_state=7)
+    clf = LinearSVC(C=1).fit(X, y)
+    calib_clf = CalibratedClassifierCV(FrozenEstimator(clf))
+
+    msg = "X has 3 features, but LinearSVC is expecting 5 features as input."
+    with pytest.raises(ValueError, match=msg):
+        calib_clf.fit(X[:, :3], y)
+
+
+def test_calibration_votingclassifier():
+    # Check that `CalibratedClassifier` works with `VotingClassifier`.
+    # The method `predict_proba` from `VotingClassifier` is dynamically
+    # defined via a property that only works when voting="soft".
+    X, y = make_classification(n_samples=10, n_features=5, n_classes=2, random_state=7)
+    vote = VotingClassifier(
+        estimators=[("lr" + str(i), LogisticRegression()) for i in range(3)],
+        voting="soft",
+    )
+    vote.fit(X, y)
+
+    calib_clf = CalibratedClassifierCV(estimator=FrozenEstimator(vote))
+    # smoke test: should not raise an error
+    calib_clf.fit(X, y)
+
+
+@pytest.fixture(scope="module")
+def iris_data():
+    return load_iris(return_X_y=True)
+
+
+@pytest.fixture(scope="module")
+def iris_data_binary(iris_data):
+    X, y = iris_data
+    return X[y < 2], y[y < 2]
+
+
+@pytest.mark.parametrize("n_bins", [5, 10])
+@pytest.mark.parametrize("strategy", ["uniform", "quantile"])
+def test_calibration_display_compute(pyplot, iris_data_binary, n_bins, strategy):
+    # Ensure `CalibrationDisplay.from_predictions` and `calibration_curve`
+    # compute the same results. Also checks attributes of the
+    # CalibrationDisplay object.
+    X, y = iris_data_binary
+
+    lr = LogisticRegression().fit(X, y)
+
+    viz = CalibrationDisplay.from_estimator(
+        lr, X, y, n_bins=n_bins, strategy=strategy, alpha=0.8
+    )
+
+    y_prob = lr.predict_proba(X)[:, 1]
+    prob_true, prob_pred = calibration_curve(
+        y, y_prob, n_bins=n_bins, strategy=strategy
+    )
+
+    assert_allclose(viz.prob_true, prob_true)
+    assert_allclose(viz.prob_pred, prob_pred)
+    assert_allclose(viz.y_prob, y_prob)
+
+    assert viz.estimator_name == "LogisticRegression"
+
+    # cannot fail thanks to pyplot fixture
+    import matplotlib as mpl  # noqa
+
+    assert isinstance(viz.line_, mpl.lines.Line2D)
+    assert viz.line_.get_alpha() == 0.8
+    assert isinstance(viz.ax_, mpl.axes.Axes)
+    assert isinstance(viz.figure_, mpl.figure.Figure)
+
+    assert viz.ax_.get_xlabel() == "Mean predicted probability (Positive class: 1)"
+    assert viz.ax_.get_ylabel() == "Fraction of positives (Positive class: 1)"
+
+    expected_legend_labels = ["LogisticRegression", "Perfectly calibrated"]
+    legend_labels = viz.ax_.get_legend().get_texts()
+    assert len(legend_labels) == len(expected_legend_labels)
+    for labels in legend_labels:
+        assert labels.get_text() in expected_legend_labels
+
+
+def test_plot_calibration_curve_pipeline(pyplot, iris_data_binary):
+    # Ensure pipelines are supported by CalibrationDisplay.from_estimator
+    X, y = iris_data_binary
+    clf = make_pipeline(StandardScaler(), LogisticRegression())
+    clf.fit(X, y)
+    viz = CalibrationDisplay.from_estimator(clf, X, y)
+
+    expected_legend_labels = [viz.estimator_name, "Perfectly calibrated"]
+    legend_labels = viz.ax_.get_legend().get_texts()
+    assert len(legend_labels) == len(expected_legend_labels)
+    for labels in legend_labels:
+        assert labels.get_text() in expected_legend_labels
+
+
+@pytest.mark.parametrize(
+    "name, expected_label", [(None, "_line1"), ("my_est", "my_est")]
+)
+def test_calibration_display_default_labels(pyplot, name, expected_label):
+    prob_true = np.array([0, 1, 1, 0])
+    prob_pred = np.array([0.2, 0.8, 0.8, 0.4])
+    y_prob = np.array([])
+
+    viz = CalibrationDisplay(prob_true, prob_pred, y_prob, estimator_name=name)
+    viz.plot()
+
+    expected_legend_labels = [] if name is None else [name]
+    expected_legend_labels.append("Perfectly calibrated")
+    legend_labels = viz.ax_.get_legend().get_texts()
+    assert len(legend_labels) == len(expected_legend_labels)
+    for labels in legend_labels:
+        assert labels.get_text() in expected_legend_labels
+
+
+def test_calibration_display_label_class_plot(pyplot):
+    # Checks that when instantiating `CalibrationDisplay` class then calling
+    # `plot`, `self.estimator_name` is the one given in `plot`
+    prob_true = np.array([0, 1, 1, 0])
+    prob_pred = np.array([0.2, 0.8, 0.8, 0.4])
+    y_prob = np.array([])
+
+    name = "name one"
+    viz = CalibrationDisplay(prob_true, prob_pred, y_prob, estimator_name=name)
+    assert viz.estimator_name == name
+    name = "name two"
+    viz.plot(name=name)
+
+    expected_legend_labels = [name, "Perfectly calibrated"]
+    legend_labels = viz.ax_.get_legend().get_texts()
+    assert len(legend_labels) == len(expected_legend_labels)
+    for labels in legend_labels:
+        assert labels.get_text() in expected_legend_labels
+
+
+@pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])
+def test_calibration_display_name_multiple_calls(
+    constructor_name, pyplot, iris_data_binary
+):
+    # Check that the `name` used when calling
+    # `CalibrationDisplay.from_predictions` or
+    # `CalibrationDisplay.from_estimator` is used when multiple
+    # `CalibrationDisplay.viz.plot()` calls are made.
+    X, y = iris_data_binary
+    clf_name = "my hand-crafted name"
+    clf = LogisticRegression().fit(X, y)
+    y_prob = clf.predict_proba(X)[:, 1]
+
+    constructor = getattr(CalibrationDisplay, constructor_name)
+    params = (clf, X, y) if constructor_name == "from_estimator" else (y, y_prob)
+
+    viz = constructor(*params, name=clf_name)
+    assert viz.estimator_name == clf_name
+    pyplot.close("all")
+    viz.plot()
+
+    expected_legend_labels = [clf_name, "Perfectly calibrated"]
+    legend_labels = viz.ax_.get_legend().get_texts()
+    assert len(legend_labels) == len(expected_legend_labels)
+    for labels in legend_labels:
+        assert labels.get_text() in expected_legend_labels
+
+    pyplot.close("all")
+    clf_name = "another_name"
+    viz.plot(name=clf_name)
+    assert len(legend_labels) == len(expected_legend_labels)
+    for labels in legend_labels:
+        assert labels.get_text() in expected_legend_labels
+
+
+def test_calibration_display_ref_line(pyplot, iris_data_binary):
+    # Check that `ref_line` only appears once
+    X, y = iris_data_binary
+    lr = LogisticRegression().fit(X, y)
+    dt = DecisionTreeClassifier().fit(X, y)
+
+    viz = CalibrationDisplay.from_estimator(lr, X, y)
+    viz2 = CalibrationDisplay.from_estimator(dt, X, y, ax=viz.ax_)
+
+    labels = viz2.ax_.get_legend_handles_labels()[1]
+    assert labels.count("Perfectly calibrated") == 1
+
+
+@pytest.mark.parametrize("dtype_y_str", [str, object])
+def test_calibration_curve_pos_label_error_str(dtype_y_str):
+    """Check error message when a `pos_label` is not specified with `str` targets."""
+    rng = np.random.RandomState(42)
+    y1 = np.array(["spam"] * 3 + ["eggs"] * 2, dtype=dtype_y_str)
+    y2 = rng.randint(0, 2, size=y1.size)
+
+    err_msg = (
+        "y_true takes value in {'eggs', 'spam'} and pos_label is not "
+        "specified: either make y_true take value in {0, 1} or {-1, 1} or "
+        "pass pos_label explicitly"
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        calibration_curve(y1, y2)
+
+
+@pytest.mark.parametrize("dtype_y_str", [str, object])
+def test_calibration_curve_pos_label(dtype_y_str):
+    """Check the behaviour when passing explicitly `pos_label`."""
+    y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
+    classes = np.array(["spam", "egg"], dtype=dtype_y_str)
+    y_true_str = classes[y_true]
+    y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.0])
+
+    # default case
+    prob_true, _ = calibration_curve(y_true, y_pred, n_bins=4)
+    assert_allclose(prob_true, [0, 0.5, 1, 1])
+    # if `y_true` contains `str`, then `pos_label` is required
+    prob_true, _ = calibration_curve(y_true_str, y_pred, n_bins=4, pos_label="egg")
+    assert_allclose(prob_true, [0, 0.5, 1, 1])
+
+    prob_true, _ = calibration_curve(y_true, 1 - y_pred, n_bins=4, pos_label=0)
+    assert_allclose(prob_true, [0, 0, 0.5, 1])
+    prob_true, _ = calibration_curve(y_true_str, 1 - y_pred, n_bins=4, pos_label="spam")
+    assert_allclose(prob_true, [0, 0, 0.5, 1])
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"c": "red", "lw": 2, "ls": "-."},
+        {"color": "red", "linewidth": 2, "linestyle": "-."},
+    ],
+)
+def test_calibration_display_kwargs(pyplot, iris_data_binary, kwargs):
+    """Check that matplotlib aliases are handled."""
+    X, y = iris_data_binary
+
+    lr = LogisticRegression().fit(X, y)
+    viz = CalibrationDisplay.from_estimator(lr, X, y, **kwargs)
+
+    assert viz.line_.get_color() == "red"
+    assert viz.line_.get_linewidth() == 2
+    assert viz.line_.get_linestyle() == "-."
+
+
+@pytest.mark.parametrize("pos_label, expected_pos_label", [(None, 1), (0, 0), (1, 1)])
+def test_calibration_display_pos_label(
+    pyplot, iris_data_binary, pos_label, expected_pos_label
+):
+    """Check the behaviour of `pos_label` in the `CalibrationDisplay`."""
+    X, y = iris_data_binary
+
+    lr = LogisticRegression().fit(X, y)
+    viz = CalibrationDisplay.from_estimator(lr, X, y, pos_label=pos_label)
+
+    y_prob = lr.predict_proba(X)[:, expected_pos_label]
+    prob_true, prob_pred = calibration_curve(y, y_prob, pos_label=pos_label)
+
+    assert_allclose(viz.prob_true, prob_true)
+    assert_allclose(viz.prob_pred, prob_pred)
+    assert_allclose(viz.y_prob, y_prob)
+
+    assert (
+        viz.ax_.get_xlabel()
+        == f"Mean predicted probability (Positive class: {expected_pos_label})"
+    )
+    assert (
+        viz.ax_.get_ylabel()
+        == f"Fraction of positives (Positive class: {expected_pos_label})"
+    )
+
+    expected_legend_labels = [lr.__class__.__name__, "Perfectly calibrated"]
+    legend_labels = viz.ax_.get_legend().get_texts()
+    assert len(legend_labels) == len(expected_legend_labels)
+    for labels in legend_labels:
+        assert labels.get_text() in expected_legend_labels
+
+
+@pytest.mark.parametrize("method", ["sigmoid", "isotonic"])
+@pytest.mark.parametrize("ensemble", [True, False])
+def test_calibrated_classifier_cv_double_sample_weights_equivalence(method, ensemble):
+    """Check that passing repeating twice the dataset `X` is equivalent to
+    passing a `sample_weight` with a factor 2."""
+    X, y = load_iris(return_X_y=True)
+    # Scale the data to avoid any convergence issue
+    X = StandardScaler().fit_transform(X)
+    # Only use 2 classes
+    X, y = X[:100], y[:100]
+    sample_weight = np.ones_like(y) * 2
+
+    # Interlace the data such that a 2-fold cross-validation will be equivalent
+    # to using the original dataset with a sample weights of 2
+    X_twice = np.zeros((X.shape[0] * 2, X.shape[1]), dtype=X.dtype)
+    X_twice[::2, :] = X
+    X_twice[1::2, :] = X
+    y_twice = np.zeros(y.shape[0] * 2, dtype=y.dtype)
+    y_twice[::2] = y
+    y_twice[1::2] = y
+
+    estimator = LogisticRegression()
+    calibrated_clf_without_weights = CalibratedClassifierCV(
+        estimator,
+        method=method,
+        ensemble=ensemble,
+        cv=2,
+    )
+    calibrated_clf_with_weights = clone(calibrated_clf_without_weights)
+
+    calibrated_clf_with_weights.fit(X, y, sample_weight=sample_weight)
+    calibrated_clf_without_weights.fit(X_twice, y_twice)
+
+    # Check that the underlying fitted estimators have the same coefficients
+    for est_with_weights, est_without_weights in zip(
+        calibrated_clf_with_weights.calibrated_classifiers_,
+        calibrated_clf_without_weights.calibrated_classifiers_,
+    ):
+        assert_allclose(
+            est_with_weights.estimator.coef_,
+            est_without_weights.estimator.coef_,
+        )
+
+    # Check that the predictions are the same
+    y_pred_with_weights = calibrated_clf_with_weights.predict_proba(X)
+    y_pred_without_weights = calibrated_clf_without_weights.predict_proba(X)
+
+    assert_allclose(y_pred_with_weights, y_pred_without_weights)
+
+
+@pytest.mark.parametrize("fit_params_type", ["list", "array"])
+def test_calibration_with_fit_params(fit_params_type, data):
+    """Tests that fit_params are passed to the underlying base estimator.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/12384
+    """
+    X, y = data
+    fit_params = {
+        "a": _convert_container(y, fit_params_type),
+        "b": _convert_container(y, fit_params_type),
+    }
+
+    clf = CheckingClassifier(expected_fit_params=["a", "b"])
+    pc_clf = CalibratedClassifierCV(clf)
+
+    pc_clf.fit(X, y, **fit_params)
+
+
+@pytest.mark.parametrize(
+    "sample_weight",
+    [
+        [1.0] * N_SAMPLES,
+        np.ones(N_SAMPLES),
+    ],
+)
+def test_calibration_with_sample_weight_estimator(sample_weight, data):
+    """Tests that sample_weight is passed to the underlying base
+    estimator.
+    """
+    X, y = data
+    clf = CheckingClassifier(expected_sample_weight=True)
+    pc_clf = CalibratedClassifierCV(clf)
+
+    pc_clf.fit(X, y, sample_weight=sample_weight)
+
+
+def test_calibration_without_sample_weight_estimator(data):
+    """Check that even if the estimator doesn't support
+    sample_weight, fitting with sample_weight still works.
+
+    There should be a warning, since the sample_weight is not passed
+    on to the estimator.
+    """
+    X, y = data
+    sample_weight = np.ones_like(y)
+
+    class ClfWithoutSampleWeight(CheckingClassifier):
+        def fit(self, X, y, **fit_params):
+            assert "sample_weight" not in fit_params
+            return super().fit(X, y, **fit_params)
+
+    clf = ClfWithoutSampleWeight()
+    pc_clf = CalibratedClassifierCV(clf)
+
+    with pytest.warns(UserWarning):
+        pc_clf.fit(X, y, sample_weight=sample_weight)
+
+
+def test_calibration_with_non_sample_aligned_fit_param(data):
+    """Check that CalibratedClassifierCV does not enforce sample alignment
+    for fit parameters."""
+
+    class TestClassifier(LogisticRegression):
+        def fit(self, X, y, sample_weight=None, fit_param=None):
+            assert fit_param is not None
+            return super().fit(X, y, sample_weight=sample_weight)
+
+    CalibratedClassifierCV(estimator=TestClassifier()).fit(
+        *data, fit_param=np.ones(len(data[1]) + 1)
+    )
+
+
+def test_calibrated_classifier_cv_works_with_large_confidence_scores(
+    global_random_seed,
+):
+    """Test that :class:`CalibratedClassifierCV` works with large confidence
+    scores when using the `sigmoid` method, particularly with the
+    :class:`SGDClassifier`.
+
+    Non-regression test for issue #26766.
+    """
+    prob = 0.67
+    n = 1000
+    random_noise = np.random.default_rng(global_random_seed).normal(size=n)
+
+    y = np.array([1] * int(n * prob) + [0] * (n - int(n * prob)))
+    X = 1e5 * y.reshape((-1, 1)) + random_noise
+
+    # Check that the decision function of SGDClassifier produces predicted
+    # values that are quite large, for the data under consideration.
+    cv = check_cv(cv=None, y=y, classifier=True)
+    indices = cv.split(X, y)
+    for train, test in indices:
+        X_train, y_train = X[train], y[train]
+        X_test = X[test]
+        sgd_clf = SGDClassifier(loss="squared_hinge", random_state=global_random_seed)
+        sgd_clf.fit(X_train, y_train)
+        predictions = sgd_clf.decision_function(X_test)
+        assert (predictions > 1e4).any()
+
+    # Compare the CalibratedClassifierCV using the sigmoid method with the
+    # CalibratedClassifierCV using the isotonic method. The isotonic method
+    # is used for comparison because it is numerically stable.
+    clf_sigmoid = CalibratedClassifierCV(
+        SGDClassifier(loss="squared_hinge", random_state=global_random_seed),
+        method="sigmoid",
+    )
+    score_sigmoid = cross_val_score(clf_sigmoid, X, y, scoring="roc_auc")
+
+    # The isotonic method is used for comparison because it is numerically
+    # stable.
+    clf_isotonic = CalibratedClassifierCV(
+        SGDClassifier(loss="squared_hinge", random_state=global_random_seed),
+        method="isotonic",
+    )
+    score_isotonic = cross_val_score(clf_isotonic, X, y, scoring="roc_auc")
+
+    # The AUC score should be the same because it is invariant under
+    # strictly monotonic conditions
+    assert_allclose(score_sigmoid, score_isotonic)
+
+
+def test_sigmoid_calibration_max_abs_prediction_threshold(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+    n = 100
+    y = random_state.randint(0, 2, size=n)
+
+    # Check that for small enough predictions ranging from -2 to 2, the
+    # threshold value has no impact on the outcome
+    predictions_small = random_state.uniform(low=-2, high=2, size=100)
+
+    # Using a threshold lower than the maximum absolute value of the
+    # predictions enables internal re-scaling by max(abs(predictions_small)).
+    threshold_1 = 0.1
+    a1, b1 = _sigmoid_calibration(
+        predictions=predictions_small,
+        y=y,
+        max_abs_prediction_threshold=threshold_1,
+    )
+
+    # Using a larger threshold disables rescaling.
+    threshold_2 = 10
+    a2, b2 = _sigmoid_calibration(
+        predictions=predictions_small,
+        y=y,
+        max_abs_prediction_threshold=threshold_2,
+    )
+
+    # Using default threshold of 30 also disables the scaling.
+    a3, b3 = _sigmoid_calibration(
+        predictions=predictions_small,
+        y=y,
+    )
+
+    # Depends on the tolerance of the underlying quasy-newton solver which is
+    # not too strict by default.
+    atol = 1e-6
+    assert_allclose(a1, a2, atol=atol)
+    assert_allclose(a2, a3, atol=atol)
+    assert_allclose(b1, b2, atol=atol)
+    assert_allclose(b2, b3, atol=atol)
+
+
+def test_float32_predict_proba(data):
+    """Check that CalibratedClassifierCV works with float32 predict proba.
+
+    Non-regression test for gh-28245.
+    """
+
+    class DummyClassifer32(DummyClassifier):
+        def predict_proba(self, X):
+            return super().predict_proba(X).astype(np.float32)
+
+    model = DummyClassifer32()
+    calibrator = CalibratedClassifierCV(model)
+    # Does not raise an error
+    calibrator.fit(*data)
+
+
+def test_error_less_class_samples_than_folds():
+    """Check that CalibratedClassifierCV works with string targets.
+
+    non-regression test for issue #28841.
+    """
+    X = np.random.normal(size=(20, 3))
+    y = ["a"] * 10 + ["b"] * 10
+
+    CalibratedClassifierCV(cv=3).fit(X, y)
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_check_build.py b/.venv/Lib/site-packages/sklearn/tests/test_check_build.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb430bf675c131bf35d18315a88f28bc0f1d45b1
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_check_build.py
@@ -0,0 +1,15 @@
+"""
+Smoke Test the check_build module
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import pytest
+
+from sklearn.__check_build import raise_build_error
+
+
+def test_raise_build_error():
+    with pytest.raises(ImportError):
+        raise_build_error(ImportError())
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_common.py b/.venv/Lib/site-packages/sklearn/tests/test_common.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0bad510911843b6811207edc963b0d18040cb0e
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_common.py
@@ -0,0 +1,440 @@
+"""
+General tests for all estimators in sklearn.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import os
+import pkgutil
+import re
+import warnings
+from functools import partial
+from inspect import isgenerator
+from itertools import chain
+
+import pytest
+from scipy.linalg import LinAlgWarning
+
+import sklearn
+from sklearn.base import BaseEstimator
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import make_classification
+from sklearn.exceptions import ConvergenceWarning
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import FeatureUnion, make_pipeline
+from sklearn.preprocessing import (
+    FunctionTransformer,
+    MinMaxScaler,
+    OneHotEncoder,
+    StandardScaler,
+)
+from sklearn.utils import all_estimators
+from sklearn.utils._test_common.instance_generator import (
+    _get_check_estimator_ids,
+    _get_expected_failed_checks,
+    _tested_estimators,
+)
+from sklearn.utils._testing import (
+    SkipTest,
+    ignore_warnings,
+)
+from sklearn.utils.estimator_checks import (
+    check_dataframe_column_names_consistency,
+    check_estimator,
+    check_get_feature_names_out_error,
+    check_global_output_transform_pandas,
+    check_global_set_output_transform_polars,
+    check_inplace_ensure_writeable,
+    check_param_validation,
+    check_set_output_transform,
+    check_set_output_transform_pandas,
+    check_set_output_transform_polars,
+    check_transformer_get_feature_names_out,
+    check_transformer_get_feature_names_out_pandas,
+    parametrize_with_checks,
+)
+from sklearn.utils.fixes import _IS_WASM
+
+
+def test_all_estimator_no_base_class():
+    # test that all_estimators doesn't find abstract classes.
+    for name, Estimator in all_estimators():
+        msg = (
+            "Base estimators such as {0} should not be included in all_estimators"
+        ).format(name)
+        assert not name.lower().startswith("base"), msg
+
+
+def _sample_func(x, y=1):
+    pass
+
+
+class CallableEstimator(BaseEstimator):
+    """Dummy development stub for an estimator.
+
+    This is to make sure a callable estimator passes common tests.
+    """
+
+    def __call__(self):
+        pass  # pragma: nocover
+
+
+@pytest.mark.parametrize(
+    "val, expected",
+    [
+        (partial(_sample_func, y=1), "_sample_func(y=1)"),
+        (_sample_func, "_sample_func"),
+        (partial(_sample_func, "world"), "_sample_func"),
+        (LogisticRegression(C=2.0), "LogisticRegression(C=2.0)"),
+        (
+            LogisticRegression(
+                random_state=1,
+                solver="newton-cg",
+                class_weight="balanced",
+                warm_start=True,
+            ),
+            (
+                "LogisticRegression(class_weight='balanced',random_state=1,"
+                "solver='newton-cg',warm_start=True)"
+            ),
+        ),
+        (CallableEstimator(), "CallableEstimator()"),
+    ],
+)
+def test_get_check_estimator_ids(val, expected):
+    assert _get_check_estimator_ids(val) == expected
+
+
+@parametrize_with_checks(
+    list(_tested_estimators()), expected_failed_checks=_get_expected_failed_checks
+)
+def test_estimators(estimator, check, request):
+    # Common tests for estimator instances
+    with ignore_warnings(
+        category=(FutureWarning, ConvergenceWarning, UserWarning, LinAlgWarning)
+    ):
+        check(estimator)
+
+
+# TODO(1.8): remove test when generate_only is removed
+def test_check_estimator_generate_only_deprecation():
+    """Check that check_estimator with generate_only=True raises a deprecation
+    warning."""
+    with pytest.warns(FutureWarning, match="`generate_only` is deprecated in 1.6"):
+        all_instance_gen_checks = check_estimator(
+            LogisticRegression(), generate_only=True
+        )
+    assert isgenerator(all_instance_gen_checks)
+
+
+@pytest.mark.xfail(_IS_WASM, reason="importlib not supported for Pyodide packages")
+@pytest.mark.filterwarnings(
+    "ignore:Since version 1.0, it is not needed to import "
+    "enable_hist_gradient_boosting anymore"
+)
+def test_import_all_consistency():
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    # Smoke test to check that any name in a __all__ list is actually defined
+    # in the namespace of the module or package.
+    pkgs = pkgutil.walk_packages(
+        path=sklearn_path, prefix="sklearn.", onerror=lambda _: None
+    )
+    submods = [modname for _, modname, _ in pkgs]
+    for modname in submods + ["sklearn"]:
+        if ".tests." in modname:
+            continue
+        # Avoid test suite depending on build dependencies, for example Cython
+        if "sklearn._build_utils" in modname:
+            continue
+        package = __import__(modname, fromlist="dummy")
+        for name in getattr(package, "__all__", ()):
+            assert hasattr(package, name), "Module '{0}' has no attribute '{1}'".format(
+                modname, name
+            )
+
+
+def test_root_import_all_completeness():
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    EXCEPTIONS = ("utils", "tests", "base", "conftest")
+    for _, modname, _ in pkgutil.walk_packages(
+        path=sklearn_path, onerror=lambda _: None
+    ):
+        if "." in modname or modname.startswith("_") or modname in EXCEPTIONS:
+            continue
+        assert modname in sklearn.__all__
+
+
+def test_all_tests_are_importable():
+    # Ensure that for each contentful subpackage, there is a test directory
+    # within it that is also a subpackage (i.e. a directory with __init__.py)
+
+    HAS_TESTS_EXCEPTIONS = re.compile(
+        r"""(?x)
+                                      \.externals(\.|$)|
+                                      \.tests(\.|$)|
+                                      \._
+                                      """
+    )
+    resource_modules = {
+        "sklearn.datasets.data",
+        "sklearn.datasets.descr",
+        "sklearn.datasets.images",
+    }
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    lookup = {
+        name: ispkg
+        for _, name, ispkg in pkgutil.walk_packages(sklearn_path, prefix="sklearn.")
+    }
+    missing_tests = [
+        name
+        for name, ispkg in lookup.items()
+        if ispkg
+        and name not in resource_modules
+        and not HAS_TESTS_EXCEPTIONS.search(name)
+        and name + ".tests" not in lookup
+    ]
+    assert missing_tests == [], (
+        "{0} do not have `tests` subpackages. "
+        "Perhaps they require "
+        "__init__.py or a meson.build "
+        "in the parent "
+        "directory".format(missing_tests)
+    )
+
+
+def test_class_support_removed():
+    # Make sure passing classes to check_estimator or parametrize_with_checks
+    # raises an error
+
+    msg = "Passing a class was deprecated.* isn't supported anymore"
+    with pytest.raises(TypeError, match=msg):
+        check_estimator(LogisticRegression)
+
+    with pytest.raises(TypeError, match=msg):
+        parametrize_with_checks([LogisticRegression])
+
+
+def _estimators_that_predict_in_fit():
+    for estimator in _tested_estimators():
+        est_params = set(estimator.get_params())
+        if "oob_score" in est_params:
+            yield estimator.set_params(oob_score=True, bootstrap=True)
+        elif "early_stopping" in est_params:
+            est = estimator.set_params(early_stopping=True, n_iter_no_change=1)
+            if est.__class__.__name__ in {"MLPClassifier", "MLPRegressor"}:
+                # TODO: FIX MLP to not check validation set during MLP
+                yield pytest.param(
+                    est, marks=pytest.mark.xfail(msg="MLP still validates in fit")
+                )
+            else:
+                yield est
+        elif "n_iter_no_change" in est_params:
+            yield estimator.set_params(n_iter_no_change=1)
+
+
+# NOTE: When running `check_dataframe_column_names_consistency` on a meta-estimator that
+# delegates validation to a base estimator, the check is testing that the base estimator
+# is checking for column name consistency.
+column_name_estimators = list(
+    chain(
+        _tested_estimators(),
+        [make_pipeline(LogisticRegression(C=1))],
+        _estimators_that_predict_in_fit(),
+    )
+)
+
+
+@pytest.mark.parametrize(
+    "estimator", column_name_estimators, ids=_get_check_estimator_ids
+)
+def test_pandas_column_name_consistency(estimator):
+    if isinstance(estimator, ColumnTransformer):
+        pytest.skip("ColumnTransformer is not tested here")
+    if "check_dataframe_column_names_consistency" in _get_expected_failed_checks(
+        estimator
+    ):
+        pytest.skip(
+            "Estimator does not support check_dataframe_column_names_consistency"
+        )
+    with ignore_warnings(category=(FutureWarning)):
+        with warnings.catch_warnings(record=True) as record:
+            check_dataframe_column_names_consistency(
+                estimator.__class__.__name__, estimator
+            )
+        for warning in record:
+            assert "was fitted without feature names" not in str(warning.message)
+
+
+# TODO: As more modules support get_feature_names_out they should be removed
+# from this list to be tested
+GET_FEATURES_OUT_MODULES_TO_IGNORE = [
+    "ensemble",
+    "kernel_approximation",
+]
+
+
+def _include_in_get_feature_names_out_check(transformer):
+    if hasattr(transformer, "get_feature_names_out"):
+        return True
+    module = transformer.__module__.split(".")[1]
+    return module not in GET_FEATURES_OUT_MODULES_TO_IGNORE
+
+
+GET_FEATURES_OUT_ESTIMATORS = [
+    est
+    for est in _tested_estimators("transformer")
+    if _include_in_get_feature_names_out_check(est)
+]
+
+
+@pytest.mark.parametrize(
+    "transformer", GET_FEATURES_OUT_ESTIMATORS, ids=_get_check_estimator_ids
+)
+def test_transformers_get_feature_names_out(transformer):
+
+    with ignore_warnings(category=(FutureWarning)):
+        check_transformer_get_feature_names_out(
+            transformer.__class__.__name__, transformer
+        )
+        check_transformer_get_feature_names_out_pandas(
+            transformer.__class__.__name__, transformer
+        )
+
+
+ESTIMATORS_WITH_GET_FEATURE_NAMES_OUT = [
+    est for est in _tested_estimators() if hasattr(est, "get_feature_names_out")
+]
+
+
+@pytest.mark.parametrize(
+    "estimator", ESTIMATORS_WITH_GET_FEATURE_NAMES_OUT, ids=_get_check_estimator_ids
+)
+def test_estimators_get_feature_names_out_error(estimator):
+    estimator_name = estimator.__class__.__name__
+    check_get_feature_names_out_error(estimator_name, estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator", list(_tested_estimators()), ids=_get_check_estimator_ids
+)
+def test_check_param_validation(estimator):
+    if isinstance(estimator, FeatureUnion):
+        pytest.skip("FeatureUnion is not tested here")
+    name = estimator.__class__.__name__
+    check_param_validation(name, estimator)
+
+
+SET_OUTPUT_ESTIMATORS = list(
+    chain(
+        _tested_estimators("transformer"),
+        [
+            make_pipeline(StandardScaler(), MinMaxScaler()),
+            OneHotEncoder(sparse_output=False),
+            FunctionTransformer(feature_names_out="one-to-one"),
+        ],
+    )
+)
+
+
+@pytest.mark.parametrize(
+    "estimator", SET_OUTPUT_ESTIMATORS, ids=_get_check_estimator_ids
+)
+def test_set_output_transform(estimator):
+    name = estimator.__class__.__name__
+    if not hasattr(estimator, "set_output"):
+        pytest.skip(
+            f"Skipping check_set_output_transform for {name}: Does not support"
+            " set_output API"
+        )
+    with ignore_warnings(category=(FutureWarning)):
+        check_set_output_transform(estimator.__class__.__name__, estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator", SET_OUTPUT_ESTIMATORS, ids=_get_check_estimator_ids
+)
+@pytest.mark.parametrize(
+    "check_func",
+    [
+        check_set_output_transform_pandas,
+        check_global_output_transform_pandas,
+        check_set_output_transform_polars,
+        check_global_set_output_transform_polars,
+    ],
+)
+def test_set_output_transform_configured(estimator, check_func):
+    name = estimator.__class__.__name__
+    if not hasattr(estimator, "set_output"):
+        pytest.skip(
+            f"Skipping {check_func.__name__} for {name}: Does not support"
+            " set_output API yet"
+        )
+    with ignore_warnings(category=(FutureWarning)):
+        check_func(estimator.__class__.__name__, estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
+)
+def test_check_inplace_ensure_writeable(estimator):
+    name = estimator.__class__.__name__
+
+    if hasattr(estimator, "copy"):
+        estimator.set_params(copy=False)
+    elif hasattr(estimator, "copy_X"):
+        estimator.set_params(copy_X=False)
+    else:
+        raise SkipTest(f"{name} doesn't require writeable input.")
+
+    # The following estimators can work inplace only with certain settings
+    if name == "HDBSCAN":
+        estimator.set_params(metric="precomputed", algorithm="brute")
+
+    if name == "PCA":
+        estimator.set_params(svd_solver="full")
+
+    if name == "KernelPCA":
+        estimator.set_params(kernel="precomputed")
+
+    check_inplace_ensure_writeable(name, estimator)
+
+
+# TODO(1.7): Remove this test when the deprecation cycle is over
+def test_transition_public_api_deprecations():
+    """This test checks that we raised deprecation warning explaining how to transition
+    to the new developer public API from 1.5 to 1.6.
+    """
+
+    class OldEstimator(BaseEstimator):
+        def fit(self, X, y=None):
+            X = self._validate_data(X)
+            self._check_n_features(X, reset=True)
+            self._check_feature_names(X, reset=True)
+            return self
+
+        def transform(self, X):
+            return X  # pragma: no cover
+
+    X, y = make_classification(n_samples=10, n_features=5, random_state=0)
+
+    old_estimator = OldEstimator()
+    with pytest.warns(FutureWarning) as warning_list:
+        old_estimator.fit(X)
+
+    assert len(warning_list) == 3
+    assert str(warning_list[0].message).startswith(
+        "`BaseEstimator._validate_data` is deprecated"
+    )
+    assert str(warning_list[1].message).startswith(
+        "`BaseEstimator._check_n_features` is deprecated"
+    )
+    assert str(warning_list[2].message).startswith(
+        "`BaseEstimator._check_feature_names` is deprecated"
+    )
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_config.py b/.venv/Lib/site-packages/sklearn/tests/test_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..94c84777dd680d3d31399ea0833ad7b83f887800
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_config.py
@@ -0,0 +1,199 @@
+import builtins
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+
+import sklearn
+from sklearn import config_context, get_config, set_config
+from sklearn.utils.fixes import _IS_WASM
+from sklearn.utils.parallel import Parallel, delayed
+
+
+def test_config_context():
+    assert get_config() == {
+        "assume_finite": False,
+        "working_memory": 1024,
+        "print_changed_only": True,
+        "display": "diagram",
+        "array_api_dispatch": False,
+        "pairwise_dist_chunk_size": 256,
+        "enable_cython_pairwise_dist": True,
+        "transform_output": "default",
+        "enable_metadata_routing": False,
+        "skip_parameter_validation": False,
+    }
+
+    # Not using as a context manager affects nothing
+    config_context(assume_finite=True)
+    assert get_config()["assume_finite"] is False
+
+    with config_context(assume_finite=True):
+        assert get_config() == {
+            "assume_finite": True,
+            "working_memory": 1024,
+            "print_changed_only": True,
+            "display": "diagram",
+            "array_api_dispatch": False,
+            "pairwise_dist_chunk_size": 256,
+            "enable_cython_pairwise_dist": True,
+            "transform_output": "default",
+            "enable_metadata_routing": False,
+            "skip_parameter_validation": False,
+        }
+    assert get_config()["assume_finite"] is False
+
+    with config_context(assume_finite=True):
+        with config_context(assume_finite=None):
+            assert get_config()["assume_finite"] is True
+
+        assert get_config()["assume_finite"] is True
+
+        with config_context(assume_finite=False):
+            assert get_config()["assume_finite"] is False
+
+            with config_context(assume_finite=None):
+                assert get_config()["assume_finite"] is False
+
+                # global setting will not be retained outside of context that
+                # did not modify this setting
+                set_config(assume_finite=True)
+                assert get_config()["assume_finite"] is True
+
+            assert get_config()["assume_finite"] is False
+
+        assert get_config()["assume_finite"] is True
+
+    assert get_config() == {
+        "assume_finite": False,
+        "working_memory": 1024,
+        "print_changed_only": True,
+        "display": "diagram",
+        "array_api_dispatch": False,
+        "pairwise_dist_chunk_size": 256,
+        "enable_cython_pairwise_dist": True,
+        "transform_output": "default",
+        "enable_metadata_routing": False,
+        "skip_parameter_validation": False,
+    }
+
+    # No positional arguments
+    with pytest.raises(TypeError):
+        config_context(True)
+
+    # No unknown arguments
+    with pytest.raises(TypeError):
+        config_context(do_something_else=True).__enter__()
+
+
+def test_config_context_exception():
+    assert get_config()["assume_finite"] is False
+    try:
+        with config_context(assume_finite=True):
+            assert get_config()["assume_finite"] is True
+            raise ValueError()
+    except ValueError:
+        pass
+    assert get_config()["assume_finite"] is False
+
+
+def test_set_config():
+    assert get_config()["assume_finite"] is False
+    set_config(assume_finite=None)
+    assert get_config()["assume_finite"] is False
+    set_config(assume_finite=True)
+    assert get_config()["assume_finite"] is True
+    set_config(assume_finite=None)
+    assert get_config()["assume_finite"] is True
+    set_config(assume_finite=False)
+    assert get_config()["assume_finite"] is False
+
+    # No unknown arguments
+    with pytest.raises(TypeError):
+        set_config(do_something_else=True)
+
+
+def set_assume_finite(assume_finite, sleep_duration):
+    """Return the value of assume_finite after waiting `sleep_duration`."""
+    with config_context(assume_finite=assume_finite):
+        time.sleep(sleep_duration)
+        return get_config()["assume_finite"]
+
+
+@pytest.mark.parametrize("backend", ["loky", "multiprocessing", "threading"])
+def test_config_threadsafe_joblib(backend):
+    """Test that the global config is threadsafe with all joblib backends.
+    Two jobs are spawned and sets assume_finite to two different values.
+    When the job with a duration 0.1s completes, the assume_finite value
+    should be the same as the value passed to the function. In other words,
+    it is not influenced by the other job setting assume_finite to True.
+    """
+    assume_finites = [False, True, False, True]
+    sleep_durations = [0.1, 0.2, 0.1, 0.2]
+
+    items = Parallel(backend=backend, n_jobs=2)(
+        delayed(set_assume_finite)(assume_finite, sleep_dur)
+        for assume_finite, sleep_dur in zip(assume_finites, sleep_durations)
+    )
+
+    assert items == [False, True, False, True]
+
+
+@pytest.mark.xfail(_IS_WASM, reason="cannot start threads")
+def test_config_threadsafe():
+    """Uses threads directly to test that the global config does not change
+    between threads. Same test as `test_config_threadsafe_joblib` but with
+    `ThreadPoolExecutor`."""
+
+    assume_finites = [False, True, False, True]
+    sleep_durations = [0.1, 0.2, 0.1, 0.2]
+
+    with ThreadPoolExecutor(max_workers=2) as e:
+        items = [
+            output
+            for output in e.map(set_assume_finite, assume_finites, sleep_durations)
+        ]
+
+    assert items == [False, True, False, True]
+
+
+def test_config_array_api_dispatch_error(monkeypatch):
+    """Check error is raised when array_api_compat is not installed."""
+
+    # Hide array_api_compat import
+    orig_import = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == "array_api_compat":
+            raise ImportError
+        return orig_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", mocked_import)
+
+    with pytest.raises(ImportError, match="array_api_compat is required"):
+        with config_context(array_api_dispatch=True):
+            pass
+
+    with pytest.raises(ImportError, match="array_api_compat is required"):
+        set_config(array_api_dispatch=True)
+
+
+def test_config_array_api_dispatch_error_numpy(monkeypatch):
+    """Check error when NumPy is too old"""
+    # Pretend that array_api_compat is installed.
+    orig_import = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == "array_api_compat":
+            return object()
+        return orig_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", mocked_import)
+    monkeypatch.setattr(sklearn.utils._array_api.numpy, "__version__", "1.20")
+
+    with pytest.raises(ImportError, match="NumPy must be 1.21 or newer"):
+        with config_context(array_api_dispatch=True):
+            pass
+
+    with pytest.raises(ImportError, match="NumPy must be 1.21 or newer"):
+        set_config(array_api_dispatch=True)
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_discriminant_analysis.py b/.venv/Lib/site-packages/sklearn/tests/test_discriminant_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d69c7f3d73703f4ad92530f5576fb9748faf439
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_discriminant_analysis.py
@@ -0,0 +1,678 @@
+import warnings
+
+import numpy as np
+import pytest
+from scipy import linalg
+
+from sklearn.cluster import KMeans
+from sklearn.covariance import LedoitWolf, ShrunkCovariance, ledoit_wolf
+from sklearn.datasets import make_blobs
+from sklearn.discriminant_analysis import (
+    LinearDiscriminantAnalysis,
+    QuadraticDiscriminantAnalysis,
+    _cov,
+)
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import _IS_WASM
+
+# Data is just 6 separable points in the plane
+X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype="f")
+y = np.array([1, 1, 1, 2, 2, 2])
+y3 = np.array([1, 1, 2, 2, 3, 3])
+
+# Degenerate data with only one feature (still should be separable)
+X1 = np.array(
+    [[-2], [-1], [-1], [1], [1], [2]],
+    dtype="f",
+)
+
+# Data is just 9 separable points in the plane
+X6 = np.array(
+    [[0, 0], [-2, -2], [-2, -1], [-1, -1], [-1, -2], [1, 3], [1, 2], [2, 1], [2, 2]]
+)
+y6 = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2])
+y7 = np.array([1, 2, 3, 2, 3, 1, 2, 3, 1])
+
+# Degenerate data with 1 feature (still should be separable)
+X7 = np.array([[-3], [-2], [-1], [-1], [0], [1], [1], [2], [3]])
+
+# Data that has zero variance in one dimension and needs regularization
+X2 = np.array(
+    [[-3, 0], [-2, 0], [-1, 0], [-1, 0], [0, 0], [1, 0], [1, 0], [2, 0], [3, 0]]
+)
+
+# One element class
+y4 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 2])
+
+# Data with less samples in a class than n_features
+X5 = np.c_[np.arange(8), np.zeros((8, 3))]
+y5 = np.array([0, 0, 0, 0, 0, 1, 1, 1])
+
+solver_shrinkage = [
+    ("svd", None),
+    ("lsqr", None),
+    ("eigen", None),
+    ("lsqr", "auto"),
+    ("lsqr", 0),
+    ("lsqr", 0.43),
+    ("eigen", "auto"),
+    ("eigen", 0),
+    ("eigen", 0.43),
+]
+
+
+def test_lda_predict():
+    # Test LDA classification.
+    # This checks that LDA implements fit and predict and returns correct
+    # values for simple toy data.
+    for test_case in solver_shrinkage:
+        solver, shrinkage = test_case
+        clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        y_pred = clf.fit(X, y).predict(X)
+        assert_array_equal(y_pred, y, "solver %s" % solver)
+
+        # Assert that it works with 1D data
+        y_pred1 = clf.fit(X1, y).predict(X1)
+        assert_array_equal(y_pred1, y, "solver %s" % solver)
+
+        # Test probability estimates
+        y_proba_pred1 = clf.predict_proba(X1)
+        assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y, "solver %s" % solver)
+        y_log_proba_pred1 = clf.predict_log_proba(X1)
+        assert_allclose(
+            np.exp(y_log_proba_pred1),
+            y_proba_pred1,
+            rtol=1e-6,
+            atol=1e-6,
+            err_msg="solver %s" % solver,
+        )
+
+        # Primarily test for commit 2f34950 -- "reuse" of priors
+        y_pred3 = clf.fit(X, y3).predict(X)
+        # LDA shouldn't be able to separate those
+        assert np.any(y_pred3 != y3), "solver %s" % solver
+
+    clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto")
+    with pytest.raises(NotImplementedError):
+        clf.fit(X, y)
+
+    clf = LinearDiscriminantAnalysis(
+        solver="lsqr", shrinkage=0.1, covariance_estimator=ShrunkCovariance()
+    )
+    with pytest.raises(
+        ValueError,
+        match=(
+            "covariance_estimator and shrinkage "
+            "parameters are not None. "
+            "Only one of the two can be set."
+        ),
+    ):
+        clf.fit(X, y)
+
+    # test bad solver with covariance_estimator
+    clf = LinearDiscriminantAnalysis(solver="svd", covariance_estimator=LedoitWolf())
+    with pytest.raises(
+        ValueError, match="covariance estimator is not supported with svd"
+    ):
+        clf.fit(X, y)
+
+    # test bad covariance estimator
+    clf = LinearDiscriminantAnalysis(
+        solver="lsqr", covariance_estimator=KMeans(n_clusters=2, n_init="auto")
+    )
+    with pytest.raises(ValueError):
+        clf.fit(X, y)
+
+
+@pytest.mark.parametrize("n_classes", [2, 3])
+@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
+def test_lda_predict_proba(solver, n_classes):
+    def generate_dataset(n_samples, centers, covariances, random_state=None):
+        """Generate a multivariate normal data given some centers and
+        covariances"""
+        rng = check_random_state(random_state)
+        X = np.vstack(
+            [
+                rng.multivariate_normal(mean, cov, size=n_samples // len(centers))
+                for mean, cov in zip(centers, covariances)
+            ]
+        )
+        y = np.hstack(
+            [[clazz] * (n_samples // len(centers)) for clazz in range(len(centers))]
+        )
+        return X, y
+
+    blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes]
+    blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers))
+    X, y = generate_dataset(
+        n_samples=90000, centers=blob_centers, covariances=blob_stds, random_state=42
+    )
+    lda = LinearDiscriminantAnalysis(
+        solver=solver, store_covariance=True, shrinkage=None
+    ).fit(X, y)
+    # check that the empirical means and covariances are close enough to the
+    # one used to generate the data
+    assert_allclose(lda.means_, blob_centers, atol=1e-1)
+    assert_allclose(lda.covariance_, blob_stds[0], atol=1)
+
+    # implement the method to compute the probability given in The Elements
+    # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression
+    # or LDA?")
+    precision = linalg.inv(blob_stds[0])
+    alpha_k = []
+    alpha_k_0 = []
+    for clazz in range(len(blob_centers) - 1):
+        alpha_k.append(
+            np.dot(precision, (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis])
+        )
+        alpha_k_0.append(
+            np.dot(
+                -0.5 * (blob_centers[clazz] + blob_centers[-1])[np.newaxis, :],
+                alpha_k[-1],
+            )
+        )
+
+    sample = np.array([[-22, 22]])
+
+    def discriminant_func(sample, coef, intercept, clazz):
+        return np.exp(intercept[clazz] + np.dot(sample, coef[clazz])).item()
+
+    prob = np.array(
+        [
+            float(
+                discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                / (
+                    1
+                    + sum(
+                        [
+                            discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                            for clazz in range(n_classes - 1)
+                        ]
+                    )
+                )
+            )
+            for clazz in range(n_classes - 1)
+        ]
+    )
+
+    prob_ref = 1 - np.sum(prob)
+
+    # check the consistency of the computed probability
+    # all probabilities should sum to one
+    prob_ref_2 = float(
+        1
+        / (
+            1
+            + sum(
+                [
+                    discriminant_func(sample, alpha_k, alpha_k_0, clazz)
+                    for clazz in range(n_classes - 1)
+                ]
+            )
+        )
+    )
+
+    assert prob_ref == pytest.approx(prob_ref_2)
+    # check that the probability of LDA are close to the theoretical
+    # probabilities
+    assert_allclose(
+        lda.predict_proba(sample), np.hstack([prob, prob_ref])[np.newaxis], atol=1e-2
+    )
+
+
+def test_lda_priors():
+    # Test priors (negative priors)
+    priors = np.array([0.5, -0.5])
+    clf = LinearDiscriminantAnalysis(priors=priors)
+    msg = "priors must be non-negative"
+
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X, y)
+
+    # Test that priors passed as a list are correctly handled (run to see if
+    # failure)
+    clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5])
+    clf.fit(X, y)
+
+    # Test that priors always sum to 1
+    priors = np.array([0.5, 0.6])
+    prior_norm = np.array([0.45, 0.55])
+    clf = LinearDiscriminantAnalysis(priors=priors)
+
+    with pytest.warns(UserWarning):
+        clf.fit(X, y)
+
+    assert_array_almost_equal(clf.priors_, prior_norm, 2)
+
+
+def test_lda_coefs():
+    # Test if the coefficients of the solvers are approximately the same.
+    n_features = 2
+    n_classes = 2
+    n_samples = 1000
+    X, y = make_blobs(
+        n_samples=n_samples, n_features=n_features, centers=n_classes, random_state=11
+    )
+
+    clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
+    clf_lda_lsqr = LinearDiscriminantAnalysis(solver="lsqr")
+    clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
+
+    clf_lda_svd.fit(X, y)
+    clf_lda_lsqr.fit(X, y)
+    clf_lda_eigen.fit(X, y)
+
+    assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_lsqr.coef_, 1)
+    assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_eigen.coef_, 1)
+    assert_array_almost_equal(clf_lda_eigen.coef_, clf_lda_lsqr.coef_, 1)
+
+
+def test_lda_transform():
+    # Test LDA transform.
+    clf = LinearDiscriminantAnalysis(solver="svd", n_components=1)
+    X_transformed = clf.fit(X, y).transform(X)
+    assert X_transformed.shape[1] == 1
+    clf = LinearDiscriminantAnalysis(solver="eigen", n_components=1)
+    X_transformed = clf.fit(X, y).transform(X)
+    assert X_transformed.shape[1] == 1
+
+    clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1)
+    clf.fit(X, y)
+    msg = "transform not implemented for 'lsqr'"
+
+    with pytest.raises(NotImplementedError, match=msg):
+        clf.transform(X)
+
+
+def test_lda_explained_variance_ratio():
+    # Test if the sum of the normalized eigen vectors values equals 1,
+    # Also tests whether the explained_variance_ratio_ formed by the
+    # eigen solver is the same as the explained_variance_ratio_ formed
+    # by the svd solver
+
+    state = np.random.RandomState(0)
+    X = state.normal(loc=0, scale=100, size=(40, 20))
+    y = state.randint(0, 3, size=(40,))
+
+    clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
+    clf_lda_eigen.fit(X, y)
+    assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3)
+    assert clf_lda_eigen.explained_variance_ratio_.shape == (
+        2,
+    ), "Unexpected length for explained_variance_ratio_"
+
+    clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
+    clf_lda_svd.fit(X, y)
+    assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3)
+    assert clf_lda_svd.explained_variance_ratio_.shape == (
+        2,
+    ), "Unexpected length for explained_variance_ratio_"
+
+    assert_array_almost_equal(
+        clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_
+    )
+
+
+def test_lda_orthogonality():
+    # arrange four classes with their means in a kite-shaped pattern
+    # the longer distance should be transformed to the first component, and
+    # the shorter distance to the second component.
+    means = np.array([[0, 0, -1], [0, 2, 0], [0, -2, 0], [0, 0, 5]])
+
+    # We construct perfectly symmetric distributions, so the LDA can estimate
+    # precise means.
+    scatter = np.array(
+        [
+            [0.1, 0, 0],
+            [-0.1, 0, 0],
+            [0, 0.1, 0],
+            [0, -0.1, 0],
+            [0, 0, 0.1],
+            [0, 0, -0.1],
+        ]
+    )
+
+    X = (means[:, np.newaxis, :] + scatter[np.newaxis, :, :]).reshape((-1, 3))
+    y = np.repeat(np.arange(means.shape[0]), scatter.shape[0])
+
+    # Fit LDA and transform the means
+    clf = LinearDiscriminantAnalysis(solver="svd").fit(X, y)
+    means_transformed = clf.transform(means)
+
+    d1 = means_transformed[3] - means_transformed[0]
+    d2 = means_transformed[2] - means_transformed[1]
+    d1 /= np.sqrt(np.sum(d1**2))
+    d2 /= np.sqrt(np.sum(d2**2))
+
+    # the transformed within-class covariance should be the identity matrix
+    assert_almost_equal(np.cov(clf.transform(scatter).T), np.eye(2))
+
+    # the means of classes 0 and 3 should lie on the first component
+    assert_almost_equal(np.abs(np.dot(d1[:2], [1, 0])), 1.0)
+
+    # the means of classes 1 and 2 should lie on the second component
+    assert_almost_equal(np.abs(np.dot(d2[:2], [0, 1])), 1.0)
+
+
+def test_lda_scaling():
+    # Test if classification works correctly with differently scaled features.
+    n = 100
+    rng = np.random.RandomState(1234)
+    # use uniform distribution of features to make sure there is absolutely no
+    # overlap between classes.
+    x1 = rng.uniform(-1, 1, (n, 3)) + [-10, 0, 0]
+    x2 = rng.uniform(-1, 1, (n, 3)) + [10, 0, 0]
+    x = np.vstack((x1, x2)) * [1, 100, 10000]
+    y = [-1] * n + [1] * n
+
+    for solver in ("svd", "lsqr", "eigen"):
+        clf = LinearDiscriminantAnalysis(solver=solver)
+        # should be able to separate the data perfectly
+        assert clf.fit(x, y).score(x, y) == 1.0, "using covariance: %s" % solver
+
+
+def test_lda_store_covariance():
+    # Test for solver 'lsqr' and 'eigen'
+    # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers
+    for solver in ("lsqr", "eigen"):
+        clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6)
+        assert hasattr(clf, "covariance_")
+
+        # Test the actual attribute:
+        clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(
+            X6, y6
+        )
+        assert hasattr(clf, "covariance_")
+
+        assert_array_almost_equal(
+            clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
+        )
+
+    # Test for SVD solver, the default is to not set the covariances_ attribute
+    clf = LinearDiscriminantAnalysis(solver="svd").fit(X6, y6)
+    assert not hasattr(clf, "covariance_")
+
+    # Test the actual attribute:
+    clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(X6, y6)
+    assert hasattr(clf, "covariance_")
+
+    assert_array_almost_equal(
+        clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
+    )
+
+
+@pytest.mark.parametrize("seed", range(10))
+def test_lda_shrinkage(seed):
+    # Test that shrunk covariance estimator and shrinkage parameter behave the
+    # same
+    rng = np.random.RandomState(seed)
+    X = rng.rand(100, 10)
+    y = rng.randint(3, size=(100))
+    c1 = LinearDiscriminantAnalysis(store_covariance=True, shrinkage=0.5, solver="lsqr")
+    c2 = LinearDiscriminantAnalysis(
+        store_covariance=True,
+        covariance_estimator=ShrunkCovariance(shrinkage=0.5),
+        solver="lsqr",
+    )
+    c1.fit(X, y)
+    c2.fit(X, y)
+    assert_allclose(c1.means_, c2.means_)
+    assert_allclose(c1.covariance_, c2.covariance_)
+
+
+def test_lda_ledoitwolf():
+    # When shrinkage="auto" current implementation uses ledoitwolf estimation
+    # of covariance after standardizing the data. This checks that it is indeed
+    # the case
+    class StandardizedLedoitWolf:
+        def fit(self, X):
+            sc = StandardScaler()  # standardize features
+            X_sc = sc.fit_transform(X)
+            s = ledoit_wolf(X_sc)[0]
+            # rescale
+            s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
+            self.covariance_ = s
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(100, 10)
+    y = rng.randint(3, size=(100,))
+    c1 = LinearDiscriminantAnalysis(
+        store_covariance=True, shrinkage="auto", solver="lsqr"
+    )
+    c2 = LinearDiscriminantAnalysis(
+        store_covariance=True,
+        covariance_estimator=StandardizedLedoitWolf(),
+        solver="lsqr",
+    )
+    c1.fit(X, y)
+    c2.fit(X, y)
+    assert_allclose(c1.means_, c2.means_)
+    assert_allclose(c1.covariance_, c2.covariance_)
+
+
+@pytest.mark.parametrize("n_features", [3, 5])
+@pytest.mark.parametrize("n_classes", [5, 3])
+def test_lda_dimension_warning(n_classes, n_features):
+    rng = check_random_state(0)
+    n_samples = 10
+    X = rng.randn(n_samples, n_features)
+    # we create n_classes labels by repeating and truncating a
+    # range(n_classes) until n_samples
+    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
+    max_components = min(n_features, n_classes - 1)
+
+    for n_components in [max_components - 1, None, max_components]:
+        # if n_components <= min(n_classes - 1, n_features), no warning
+        lda = LinearDiscriminantAnalysis(n_components=n_components)
+        lda.fit(X, y)
+
+    for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]:
+        # if n_components > min(n_classes - 1, n_features), raise error.
+        # We test one unit higher than max_components, and then something
+        # larger than both n_features and n_classes - 1 to ensure the test
+        # works for any value of n_component
+        lda = LinearDiscriminantAnalysis(n_components=n_components)
+        msg = "n_components cannot be larger than "
+        with pytest.raises(ValueError, match=msg):
+            lda.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "data_type, expected_type",
+    [
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ],
+)
+def test_lda_dtype_match(data_type, expected_type):
+    for solver, shrinkage in solver_shrinkage:
+        clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf.fit(X.astype(data_type), y.astype(data_type))
+        assert clf.coef_.dtype == expected_type
+
+
+def test_lda_numeric_consistency_float32_float64():
+    for solver, shrinkage in solver_shrinkage:
+        clf_32 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf_32.fit(X.astype(np.float32), y.astype(np.float32))
+        clf_64 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf_64.fit(X.astype(np.float64), y.astype(np.float64))
+
+        # Check value consistency between types
+        rtol = 1e-6
+        assert_allclose(clf_32.coef_, clf_64.coef_, rtol=rtol)
+
+
+def test_qda():
+    # QDA classification.
+    # This checks that QDA implements fit and predict and returns
+    # correct values for a simple toy dataset.
+    clf = QuadraticDiscriminantAnalysis()
+    y_pred = clf.fit(X6, y6).predict(X6)
+    assert_array_equal(y_pred, y6)
+
+    # Assure that it works with 1D data
+    y_pred1 = clf.fit(X7, y6).predict(X7)
+    assert_array_equal(y_pred1, y6)
+
+    # Test probas estimates
+    y_proba_pred1 = clf.predict_proba(X7)
+    assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y6)
+    y_log_proba_pred1 = clf.predict_log_proba(X7)
+    assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8)
+
+    y_pred3 = clf.fit(X6, y7).predict(X6)
+    # QDA shouldn't be able to separate those
+    assert np.any(y_pred3 != y7)
+
+    # Classes should have at least 2 elements
+    with pytest.raises(ValueError):
+        clf.fit(X6, y4)
+
+
+def test_qda_priors():
+    clf = QuadraticDiscriminantAnalysis()
+    y_pred = clf.fit(X6, y6).predict(X6)
+    n_pos = np.sum(y_pred == 2)
+
+    neg = 1e-10
+    clf = QuadraticDiscriminantAnalysis(priors=np.array([neg, 1 - neg]))
+    y_pred = clf.fit(X6, y6).predict(X6)
+    n_pos2 = np.sum(y_pred == 2)
+
+    assert n_pos2 > n_pos
+
+
+@pytest.mark.parametrize("priors_type", ["list", "tuple", "array"])
+def test_qda_prior_type(priors_type):
+    """Check that priors accept array-like."""
+    priors = [0.5, 0.5]
+    clf = QuadraticDiscriminantAnalysis(
+        priors=_convert_container([0.5, 0.5], priors_type)
+    ).fit(X6, y6)
+    assert isinstance(clf.priors_, np.ndarray)
+    assert_array_equal(clf.priors_, priors)
+
+
+def test_qda_prior_copy():
+    """Check that altering `priors` without `fit` doesn't change `priors_`"""
+    priors = np.array([0.5, 0.5])
+    qda = QuadraticDiscriminantAnalysis(priors=priors).fit(X, y)
+
+    # we expect the following
+    assert_array_equal(qda.priors_, qda.priors)
+
+    # altering `priors` without `fit` should not change `priors_`
+    priors[0] = 0.2
+    assert qda.priors_[0] != qda.priors[0]
+
+
+def test_qda_store_covariance():
+    # The default is to not set the covariances_ attribute
+    clf = QuadraticDiscriminantAnalysis().fit(X6, y6)
+    assert not hasattr(clf, "covariance_")
+
+    # Test the actual attribute:
+    clf = QuadraticDiscriminantAnalysis(store_covariance=True).fit(X6, y6)
+    assert hasattr(clf, "covariance_")
+
+    assert_array_almost_equal(clf.covariance_[0], np.array([[0.7, 0.45], [0.45, 0.7]]))
+
+    assert_array_almost_equal(
+        clf.covariance_[1],
+        np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]),
+    )
+
+
+@pytest.mark.xfail(
+    _IS_WASM,
+    reason=(
+        "no floating point exceptions, see"
+        " https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881"
+    ),
+)
+def test_qda_regularization():
+    # The default is reg_param=0. and will cause issues when there is a
+    # constant variable.
+
+    # Fitting on data with constant variable without regularization
+    # triggers a LinAlgError.
+    msg = r"The covariance matrix of class .+ is not full rank"
+    clf = QuadraticDiscriminantAnalysis()
+    with pytest.warns(linalg.LinAlgWarning, match=msg):
+        y_pred = clf.fit(X2, y6)
+
+    y_pred = clf.predict(X2)
+    assert np.any(y_pred != y6)
+
+    # Adding a little regularization fixes the fit time error.
+    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+    clf.fit(X2, y6)
+    y_pred = clf.predict(X2)
+    assert_array_equal(y_pred, y6)
+
+    # LinAlgWarning should also be there for the n_samples_in_a_class <
+    # n_features case.
+    clf = QuadraticDiscriminantAnalysis()
+    with pytest.warns(linalg.LinAlgWarning, match=msg):
+        clf.fit(X5, y5)
+
+    # The error will persist even with regularization
+    clf = QuadraticDiscriminantAnalysis(reg_param=0.3)
+    with pytest.warns(linalg.LinAlgWarning, match=msg):
+        clf.fit(X5, y5)
+
+
+def test_covariance():
+    x, y = make_blobs(n_samples=100, n_features=5, centers=1, random_state=42)
+
+    # make features correlated
+    x = np.dot(x, np.arange(x.shape[1] ** 2).reshape(x.shape[1], x.shape[1]))
+
+    c_e = _cov(x, "empirical")
+    assert_almost_equal(c_e, c_e.T)
+
+    c_s = _cov(x, "auto")
+    assert_almost_equal(c_s, c_s.T)
+
+
+@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
+def test_raises_value_error_on_same_number_of_classes_and_samples(solver):
+    """
+    Tests that if the number of samples equals the number
+    of classes, a ValueError is raised.
+    """
+    X = np.array([[0.5, 0.6], [0.6, 0.5]])
+    y = np.array(["a", "b"])
+    clf = LinearDiscriminantAnalysis(solver=solver)
+    with pytest.raises(ValueError, match="The number of samples must be more"):
+        clf.fit(X, y)
+
+
+def test_get_feature_names_out():
+    """Check get_feature_names_out uses class name as prefix."""
+
+    est = LinearDiscriminantAnalysis().fit(X, y)
+    names_out = est.get_feature_names_out()
+
+    class_name_lower = "LinearDiscriminantAnalysis".lower()
+    expected_names_out = np.array(
+        [
+            f"{class_name_lower}{i}"
+            for i in range(est.explained_variance_ratio_.shape[0])
+        ],
+        dtype=object,
+    )
+    assert_array_equal(names_out, expected_names_out)
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_docstring_parameters.py b/.venv/Lib/site-packages/sklearn/tests/test_docstring_parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfd5d84f582f1a0da1f6c118cefb679a11b82870
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_docstring_parameters.py
@@ -0,0 +1,384 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import importlib
+import inspect
+import os
+import warnings
+from inspect import signature
+from pkgutil import walk_packages
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn import metrics
+from sklearn.datasets import make_classification
+from sklearn.ensemble import StackingClassifier, StackingRegressor
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.utils import all_estimators
+from sklearn.utils._test_common.instance_generator import _construct_instances
+from sklearn.utils._testing import (
+    _get_func_name,
+    assert_docstring_consistency,
+    check_docstring_parameters,
+    ignore_warnings,
+    skip_if_no_numpydoc,
+)
+from sklearn.utils.deprecation import _is_deprecated
+from sklearn.utils.estimator_checks import (
+    _enforce_estimator_tags_X,
+    _enforce_estimator_tags_y,
+)
+
+# walk_packages() ignores DeprecationWarnings, now we need to ignore
+# FutureWarnings
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", FutureWarning)
+    # mypy error: Module has no attribute "__path__"
+    sklearn_path = [os.path.dirname(sklearn.__file__)]
+    PUBLIC_MODULES = set(
+        [
+            pckg[1]
+            for pckg in walk_packages(prefix="sklearn.", path=sklearn_path)
+            if not ("._" in pckg[1] or ".tests." in pckg[1])
+        ]
+    )
+
+# functions to ignore args / docstring of
+# TODO(1.7): remove "sklearn.utils._joblib"
+_DOCSTRING_IGNORES = [
+    "sklearn.utils.deprecation.load_mlcomp",
+    "sklearn.pipeline.make_pipeline",
+    "sklearn.pipeline.make_union",
+    "sklearn.utils.extmath.safe_sparse_dot",
+    "sklearn.utils._joblib",
+    "HalfBinomialLoss",
+]
+
+# Methods where y param should be ignored if y=None by default
+_METHODS_IGNORE_NONE_Y = [
+    "fit",
+    "score",
+    "fit_predict",
+    "fit_transform",
+    "partial_fit",
+    "predict",
+]
+
+
+def test_docstring_parameters():
+    # Test module docstring formatting
+
+    # Skip test if numpydoc is not found
+    pytest.importorskip(
+        "numpydoc", reason="numpydoc is required to test the docstrings"
+    )
+
+    # XXX unreached code as of v0.22
+    from numpydoc import docscrape
+
+    incorrect = []
+    for name in PUBLIC_MODULES:
+        if name.endswith(".conftest"):
+            # pytest tooling, not part of the scikit-learn API
+            continue
+        if name == "sklearn.utils.fixes":
+            # We cannot always control these docstrings
+            continue
+        with warnings.catch_warnings(record=True):
+            module = importlib.import_module(name)
+        classes = inspect.getmembers(module, inspect.isclass)
+        # Exclude non-scikit-learn classes
+        classes = [cls for cls in classes if cls[1].__module__.startswith("sklearn")]
+        for cname, cls in classes:
+            this_incorrect = []
+            if cname in _DOCSTRING_IGNORES or cname.startswith("_"):
+                continue
+            if inspect.isabstract(cls):
+                continue
+            with warnings.catch_warnings(record=True) as w:
+                cdoc = docscrape.ClassDoc(cls)
+            if len(w):
+                raise RuntimeError(
+                    "Error for __init__ of %s in %s:\n%s" % (cls, name, w[0])
+                )
+
+            # Skip checks on deprecated classes
+            if _is_deprecated(cls.__new__):
+                continue
+
+            this_incorrect += check_docstring_parameters(cls.__init__, cdoc)
+
+            for method_name in cdoc.methods:
+                method = getattr(cls, method_name)
+                if _is_deprecated(method):
+                    continue
+                param_ignore = None
+                # Now skip docstring test for y when y is None
+                # by default for API reason
+                if method_name in _METHODS_IGNORE_NONE_Y:
+                    sig = signature(method)
+                    if "y" in sig.parameters and sig.parameters["y"].default is None:
+                        param_ignore = ["y"]  # ignore y for fit and score
+                result = check_docstring_parameters(method, ignore=param_ignore)
+                this_incorrect += result
+
+            incorrect += this_incorrect
+
+        functions = inspect.getmembers(module, inspect.isfunction)
+        # Exclude imported functions
+        functions = [fn for fn in functions if fn[1].__module__ == name]
+        for fname, func in functions:
+            # Don't test private methods / functions
+            if fname.startswith("_"):
+                continue
+            if fname == "configuration" and name.endswith("setup"):
+                continue
+            name_ = _get_func_name(func)
+            if not any(d in name_ for d in _DOCSTRING_IGNORES) and not _is_deprecated(
+                func
+            ):
+                incorrect += check_docstring_parameters(func)
+
+    msg = "\n".join(incorrect)
+    if len(incorrect) > 0:
+        raise AssertionError("Docstring Error:\n" + msg)
+
+
+def _construct_searchcv_instance(SearchCV):
+    return SearchCV(LogisticRegression(), {"C": [0.1, 1]})
+
+
+def _construct_compose_pipeline_instance(Estimator):
+    # Minimal / degenerate instances: only useful to test the docstrings.
+    if Estimator.__name__ == "ColumnTransformer":
+        return Estimator(transformers=[("transformer", "passthrough", [0, 1])])
+    elif Estimator.__name__ == "Pipeline":
+        return Estimator(steps=[("clf", LogisticRegression())])
+    elif Estimator.__name__ == "FeatureUnion":
+        return Estimator(transformer_list=[("transformer", FunctionTransformer())])
+
+
+def _construct_sparse_coder(Estimator):
+    # XXX: hard-coded assumption that n_features=3
+    dictionary = np.array(
+        [[0, 1, 0], [-1, -1, 2], [1, 1, 1], [0, 1, 1], [0, 2, 1]],
+        dtype=np.float64,
+    )
+    return Estimator(dictionary=dictionary)
+
+
+@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
+@pytest.mark.parametrize("name, Estimator", all_estimators())
+def test_fit_docstring_attributes(name, Estimator):
+    pytest.importorskip("numpydoc")
+    from numpydoc import docscrape
+
+    doc = docscrape.ClassDoc(Estimator)
+    attributes = doc["Attributes"]
+
+    if Estimator.__name__ in (
+        "HalvingRandomSearchCV",
+        "RandomizedSearchCV",
+        "HalvingGridSearchCV",
+        "GridSearchCV",
+    ):
+        est = _construct_searchcv_instance(Estimator)
+    elif Estimator.__name__ in (
+        "ColumnTransformer",
+        "Pipeline",
+        "FeatureUnion",
+    ):
+        est = _construct_compose_pipeline_instance(Estimator)
+    elif Estimator.__name__ == "SparseCoder":
+        est = _construct_sparse_coder(Estimator)
+    elif Estimator.__name__ == "FrozenEstimator":
+        X, y = make_classification(n_samples=20, n_features=5, random_state=0)
+        est = Estimator(LogisticRegression().fit(X, y))
+    else:
+        # TODO(devtools): use _tested_estimators instead of all_estimators in the
+        # decorator
+        est = next(_construct_instances(Estimator))
+
+    if Estimator.__name__ == "SelectKBest":
+        est.set_params(k=2)
+    elif Estimator.__name__ == "DummyClassifier":
+        est.set_params(strategy="stratified")
+    elif Estimator.__name__ == "CCA" or Estimator.__name__.startswith("PLS"):
+        # default = 2 is invalid for single target
+        est.set_params(n_components=1)
+    elif Estimator.__name__ in (
+        "GaussianRandomProjection",
+        "SparseRandomProjection",
+    ):
+        # default="auto" raises an error with the shape of `X`
+        est.set_params(n_components=2)
+    elif Estimator.__name__ == "TSNE":
+        # default raises an error, perplexity must be less than n_samples
+        est.set_params(perplexity=2)
+
+    # Low max iter to speed up tests: we are only interested in checking the existence
+    # of fitted attributes. This should be invariant to whether it has converged or not.
+    if "max_iter" in est.get_params():
+        est.set_params(max_iter=2)
+        # min value for `TSNE` is 250
+        if Estimator.__name__ == "TSNE":
+            est.set_params(max_iter=250)
+
+    if "random_state" in est.get_params():
+        est.set_params(random_state=0)
+
+    # In case we want to deprecate some attributes in the future
+    skipped_attributes = {}
+
+    if Estimator.__name__.endswith("Vectorizer"):
+        # Vectorizer require some specific input data
+        if Estimator.__name__ in (
+            "CountVectorizer",
+            "HashingVectorizer",
+            "TfidfVectorizer",
+        ):
+            X = [
+                "This is the first document.",
+                "This document is the second document.",
+                "And this is the third one.",
+                "Is this the first document?",
+            ]
+        elif Estimator.__name__ == "DictVectorizer":
+            X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}]
+        y = None
+    else:
+        X, y = make_classification(
+            n_samples=20,
+            n_features=3,
+            n_redundant=0,
+            n_classes=2,
+            random_state=2,
+        )
+
+        y = _enforce_estimator_tags_y(est, y)
+        X = _enforce_estimator_tags_X(est, X)
+
+    if est.__sklearn_tags__().target_tags.one_d_labels:
+        est.fit(y)
+    elif est.__sklearn_tags__().target_tags.two_d_labels:
+        est.fit(np.c_[y, y])
+    elif est.__sklearn_tags__().input_tags.three_d_array:
+        est.fit(X[np.newaxis, ...], y)
+    else:
+        est.fit(X, y)
+
+    for attr in attributes:
+        if attr.name in skipped_attributes:
+            continue
+        desc = " ".join(attr.desc).lower()
+        # As certain attributes are present "only" if a certain parameter is
+        # provided, this checks if the word "only" is present in the attribute
+        # description, and if not the attribute is required to be present.
+        if "only " in desc:
+            continue
+        # ignore deprecation warnings
+        with ignore_warnings(category=FutureWarning):
+            assert hasattr(est, attr.name)
+
+    fit_attr = _get_all_fitted_attributes(est)
+    fit_attr_names = [attr.name for attr in attributes]
+    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
+    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
+    if undocumented_attrs:
+        raise AssertionError(
+            f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}"
+        )
+
+
+def _get_all_fitted_attributes(estimator):
+    "Get all the fitted attributes of an estimator including properties"
+    # attributes
+    fit_attr = list(estimator.__dict__.keys())
+
+    # properties
+    with warnings.catch_warnings():
+        warnings.filterwarnings("error", category=FutureWarning)
+
+        for name in dir(estimator.__class__):
+            obj = getattr(estimator.__class__, name)
+            if not isinstance(obj, property):
+                continue
+
+            # ignore properties that raises an AttributeError and deprecated
+            # properties
+            try:
+                getattr(estimator, name)
+            except (AttributeError, FutureWarning):
+                continue
+            fit_attr.append(name)
+
+    return [k for k in fit_attr if k.endswith("_") and not k.startswith("_")]
+
+
+@skip_if_no_numpydoc
+def test_precision_recall_f_score_docstring_consistency():
+    """Check docstrings parameters of related metrics are consistent."""
+    metrics_to_check = [
+        metrics.precision_recall_fscore_support,
+        metrics.f1_score,
+        metrics.fbeta_score,
+        metrics.precision_score,
+        metrics.recall_score,
+    ]
+    assert_docstring_consistency(
+        metrics_to_check,
+        include_params=True,
+        # "zero_division" - the reason for zero division differs between f scores,
+        # precision and recall.
+        exclude_params=["average", "zero_division"],
+    )
+    description_regex = (
+        r"""This parameter is required for multiclass/multilabel targets\.
+        If ``None``, the metrics for each class are returned\. Otherwise, this
+        determines the type of averaging performed on the data:
+        ``'binary'``:
+            Only report results for the class specified by ``pos_label``\.
+            This is applicable only if targets \(``y_\{true,pred\}``\) are binary\.
+        ``'micro'``:
+            Calculate metrics globally by counting the total true positives,
+            false negatives and false positives\.
+        ``'macro'``:
+            Calculate metrics for each label, and find their unweighted
+            mean\.  This does not take label imbalance into account\.
+        ``'weighted'``:
+            Calculate metrics for each label, and find their average weighted
+            by support \(the number of true instances for each label\)\. This
+            alters 'macro' to account for label imbalance; it can result in an
+            F-score that is not between precision and recall\."""
+        + r"[\s\w]*\.*"  # optionally match additonal sentence
+        + r"""
+        ``'samples'``:
+            Calculate metrics for each instance, and find their average \(only
+            meaningful for multilabel classification where this differs from
+            :func:`accuracy_score`\)\."""
+    )
+    assert_docstring_consistency(
+        metrics_to_check,
+        include_params=["average"],
+        descr_regex_pattern=" ".join(description_regex.split()),
+    )
+
+
+@skip_if_no_numpydoc
+def test_stacking_classifier_regressor_docstring_consistency():
+    """Check docstrings parameters stacking estimators are consistent."""
+    assert_docstring_consistency(
+        [StackingClassifier, StackingRegressor],
+        include_params=["cv", "n_jobs", "passthrough", "verbose"],
+        include_attrs=True,
+        exclude_attrs=["final_estimator_"],
+    )
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_docstrings.py b/.venv/Lib/site-packages/sklearn/tests/test_docstrings.py
new file mode 100644
index 0000000000000000000000000000000000000000..683ad20987c05348852c965b878687f009437f3c
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_docstrings.py
@@ -0,0 +1,208 @@
+import re
+from inspect import signature
+from typing import Optional
+
+import pytest
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.utils.discovery import all_displays, all_estimators, all_functions
+
+numpydoc_validation = pytest.importorskip("numpydoc.validate")
+
+
+def get_all_methods():
+    estimators = all_estimators()
+    displays = all_displays()
+    for name, Klass in estimators + displays:
+        if name.startswith("_"):
+            # skip private classes
+            continue
+        methods = []
+        for name in dir(Klass):
+            if name.startswith("_"):
+                continue
+            method_obj = getattr(Klass, name)
+            if hasattr(method_obj, "__call__") or isinstance(method_obj, property):
+                methods.append(name)
+        methods.append(None)
+
+        for method in sorted(methods, key=str):
+            yield Klass, method
+
+
+def get_all_functions_names():
+    functions = all_functions()
+    for _, func in functions:
+        # exclude functions from utils.fixex since they come from external packages
+        if "utils.fixes" not in func.__module__:
+            yield f"{func.__module__}.{func.__name__}"
+
+
+def filter_errors(errors, method, Klass=None):
+    """
+    Ignore some errors based on the method type.
+
+    These rules are specific for scikit-learn."""
+    for code, message in errors:
+        # We ignore following error code,
+        #  - RT02: The first line of the Returns section
+        #    should contain only the type, ..
+        #   (as we may need refer to the name of the returned
+        #    object)
+        #  - GL01: Docstring text (summary) should start in the line
+        #    immediately after the opening quotes (not in the same line,
+        #    or leaving a blank line in between)
+        #  - GL02: If there's a blank line, it should be before the
+        #    first line of the Returns section, not after (it allows to have
+        #    short docstrings for properties).
+
+        if code in ["RT02", "GL01", "GL02"]:
+            continue
+
+        # Ignore PR02: Unknown parameters for properties. We sometimes use
+        # properties for ducktyping, i.e. SGDClassifier.predict_proba
+        # Ignore GL08: Parsing of the method signature failed, possibly because this is
+        # a property. Properties are sometimes used for deprecated attributes and the
+        # attribute is already documented in the class docstring.
+        #
+        # All error codes:
+        # https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks
+        if code in ("PR02", "GL08") and Klass is not None and method is not None:
+            method_obj = getattr(Klass, method)
+            if isinstance(method_obj, property):
+                continue
+
+        # Following codes are only taken into account for the
+        # top level class docstrings:
+        #  - ES01: No extended summary found
+        #  - SA01: See Also section not found
+        #  - EX01: No examples section found
+
+        if method is not None and code in ["EX01", "SA01", "ES01"]:
+            continue
+        yield code, message
+
+
+def repr_errors(res, Klass=None, method: Optional[str] = None) -> str:
+    """Pretty print original docstring and the obtained errors
+
+    Parameters
+    ----------
+    res : dict
+        result of numpydoc.validate.validate
+    Klass : {Estimator, Display, None}
+        estimator object or None
+    method : str
+        if estimator is not None, either the method name or None.
+
+    Returns
+    -------
+    str
+       String representation of the error.
+    """
+    if method is None:
+        if hasattr(Klass, "__init__"):
+            method = "__init__"
+        elif Klass is None:
+            raise ValueError("At least one of Klass, method should be provided")
+        else:
+            raise NotImplementedError
+
+    if Klass is not None:
+        obj = getattr(Klass, method)
+        try:
+            obj_signature = str(signature(obj))
+        except TypeError:
+            # In particular we can't parse the signature of properties
+            obj_signature = (
+                "\nParsing of the method signature failed, "
+                "possibly because this is a property."
+            )
+
+        obj_name = Klass.__name__ + "." + method
+    else:
+        obj_signature = ""
+        obj_name = method
+
+    msg = "\n\n" + "\n\n".join(
+        [
+            str(res["file"]),
+            obj_name + obj_signature,
+            res["docstring"],
+            "# Errors",
+            "\n".join(
+                " - {}: {}".format(code, message) for code, message in res["errors"]
+            ),
+        ]
+    )
+    return msg
+
+
+@pytest.mark.parametrize("function_name", get_all_functions_names())
+def test_function_docstring(function_name, request):
+    """Check function docstrings using numpydoc."""
+    res = numpydoc_validation.validate(function_name)
+
+    res["errors"] = list(filter_errors(res["errors"], method="function"))
+
+    if res["errors"]:
+        msg = repr_errors(res, method=f"Tested function: {function_name}")
+
+        raise ValueError(msg)
+
+
+@pytest.mark.parametrize("Klass, method", get_all_methods())
+def test_docstring(Klass, method, request):
+    base_import_path = Klass.__module__
+    import_path = [base_import_path, Klass.__name__]
+    if method is not None:
+        import_path.append(method)
+
+    import_path = ".".join(import_path)
+
+    res = numpydoc_validation.validate(import_path)
+
+    res["errors"] = list(filter_errors(res["errors"], method, Klass=Klass))
+
+    if res["errors"]:
+        msg = repr_errors(res, Klass, method)
+
+        raise ValueError(msg)
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+
+    parser = argparse.ArgumentParser(description="Validate docstring with numpydoc.")
+    parser.add_argument("import_path", help="Import path to validate")
+
+    args = parser.parse_args()
+
+    res = numpydoc_validation.validate(args.import_path)
+
+    import_path_sections = args.import_path.split(".")
+    # When applied to classes, detect class method. For functions
+    # method = None.
+    # TODO: this detection can be improved. Currently we assume that we have
+    # class # methods if the second path element before last is in camel case.
+    if len(import_path_sections) >= 2 and re.match(
+        r"(?:[A-Z][a-z]*)+", import_path_sections[-2]
+    ):
+        method = import_path_sections[-1]
+    else:
+        method = None
+
+    res["errors"] = list(filter_errors(res["errors"], method))
+
+    if res["errors"]:
+        msg = repr_errors(res, method=args.import_path)
+
+        print(msg)
+        sys.exit(1)
+    else:
+        print("All docstring checks passed for {}!".format(args.import_path))
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_dummy.py b/.venv/Lib/site-packages/sklearn/tests/test_dummy.py
new file mode 100644
index 0000000000000000000000000000000000000000..0340e54b1c7e7bbaf8b86697e4369289b00e92b7
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_dummy.py
@@ -0,0 +1,715 @@
+import warnings
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+
+from sklearn.base import clone
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.exceptions import NotFittedError
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS
+from sklearn.utils.stats import _weighted_percentile
+
+
+def _check_predict_proba(clf, X, y):
+    proba = clf.predict_proba(X)
+
+    # We know that we can have division by zero
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", "divide by zero encountered in log")
+        log_proba = clf.predict_log_proba(X)
+
+    y = np.atleast_1d(y)
+    if y.ndim == 1:
+        y = np.reshape(y, (-1, 1))
+
+    n_outputs = y.shape[1]
+    n_samples = len(X)
+
+    if n_outputs == 1:
+        proba = [proba]
+        log_proba = [log_proba]
+
+    for k in range(n_outputs):
+        assert proba[k].shape[0] == n_samples
+        assert proba[k].shape[1] == len(np.unique(y[:, k]))
+        assert_array_almost_equal(proba[k].sum(axis=1), np.ones(len(X)))
+        # We know that we can have division by zero
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "divide by zero encountered in log")
+            assert_array_almost_equal(np.log(proba[k]), log_proba[k])
+
+
+def _check_behavior_2d(clf):
+    # 1d case
+    X = np.array([[0], [0], [0], [0]])  # ignored
+    y = np.array([1, 2, 1, 1])
+    est = clone(clf)
+    est.fit(X, y)
+    y_pred = est.predict(X)
+    assert y.shape == y_pred.shape
+
+    # 2d case
+    y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])
+    est = clone(clf)
+    est.fit(X, y)
+    y_pred = est.predict(X)
+    assert y.shape == y_pred.shape
+
+
+def _check_behavior_2d_for_constant(clf):
+    # 2d case only
+    X = np.array([[0], [0], [0], [0]])  # ignored
+    y = np.array([[1, 0, 5, 4, 3], [2, 0, 1, 2, 5], [1, 0, 4, 5, 2], [1, 3, 3, 2, 0]])
+    est = clone(clf)
+    est.fit(X, y)
+    y_pred = est.predict(X)
+    assert y.shape == y_pred.shape
+
+
+def _check_equality_regressor(statistic, y_learn, y_pred_learn, y_test, y_pred_test):
+    assert_array_almost_equal(np.tile(statistic, (y_learn.shape[0], 1)), y_pred_learn)
+    assert_array_almost_equal(np.tile(statistic, (y_test.shape[0], 1)), y_pred_test)
+
+
+def test_feature_names_in_and_n_features_in_(global_random_seed, n_samples=10):
+    pd = pytest.importorskip("pandas")
+
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = pd.DataFrame([[0]] * n_samples, columns=["feature_1"])
+    y = random_state.rand(n_samples)
+
+    est = DummyRegressor().fit(X, y)
+    assert hasattr(est, "feature_names_in_")
+    assert hasattr(est, "n_features_in_")
+
+    est = DummyClassifier().fit(X, y)
+    assert hasattr(est, "feature_names_in_")
+    assert hasattr(est, "n_features_in_")
+
+
+def test_most_frequent_and_prior_strategy():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = [1, 2, 1, 1]
+
+    for strategy in ("most_frequent", "prior"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+        assert_array_equal(clf.predict(X), np.ones(len(X)))
+        _check_predict_proba(clf, X, y)
+
+        if strategy == "prior":
+            assert_array_almost_equal(
+                clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1))
+            )
+        else:
+            assert_array_almost_equal(
+                clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1)) > 0.5
+            )
+
+
+def test_most_frequent_and_prior_strategy_with_2d_column_y():
+    # non-regression test added in
+    # https://github.com/scikit-learn/scikit-learn/pull/13545
+    X = [[0], [0], [0], [0]]
+    y_1d = [1, 2, 1, 1]
+    y_2d = [[1], [2], [1], [1]]
+
+    for strategy in ("most_frequent", "prior"):
+        clf_1d = DummyClassifier(strategy=strategy, random_state=0)
+        clf_2d = DummyClassifier(strategy=strategy, random_state=0)
+
+        clf_1d.fit(X, y_1d)
+        clf_2d.fit(X, y_2d)
+        assert_array_equal(clf_1d.predict(X), clf_2d.predict(X))
+
+
+def test_most_frequent_and_prior_strategy_multioutput():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])
+
+    n_samples = len(X)
+
+    for strategy in ("prior", "most_frequent"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+        assert_array_equal(
+            clf.predict(X),
+            np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]),
+        )
+        _check_predict_proba(clf, X, y)
+        _check_behavior_2d(clf)
+
+
+def test_stratified_strategy(global_random_seed):
+    X = [[0]] * 5  # ignored
+    y = [1, 2, 1, 1, 2]
+    clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+    p = np.bincount(y_pred) / float(len(X))
+    assert_almost_equal(p[1], 3.0 / 5, decimal=1)
+    assert_almost_equal(p[2], 2.0 / 5, decimal=1)
+    _check_predict_proba(clf, X, y)
+
+
+def test_stratified_strategy_multioutput(global_random_seed):
+    X = [[0]] * 5  # ignored
+    y = np.array([[2, 1], [2, 2], [1, 1], [1, 2], [1, 1]])
+
+    clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 3.0 / 5, decimal=1)
+        assert_almost_equal(p[2], 2.0 / 5, decimal=1)
+        _check_predict_proba(clf, X, y)
+
+    _check_behavior_2d(clf)
+
+
+def test_uniform_strategy(global_random_seed):
+    X = [[0]] * 4  # ignored
+    y = [1, 2, 1, 1]
+    clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+    p = np.bincount(y_pred) / float(len(X))
+    assert_almost_equal(p[1], 0.5, decimal=1)
+    assert_almost_equal(p[2], 0.5, decimal=1)
+    _check_predict_proba(clf, X, y)
+
+
+def test_uniform_strategy_multioutput(global_random_seed):
+    X = [[0]] * 4  # ignored
+    y = np.array([[2, 1], [2, 2], [1, 2], [1, 1]])
+    clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 0.5, decimal=1)
+        assert_almost_equal(p[2], 0.5, decimal=1)
+        _check_predict_proba(clf, X, y)
+
+    _check_behavior_2d(clf)
+
+
+def test_string_labels():
+    X = [[0]] * 5
+    y = ["paris", "paris", "tokyo", "amsterdam", "berlin"]
+    clf = DummyClassifier(strategy="most_frequent")
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), ["paris"] * 5)
+
+
+@pytest.mark.parametrize(
+    "y,y_test",
+    [
+        ([2, 1, 1, 1], [2, 2, 1, 1]),
+        (
+            np.array([[2, 2], [1, 1], [1, 1], [1, 1]]),
+            np.array([[2, 2], [2, 2], [1, 1], [1, 1]]),
+        ),
+    ],
+)
+def test_classifier_score_with_None(y, y_test):
+    clf = DummyClassifier(strategy="most_frequent")
+    clf.fit(None, y)
+    assert clf.score(None, y_test) == 0.5
+
+
+@pytest.mark.parametrize(
+    "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"]
+)
+def test_classifier_prediction_independent_of_X(strategy, global_random_seed):
+    y = [0, 2, 1, 1]
+    X1 = [[0]] * 4
+    clf1 = DummyClassifier(
+        strategy=strategy, random_state=global_random_seed, constant=0
+    )
+    clf1.fit(X1, y)
+    predictions1 = clf1.predict(X1)
+
+    X2 = [[1]] * 4
+    clf2 = DummyClassifier(
+        strategy=strategy, random_state=global_random_seed, constant=0
+    )
+    clf2.fit(X2, y)
+    predictions2 = clf2.predict(X2)
+
+    assert_array_equal(predictions1, predictions2)
+
+
+def test_mean_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 4  # ignored
+    y = random_state.randn(4)
+
+    reg = DummyRegressor()
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.mean(y)] * len(X))
+
+
+def test_mean_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    mean = np.mean(y_learn, axis=0).reshape((1, -1))
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor()
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d(est)
+
+
+def test_regressor_exceptions():
+    reg = DummyRegressor()
+    with pytest.raises(NotFittedError):
+        reg.predict([])
+
+
+def test_median_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="median")
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
+
+
+def test_median_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    median = np.median(y_learn, axis=0).reshape((1, -1))
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="median")
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d(est)
+
+
+def test_quantile_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="quantile", quantile=0.5)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
+
+    reg = DummyRegressor(strategy="quantile", quantile=0)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.min(y)] * len(X))
+
+    reg = DummyRegressor(strategy="quantile", quantile=1)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.max(y)] * len(X))
+
+    reg = DummyRegressor(strategy="quantile", quantile=0.3)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X))
+
+
+def test_quantile_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    median = np.median(y_learn, axis=0).reshape((1, -1))
+    quantile_values = np.percentile(y_learn, axis=0, q=80).reshape((1, -1))
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="quantile", quantile=0.5)
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d(est)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="quantile", quantile=0.8)
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(
+        quantile_values, y_learn, y_pred_learn, y_test, y_pred_test
+    )
+    _check_behavior_2d(est)
+
+
+def test_quantile_invalid():
+    X = [[0]] * 5  # ignored
+    y = [0] * 5  # ignored
+
+    est = DummyRegressor(strategy="quantile", quantile=None)
+    err_msg = (
+        "When using `strategy='quantile', you have to specify the desired quantile"
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit(X, y)
+
+
+def test_quantile_strategy_empty_train():
+    est = DummyRegressor(strategy="quantile", quantile=0.4)
+    with pytest.raises(IndexError):
+        est.fit([], [])
+
+
+def test_constant_strategy_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="constant", constant=[43])
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [43] * len(X))
+
+    reg = DummyRegressor(strategy="constant", constant=43)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [43] * len(X))
+
+    # non-regression test for #22478
+    assert not isinstance(reg.constant, np.ndarray)
+
+
+def test_constant_strategy_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    # test with 2d array
+    constants = random_state.randn(5)
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="constant", constant=constants)
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(constants, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d_for_constant(est)
+
+
+def test_y_mean_attribute_regressor():
+    X = [[0]] * 5
+    y = [1, 2, 4, 6, 8]
+    # when strategy = 'mean'
+    est = DummyRegressor(strategy="mean")
+    est.fit(X, y)
+
+    assert est.constant_ == np.mean(y)
+
+
+def test_constants_not_specified_regressor():
+    X = [[0]] * 5
+    y = [1, 2, 4, 6, 8]
+
+    est = DummyRegressor(strategy="constant")
+    err_msg = "Constant target value has to be specified"
+    with pytest.raises(TypeError, match=err_msg):
+        est.fit(X, y)
+
+
+def test_constant_size_multioutput_regressor(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+    X = random_state.randn(10, 10)
+    y = random_state.randn(10, 5)
+
+    est = DummyRegressor(strategy="constant", constant=[1, 2, 3, 4])
+    err_msg = r"Constant target value should have shape \(5, 1\)."
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit(X, y)
+
+
+def test_constant_strategy():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = [2, 1, 2, 2]
+
+    clf = DummyClassifier(strategy="constant", random_state=0, constant=1)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), np.ones(len(X)))
+    _check_predict_proba(clf, X, y)
+
+    X = [[0], [0], [0], [0]]  # ignored
+    y = ["two", "one", "two", "two"]
+    clf = DummyClassifier(strategy="constant", random_state=0, constant="one")
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), np.array(["one"] * 4))
+    _check_predict_proba(clf, X, y)
+
+
+def test_constant_strategy_multioutput():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = np.array([[2, 3], [1, 3], [2, 3], [2, 0]])
+
+    n_samples = len(X)
+
+    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
+    clf.fit(X, y)
+    assert_array_equal(
+        clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
+    )
+    _check_predict_proba(clf, X, y)
+
+
+@pytest.mark.parametrize(
+    "y, params, err_msg",
+    [
+        ([2, 1, 2, 2], {"random_state": 0}, "Constant.*has to be specified"),
+        ([2, 1, 2, 2], {"constant": [2, 0]}, "Constant.*should have shape"),
+        (
+            np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]),
+            {"constant": 2},
+            "Constant.*should have shape",
+        ),
+        (
+            [2, 1, 2, 2],
+            {"constant": "my-constant"},
+            "constant=my-constant.*Possible values.*\\[1, 2]",
+        ),
+        (
+            np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]),
+            {"constant": [2, "unknown"]},
+            "constant=\\[2, 'unknown'].*Possible values.*\\[1, 2]",
+        ),
+    ],
+    ids=[
+        "no-constant",
+        "too-many-constant",
+        "not-enough-output",
+        "single-output",
+        "multi-output",
+    ],
+)
+def test_constant_strategy_exceptions(y, params, err_msg):
+    X = [[0], [0], [0], [0]]
+
+    clf = DummyClassifier(strategy="constant", **params)
+    with pytest.raises(ValueError, match=err_msg):
+        clf.fit(X, y)
+
+
+def test_classification_sample_weight():
+    X = [[0], [0], [1]]
+    y = [0, 1, 0]
+    sample_weight = [0.1, 1.0, 0.1]
+
+    clf = DummyClassifier(strategy="stratified").fit(X, y, sample_weight)
+    assert_array_almost_equal(clf.class_prior_, [0.2 / 1.2, 1.0 / 1.2])
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_constant_strategy_sparse_target(csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[0, 1], [4, 0], [1, 1], [1, 4], [1, 1]]))
+
+    n_samples = len(X)
+
+    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
+    clf.fit(X, y)
+    y_pred = clf.predict(X)
+    assert sp.issparse(y_pred)
+    assert_array_equal(
+        y_pred.toarray(), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
+    )
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_uniform_strategy_sparse_target_warning(global_random_seed, csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[2, 1], [2, 2], [1, 4], [4, 2], [1, 1]]))
+
+    clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
+    with pytest.warns(UserWarning, match="the uniform strategy would not save memory"):
+        clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 1 / 3, decimal=1)
+        assert_almost_equal(p[2], 1 / 3, decimal=1)
+        assert_almost_equal(p[4], 1 / 3, decimal=1)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_stratified_strategy_sparse_target(global_random_seed, csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[4, 1], [0, 0], [1, 1], [1, 4], [1, 1]]))
+
+    clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
+    clf.fit(X, y)
+
+    X = [[0]] * 500
+    y_pred = clf.predict(X)
+    assert sp.issparse(y_pred)
+    y_pred = y_pred.toarray()
+
+    for k in range(y.shape[1]):
+        p = np.bincount(y_pred[:, k]) / float(len(X))
+        assert_almost_equal(p[1], 3.0 / 5, decimal=1)
+        assert_almost_equal(p[0], 1.0 / 5, decimal=1)
+        assert_almost_equal(p[4], 1.0 / 5, decimal=1)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_most_frequent_and_prior_strategy_sparse_target(csc_container):
+    X = [[0]] * 5  # ignored
+    y = csc_container(np.array([[1, 0], [1, 3], [4, 0], [0, 1], [1, 0]]))
+
+    n_samples = len(X)
+    y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
+    for strategy in ("most_frequent", "prior"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+
+        y_pred = clf.predict(X)
+        assert sp.issparse(y_pred)
+        assert_array_equal(y_pred.toarray(), y_expected)
+
+
+def test_dummy_regressor_sample_weight(global_random_seed, n_samples=10):
+    random_state = np.random.RandomState(seed=global_random_seed)
+
+    X = [[0]] * n_samples
+    y = random_state.rand(n_samples)
+    sample_weight = random_state.rand(n_samples)
+
+    est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
+    assert est.constant_ == np.average(y, weights=sample_weight)
+
+    est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
+    assert est.constant_ == _weighted_percentile(y, sample_weight, 50.0)
+
+    est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight)
+    assert est.constant_ == _weighted_percentile(y, sample_weight, 95.0)
+
+
+def test_dummy_regressor_on_3D_array():
+    X = np.array([[["foo"]], [["bar"]], [["baz"]]])
+    y = np.array([2, 2, 2])
+    y_expected = np.array([2, 2, 2])
+    cls = DummyRegressor()
+    cls.fit(X, y)
+    y_pred = cls.predict(X)
+    assert_array_equal(y_pred, y_expected)
+
+
+def test_dummy_classifier_on_3D_array():
+    X = np.array([[["foo"]], [["bar"]], [["baz"]]])
+    y = [2, 2, 2]
+    y_expected = [2, 2, 2]
+    y_proba_expected = [[1], [1], [1]]
+    cls = DummyClassifier(strategy="stratified")
+    cls.fit(X, y)
+    y_pred = cls.predict(X)
+    y_pred_proba = cls.predict_proba(X)
+    assert_array_equal(y_pred, y_expected)
+    assert_array_equal(y_pred_proba, y_proba_expected)
+
+
+def test_dummy_regressor_return_std():
+    X = [[0]] * 3  # ignored
+    y = np.array([2, 2, 2])
+    y_std_expected = np.array([0, 0, 0])
+    cls = DummyRegressor()
+    cls.fit(X, y)
+    y_pred_list = cls.predict(X, return_std=True)
+    # there should be two elements when return_std is True
+    assert len(y_pred_list) == 2
+    # the second element should be all zeros
+    assert_array_equal(y_pred_list[1], y_std_expected)
+
+
+@pytest.mark.parametrize(
+    "y,y_test",
+    [
+        ([1, 1, 1, 2], [1.25] * 4),
+        (np.array([[2, 2], [1, 1], [1, 1], [1, 1]]), [[1.25, 1.25]] * 4),
+    ],
+)
+def test_regressor_score_with_None(y, y_test):
+    reg = DummyRegressor()
+    reg.fit(None, y)
+    assert reg.score(None, y_test) == 1.0
+
+
+@pytest.mark.parametrize("strategy", ["mean", "median", "quantile", "constant"])
+def test_regressor_prediction_independent_of_X(strategy):
+    y = [0, 2, 1, 1]
+    X1 = [[0]] * 4
+    reg1 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
+    reg1.fit(X1, y)
+    predictions1 = reg1.predict(X1)
+
+    X2 = [[1]] * 4
+    reg2 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
+    reg2.fit(X2, y)
+    predictions2 = reg2.predict(X2)
+
+    assert_array_equal(predictions1, predictions2)
+
+
+@pytest.mark.parametrize(
+    "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"]
+)
+def test_dtype_of_classifier_probas(strategy):
+    y = [0, 2, 1, 1]
+    X = np.zeros(4)
+    model = DummyClassifier(strategy=strategy, random_state=0, constant=0)
+    probas = model.fit(X, y).predict_proba(X)
+
+    assert probas.dtype == np.float64
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_init.py b/.venv/Lib/site-packages/sklearn/tests/test_init.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d0ae47536b3dcc70e5f71fc85d2a2c3afa1ab63
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_init.py
@@ -0,0 +1,20 @@
+# Basic unittests to test functioning of module's top-level
+
+
+__author__ = "Yaroslav Halchenko"
+__license__ = "BSD"
+
+
+try:
+    from sklearn import *  # noqa
+
+    _top_import_error = None
+except Exception as e:
+    _top_import_error = e
+
+
+def test_import_skl():
+    # Test either above import has failed for some reason
+    # "import *" is discouraged outside of the module level, hence we
+    # rely on setting up the variable above
+    assert _top_import_error is None
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_isotonic.py b/.venv/Lib/site-packages/sklearn/tests/test_isotonic.py
new file mode 100644
index 0000000000000000000000000000000000000000..b05fa674e9ef87cfc0d4c3aa8b75cc0772af6d6f
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_isotonic.py
@@ -0,0 +1,708 @@
+import copy
+import pickle
+import warnings
+
+import numpy as np
+import pytest
+from scipy.special import expit
+
+import sklearn
+from sklearn.datasets import make_regression
+from sklearn.isotonic import (
+    IsotonicRegression,
+    _make_unique,
+    check_increasing,
+    isotonic_regression,
+)
+from sklearn.utils import shuffle
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.validation import check_array
+
+
+def test_permutation_invariance():
+    # check that fit is permutation invariant.
+    # regression test of missing sorting of sample-weights
+    ir = IsotonicRegression()
+    x = [1, 2, 3, 4, 5, 6, 7]
+    y = [1, 41, 51, 1, 2, 5, 24]
+    sample_weight = [1, 2, 3, 4, 5, 6, 7]
+    x_s, y_s, sample_weight_s = shuffle(x, y, sample_weight, random_state=0)
+    y_transformed = ir.fit_transform(x, y, sample_weight=sample_weight)
+    y_transformed_s = ir.fit(x_s, y_s, sample_weight=sample_weight_s).transform(x)
+
+    assert_array_equal(y_transformed, y_transformed_s)
+
+
+def test_check_increasing_small_number_of_samples():
+    x = [0, 1, 2]
+    y = [1, 1.1, 1.05]
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert is_increasing
+
+
+def test_check_increasing_up():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, 1.5, 2.77, 8.99, 8.99, 50]
+
+    # Check that we got increasing=True and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert is_increasing
+
+
+def test_check_increasing_up_extreme():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, 1, 2, 3, 4, 5]
+
+    # Check that we got increasing=True and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert is_increasing
+
+
+def test_check_increasing_down():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, -1.5, -2.77, -8.99, -8.99, -50]
+
+    # Check that we got increasing=False and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert not is_increasing
+
+
+def test_check_increasing_down_extreme():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, -1, -2, -3, -4, -5]
+
+    # Check that we got increasing=False and no warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        is_increasing = check_increasing(x, y)
+
+    assert not is_increasing
+
+
+def test_check_ci_warn():
+    x = [0, 1, 2, 3, 4, 5]
+    y = [0, -1, 2, -3, 4, -5]
+
+    # Check that we got increasing=False and CI interval warning
+    msg = "interval"
+    with pytest.warns(UserWarning, match=msg):
+        is_increasing = check_increasing(x, y)
+
+    assert not is_increasing
+
+
+def test_isotonic_regression():
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
+    assert_array_equal(y_, isotonic_regression(y))
+
+    y = np.array([10, 0, 2])
+    y_ = np.array([4, 4, 4])
+    assert_array_equal(y_, isotonic_regression(y))
+
+    x = np.arange(len(y))
+    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
+    ir.fit(x, y)
+    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
+    assert_array_equal(ir.transform(x), ir.predict(x))
+
+    # check that it is immune to permutation
+    perm = np.random.permutation(len(y))
+    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
+    assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm])
+    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
+
+    # check we don't crash when all x are equal:
+    ir = IsotonicRegression()
+    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
+
+
+def test_isotonic_regression_ties_min():
+    # Setup examples with ties on minimum
+    x = [1, 1, 2, 3, 4, 5]
+    y = [1, 2, 3, 4, 5, 6]
+    y_true = [1.5, 1.5, 3, 4, 5, 6]
+
+    # Check that we get identical results for fit/transform and fit_transform
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
+    assert_array_equal(y_true, ir.fit_transform(x, y))
+
+
+def test_isotonic_regression_ties_max():
+    # Setup examples with ties on maximum
+    x = [1, 2, 3, 4, 5, 5]
+    y = [1, 2, 3, 4, 5, 6]
+    y_true = [1, 2, 3, 4, 5.5, 5.5]
+
+    # Check that we get identical results for fit/transform and fit_transform
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
+    assert_array_equal(y_true, ir.fit_transform(x, y))
+
+
+def test_isotonic_regression_ties_secondary_():
+    """
+    Test isotonic regression fit, transform  and fit_transform
+    against the "secondary" ties method and "pituitary" data from R
+     "isotone" package, as detailed in: J. d. Leeuw, K. Hornik, P. Mair,
+     Isotone Optimization in R: Pool-Adjacent-Violators Algorithm
+    (PAVA) and Active Set Methods
+
+    Set values based on pituitary example and
+     the following R command detailed in the paper above:
+    > library("isotone")
+    > data("pituitary")
+    > res1 <- gpava(pituitary$age, pituitary$size, ties="secondary")
+    > res1$x
+
+    `isotone` version: 1.0-2, 2014-09-07
+    R version: R version 3.1.1 (2014-07-10)
+    """
+    x = [8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14]
+    y = [21, 23.5, 23, 24, 21, 25, 21.5, 22, 19, 23.5, 25]
+    y_true = [
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        22.22222,
+        24.25,
+        24.25,
+    ]
+
+    # Check fit, transform and fit_transform
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_almost_equal(ir.transform(x), y_true, 4)
+    assert_array_almost_equal(ir.fit_transform(x, y), y_true, 4)
+
+
+def test_isotonic_regression_with_ties_in_differently_sized_groups():
+    """
+    Non-regression test to handle issue 9432:
+    https://github.com/scikit-learn/scikit-learn/issues/9432
+
+    Compare against output in R:
+    > library("isotone")
+    > x <- c(0, 1, 1, 2, 3, 4)
+    > y <- c(0, 0, 1, 0, 0, 1)
+    > res1 <- gpava(x, y, ties="secondary")
+    > res1$x
+
+    `isotone` version: 1.1-0, 2015-07-24
+    R version: R version 3.3.2 (2016-10-31)
+    """
+    x = np.array([0, 1, 1, 2, 3, 4])
+    y = np.array([0, 0, 1, 0, 0, 1])
+    y_true = np.array([0.0, 0.25, 0.25, 0.25, 0.25, 1.0])
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_almost_equal(ir.transform(x), y_true)
+    assert_array_almost_equal(ir.fit_transform(x, y), y_true)
+
+
+def test_isotonic_regression_reversed():
+    y = np.array([10, 9, 10, 7, 6, 6.1, 5])
+    y_result = np.array([10, 9.5, 9.5, 7, 6.05, 6.05, 5])
+
+    y_iso = isotonic_regression(y, increasing=False)
+    assert_allclose(y_iso, y_result)
+
+    y_ = IsotonicRegression(increasing=False).fit_transform(np.arange(len(y)), y)
+    assert_allclose(y_, y_result)
+    assert_array_equal(np.ones(y_[:-1].shape), ((y_[:-1] - y_[1:]) >= 0))
+
+
+def test_isotonic_regression_auto_decreasing():
+    # Set y and x for decreasing
+    y = np.array([10, 9, 10, 7, 6, 6.1, 5])
+    x = np.arange(len(y))
+
+    # Create model and fit_transform
+    ir = IsotonicRegression(increasing="auto")
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        y_ = ir.fit_transform(x, y)
+        # work-around for pearson divide warnings in scipy <= 0.17.0
+        assert all(["invalid value encountered in " in str(warn.message) for warn in w])
+
+    # Check that relationship decreases
+    is_increasing = y_[0] < y_[-1]
+    assert not is_increasing
+
+
+def test_isotonic_regression_auto_increasing():
+    # Set y and x for decreasing
+    y = np.array([5, 6.1, 6, 7, 10, 9, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit_transform
+    ir = IsotonicRegression(increasing="auto")
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        y_ = ir.fit_transform(x, y)
+        # work-around for pearson divide warnings in scipy <= 0.17.0
+        assert all(["invalid value encountered in " in str(warn.message) for warn in w])
+
+    # Check that relationship increases
+    is_increasing = y_[0] < y_[-1]
+    assert is_increasing
+
+
+def test_assert_raises_exceptions():
+    ir = IsotonicRegression()
+    rng = np.random.RandomState(42)
+
+    msg = "Found input variables with inconsistent numbers of samples"
+    with pytest.raises(ValueError, match=msg):
+        ir.fit([0, 1, 2], [5, 7, 3], [0.1, 0.6])
+
+    with pytest.raises(ValueError, match=msg):
+        ir.fit([0, 1, 2], [5, 7])
+
+    msg = "X should be a 1d array"
+    with pytest.raises(ValueError, match=msg):
+        ir.fit(rng.randn(3, 10), [0, 1, 2])
+
+    msg = "Isotonic regression input X should be a 1d array"
+    with pytest.raises(ValueError, match=msg):
+        ir.transform(rng.randn(3, 10))
+
+
+def test_isotonic_sample_weight_parameter_default_value():
+    # check if default value of sample_weight parameter is one
+    ir = IsotonicRegression()
+    # random test data
+    rng = np.random.RandomState(42)
+    n = 100
+    x = np.arange(n)
+    y = rng.randint(-50, 50, size=(n,)) + 50.0 * np.log(1 + np.arange(n))
+    # check if value is correctly used
+    weights = np.ones(n)
+    y_set_value = ir.fit_transform(x, y, sample_weight=weights)
+    y_default_value = ir.fit_transform(x, y)
+
+    assert_array_equal(y_set_value, y_default_value)
+
+
+def test_isotonic_min_max_boundaries():
+    # check if min value is used correctly
+    ir = IsotonicRegression(y_min=2, y_max=4)
+    n = 6
+    x = np.arange(n)
+    y = np.arange(n)
+    y_test = [2, 2, 2, 3, 4, 4]
+    y_result = np.round(ir.fit_transform(x, y))
+    assert_array_equal(y_result, y_test)
+
+
+def test_isotonic_sample_weight():
+    ir = IsotonicRegression()
+    x = [1, 2, 3, 4, 5, 6, 7]
+    y = [1, 41, 51, 1, 2, 5, 24]
+    sample_weight = [1, 2, 3, 4, 5, 6, 7]
+    expected_y = [1, 13.95, 13.95, 13.95, 13.95, 13.95, 24]
+    received_y = ir.fit_transform(x, y, sample_weight=sample_weight)
+
+    assert_array_equal(expected_y, received_y)
+
+
+def test_isotonic_regression_oob_raise():
+    # Set y and x
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="raise")
+    ir.fit(x, y)
+
+    # Check that an exception is thrown
+    msg = "in x_new is below the interpolation range"
+    with pytest.raises(ValueError, match=msg):
+        ir.predict([min(x) - 10, max(x) + 10])
+
+
+def test_isotonic_regression_oob_clip():
+    # Set y and x
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="clip")
+    ir.fit(x, y)
+
+    # Predict from  training and test x and check that min/max match.
+    y1 = ir.predict([min(x) - 10, max(x) + 10])
+    y2 = ir.predict(x)
+    assert max(y1) == max(y2)
+    assert min(y1) == min(y2)
+
+
+def test_isotonic_regression_oob_nan():
+    # Set y and x
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="nan")
+    ir.fit(x, y)
+
+    # Predict from  training and test x and check that we have two NaNs.
+    y1 = ir.predict([min(x) - 10, max(x) + 10])
+    assert sum(np.isnan(y1)) == 2
+
+
+def test_isotonic_regression_pickle():
+    y = np.array([3, 7, 5, 9, 8, 7, 10])
+    x = np.arange(len(y))
+
+    # Create model and fit
+    ir = IsotonicRegression(increasing="auto", out_of_bounds="clip")
+    ir.fit(x, y)
+
+    ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL)
+    ir2 = pickle.loads(ir_ser)
+    np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))
+
+
+def test_isotonic_duplicate_min_entry():
+    x = [0, 0, 1]
+    y = [0, 0, 1]
+
+    ir = IsotonicRegression(increasing=True, out_of_bounds="clip")
+    ir.fit(x, y)
+    all_predictions_finite = np.all(np.isfinite(ir.predict(x)))
+    assert all_predictions_finite
+
+
+def test_isotonic_ymin_ymax():
+    # Test from @NelleV's issue:
+    # https://github.com/scikit-learn/scikit-learn/issues/6921
+    x = np.array(
+        [
+            1.263,
+            1.318,
+            -0.572,
+            0.307,
+            -0.707,
+            -0.176,
+            -1.599,
+            1.059,
+            1.396,
+            1.906,
+            0.210,
+            0.028,
+            -0.081,
+            0.444,
+            0.018,
+            -0.377,
+            -0.896,
+            -0.377,
+            -1.327,
+            0.180,
+        ]
+    )
+    y = isotonic_regression(x, y_min=0.0, y_max=0.1)
+
+    assert np.all(y >= 0)
+    assert np.all(y <= 0.1)
+
+    # Also test decreasing case since the logic there is different
+    y = isotonic_regression(x, y_min=0.0, y_max=0.1, increasing=False)
+
+    assert np.all(y >= 0)
+    assert np.all(y <= 0.1)
+
+    # Finally, test with only one bound
+    y = isotonic_regression(x, y_min=0.0, increasing=False)
+
+    assert np.all(y >= 0)
+
+
+def test_isotonic_zero_weight_loop():
+    # Test from @ogrisel's issue:
+    # https://github.com/scikit-learn/scikit-learn/issues/4297
+
+    # Get deterministic RNG with seed
+    rng = np.random.RandomState(42)
+
+    # Create regression and samples
+    regression = IsotonicRegression()
+    n_samples = 50
+    x = np.linspace(-3, 3, n_samples)
+    y = x + rng.uniform(size=n_samples)
+
+    # Get some random weights and zero out
+    w = rng.uniform(size=n_samples)
+    w[5:8] = 0
+    regression.fit(x, y, sample_weight=w)
+
+    # This will hang in failure case.
+    regression.fit(x, y, sample_weight=w)
+
+
+def test_fast_predict():
+    # test that the faster prediction change doesn't
+    # affect out-of-sample predictions:
+    # https://github.com/scikit-learn/scikit-learn/pull/6206
+    rng = np.random.RandomState(123)
+    n_samples = 10**3
+    # X values over the -10,10 range
+    X_train = 20.0 * rng.rand(n_samples) - 10
+    y_train = (
+        np.less(rng.rand(n_samples), expit(X_train)).astype("int64").astype("float64")
+    )
+
+    weights = rng.rand(n_samples)
+    # we also want to test that everything still works when some weights are 0
+    weights[rng.rand(n_samples) < 0.1] = 0
+
+    slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
+    fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
+
+    # Build interpolation function with ALL input data, not just the
+    # non-redundant subset. The following 2 lines are taken from the
+    # .fit() method, without removing unnecessary points
+    X_train_fit, y_train_fit = slow_model._build_y(
+        X_train, y_train, sample_weight=weights, trim_duplicates=False
+    )
+    slow_model._build_f(X_train_fit, y_train_fit)
+
+    # fit with just the necessary data
+    fast_model.fit(X_train, y_train, sample_weight=weights)
+
+    X_test = 20.0 * rng.rand(n_samples) - 10
+    y_pred_slow = slow_model.predict(X_test)
+    y_pred_fast = fast_model.predict(X_test)
+
+    assert_array_equal(y_pred_slow, y_pred_fast)
+
+
+def test_isotonic_copy_before_fit():
+    # https://github.com/scikit-learn/scikit-learn/issues/6628
+    ir = IsotonicRegression()
+    copy.copy(ir)
+
+
+@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
+def test_isotonic_dtype(dtype):
+    y = [2, 1, 4, 3, 5]
+    weights = np.array([0.9, 0.9, 0.9, 0.9, 0.9], dtype=np.float64)
+    reg = IsotonicRegression()
+
+    for sample_weight in (None, weights.astype(np.float32), weights):
+        y_np = np.array(y, dtype=dtype)
+        expected_dtype = check_array(
+            y_np, dtype=[np.float64, np.float32], ensure_2d=False
+        ).dtype
+
+        res = isotonic_regression(y_np, sample_weight=sample_weight)
+        assert res.dtype == expected_dtype
+
+        X = np.arange(len(y)).astype(dtype)
+        reg.fit(X, y_np, sample_weight=sample_weight)
+        res = reg.predict(X)
+        assert res.dtype == expected_dtype
+
+
+@pytest.mark.parametrize("y_dtype", [np.int32, np.int64, np.float32, np.float64])
+def test_isotonic_mismatched_dtype(y_dtype):
+    # regression test for #15004
+    # check that data are converted when X and y dtype differ
+    reg = IsotonicRegression()
+    y = np.array([2, 1, 4, 3, 5], dtype=y_dtype)
+    X = np.arange(len(y), dtype=np.float32)
+    reg.fit(X, y)
+    assert reg.predict(X).dtype == X.dtype
+
+
+def test_make_unique_dtype():
+    x_list = [2, 2, 2, 3, 5]
+    for dtype in (np.float32, np.float64):
+        x = np.array(x_list, dtype=dtype)
+        y = x.copy()
+        w = np.ones_like(x)
+        x, y, w = _make_unique(x, y, w)
+        assert_array_equal(x, [2, 3, 5])
+
+
+@pytest.mark.parametrize("dtype", [np.float64, np.float32])
+def test_make_unique_tolerance(dtype):
+    # Check that equality takes account of np.finfo tolerance
+    x = np.array([0, 1e-16, 1, 1 + 1e-14], dtype=dtype)
+    y = x.copy()
+    w = np.ones_like(x)
+    x, y, w = _make_unique(x, y, w)
+    if dtype == np.float64:
+        x_out = np.array([0, 1, 1 + 1e-14])
+    else:
+        x_out = np.array([0, 1])
+    assert_array_equal(x, x_out)
+
+
+def test_isotonic_make_unique_tolerance():
+    # Check that averaging of targets for duplicate X is done correctly,
+    # taking into account tolerance
+    X = np.array([0, 1, 1 + 1e-16, 2], dtype=np.float64)
+    y = np.array([0, 1, 2, 3], dtype=np.float64)
+    ireg = IsotonicRegression().fit(X, y)
+    y_pred = ireg.predict([0, 0.5, 1, 1.5, 2])
+
+    assert_array_equal(y_pred, np.array([0, 0.75, 1.5, 2.25, 3]))
+    assert_array_equal(ireg.X_thresholds_, np.array([0.0, 1.0, 2.0]))
+    assert_array_equal(ireg.y_thresholds_, np.array([0.0, 1.5, 3.0]))
+
+
+def test_isotonic_non_regression_inf_slope():
+    # Non-regression test to ensure that inf values are not returned
+    # see: https://github.com/scikit-learn/scikit-learn/issues/10903
+    X = np.array([0.0, 4.1e-320, 4.4e-314, 1.0])
+    y = np.array([0.42, 0.42, 0.44, 0.44])
+    ireg = IsotonicRegression().fit(X, y)
+    y_pred = ireg.predict(np.array([0, 2.1e-319, 5.4e-316, 1e-10]))
+    assert np.all(np.isfinite(y_pred))
+
+
+@pytest.mark.parametrize("increasing", [True, False])
+def test_isotonic_thresholds(increasing):
+    rng = np.random.RandomState(42)
+    n_samples = 30
+    X = rng.normal(size=n_samples)
+    y = rng.normal(size=n_samples)
+    ireg = IsotonicRegression(increasing=increasing).fit(X, y)
+    X_thresholds, y_thresholds = ireg.X_thresholds_, ireg.y_thresholds_
+    assert X_thresholds.shape == y_thresholds.shape
+
+    # Input thresholds are a strict subset of the training set (unless
+    # the data is already strictly monotonic which is not the case with
+    # this random data)
+    assert X_thresholds.shape[0] < X.shape[0]
+    assert np.isin(X_thresholds, X).all()
+
+    # Output thresholds lie in the range of the training set:
+    assert y_thresholds.max() <= y.max()
+    assert y_thresholds.min() >= y.min()
+
+    assert all(np.diff(X_thresholds) > 0)
+    if increasing:
+        assert all(np.diff(y_thresholds) >= 0)
+    else:
+        assert all(np.diff(y_thresholds) <= 0)
+
+
+def test_input_shape_validation():
+    # Test from #15012
+    # Check that IsotonicRegression can handle 2darray with only 1 feature
+    X = np.arange(10)
+    X_2d = X.reshape(-1, 1)
+    y = np.arange(10)
+
+    iso_reg = IsotonicRegression().fit(X, y)
+    iso_reg_2d = IsotonicRegression().fit(X_2d, y)
+
+    assert iso_reg.X_max_ == iso_reg_2d.X_max_
+    assert iso_reg.X_min_ == iso_reg_2d.X_min_
+    assert iso_reg.y_max == iso_reg_2d.y_max
+    assert iso_reg.y_min == iso_reg_2d.y_min
+    assert_array_equal(iso_reg.X_thresholds_, iso_reg_2d.X_thresholds_)
+    assert_array_equal(iso_reg.y_thresholds_, iso_reg_2d.y_thresholds_)
+
+    y_pred1 = iso_reg.predict(X)
+    y_pred2 = iso_reg_2d.predict(X_2d)
+    assert_allclose(y_pred1, y_pred2)
+
+
+def test_isotonic_2darray_more_than_1_feature():
+    # Ensure IsotonicRegression raises error if input has more than 1 feature
+    X = np.arange(10)
+    X_2d = np.c_[X, X]
+    y = np.arange(10)
+
+    msg = "should be a 1d array or 2d array with 1 feature"
+    with pytest.raises(ValueError, match=msg):
+        IsotonicRegression().fit(X_2d, y)
+
+    iso_reg = IsotonicRegression().fit(X, y)
+    with pytest.raises(ValueError, match=msg):
+        iso_reg.predict(X_2d)
+
+    with pytest.raises(ValueError, match=msg):
+        iso_reg.transform(X_2d)
+
+
+def test_isotonic_regression_sample_weight_not_overwritten():
+    """Check that calling fitting function of isotonic regression will not
+    overwrite `sample_weight`.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/20508
+    """
+    X, y = make_regression(n_samples=10, n_features=1, random_state=41)
+    sample_weight_original = np.ones_like(y)
+    sample_weight_original[0] = 10
+    sample_weight_fit = sample_weight_original.copy()
+
+    isotonic_regression(y, sample_weight=sample_weight_fit)
+    assert_allclose(sample_weight_fit, sample_weight_original)
+
+    IsotonicRegression().fit(X, y, sample_weight=sample_weight_fit)
+    assert_allclose(sample_weight_fit, sample_weight_original)
+
+
+@pytest.mark.parametrize("shape", ["1d", "2d"])
+def test_get_feature_names_out(shape):
+    """Check `get_feature_names_out` for `IsotonicRegression`."""
+    X = np.arange(10)
+    if shape == "2d":
+        X = X.reshape(-1, 1)
+    y = np.arange(10)
+
+    iso = IsotonicRegression().fit(X, y)
+    names = iso.get_feature_names_out()
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(["isotonicregression0"], names)
+
+
+def test_isotonic_regression_output_predict():
+    """Check that `predict` does return the expected output type.
+
+    We need to check that `transform` will output a DataFrame and a NumPy array
+    when we set `transform_output` to `pandas`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/25499
+    """
+    pd = pytest.importorskip("pandas")
+    X, y = make_regression(n_samples=10, n_features=1, random_state=42)
+    regressor = IsotonicRegression()
+    with sklearn.config_context(transform_output="pandas"):
+        regressor.fit(X, y)
+        X_trans = regressor.transform(X)
+        y_pred = regressor.predict(X)
+
+    assert isinstance(X_trans, pd.DataFrame)
+    assert isinstance(y_pred, np.ndarray)
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_kernel_approximation.py b/.venv/Lib/site-packages/sklearn/tests/test_kernel_approximation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4497d809ed0a3d6c43f641657b4ffeba94dbaa10
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_kernel_approximation.py
@@ -0,0 +1,495 @@
+import re
+
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_classification
+from sklearn.kernel_approximation import (
+    AdditiveChi2Sampler,
+    Nystroem,
+    PolynomialCountSketch,
+    RBFSampler,
+    SkewedChi2Sampler,
+)
+from sklearn.metrics.pairwise import (
+    chi2_kernel,
+    kernel_metrics,
+    polynomial_kernel,
+    rbf_kernel,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+# generate data
+rng = np.random.RandomState(0)
+X = rng.random_sample(size=(300, 50))
+Y = rng.random_sample(size=(300, 50))
+X /= X.sum(axis=1)[:, np.newaxis]
+Y /= Y.sum(axis=1)[:, np.newaxis]
+
+# Make sure X and Y are not writable to avoid introducing dependencies between
+# tests.
+X.flags.writeable = False
+Y.flags.writeable = False
+
+
+@pytest.mark.parametrize("gamma", [0.1, 1, 2.5])
+@pytest.mark.parametrize("degree, n_components", [(1, 500), (2, 500), (3, 5000)])
+@pytest.mark.parametrize("coef0", [0, 2.5])
+def test_polynomial_count_sketch(gamma, degree, coef0, n_components):
+    # test that PolynomialCountSketch approximates polynomial
+    # kernel on random data
+
+    # compute exact kernel
+    kernel = polynomial_kernel(X, Y, gamma=gamma, degree=degree, coef0=coef0)
+
+    # approximate kernel mapping
+    ps_transform = PolynomialCountSketch(
+        n_components=n_components,
+        gamma=gamma,
+        coef0=coef0,
+        degree=degree,
+        random_state=42,
+    )
+    X_trans = ps_transform.fit_transform(X)
+    Y_trans = ps_transform.transform(Y)
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+
+    error = kernel - kernel_approx
+    assert np.abs(np.mean(error)) <= 0.05  # close to unbiased
+    np.abs(error, out=error)
+    assert np.max(error) <= 0.1  # nothing too far off
+    assert np.mean(error) <= 0.05  # mean is fairly close
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("gamma", [0.1, 1.0])
+@pytest.mark.parametrize("degree", [1, 2, 3])
+@pytest.mark.parametrize("coef0", [0, 2.5])
+def test_polynomial_count_sketch_dense_sparse(gamma, degree, coef0, csr_container):
+    """Check that PolynomialCountSketch results are the same for dense and sparse
+    input.
+    """
+    ps_dense = PolynomialCountSketch(
+        n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42
+    )
+    Xt_dense = ps_dense.fit_transform(X)
+    Yt_dense = ps_dense.transform(Y)
+
+    ps_sparse = PolynomialCountSketch(
+        n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42
+    )
+    Xt_sparse = ps_sparse.fit_transform(csr_container(X))
+    Yt_sparse = ps_sparse.transform(csr_container(Y))
+
+    assert_allclose(Xt_dense, Xt_sparse)
+    assert_allclose(Yt_dense, Yt_sparse)
+
+
+def _linear_kernel(X, Y):
+    return np.dot(X, Y.T)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_additive_chi2_sampler(csr_container):
+    # test that AdditiveChi2Sampler approximates kernel on random data
+
+    # compute exact kernel
+    # abbreviations for easier formula
+    X_ = X[:, np.newaxis, :].copy()
+    Y_ = Y[np.newaxis, :, :].copy()
+
+    large_kernel = 2 * X_ * Y_ / (X_ + Y_)
+
+    # reduce to n_samples_x x n_samples_y by summing over features
+    kernel = large_kernel.sum(axis=2)
+
+    # approximate kernel mapping
+    transform = AdditiveChi2Sampler(sample_steps=3)
+    X_trans = transform.fit_transform(X)
+    Y_trans = transform.transform(Y)
+
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+
+    assert_array_almost_equal(kernel, kernel_approx, 1)
+
+    X_sp_trans = transform.fit_transform(csr_container(X))
+    Y_sp_trans = transform.transform(csr_container(Y))
+
+    assert_array_equal(X_trans, X_sp_trans.toarray())
+    assert_array_equal(Y_trans, Y_sp_trans.toarray())
+
+    # test error is raised on negative input
+    Y_neg = Y.copy()
+    Y_neg[0, 0] = -1
+    msg = "Negative values in data passed to"
+    with pytest.raises(ValueError, match=msg):
+        transform.fit(Y_neg)
+
+
+@pytest.mark.parametrize("method", ["fit", "fit_transform", "transform"])
+@pytest.mark.parametrize("sample_steps", range(1, 4))
+def test_additive_chi2_sampler_sample_steps(method, sample_steps):
+    """Check that the input sample step doesn't raise an error
+    and that sample interval doesn't change after fit.
+    """
+    transformer = AdditiveChi2Sampler(sample_steps=sample_steps)
+    getattr(transformer, method)(X)
+
+    sample_interval = 0.5
+    transformer = AdditiveChi2Sampler(
+        sample_steps=sample_steps,
+        sample_interval=sample_interval,
+    )
+    getattr(transformer, method)(X)
+    assert transformer.sample_interval == sample_interval
+
+
+@pytest.mark.parametrize("method", ["fit", "fit_transform", "transform"])
+def test_additive_chi2_sampler_wrong_sample_steps(method):
+    """Check that we raise a ValueError on invalid sample_steps"""
+    transformer = AdditiveChi2Sampler(sample_steps=4)
+    msg = re.escape(
+        "If sample_steps is not in [1, 2, 3], you need to provide sample_interval"
+    )
+    with pytest.raises(ValueError, match=msg):
+        getattr(transformer, method)(X)
+
+
+def test_skewed_chi2_sampler():
+    # test that RBFSampler approximates kernel on random data
+
+    # compute exact kernel
+    c = 0.03
+    # set on negative component but greater than c to ensure that the kernel
+    # approximation is valid on the group (-c; +\infty) endowed with the skewed
+    # multiplication.
+    Y_ = Y.copy()
+    Y_[0, 0] = -c / 2.0
+
+    # abbreviations for easier formula
+    X_c = (X + c)[:, np.newaxis, :]
+    Y_c = (Y_ + c)[np.newaxis, :, :]
+
+    # we do it in log-space in the hope that it's more stable
+    # this array is n_samples_x x n_samples_y big x n_features
+    log_kernel = (
+        (np.log(X_c) / 2.0) + (np.log(Y_c) / 2.0) + np.log(2.0) - np.log(X_c + Y_c)
+    )
+    # reduce to n_samples_x x n_samples_y by summing over features in log-space
+    kernel = np.exp(log_kernel.sum(axis=2))
+
+    # approximate kernel mapping
+    transform = SkewedChi2Sampler(skewedness=c, n_components=1000, random_state=42)
+    X_trans = transform.fit_transform(X)
+    Y_trans = transform.transform(Y_)
+
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+    assert_array_almost_equal(kernel, kernel_approx, 1)
+    assert np.isfinite(kernel).all(), "NaNs found in the Gram matrix"
+    assert np.isfinite(kernel_approx).all(), "NaNs found in the approximate Gram matrix"
+
+    # test error is raised on when inputs contains values smaller than -c
+    Y_neg = Y_.copy()
+    Y_neg[0, 0] = -c * 2.0
+    msg = "X may not contain entries smaller than -skewedness"
+    with pytest.raises(ValueError, match=msg):
+        transform.transform(Y_neg)
+
+
+def test_additive_chi2_sampler_exceptions():
+    """Ensures correct error message"""
+    transformer = AdditiveChi2Sampler()
+    X_neg = X.copy()
+    X_neg[0, 0] = -1
+    with pytest.raises(ValueError, match="X in AdditiveChi2Sampler"):
+        transformer.fit(X_neg)
+    with pytest.raises(ValueError, match="X in AdditiveChi2Sampler"):
+        transformer.fit(X)
+        transformer.transform(X_neg)
+
+
+def test_rbf_sampler():
+    # test that RBFSampler approximates kernel on random data
+    # compute exact kernel
+    gamma = 10.0
+    kernel = rbf_kernel(X, Y, gamma=gamma)
+
+    # approximate kernel mapping
+    rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
+    X_trans = rbf_transform.fit_transform(X)
+    Y_trans = rbf_transform.transform(Y)
+    kernel_approx = np.dot(X_trans, Y_trans.T)
+
+    error = kernel - kernel_approx
+    assert np.abs(np.mean(error)) <= 0.01  # close to unbiased
+    np.abs(error, out=error)
+    assert np.max(error) <= 0.1  # nothing too far off
+    assert np.mean(error) <= 0.05  # mean is fairly close
+
+
+def test_rbf_sampler_fitted_attributes_dtype(global_dtype):
+    """Check that the fitted attributes are stored accordingly to the
+    data type of X."""
+    rbf = RBFSampler()
+
+    X = np.array([[1, 2], [3, 4], [5, 6]], dtype=global_dtype)
+
+    rbf.fit(X)
+
+    assert rbf.random_offset_.dtype == global_dtype
+    assert rbf.random_weights_.dtype == global_dtype
+
+
+def test_rbf_sampler_dtype_equivalence():
+    """Check the equivalence of the results with 32 and 64 bits input."""
+    rbf32 = RBFSampler(random_state=42)
+    X32 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+    rbf32.fit(X32)
+
+    rbf64 = RBFSampler(random_state=42)
+    X64 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
+    rbf64.fit(X64)
+
+    assert_allclose(rbf32.random_offset_, rbf64.random_offset_)
+    assert_allclose(rbf32.random_weights_, rbf64.random_weights_)
+
+
+def test_rbf_sampler_gamma_scale():
+    """Check the inner value computed when `gamma='scale'`."""
+    X, y = [[0.0], [1.0]], [0, 1]
+    rbf = RBFSampler(gamma="scale")
+    rbf.fit(X, y)
+    assert rbf._gamma == pytest.approx(4)
+
+
+def test_skewed_chi2_sampler_fitted_attributes_dtype(global_dtype):
+    """Check that the fitted attributes are stored accordingly to the
+    data type of X."""
+    skewed_chi2_sampler = SkewedChi2Sampler()
+
+    X = np.array([[1, 2], [3, 4], [5, 6]], dtype=global_dtype)
+
+    skewed_chi2_sampler.fit(X)
+
+    assert skewed_chi2_sampler.random_offset_.dtype == global_dtype
+    assert skewed_chi2_sampler.random_weights_.dtype == global_dtype
+
+
+def test_skewed_chi2_sampler_dtype_equivalence():
+    """Check the equivalence of the results with 32 and 64 bits input."""
+    skewed_chi2_sampler_32 = SkewedChi2Sampler(random_state=42)
+    X_32 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+    skewed_chi2_sampler_32.fit(X_32)
+
+    skewed_chi2_sampler_64 = SkewedChi2Sampler(random_state=42)
+    X_64 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
+    skewed_chi2_sampler_64.fit(X_64)
+
+    assert_allclose(
+        skewed_chi2_sampler_32.random_offset_, skewed_chi2_sampler_64.random_offset_
+    )
+    assert_allclose(
+        skewed_chi2_sampler_32.random_weights_, skewed_chi2_sampler_64.random_weights_
+    )
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_input_validation(csr_container):
+    # Regression test: kernel approx. transformers should work on lists
+    # No assertions; the old versions would simply crash
+    X = [[1, 2], [3, 4], [5, 6]]
+    AdditiveChi2Sampler().fit(X).transform(X)
+    SkewedChi2Sampler().fit(X).transform(X)
+    RBFSampler().fit(X).transform(X)
+
+    X = csr_container(X)
+    RBFSampler().fit(X).transform(X)
+
+
+def test_nystroem_approximation():
+    # some basic tests
+    rnd = np.random.RandomState(0)
+    X = rnd.uniform(size=(10, 4))
+
+    # With n_components = n_samples this is exact
+    X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X)
+    K = rbf_kernel(X)
+    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
+
+    trans = Nystroem(n_components=2, random_state=rnd)
+    X_transformed = trans.fit(X).transform(X)
+    assert X_transformed.shape == (X.shape[0], 2)
+
+    # test callable kernel
+    trans = Nystroem(n_components=2, kernel=_linear_kernel, random_state=rnd)
+    X_transformed = trans.fit(X).transform(X)
+    assert X_transformed.shape == (X.shape[0], 2)
+
+    # test that available kernels fit and transform
+    kernels_available = kernel_metrics()
+    for kern in kernels_available:
+        trans = Nystroem(n_components=2, kernel=kern, random_state=rnd)
+        X_transformed = trans.fit(X).transform(X)
+        assert X_transformed.shape == (X.shape[0], 2)
+
+
+def test_nystroem_default_parameters():
+    rnd = np.random.RandomState(42)
+    X = rnd.uniform(size=(10, 4))
+
+    # rbf kernel should behave as gamma=None by default
+    # aka gamma = 1 / n_features
+    nystroem = Nystroem(n_components=10)
+    X_transformed = nystroem.fit_transform(X)
+    K = rbf_kernel(X, gamma=None)
+    K2 = np.dot(X_transformed, X_transformed.T)
+    assert_array_almost_equal(K, K2)
+
+    # chi2 kernel should behave as gamma=1 by default
+    nystroem = Nystroem(kernel="chi2", n_components=10)
+    X_transformed = nystroem.fit_transform(X)
+    K = chi2_kernel(X, gamma=1)
+    K2 = np.dot(X_transformed, X_transformed.T)
+    assert_array_almost_equal(K, K2)
+
+
+def test_nystroem_singular_kernel():
+    # test that nystroem works with singular kernel matrix
+    rng = np.random.RandomState(0)
+    X = rng.rand(10, 20)
+    X = np.vstack([X] * 2)  # duplicate samples
+
+    gamma = 100
+    N = Nystroem(gamma=gamma, n_components=X.shape[0]).fit(X)
+    X_transformed = N.transform(X)
+
+    K = rbf_kernel(X, gamma=gamma)
+
+    assert_array_almost_equal(K, np.dot(X_transformed, X_transformed.T))
+    assert np.all(np.isfinite(Y))
+
+
+def test_nystroem_poly_kernel_params():
+    # Non-regression: Nystroem should pass other parameters beside gamma.
+    rnd = np.random.RandomState(37)
+    X = rnd.uniform(size=(10, 4))
+
+    K = polynomial_kernel(X, degree=3.1, coef0=0.1)
+    nystroem = Nystroem(
+        kernel="polynomial", n_components=X.shape[0], degree=3.1, coef0=0.1
+    )
+    X_transformed = nystroem.fit_transform(X)
+    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
+
+
+def test_nystroem_callable():
+    # Test Nystroem on a callable.
+    rnd = np.random.RandomState(42)
+    n_samples = 10
+    X = rnd.uniform(size=(n_samples, 4))
+
+    def logging_histogram_kernel(x, y, log):
+        """Histogram kernel that writes to a log."""
+        log.append(1)
+        return np.minimum(x, y).sum()
+
+    kernel_log = []
+    X = list(X)  # test input validation
+    Nystroem(
+        kernel=logging_histogram_kernel,
+        n_components=(n_samples - 1),
+        kernel_params={"log": kernel_log},
+    ).fit(X)
+    assert len(kernel_log) == n_samples * (n_samples - 1) / 2
+
+    # if degree, gamma or coef0 is passed, we raise a ValueError
+    msg = "Don't pass gamma, coef0 or degree to Nystroem"
+    params = ({"gamma": 1}, {"coef0": 1}, {"degree": 2})
+    for param in params:
+        ny = Nystroem(kernel=_linear_kernel, n_components=(n_samples - 1), **param)
+        with pytest.raises(ValueError, match=msg):
+            ny.fit(X)
+
+
+def test_nystroem_precomputed_kernel():
+    # Non-regression: test Nystroem on precomputed kernel.
+    # PR - 14706
+    rnd = np.random.RandomState(12)
+    X = rnd.uniform(size=(10, 4))
+
+    K = polynomial_kernel(X, degree=2, coef0=0.1)
+    nystroem = Nystroem(kernel="precomputed", n_components=X.shape[0])
+    X_transformed = nystroem.fit_transform(K)
+    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
+
+    # if degree, gamma or coef0 is passed, we raise a ValueError
+    msg = "Don't pass gamma, coef0 or degree to Nystroem"
+    params = ({"gamma": 1}, {"coef0": 1}, {"degree": 2})
+    for param in params:
+        ny = Nystroem(kernel="precomputed", n_components=X.shape[0], **param)
+        with pytest.raises(ValueError, match=msg):
+            ny.fit(K)
+
+
+def test_nystroem_component_indices():
+    """Check that `component_indices_` corresponds to the subset of
+    training points used to construct the feature map.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/20474
+    """
+    X, _ = make_classification(n_samples=100, n_features=20)
+    feature_map_nystroem = Nystroem(
+        n_components=10,
+        random_state=0,
+    )
+    feature_map_nystroem.fit(X)
+    assert feature_map_nystroem.component_indices_.shape == (10,)
+
+
+@pytest.mark.parametrize(
+    "Estimator", [PolynomialCountSketch, RBFSampler, SkewedChi2Sampler, Nystroem]
+)
+def test_get_feature_names_out(Estimator):
+    """Check get_feature_names_out"""
+    est = Estimator().fit(X)
+    X_trans = est.transform(X)
+
+    names_out = est.get_feature_names_out()
+    class_name = Estimator.__name__.lower()
+    expected_names = [f"{class_name}{i}" for i in range(X_trans.shape[1])]
+    assert_array_equal(names_out, expected_names)
+
+
+def test_additivechi2sampler_get_feature_names_out():
+    """Check get_feature_names_out for AdditiveChi2Sampler."""
+    rng = np.random.RandomState(0)
+    X = rng.random_sample(size=(300, 3))
+
+    chi2_sampler = AdditiveChi2Sampler(sample_steps=3).fit(X)
+    input_names = ["f0", "f1", "f2"]
+    suffixes = [
+        "f0_sqrt",
+        "f1_sqrt",
+        "f2_sqrt",
+        "f0_cos1",
+        "f1_cos1",
+        "f2_cos1",
+        "f0_sin1",
+        "f1_sin1",
+        "f2_sin1",
+        "f0_cos2",
+        "f1_cos2",
+        "f2_cos2",
+        "f0_sin2",
+        "f1_sin2",
+        "f2_sin2",
+    ]
+
+    names_out = chi2_sampler.get_feature_names_out(input_features=input_names)
+    expected_names = [f"additivechi2sampler_{suffix}" for suffix in suffixes]
+    assert_array_equal(names_out, expected_names)
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_kernel_ridge.py b/.venv/Lib/site-packages/sklearn/tests/test_kernel_ridge.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ac948eb09e068f888d5a9fd8fd6c21913597d47
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_kernel_ridge.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_regression
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import Ridge
+from sklearn.metrics.pairwise import pairwise_kernels
+from sklearn.utils._testing import assert_array_almost_equal, ignore_warnings
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+
+X, y = make_regression(n_features=10, random_state=0)
+Y = np.array([y, y]).T
+
+
+def test_kernel_ridge():
+    pred = Ridge(alpha=1, fit_intercept=False).fit(X, y).predict(X)
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, y).predict(X)
+    assert_array_almost_equal(pred, pred2)
+
+
+@pytest.mark.parametrize("sparse_container", [*CSR_CONTAINERS, *CSC_CONTAINERS])
+def test_kernel_ridge_sparse(sparse_container):
+    X_sparse = sparse_container(X)
+    pred = (
+        Ridge(alpha=1, fit_intercept=False, solver="cholesky")
+        .fit(X_sparse, y)
+        .predict(X_sparse)
+    )
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X_sparse, y).predict(X_sparse)
+    assert_array_almost_equal(pred, pred2)
+
+
+def test_kernel_ridge_singular_kernel():
+    # alpha=0 causes a LinAlgError in computing the dual coefficients,
+    # which causes a fallback to a lstsq solver. This is tested here.
+    pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X)
+    kr = KernelRidge(kernel="linear", alpha=0)
+    ignore_warnings(kr.fit)(X, y)
+    pred2 = kr.predict(X)
+    assert_array_almost_equal(pred, pred2)
+
+
+def test_kernel_ridge_precomputed():
+    for kernel in ["linear", "rbf", "poly", "cosine"]:
+        K = pairwise_kernels(X, X, metric=kernel)
+        pred = KernelRidge(kernel=kernel).fit(X, y).predict(X)
+        pred2 = KernelRidge(kernel="precomputed").fit(K, y).predict(K)
+        assert_array_almost_equal(pred, pred2)
+
+
+def test_kernel_ridge_precomputed_kernel_unchanged():
+    K = np.dot(X, X.T)
+    K2 = K.copy()
+    KernelRidge(kernel="precomputed").fit(K, y)
+    assert_array_almost_equal(K, K2)
+
+
+def test_kernel_ridge_sample_weights():
+    K = np.dot(X, X.T)  # precomputed kernel
+    sw = np.random.RandomState(0).rand(X.shape[0])
+
+    pred = Ridge(alpha=1, fit_intercept=False).fit(X, y, sample_weight=sw).predict(X)
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, y, sample_weight=sw).predict(X)
+    pred3 = (
+        KernelRidge(kernel="precomputed", alpha=1)
+        .fit(K, y, sample_weight=sw)
+        .predict(K)
+    )
+    assert_array_almost_equal(pred, pred2)
+    assert_array_almost_equal(pred, pred3)
+
+
+def test_kernel_ridge_multi_output():
+    pred = Ridge(alpha=1, fit_intercept=False).fit(X, Y).predict(X)
+    pred2 = KernelRidge(kernel="linear", alpha=1).fit(X, Y).predict(X)
+    assert_array_almost_equal(pred, pred2)
+
+    pred3 = KernelRidge(kernel="linear", alpha=1).fit(X, y).predict(X)
+    pred3 = np.array([pred3, pred3]).T
+    assert_array_almost_equal(pred2, pred3)
diff --git a/.venv/Lib/site-packages/sklearn/tests/test_metadata_routing.py b/.venv/Lib/site-packages/sklearn/tests/test_metadata_routing.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff9dd2ec2fafceeba41cfef486c64bbaf2f62e62
--- /dev/null
+++ b/.venv/Lib/site-packages/sklearn/tests/test_metadata_routing.py
@@ -0,0 +1,1158 @@
+"""
+Metadata Routing Utility Tests
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import re
+
+import numpy as np
+import pytest
+
+from sklearn import config_context
+from sklearn.base import (
+    BaseEstimator,
+    clone,
+)
+from sklearn.exceptions import UnsetMetadataPassedError
+from sklearn.linear_model import LinearRegression
+from sklearn.pipeline import Pipeline
+from sklearn.tests.metadata_routing_common import (
+    ConsumingClassifier,
+    ConsumingRegressor,
+    ConsumingTransformer,
+    MetaRegressor,
+    MetaTransformer,
+    NonConsumingClassifier,
+    WeightedMetaClassifier,
+    WeightedMetaRegressor,
+    _Registry,
+    assert_request_equal,
+    assert_request_is_empty,
+    check_recorded_metadata,
+)
+from sklearn.utils import metadata_routing
+from sklearn.utils._metadata_requests import (
+    COMPOSITE_METHODS,
+    METHODS,
+    SIMPLE_METHODS,
+    MethodMetadataRequest,
+    MethodPair,
+    _MetadataRequester,
+    request_is_alias,
+    request_is_valid,
+)
+from sklearn.utils.metadata_routing import (
+    MetadataRequest,
+    MetadataRouter,
+    MethodMapping,
+    _RoutingNotSupportedMixin,
+    get_routing_for_object,
+    process_routing,
+)
+from sklearn.utils.validation import check_is_fitted
+
+rng = np.random.RandomState(42)
+N, M = 100, 4
+X = rng.rand(N, M)
+y = rng.randint(0, 2, size=N)
+my_groups = rng.randint(0, 10, size=N)
+my_weights = rng.rand(N)
+my_other_weights = rng.rand(N)
+
+
+class SimplePipeline(BaseEstimator):
+    """A very simple pipeline, assuming the last step is always a predictor.
+
+    Parameters
+    ----------
+    steps : iterable of objects
+        An iterable of transformers with the last step being a predictor.
+    """
+
+    def __init__(self, steps):
+        self.steps = steps
+
+    def fit(self, X, y, **fit_params):
+        self.steps_ = []
+        params = process_routing(self, "fit", **fit_params)
+        X_transformed = X
+        for i, step in enumerate(self.steps[:-1]):
+            transformer = clone(step).fit(
+                X_transformed, y, **params.get(f"step_{i}").fit
+            )
+            self.steps_.append(transformer)
+            X_transformed = transformer.transform(
+                X_transformed, **params.get(f"step_{i}").transform
+            )
+
+        self.steps_.append(
+            clone(self.steps[-1]).fit(X_transformed, y, **params.predictor.fit)
+        )
+        return self
+
+    def predict(self, X, **predict_params):
+        check_is_fitted(self)
+        X_transformed = X
+        params = process_routing(self, "predict", **predict_params)
+        for i, step in enumerate(self.steps_[:-1]):
+            X_transformed = step.transform(X, **params.get(f"step_{i}").transform)
+
+        return self.steps_[-1].predict(X_transformed, **params.predictor.predict)
+
+    def get_metadata_routing(self):
+        router = MetadataRouter(owner=self.__class__.__name__)
+        for i, step in enumerate(self.steps[:-1]):
+            router.add(
+                **{f"step_{i}": step},
+                method_mapping=MethodMapping()
+                .add(caller="fit", callee="fit")
+                .add(caller="fit", callee="transform")
+                .add(caller="predict", callee="transform"),
+            )
+        router.add(
+            predictor=self.steps[-1],
+            method_mapping=MethodMapping()
+            .add(caller="fit", callee="fit")
+            .add(caller="predict", callee="predict"),
+        )
+        return router
+
+
+@config_context(enable_metadata_routing=True)
+def test_assert_request_is_empty():
+    requests = MetadataRequest(owner="test")
+    assert_request_is_empty(requests)
+
+    requests.fit.add_request(param="foo", alias=None)
+    # this should still work, since None is the default value
+    assert_request_is_empty(requests)
+
+    requests.fit.add_request(param="bar", alias="value")
+    with pytest.raises(AssertionError):
+        # now requests is no more empty
+        assert_request_is_empty(requests)
+
+    # but one can exclude a method
+    assert_request_is_empty(requests, exclude="fit")
+
+    requests.score.add_request(param="carrot", alias=True)
+    with pytest.raises(AssertionError):
+        # excluding `fit` is not enough
+        assert_request_is_empty(requests, exclude="fit")
+
+    # and excluding both fit and score would avoid an exception
+    assert_request_is_empty(requests, exclude=["fit", "score"])
+
+    # test if a router is empty
+    assert_request_is_empty(
+        MetadataRouter(owner="test")
+        .add_self_request(WeightedMetaRegressor(estimator=None))
+        .add(
+            estimator=ConsumingRegressor(),
+            method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+        )
+    )
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        ConsumingClassifier(registry=_Registry()),
+        ConsumingRegressor(registry=_Registry()),
+        ConsumingTransformer(registry=_Registry()),
+        WeightedMetaClassifier(estimator=ConsumingClassifier(), registry=_Registry()),
+        WeightedMetaRegressor(estimator=ConsumingRegressor(), registry=_Registry()),
+    ],
+)
+@config_context(enable_metadata_routing=True)
+def test_estimator_puts_self_in_registry(estimator):
+    """Check that an estimator puts itself in the registry upon fit."""
+    estimator.fit(X, y)
+    assert estimator in estimator.registry
+
+
+@pytest.mark.parametrize(
+    "val, res",
+    [
+        (False, False),
+        (True, False),
+        (None, False),
+        ("$UNUSED$", False),
+        ("$WARN$", False),
+        ("invalid-input", False),
+        ("valid_arg", True),
+    ],
+)
+@config_context(enable_metadata_routing=True)
+def test_request_type_is_alias(val, res):
+    # Test request_is_alias
+    assert request_is_alias(val) == res
+
+
+@pytest.mark.parametrize(
+    "val, res",
+    [
+        (False, True),
+        (True, True),
+        (None, True),
+        ("$UNUSED$", True),
+        ("$WARN$", True),
+        ("invalid-input", False),
+        ("alias_arg", False),
+    ],
+)
+@config_context(enable_metadata_routing=True)
+def test_request_type_is_valid(val, res):
+    # Test request_is_valid
+    assert request_is_valid(val) == res
+
+
+@config_context(enable_metadata_routing=True)
+def test_default_requests():
+    class OddEstimator(BaseEstimator):
+        __metadata_request__fit = {
+            # set a different default request
+            "sample_weight": True
+        }  # type: ignore
+
+    odd_request = get_routing_for_object(OddEstimator())
+    assert odd_request.fit.requests == {"sample_weight": True}
+
+    # check other test estimators
+    assert not len(get_routing_for_object(NonConsumingClassifier()).fit.requests)
+    assert_request_is_empty(NonConsumingClassifier().get_metadata_routing())
+
+    trs_request = get_routing_for_object(ConsumingTransformer())
+    assert trs_request.fit.requests == {
+        "sample_weight": None,
+        "metadata": None,
+    }
+    assert trs_request.transform.requests == {"metadata": None, "sample_weight": None}
+    assert_request_is_empty(trs_request)
+
+    est_request = get_routing_for_object(ConsumingClassifier())
+    assert est_request.fit.requests == {
+        "sample_weight": None,
+        "metadata": None,
+    }
+    assert_request_is_empty(est_request)
+
+
+@config_context(enable_metadata_routing=True)
+def test_default_request_override():
+    """Test that default requests are correctly overridden regardless of the ASCII order
+    of the class names, hence testing small and capital letter class name starts.
+    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/28430
+    """
+
+    class Base(BaseEstimator):
+        __metadata_request__split = {"groups": True}
+
+    class class_1(Base):
+        __metadata_request__split = {"groups": "sample_domain"}
+
+    class Class_1(Base):
+        __metadata_request__split = {"groups": "sample_domain"}
+
+    assert_request_equal(
+        class_1()._get_metadata_request(), {"split": {"groups": "sample_domain"}}
+    )
+    assert_request_equal(
+        Class_1()._get_metadata_request(), {"split": {"groups": "sample_domain"}}
+    )
+
+
+@config_context(enable_metadata_routing=True)
+def test_process_routing_invalid_method():
+    with pytest.raises(TypeError, match="Can only route and process input"):
+        process_routing(ConsumingClassifier(), "invalid_method", groups=my_groups)
+
+
+@config_context(enable_metadata_routing=True)
+def test_process_routing_invalid_object():
+    class InvalidObject:
+        pass
+
+    with pytest.raises(AttributeError, match="either implement the routing method"):
+        process_routing(InvalidObject(), "fit", groups=my_groups)
+
+
+@pytest.mark.parametrize("method", METHODS)
+@pytest.mark.parametrize("default", [None, "default", []])
+@config_context(enable_metadata_routing=True)
+def test_process_routing_empty_params_get_with_default(method, default):
+    empty_params = {}
+    routed_params = process_routing(ConsumingClassifier(), "fit", **empty_params)
+
+    # Behaviour should be an empty dictionary returned for each method when retrieved.
+    params_for_method = routed_params[method]
+    assert isinstance(params_for_method, dict)
+    assert set(params_for_method.keys()) == set(METHODS)
+
+    # No default to `get` should be equivalent to the default
+    default_params_for_method = routed_params.get(method, default=default)
+    assert default_params_for_method == params_for_method
+
+
+@config_context(enable_metadata_routing=True)
+def test_simple_metadata_routing():
+    # Tests that metadata is properly routed
+
+    # The underlying estimator doesn't accept or request metadata
+    clf = WeightedMetaClassifier(estimator=NonConsumingClassifier())
+    clf.fit(X, y)
+
+    # Meta-estimator consumes sample_weight, but doesn't forward it to the underlying
+    # estimator
+    clf = WeightedMetaClassifier(estimator=NonConsumingClassifier())
+    clf.fit(X, y, sample_weight=my_weights)
+
+    # If the estimator accepts the metadata but doesn't explicitly say it doesn't
+    # need it, there's an error
+    clf = WeightedMetaClassifier(estimator=ConsumingClassifier())
+    err_message = (
+        "[sample_weight] are passed but are not explicitly set as requested or"
+        " not requested for ConsumingClassifier.fit"
+    )
+    with pytest.raises(ValueError, match=re.escape(err_message)):
+        clf.fit(X, y, sample_weight=my_weights)
+
+    # Explicitly saying the estimator doesn't need it, makes the error go away,
+    # because in this case `WeightedMetaClassifier` consumes `sample_weight`. If
+    # there was no consumer of sample_weight, passing it would result in an
+    # error.
+    clf = WeightedMetaClassifier(
+        estimator=ConsumingClassifier().set_fit_request(sample_weight=False)
+    )
+    # this doesn't raise since WeightedMetaClassifier itself is a consumer,
+    # and passing metadata to the consumer directly is fine regardless of its
+    # metadata_request values.
+    clf.fit(X, y, sample_weight=my_weights)
+    check_recorded_metadata(clf.estimator_, method="fit", parent="fit")
+
+    # Requesting a metadata will make the meta-estimator forward it correctly
+    clf = WeightedMetaClassifier(
+        estimator=ConsumingClassifier().set_fit_request(sample_weight=True)
+    )
+    clf.fit(X, y, sample_weight=my_weights)
+    check_recorded_metadata(
+        clf.estimator_, method="fit", parent="fit", sample_weight=my_weights
+    )
+
+    # And requesting it with an alias
+    clf = WeightedMetaClassifier(
+        estimator=ConsumingClassifier().set_fit_request(
+            sample_weight="alternative_weight"
+        )
+    )
+    clf.fit(X, y, alternative_weight=my_weights)
+    check_recorded_metadata(
+        clf.estimator_, method="fit", parent="fit", sample_weight=my_weights
+    )
+
+
+@config_context(enable_metadata_routing=True)
+def test_nested_routing():
+    # check if metadata is routed in a nested routing situation.
+    pipeline = SimplePipeline(
+        [
+            MetaTransformer(
+                transformer=ConsumingTransformer()
+                .set_fit_request(metadata=True, sample_weight=False)
+                .set_transform_request(sample_weight=True, metadata=False)
+            ),
+            WeightedMetaRegressor(
+                estimator=ConsumingRegressor()
+                .set_fit_request(sample_weight="inner_weights", metadata=False)
+                .set_predict_request(sample_weight=False)
+            ).set_fit_request(sample_weight="outer_weights"),
+        ]
+    )
+    w1, w2, w3 = [1], [2], [3]
+    pipeline.fit(
+        X, y, metadata=my_groups, sample_weight=w1, outer_weights=w2, inner_weights=w3
+    )
+    check_recorded_metadata(
+        pipeline.steps_[0].transformer_,
+        method="fit",
+        parent="fit",
+        metadata=my_groups,
+    )
+    check_recorded_metadata(
+        pipeline.steps_[0].transformer_,
+        method="transform",
+        parent="fit",
+        sample_weight=w1,
+    )
+    check_recorded_metadata(
+        pipeline.steps_[1], method="fit", parent="fit", sample_weight=w2
+    )
+    check_recorded_metadata(
+        pipeline.steps_[1].estimator_, method="fit", parent="fit", sample_weight=w3
+    )
+
+    pipeline.predict(X, sample_weight=w3)
+    check_recorded_metadata(
+        pipeline.steps_[0].transformer_,
+        method="transform",
+        parent="fit",
+        sample_weight=w3,
+    )
+
+
+@config_context(enable_metadata_routing=True)
+def test_nested_routing_conflict():
+    # check if an error is raised if there's a conflict between keys
+    pipeline = SimplePipeline(
+        [
+            MetaTransformer(
+                transformer=ConsumingTransformer()
+                .set_fit_request(metadata=True, sample_weight=False)
+                .set_transform_request(sample_weight=True)
+            ),
+            WeightedMetaRegressor(
+                estimator=ConsumingRegressor().set_fit_request(sample_weight=True)
+            ).set_fit_request(sample_weight="outer_weights"),
+        ]
+    )
+    w1, w2 = [1], [2]
+    with pytest.raises(
+        ValueError,
+        match=(
+            re.escape(
+                "In WeightedMetaRegressor, there is a conflict on sample_weight between"
+                " what is requested for this estimator and what is requested by its"
+                " children. You can resolve this conflict by using an alias for the"
+                " child estimator(s) requested metadata."
+            )
+        ),
+    ):
+        pipeline.fit(X, y, metadata=my_groups, sample_weight=w1, outer_weights=w2)
+
+
+@config_context(enable_metadata_routing=True)
+def test_invalid_metadata():
+    # check that passing wrong metadata raises an error
+    trs = MetaTransformer(
+        transformer=ConsumingTransformer().set_transform_request(sample_weight=True)
+    )
+    with pytest.raises(
+        TypeError,
+        match=(re.escape("transform got unexpected argument(s) {'other_param'}")),
+    ):
+        trs.fit(X, y).transform(X, other_param=my_weights)
+
+    # passing a metadata which is not requested by any estimator should also raise
+    trs = MetaTransformer(
+        transformer=ConsumingTransformer().set_transform_request(sample_weight=False)
+    )
+    with pytest.raises(
+        TypeError,
+        match=(re.escape("transform got unexpected argument(s) {'sample_weight'}")),
+    ):
+        trs.fit(X, y).transform(X, sample_weight=my_weights)
+
+
+@config_context(enable_metadata_routing=True)
+def test_get_metadata_routing():
+    class TestDefaultsBadMethodName(_MetadataRequester):
+        __metadata_request__fit = {
+            "sample_weight": None,
+            "my_param": None,
+        }
+        __metadata_request__score = {
+            "sample_weight": None,
+            "my_param": True,
+            "my_other_param": None,
+        }
+        # this will raise an error since we don't understand "other_method" as a method
+        __metadata_request__other_method = {"my_param": True}
+
+    class TestDefaults(_MetadataRequester):
+        __metadata_request__fit = {
+            "sample_weight": None,
+            "my_other_param": None,
+        }
+        __metadata_request__score = {
+            "sample_weight": None,
+            "my_param": True,
+            "my_other_param": None,
+        }
+        __metadata_request__predict = {"my_param": True}
+
+    with pytest.raises(
+        AttributeError, match="'MetadataRequest' object has no attribute 'other_method'"
+    ):
+        TestDefaultsBadMethodName().get_metadata_routing()
+
+    expected = {
+        "score": {
+            "my_param": True,
+            "my_other_param": None,
+            "sample_weight": None,
+        },
+        "fit": {
+            "my_other_param": None,
+            "sample_weight": None,
+        },
+        "predict": {"my_param": True},
+    }
+    assert_request_equal(TestDefaults().get_metadata_routing(), expected)
+
+    est = TestDefaults().set_score_request(my_param="other_param")
+    expected = {
+        "score": {
+            "my_param": "other_param",
+            "my_other_param": None,
+            "sample_weight": None,
+        },
+        "fit": {
+            "my_other_param": None,
+            "sample_weight": None,
+        },
+        "predict": {"my_param": True},
+    }
+    assert_request_equal(est.get_metadata_routing(), expected)
+
+    est = TestDefaults().set_fit_request(sample_weight=True)
+    expected = {
+        "score": {
+            "my_param": True,
+            "my_other_param": None,
+            "sample_weight": None,
+        },
+        "fit": {
+            "my_other_param": None,
+            "sample_weight": True,
+        },
+        "predict": {"my_param": True},
+    }
+    assert_request_equal(est.get_metadata_routing(), expected)
+
+
+@config_context(enable_metadata_routing=True)
+def test_setting_default_requests():
+    # Test _get_default_requests method
+    test_cases = dict()
+
+    class ExplicitRequest(BaseEstimator):
+        # `fit` doesn't accept `props` explicitly, but we want to request it
+        __metadata_request__fit = {"prop": None}
+
+        def fit(self, X, y, **kwargs):
+            return self
+
+    test_cases[ExplicitRequest] = {"prop": None}
+
+    class ExplicitRequestOverwrite(BaseEstimator):
+        # `fit` explicitly accepts `props`, but we want to change the default
+        # request value from None to True
+        __metadata_request__fit = {"prop": True}
+
+        def fit(self, X, y, prop=None, **kwargs):
+            return self
+
+    test_cases[ExplicitRequestOverwrite] = {"prop": True}
+
+    class ImplicitRequest(BaseEstimator):
+        # `fit` requests `prop` and the default None should be used
+        def fit(self, X, y, prop=None, **kwargs):
+            return self
+
+    test_cases[ImplicitRequest] = {"prop": None}
+
+    class ImplicitRequestRemoval(BaseEstimator):
+        # `fit` (in this class or a parent) requests `prop`, but we don't want
+        # it requested at all.
+        __metadata_request__fit = {"prop": metadata_routing.UNUSED}
+
+        def fit(self, X, y, prop=None, **kwargs):
+            return self
+
+    test_cases[ImplicitRequestRemoval] = {}
+
+    for Klass, requests in test_cases.items():
+        assert get_routing_for_object(Klass()).fit.requests == requests
+        assert_request_is_empty(Klass().get_metadata_routing(), exclude="fit")
+        Klass().fit(None, None)  # for coverage
+
+
+@config_context(enable_metadata_routing=True)
+def test_removing_non_existing_param_raises():
+    """Test that removing a metadata using UNUSED which doesn't exist raises."""
+
+    class InvalidRequestRemoval(BaseEstimator):
+        # `fit` (in this class or a parent) requests `prop`, but we don't want
+        # it requested at all.
+        __metadata_request__fit = {"prop": metadata_routing.UNUSED}
+
+        def fit(self, X, y, **kwargs):
+            return self
+
+    with pytest.raises(ValueError, match="Trying to remove parameter"):
+        InvalidRequestRemoval().get_metadata_routing()
+
+
+@config_context(enable_metadata_routing=True)
+def test_method_metadata_request():
+    mmr = MethodMetadataRequest(owner="test", method="fit")
+
+    with pytest.raises(ValueError, match="The alias you're setting for"):
+        mmr.add_request(param="foo", alias=1.4)
+
+    mmr.add_request(param="foo", alias=None)
+    assert mmr.requests == {"foo": None}
+    mmr.add_request(param="foo", alias=False)
+    assert mmr.requests == {"foo": False}
+    mmr.add_request(param="foo", alias=True)
+    assert mmr.requests == {"foo": True}
+    mmr.add_request(param="foo", alias="foo")
+    assert mmr.requests == {"foo": True}
+    mmr.add_request(param="foo", alias="bar")
+    assert mmr.requests == {"foo": "bar"}
+    assert mmr._get_param_names(return_alias=False) == {"foo"}
+    assert mmr._get_param_names(return_alias=True) == {"bar"}
+
+
+@config_context(enable_metadata_routing=True)
+def test_get_routing_for_object():
+    class Consumer(BaseEstimator):
+        __metadata_request__fit = {"prop": None}
+
+    assert_request_is_empty(get_routing_for_object(None))
+    assert_request_is_empty(get_routing_for_object(object()))
+
+    mr = MetadataRequest(owner="test")
+    mr.fit.add_request(param="foo", alias="bar")
+    mr_factory = get_routing_for_object(mr)
+    assert_request_is_empty(mr_factory, exclude="fit")
+    assert mr_factory.fit.requests == {"foo": "bar"}
+
+    mr = get_routing_for_object(Consumer())
+    assert_request_is_empty(mr, exclude="fit")
+    assert mr.fit.requests == {"prop": None}
+
+
+@config_context(enable_metadata_routing=True)
+def test_metadata_request_consumes_method():
+    """Test that MetadataRequest().consumes() method works as expected."""
+    request = MetadataRouter(owner="test")
+    assert request.consumes(method="fit", params={"foo"}) == set()
+
+    request = MetadataRequest(owner="test")
+    request.fit.add_request(param="foo", alias=True)
+    assert request.consumes(method="fit", params={"foo"}) == {"foo"}
+
+    request = MetadataRequest(owner="test")
+    request.fit.add_request(param="foo", alias="bar")
+    assert request.consumes(method="fit", params={"bar", "foo"}) == {"bar"}
+
+
+@config_context(enable_metadata_routing=True)
+def test_metadata_router_consumes_method():
+    """Test that MetadataRouter().consumes method works as expected."""
+    # having it here instead of parametrizing the test since `set_fit_request`
+    # is not available while collecting the tests.
+    cases = [
+        (
+            WeightedMetaRegressor(
+                estimator=ConsumingRegressor().set_fit_request(sample_weight=True)
+            ),
+            {"sample_weight"},
+            {"sample_weight"},
+        ),
+        (
+            WeightedMetaRegressor(
+                estimator=ConsumingRegressor().set_fit_request(
+                    sample_weight="my_weights"
+                )
+            ),
+            {"my_weights", "sample_weight"},
+            {"my_weights"},
+        ),
+    ]
+
+    for obj, input, output in cases:
+        assert obj.get_metadata_routing().consumes(method="fit", params=input) == output
+
+
+@config_context(enable_metadata_routing=True)
+def test_metaestimator_warnings():
+    class WeightedMetaRegressorWarn(WeightedMetaRegressor):
+        __metadata_request__fit = {"sample_weight": metadata_routing.WARN}
+
+    with pytest.warns(
+        UserWarning, match="Support for .* has recently been added to this class"
+    ):
+        WeightedMetaRegressorWarn(
+            estimator=LinearRegression().set_fit_request(sample_weight=False)
+        ).fit(X, y, sample_weight=my_weights)
+
+
+@config_context(enable_metadata_routing=True)
+def test_estimator_warnings():
+    class ConsumingRegressorWarn(ConsumingRegressor):
+        __metadata_request__fit = {"sample_weight": metadata_routing.WARN}
+
+    with pytest.warns(
+        UserWarning, match="Support for .* has recently been added to this class"
+    ):
+        MetaRegressor(estimator=ConsumingRegressorWarn()).fit(
+            X, y, sample_weight=my_weights
+        )
+
+
+@config_context(enable_metadata_routing=True)
+@pytest.mark.parametrize(
+    "obj, string",
+    [
+        (
+            MethodMetadataRequest(owner="test", method="fit").add_request(
+                param="foo", alias="bar"
+            ),
+            "{'foo': 'bar'}",
+        ),
+        (
+            MetadataRequest(owner="test"),
+            "{}",
+        ),
+        (
+            MetadataRouter(owner="test").add(
+                estimator=ConsumingRegressor(),
+                method_mapping=MethodMapping().add(caller="predict", callee="predict"),
+            ),
+            (
+                "{'estimator': {'mapping': [{'caller': 'predict', 'callee':"
+                " 'predict'}], 'router': {'fit': {'sample_weight': None, 'metadata':"
+                " None}, 'partial_fit': {'sample_weight': None, 'metadata': None},"
+                " 'predict': {'sample_weight': None, 'metadata': None}, 'score':"
+                " {'sample_weight': None, 'metadata': None}}}}"
+            ),
+        ),
+    ],
+)
+@config_context(enable_metadata_routing=True)
+def test_string_representations(obj, string):
+    assert str(obj) == string
+
+
+@pytest.mark.parametrize(
+    "obj, method, inputs, err_cls, err_msg",
+    [
+        (
+            MethodMapping(),
+            "add",
+            {"caller": "fit", "callee": "invalid"},
+            ValueError,
+            "Given callee",
+        ),
+        (
+            MethodMapping(),
+            "add",
+            {"caller": "invalid", "callee": "fit"},
+            ValueError,
+            "Given caller",
+        ),
+        (
+            MetadataRouter(owner="test"),
+            "add_self_request",
+            {"obj": MetadataRouter(owner="test")},
+            ValueError,
+            "Given `obj` is neither a `MetadataRequest` nor does it implement",
+        ),
+        (
+            ConsumingClassifier(),
+            "set_fit_request",
+            {"invalid": True},
+            TypeError,
+            "Unexpected args",
+        ),
+    ],
+)
+@config_context(enable_metadata_routing=True)
+def test_validations(obj, method, inputs, err_cls, err_msg):
+    with pytest.raises(err_cls, match=err_msg):
+        getattr(obj, method)(**inputs)
+
+
+@config_context(enable_metadata_routing=True)
+def test_methodmapping():
+    mm = (
+        MethodMapping()
+        .add(caller="fit", callee="transform")
+        .add(caller="fit", callee="fit")
+    )
+
+    mm_list = list(mm)
+    assert mm_list[0] == ("fit", "transform")
+    assert mm_list[1] == ("fit", "fit")
+
+    mm = MethodMapping()
+    for method in METHODS:
+        mm.add(caller=method, callee=method)
+        assert MethodPair(method, method) in mm._routes
+    assert len(mm._routes) == len(METHODS)
+
+    mm = MethodMapping().add(caller="score", callee="score")
+    assert repr(mm) == "[{'caller': 'score', 'callee': 'score'}]"
+
+
+@config_context(enable_metadata_routing=True)
+def test_metadatarouter_add_self_request():
+    # adding a MetadataRequest as `self` adds a copy
+    request = MetadataRequest(owner="nested")
+    request.fit.add_request(param="param", alias=True)
+    router = MetadataRouter(owner="test").add_self_request(request)
+    assert str(router._self_request) == str(request)
+    # should be a copy, not the same object
+    assert router._self_request is not request
+
+    # one can add an estimator as self
+    est = ConsumingRegressor().set_fit_request(sample_weight="my_weights")
+    router = MetadataRouter(owner="test").add_self_request(obj=est)
+    assert str(router._self_request) == str(est.get_metadata_routing())
+    assert router._self_request is not est.get_metadata_routing()
+
+    # adding a consumer+router as self should only add the consumer part
+    est = WeightedMetaRegressor(
+        estimator=ConsumingRegressor().set_fit_request(sample_weight="nested_weights")
+    )
+    router = MetadataRouter(owner="test").add_self_request(obj=est)
+    # _get_metadata_request() returns the consumer part of the requests
+    assert str(router._self_request) == str(est._get_metadata_request())
+    # get_metadata_routing() returns the complete request set, consumer and
+    # router included.
+    assert str(router._self_request) != str(est.get_metadata_routing())
+    # it should be a copy, not the same object
+    assert router._self_request is not est._get_metadata_request()
+
+
+@config_context(enable_metadata_routing=True)
+def test_metadata_routing_add():
+    # adding one with a string `method_mapping`
+    router = MetadataRouter(owner="test").add(
+        est=ConsumingRegressor().set_fit_request(sample_weight="weights"),
+        method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+    )
+    assert (
+        str(router)
+        == "{'est': {'mapping': [{'caller': 'fit', 'callee': 'fit'}], 'router': {'fit':"
+        " {'sample_weight': 'weights', 'metadata': None}, 'partial_fit':"
+        " {'sample_weight': None, 'metadata': None}, 'predict': {'sample_weight':"
+        " None, 'metadata': None}, 'score': {'sample_weight': None, 'metadata':"
+        " None}}}}"
+    )
+
+    # adding one with an instance of MethodMapping
+    router = MetadataRouter(owner="test").add(
+        method_mapping=MethodMapping().add(caller="fit", callee="score"),
+        est=ConsumingRegressor().set_score_request(sample_weight=True),
+    )
+    assert (
+        str(router)
+        == "{'est': {'mapping': [{'caller': 'fit', 'callee': 'score'}], 'router':"
+        " {'fit': {'sample_weight': None, 'metadata': None}, 'partial_fit':"
+        " {'sample_weight': None, 'metadata': None}, 'predict': {'sample_weight':"
+        " None, 'metadata': None}, 'score': {'sample_weight': True, 'metadata':"
+        " None}}}}"
+    )
+
+
+@config_context(enable_metadata_routing=True)
+def test_metadata_routing_get_param_names():
+    router = (
+        MetadataRouter(owner="test")
+        .add_self_request(
+            WeightedMetaRegressor(estimator=ConsumingRegressor()).set_fit_request(
+                sample_weight="self_weights"
+            )
+        )
+        .add(
+            trs=ConsumingTransformer().set_fit_request(
+                sample_weight="transform_weights"
+            ),
+            method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+        )
+    )
+
+    assert (
+        str(router)
+        == "{'$self_request': {'fit': {'sample_weight': 'self_weights'}, 'score':"
+        " {'sample_weight': None}}, 'trs': {'mapping': [{'caller': 'fit', 'callee':"
+        " 'fit'}], 'router': {'fit': {'sample_weight': 'transform_weights',"
+        " 'metadata': None}, 'transform': {'sample_weight': None, 'metadata': None},"
+        " 'inverse_transform': {'sample_weight': None, 'metadata': None}}}}"
+    )
+
+    assert router._get_param_names(
+        method="fit", return_alias=True, ignore_self_request=False
+    ) == {"transform_weights", "metadata", "self_weights"}
+    # return_alias=False will return original names for "self"
+    assert router._get_param_names(
+        method="fit", return_alias=False, ignore_self_request=False
+    ) == {"sample_weight", "metadata", "transform_weights"}
+    # ignoring self would remove "sample_weight"
+    assert router._get_param_names(
+        method="fit", return_alias=False, ignore_self_request=True
+    ) == {"metadata", "transform_weights"}
+    # return_alias is ignored when ignore_self_request=True
+    assert router._get_param_names(
+        method="fit", return_alias=True, ignore_self_request=True
+    ) == router._get_param_names(
+        method="fit", return_alias=False, ignore_self_request=True
+    )
+
+
+@config_context(enable_metadata_routing=True)
+def test_method_generation():
+    # Test if all required request methods are generated.
+
+    # TODO: these test classes can be moved to sklearn.utils._testing once we
+    # have a better idea of what the commonly used classes are.
+    class SimpleEstimator(BaseEstimator):
+        # This class should have no set_{method}_request
+        def fit(self, X, y):
+            pass  # pragma: no cover
+
+        def fit_transform(self, X, y):
+            pass  # pragma: no cover
+
+        def fit_predict(self, X, y):
+            pass  # pragma: no cover
+
+        def partial_fit(self, X, y):
+            pass  # pragma: no cover
+
+        def predict(self, X):
+            pass  # pragma: no cover
+
+        def predict_proba(self, X):
+            pass  # pragma: no cover
+
+        def predict_log_proba(self, X):
+            pass  # pragma: no cover
+
+        def decision_function(self, X):
+            pass  # pragma: no cover
+
+        def score(self, X, y):
+            pass  # pragma: no cover
+
+        def split(self, X, y=None):
+            pass  # pragma: no cover
+
+        def transform(self, X):
+            pass  # pragma: no cover
+
+        def inverse_transform(self, X):
+            pass  # pragma: no cover
+
+    for method in METHODS:
+        assert not hasattr(SimpleEstimator(), f"set_{method}_request")
+
+    class SimpleEstimator(BaseEstimator):
+        # This class should have every set_{method}_request
+        def fit(self, X, y, sample_weight=None):
+            pass  # pragma: no cover
+
+        def fit_transform(self, X, y, sample_weight=None):
+            pass  # pragma: no cover
+
+        def fit_predict(self, X, y, sample_weight=None):
+            pass  # pragma: no cover
+
+        def partial_fit(self, X, y, sample_weight=None):
+            pass  # pragma: no cover
+
+        def predict(self, X, sample_weight=None):
+            pass  # pragma: no cover
+
+        def predict_proba(self, X, sample_weight=None):
+            pass  # pragma: no cover
+
+        def predict_log_proba(self, X, sample_weight=None):
+            pass  # pragma: no cover
+
+        def decision_function(self, X, sample_weight=None):
+            pass  # pragma: no cover
+
+        def score(self, X, y, sample_weight=None):
+            pass  # pragma: no cover
+
+        def split(self, X, y=None, sample_weight=None):
+            pass  # pragma: no cover
+
+        def transform(self, X, sample_weight=None):
+            pass  # pragma: no cover
+
+        def inverse_transform(self, X, sample_weight=None):
+            pass  # pragma: no cover
+
+    # composite methods shouldn't have a corresponding set method.
+    for method in COMPOSITE_METHODS:
+        assert not hasattr(SimpleEstimator(), f"set_{method}_request")
+
+    # simple methods should have a corresponding set method.
+    for method in SIMPLE_METHODS:
+        assert hasattr(SimpleEstimator(), f"set_{method}_request")
+
+
+@config_context(enable_metadata_routing=True)
+def test_composite_methods():
+    # Test the behavior and the values of methods (composite methods) whose
+    # request values are a union of requests by other methods (simple methods).
+    # fit_transform and fit_predict are the only composite methods we have in
+    # scikit-learn.
+    class SimpleEstimator(BaseEstimator):
+        # This class should have every set_{method}_request
+        def fit(self, X, y, foo=None, bar=None):
+            pass  # pragma: no cover
+
+        def predict(self, X, foo=None, bar=None):
+            pass  # pragma: no cover
+
+        def transform(self, X, other_param=None):
+            pass  # pragma: no cover
+
+    est = SimpleEstimator()
+    # Since no request is set for fit or predict or transform, the request for
+    # fit_transform and fit_predict should also be empty.
+    assert est.get_metadata_routing().fit_transform.requests == {
+        "bar": None,
+        "foo": None,
+        "other_param": None,
+    }
+    assert est.get_metadata_routing().fit_predict.requests == {"bar": None, "foo": None}
+
+    # setting the request on only one of them should raise an error
+    est.set_fit_request(foo=True, bar="test")
+    with pytest.raises(ValueError, match="Conflicting metadata requests for"):
+        est.get_metadata_routing().fit_predict
+
+    # setting the request on the other one should fail if not the same as the
+    # first method
+    est.set_predict_request(bar=True)
+    with pytest.raises(ValueError, match="Conflicting metadata requests for"):
+        est.get_metadata_routing().fit_predict
+
+    # now the requests are consistent and getting the requests for fit_predict
+    # shouldn't raise.
+    est.set_predict_request(foo=True, bar="test")
+    est.get_metadata_routing().fit_predict
+
+    # setting the request for a none-overlapping parameter would merge them
+    # together.
+    est.set_transform_request(other_param=True)
+    assert est.get_metadata_routing().fit_transform.requests == {
+        "bar": "test",
+        "foo": True,
+        "other_param": True,
+    }
+
+
+@config_context(enable_metadata_routing=True)
+def test_no_feature_flag_raises_error():
+    """Test that when feature flag disabled, set_{method}_requests raises."""
+    with config_context(enable_metadata_routing=False):
+        with pytest.raises(RuntimeError, match="This method is only available"):
+            ConsumingClassifier().set_fit_request(sample_weight=True)
+
+
+@config_context(enable_metadata_routing=True)
+def test_none_metadata_passed():
+    """Test that passing None as metadata when not requested doesn't raise"""
+    MetaRegressor(estimator=ConsumingRegressor()).fit(X, y, sample_weight=None)
+
+
+@config_context(enable_metadata_routing=True)
+def test_no_metadata_always_works():
+    """Test that when no metadata is passed, having a meta-estimator which does
+    not yet support metadata routing works.
+
+    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/28246
+    """
+
+    class Estimator(_RoutingNotSupportedMixin, BaseEstimator):
+        def fit(self, X, y, metadata=None):
+            return self
+
+    # This passes since no metadata is passed.
+    MetaRegressor(estimator=Estimator()).fit(X, y)
+    # This fails since metadata is passed but Estimator() does not support it.
+    with pytest.raises(
+        NotImplementedError, match="Estimator has not implemented metadata routing yet."
+    ):
+        MetaRegressor(estimator=Estimator()).fit(X, y, metadata=my_groups)
+
+
+@config_context(enable_metadata_routing=True)
+def test_unsetmetadatapassederror_correct():
+    """Test that UnsetMetadataPassedError raises the correct error message when
+    set_{method}_request is not set in nested cases."""
+    weighted_meta = WeightedMetaClassifier(estimator=ConsumingClassifier())
+    pipe = SimplePipeline([weighted_meta])
+    msg = re.escape(
+        "[metadata] are passed but are not explicitly set as requested or not requested"
+        " for ConsumingClassifier.fit, which is used within WeightedMetaClassifier.fit."
+        " Call `ConsumingClassifier.set_fit_request({metadata}=True/False)` for each"
+        " metadata you want to request/ignore."
+    )
+
+    with pytest.raises(UnsetMetadataPassedError, match=msg):
+        pipe.fit(X, y, metadata="blah")
+
+
+@config_context(enable_metadata_routing=True)
+def test_unsetmetadatapassederror_correct_for_composite_methods():
+    """Test that UnsetMetadataPassedError raises the correct error message when
+    composite metadata request methods are not set in nested cases."""
+    consuming_transformer = ConsumingTransformer()
+    pipe = Pipeline([("consuming_transformer", consuming_transformer)])
+
+    msg = re.escape(
+        "[metadata] are passed but are not explicitly set as requested or not requested"
+        " for ConsumingTransformer.fit_transform, which is used within"
+        " Pipeline.fit_transform. Call"
+        " `ConsumingTransformer.set_fit_request({metadata}=True/False)"
+        ".set_transform_request({metadata}=True/False)`"
+        " for each metadata you want to request/ignore."
+    )
+    with pytest.raises(UnsetMetadataPassedError, match=msg):
+        pipe.fit_transform(X, y, metadata="blah")
+
+
+@config_context(enable_metadata_routing=True)
+def test_unbound_set_methods_work():
+    """Tests that if the set_{method}_request is unbound, it still works.
+
+    Also test that passing positional arguments to the set_{method}_request fails
+    with the right TypeError message.
+
+    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/28632
+    """
+
+    class A(BaseEstimator):
+        def fit(self, X, y, sample_weight=None):
+            return self
+
+    error_message = re.escape(
+        "set_fit_request() takes 0 positional argument but 1 were given"
+    )
+
+    # Test positional arguments error before making the descriptor method unbound.
+    with pytest.raises(TypeError, match=error_message):
+        A().set_fit_request(True)
+
+    # This somehow makes the descriptor method unbound, which results in the `instance`
+    # argument being None, and instead `self` being passed as a positional argument
+    # to the descriptor method.
+    A.set_fit_request = A.set_fit_request
+
+    # This should pass as usual
+    A().set_fit_request(sample_weight=True)
+
+    # Test positional arguments error after making the descriptor method unbound.
+    with pytest.raises(TypeError, match=error_message):
+        A().set_fit_request(True)