Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

MilesCranmer commited on Sep 28, 2023

Commit

b242a62

unverified ·

2 Parent(s): 5620b3a 0b9e421

Merge pull request #428 from MilesCranmer/refactor-utils

Browse files

Files changed (12) hide show

.github/workflows/CI.yml +26 -0
mypy.ini +8 -0
pysr/__init__.py +2 -1
pysr/denoising.py +35 -0
pysr/deprecated.py +54 -0
pysr/export_latex.py +13 -11
pysr/export_sympy.py +2 -2
pysr/feature_selection.py +35 -0
pysr/feynman_problems.py +1 -1
pysr/sr.py +19 -159
pysr/test/test.py +3 -8
pysr/utils.py +55 -0

.github/workflows/CI.yml CHANGED Viewed

@@ -143,3 +143,29 @@ jobs:
         run: |
             pip install coveralls
             coveralls --finish

         run: |
             pip install coveralls
             coveralls --finish
+  types:
+    name: Check types
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+    strategy:
+      matrix:
+        python-version: ['3.10']
+    steps:
+      - uses: actions/checkout@v3
+      - name: "Set up Python"
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+      - name: "Install PySR and all dependencies"
+        run: |
+            python -m pip install --upgrade pip
+            pip install -r requirements.txt
+            pip install mypy jax jaxlib torch
+            python setup.py install
+      - name: "Run mypy"
+        run: mypy --install-types --non-interactive pysr

mypy.ini ADDED Viewed

	@@ -0,0 +1,8 @@

+[mypy]
+warn_return_any = True
+[mypy-sklearn.*]
+ignore_missing_imports = True
+[mypy-julia.*]
+ignore_missing_imports = True

pysr/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from . import sklearn_monkeypatch
 from .export_jax import sympy2jax
 from .export_torch import sympy2torch
 from .feynman_problems import FeynmanProblem, Problem
 from .julia_helpers import install
-from .sr import PySRRegressor, best, best_callable, best_row, best_tex, pysr
 from .version import __version__
 __all__ = [

 from . import sklearn_monkeypatch
+from .deprecated import best, best_callable, best_row, best_tex, pysr
 from .export_jax import sympy2jax
 from .export_torch import sympy2torch
 from .feynman_problems import FeynmanProblem, Problem
 from .julia_helpers import install
+from .sr import PySRRegressor
 from .version import __version__
 __all__ = [

pysr/denoising.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""Functions for denoising data during preprocessing."""
+import numpy as np
+def denoise(X, y, Xresampled=None, random_state=None):
+    """Denoise the dataset using a Gaussian process."""
+    from sklearn.gaussian_process import GaussianProcessRegressor
+    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
+    gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
+    gpr = GaussianProcessRegressor(
+        kernel=gp_kernel, n_restarts_optimizer=50, random_state=random_state
+    )
+    gpr.fit(X, y)
+    if Xresampled is not None:
+        return Xresampled, gpr.predict(Xresampled)
+    return X, gpr.predict(X)
+def multi_denoise(X, y, Xresampled=None, random_state=None):
+    """Perform `denoise` along each column of `y` independently."""
+    y = np.stack(
+        [
+            denoise(X, y[:, i], Xresampled=Xresampled, random_state=random_state)[1]
+            for i in range(y.shape[1])
+        ],
+        axis=1,
+    )
+    if Xresampled is not None:
+        return Xresampled, y
+    return X, y

pysr/deprecated.py CHANGED Viewed

@@ -1,4 +1,58 @@
 """Various functions to deprecate features."""
 def make_deprecated_kwargs_for_pysr_regressor():

 """Various functions to deprecate features."""
+import warnings
+def pysr(X, y, weights=None, **kwargs):  # pragma: no cover
+    from .sr import PySRRegressor
+    warnings.warn(
+        "Calling `pysr` is deprecated. "
+        "Please use `model = PySRRegressor(**params); "
+        "model.fit(X, y)` going forward.",
+        FutureWarning,
+    )
+    model = PySRRegressor(**kwargs)
+    model.fit(X, y, weights=weights)
+    return model.equations_
+def best(*args, **kwargs):  # pragma: no cover
+    raise NotImplementedError(
+        "`best` has been deprecated. "
+        "Please use the `PySRRegressor` interface. "
+        "After fitting, you can return `.sympy()` "
+        "to get the sympy representation "
+        "of the best equation."
+    )
+def best_row(*args, **kwargs):  # pragma: no cover
+    raise NotImplementedError(
+        "`best_row` has been deprecated. "
+        "Please use the `PySRRegressor` interface. "
+        "After fitting, you can run `print(model)` to view the best equation, "
+        "or "
+        "`model.get_best()` to return the best equation's "
+        "row in `model.equations_`."
+    )
+def best_tex(*args, **kwargs):  # pragma: no cover
+    raise NotImplementedError(
+        "`best_tex` has been deprecated. "
+        "Please use the `PySRRegressor` interface. "
+        "After fitting, you can return `.latex()` to "
+        "get the sympy representation "
+        "of the best equation."
+    )
+def best_callable(*args, **kwargs):  # pragma: no cover
+    raise NotImplementedError(
+        "`best_callable` has been deprecated. Please use the `PySRRegressor` "
+        "interface. After fitting, you can use "
+        "`.predict(X)` to use the best callable."
+    )
 def make_deprecated_kwargs_for_pysr_regressor():

pysr/export_latex.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """Functions to help export PySR equations to LaTeX."""
-from typing import List
 import pandas as pd
 import sympy
@@ -19,14 +19,16 @@ class PreciseLatexPrinter(LatexPrinter):
         return super()._print_Float(reduced_float)
-def sympy2latex(expr, prec=3, full_prec=True, **settings):
     """Convert sympy expression to LaTeX with custom precision."""
     settings["full_prec"] = full_prec
     printer = PreciseLatexPrinter(settings=settings, prec=prec)
     return printer.doprint(expr)
-def generate_table_environment(columns=["equation", "complexity", "loss"]):
     margins = "c" * len(columns)
     column_map = {
         "complexity": "Complexity",
@@ -58,12 +60,12 @@ def generate_table_environment(columns=["equation", "complexity", "loss"]):
 def sympy2latextable(
     equations: pd.DataFrame,
-    indices: List[int] = None,
     precision: int = 3,
-    columns=["equation", "complexity", "loss", "score"],
     max_equation_length: int = 50,
     output_variable_name: str = "y",
-):
     """Generate a booktabs-style LaTeX table for a single set of equations."""
     assert isinstance(equations, pd.DataFrame)
@@ -71,7 +73,7 @@ def sympy2latextable(
     latex_table_content = []
     if indices is None:
-        indices = range(len(equations))
     for i in indices:
         latex_equation = sympy2latex(
@@ -126,11 +128,11 @@ def sympy2latextable(
 def sympy2multilatextable(
     equations: List[pd.DataFrame],
-    indices: List[List[int]] = None,
     precision: int = 3,
-    columns=["equation", "complexity", "loss", "score"],
-    output_variable_names: str = None,
-):
     """Generate multiple latex tables for a list of equation sets."""
     # TODO: Let user specify custom output variable

 """Functions to help export PySR equations to LaTeX."""
+from typing import List, Optional, Tuple
 import pandas as pd
 import sympy
         return super()._print_Float(reduced_float)
+def sympy2latex(expr, prec=3, full_prec=True, **settings) -> str:
     """Convert sympy expression to LaTeX with custom precision."""
     settings["full_prec"] = full_prec
     printer = PreciseLatexPrinter(settings=settings, prec=prec)
     return printer.doprint(expr)
+def generate_table_environment(
+    columns: List[str] = ["equation", "complexity", "loss"]
+) -> Tuple[str, str]:
     margins = "c" * len(columns)
     column_map = {
         "complexity": "Complexity",
 def sympy2latextable(
     equations: pd.DataFrame,
+    indices: Optional[List[int]] = None,
     precision: int = 3,
+    columns: List[str] = ["equation", "complexity", "loss", "score"],
     max_equation_length: int = 50,
     output_variable_name: str = "y",
+) -> str:
     """Generate a booktabs-style LaTeX table for a single set of equations."""
     assert isinstance(equations, pd.DataFrame)
     latex_table_content = []
     if indices is None:
+        indices = list(equations.index)
     for i in indices:
         latex_equation = sympy2latex(
 def sympy2multilatextable(
     equations: List[pd.DataFrame],
+    indices: Optional[List[List[int]]] = None,
     precision: int = 3,
+    columns: List[str] = ["equation", "complexity", "loss", "score"],
+    output_variable_names: Optional[List[str]] = None,
+) -> str:
     """Generate multiple latex tables for a list of equation sets."""
     # TODO: Let user specify custom output variable

pysr/export_sympy.py CHANGED Viewed

@@ -51,14 +51,14 @@ sympy_mappings = {
 def create_sympy_symbols(
-    feature_names_in: Optional[List[str]] = None,
 ) -> List[sympy.Symbol]:
     return [sympy.Symbol(variable) for variable in feature_names_in]
 def pysr2sympy(
     equation: str, *, extra_sympy_mappings: Optional[Dict[str, Callable]] = None
-) -> sympy.Expr:
     local_sympy_mappings = {
         **(extra_sympy_mappings if extra_sympy_mappings else {}),
         **sympy_mappings,

 def create_sympy_symbols(
+    feature_names_in: List[str],
 ) -> List[sympy.Symbol]:
     return [sympy.Symbol(variable) for variable in feature_names_in]
 def pysr2sympy(
     equation: str, *, extra_sympy_mappings: Optional[Dict[str, Callable]] = None
+):
     local_sympy_mappings = {
         **(extra_sympy_mappings if extra_sympy_mappings else {}),
         **sympy_mappings,

pysr/feature_selection.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""Functions for doing feature selection during preprocessing."""
+import numpy as np
+def run_feature_selection(X, y, select_k_features, random_state=None):
+    """
+    Find most important features.
+    Uses a gradient boosting tree regressor as a proxy for finding
+    the k most important features in X, returning indices for those
+    features as output.
+    """
+    from sklearn.ensemble import RandomForestRegressor
+    from sklearn.feature_selection import SelectFromModel
+    clf = RandomForestRegressor(
+        n_estimators=100, max_depth=3, random_state=random_state
+    )
+    clf.fit(X, y)
+    selector = SelectFromModel(
+        clf, threshold=-np.inf, max_features=select_k_features, prefit=True
+    )
+    return selector.get_support(indices=True)
+# Function has not been removed only due to usage in module tests
+def _handle_feature_selection(X, select_k_features, y, variable_names):
+    if select_k_features is not None:
+        selection = run_feature_selection(X, y, select_k_features)
+        print(f"Using features {[variable_names[i] for i in selection]}")
+        X = X[:, selection]
+    else:
+        selection = None
+    return X, selection

pysr/feynman_problems.py CHANGED Viewed

@@ -4,7 +4,7 @@ from pathlib import Path
 import numpy as np
-from .sr import best, pysr
 PKG_DIR = Path(__file__).parents[1]
 FEYNMAN_DATASET = PKG_DIR / "datasets" / "FeynmanEquations.csv"

 import numpy as np
+from .deprecated import best, pysr
 PKG_DIR = Path(__file__).parents[1]
 FEYNMAN_DATASET = PKG_DIR / "datasets" / "FeynmanEquations.csv"

pysr/sr.py CHANGED Viewed

@@ -11,6 +11,7 @@ from datetime import datetime
 from io import StringIO
 from multiprocessing import cpu_count
 from pathlib import Path
 import numpy as np
 import pandas as pd
@@ -18,12 +19,14 @@ from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
 from sklearn.utils import check_array, check_consistent_length, check_random_state
 from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
 from .deprecated import make_deprecated_kwargs_for_pysr_regressor
 from .export_jax import sympy2jax
 from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
 from .export_numpy import sympy2numpy
 from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
 from .export_torch import sympy2torch
 from .julia_helpers import (
     _escape_filename,
     _load_backend,
@@ -33,23 +36,18 @@ from .julia_helpers import (
     init_julia,
     is_julia_version_greater_eq,
 )
 Main = None  # TODO: Rename to more descriptive name like "julia_runtime"
 already_ran = False
-def pysr(X, y, weights=None, **kwargs):  # pragma: no cover
-    warnings.warn(
-        "Calling `pysr` is deprecated. "
-        "Please use `model = PySRRegressor(**params); model.fit(X, y)` going forward.",
-        FutureWarning,
-    )
-    model = PySRRegressor(**kwargs)
-    model.fit(X, y, weights=weights)
-    return model.equations_
 def _process_constraints(binary_operators, unary_operators, constraints):
     constraints = constraints.copy()
     for op in unary_operators:
@@ -172,37 +170,6 @@ def _check_assertions(
             )
-def best(*args, **kwargs):  # pragma: no cover
-    raise NotImplementedError(
-        "`best` has been deprecated. Please use the `PySRRegressor` interface. "
-        "After fitting, you can return `.sympy()` to get the sympy representation "
-        "of the best equation."
-    )
-def best_row(*args, **kwargs):  # pragma: no cover
-    raise NotImplementedError(
-        "`best_row` has been deprecated. Please use the `PySRRegressor` interface. "
-        "After fitting, you can run `print(model)` to view the best equation, or "
-        "`model.get_best()` to return the best equation's row in `model.equations_`."
-    )
-def best_tex(*args, **kwargs):  # pragma: no cover
-    raise NotImplementedError(
-        "`best_tex` has been deprecated. Please use the `PySRRegressor` interface. "
-        "After fitting, you can return `.latex()` to get the sympy representation "
-        "of the best equation."
-    )
-def best_callable(*args, **kwargs):  # pragma: no cover
-    raise NotImplementedError(
-        "`best_callable` has been deprecated. Please use the `PySRRegressor` "
-        "interface. After fitting, you can use `.predict(X)` to use the best callable."
-    )
 # Class validation constants
 VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
@@ -945,10 +912,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         model : PySRRegressor
             The model with fitted equations.
         """
-        if os.path.splitext(equation_file)[1] != ".pkl":
-            pkl_filename = _csv_filename_to_pkl_filename(equation_file)
-        else:
-            pkl_filename = equation_file
         # Try to load model from <equation_file>.pkl
         print(f"Checking if {pkl_filename} exists...")
@@ -1502,19 +1467,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         # Denoising transformation
         if self.denoise:
             if self.nout_ > 1:
-                y = np.stack(
-                    [
-                        _denoise(
-                            X, y[:, i], Xresampled=Xresampled, random_state=random_state
-                        )[1]
-                        for i in range(self.nout_)
-                    ],
-                    axis=1,
                 )
-                if Xresampled is not None:
-                    X = Xresampled
             else:
-                X, y = _denoise(X, y, Xresampled=Xresampled, random_state=random_state)
         return X, y, variable_names, X_units, y_units
@@ -1783,10 +1740,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         y,
         Xresampled=None,
         weights=None,
-        variable_names=None,
-        X_units=None,
-        y_units=None,
-    ):
         """
         Search for equations to fit the dataset and store them in `self.equations_`.
@@ -2373,7 +2330,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         return "\n".join(preamble_string + [table_string])
-def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
     """Select an expression and return its index."""
     if model_selection == "accuracy":
         chosen_idx = equations["loss"].idxmin()
@@ -2388,100 +2345,3 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
             f"{model_selection} is not a valid model selection strategy."
         )
     return chosen_idx
-def _denoise(X, y, Xresampled=None, random_state=None):
-    """Denoise the dataset using a Gaussian process."""
-    from sklearn.gaussian_process import GaussianProcessRegressor
-    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
-    gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
-    gpr = GaussianProcessRegressor(
-        kernel=gp_kernel, n_restarts_optimizer=50, random_state=random_state
-    )
-    gpr.fit(X, y)
-    if Xresampled is not None:
-        return Xresampled, gpr.predict(Xresampled)
-    return X, gpr.predict(X)
-# Function has not been removed only due to usage in module tests
-def _handle_feature_selection(X, select_k_features, y, variable_names):
-    if select_k_features is not None:
-        selection = run_feature_selection(X, y, select_k_features)
-        print(f"Using features {[variable_names[i] for i in selection]}")
-        X = X[:, selection]
-    else:
-        selection = None
-    return X, selection
-def run_feature_selection(X, y, select_k_features, random_state=None):
-    """
-    Find most important features.
-    Uses a gradient boosting tree regressor as a proxy for finding
-    the k most important features in X, returning indices for those
-    features as output.
-    """
-    from sklearn.ensemble import RandomForestRegressor
-    from sklearn.feature_selection import SelectFromModel
-    clf = RandomForestRegressor(
-        n_estimators=100, max_depth=3, random_state=random_state
-    )
-    clf.fit(X, y)
-    selector = SelectFromModel(
-        clf, threshold=-np.inf, max_features=select_k_features, prefit=True
-    )
-    return selector.get_support(indices=True)
-def _csv_filename_to_pkl_filename(csv_filename) -> str:
-    # Assume that the csv filename is of the form "foo.csv"
-    assert str(csv_filename).endswith(".csv")
-    dirname = str(os.path.dirname(csv_filename))
-    basename = str(os.path.basename(csv_filename))
-    base = str(os.path.splitext(basename)[0])
-    pkl_basename = base + ".pkl"
-    return os.path.join(dirname, pkl_basename)
-_regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
-_regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
-_regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
-_apply_regexp_im = lambda x: _regexp_im.sub(r"\1j", x)
-_apply_regexp_im_sci = lambda x: _regexp_im_sci.sub(r"\1e\2j", x)
-_apply_regexp_sci = lambda x: _regexp_sci.sub(r"\1e\2", x)
-def _preprocess_julia_floats(s: str) -> str:
-    if isinstance(s, str):
-        s = _apply_regexp_im(s)
-        s = _apply_regexp_im_sci(s)
-        s = _apply_regexp_sci(s)
-    return s
-def _subscriptify(i: int) -> str:
-    """Converts integer to subscript text form.
-    For example, 123 -> "₁₂₃".
-    """
-    return "".join([chr(0x2080 + int(c)) for c in str(i)])
-def _safe_check_feature_names_in(self, variable_names, generate_names=True):
-    """_check_feature_names_in with compat for old versions."""
-    try:
-        return _check_feature_names_in(
-            self, variable_names, generate_names=generate_names
-        )
-    except TypeError:
-        return _check_feature_names_in(self, variable_names)

 from io import StringIO
 from multiprocessing import cpu_count
 from pathlib import Path
+from typing import List, Optional
 import numpy as np
 import pandas as pd
 from sklearn.utils import check_array, check_consistent_length, check_random_state
 from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
+from .denoising import denoise, multi_denoise
 from .deprecated import make_deprecated_kwargs_for_pysr_regressor
 from .export_jax import sympy2jax
 from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable
 from .export_numpy import sympy2numpy
 from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
 from .export_torch import sympy2torch
+from .feature_selection import run_feature_selection
 from .julia_helpers import (
     _escape_filename,
     _load_backend,
     init_julia,
     is_julia_version_greater_eq,
 )
+from .utils import (
+    _csv_filename_to_pkl_filename,
+    _preprocess_julia_floats,
+    _safe_check_feature_names_in,
+    _subscriptify,
+)
 Main = None  # TODO: Rename to more descriptive name like "julia_runtime"
 already_ran = False
 def _process_constraints(binary_operators, unary_operators, constraints):
     constraints = constraints.copy()
     for op in unary_operators:
             )
 # Class validation constants
 VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
         model : PySRRegressor
             The model with fitted equations.
         """
+        pkl_filename = _csv_filename_to_pkl_filename(equation_file)
         # Try to load model from <equation_file>.pkl
         print(f"Checking if {pkl_filename} exists...")
         # Denoising transformation
         if self.denoise:
             if self.nout_ > 1:
+                X, y = multi_denoise(
+                    X, y, Xresampled=Xresampled, random_state=random_state
                 )
             else:
+                X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)
         return X, y, variable_names, X_units, y_units
         y,
         Xresampled=None,
         weights=None,
+        variable_names: Optional[List[str]] = None,
+        X_units: Optional[List[str]] = None,
+        y_units: Optional[List[str]] = None,
+    ) -> "PySRRegressor":
         """
         Search for equations to fit the dataset and store them in `self.equations_`.
         return "\n".join(preamble_string + [table_string])
+def idx_model_selection(equations: pd.DataFrame, model_selection: str):
     """Select an expression and return its index."""
     if model_selection == "accuracy":
         chosen_idx = equations["loss"].idxmin()
             f"{model_selection} is not a valid model selection strategy."
         )
     return chosen_idx

pysr/test/test.py CHANGED Viewed

@@ -14,14 +14,9 @@ from sklearn.utils.estimator_checks import check_estimator
 from .. import PySRRegressor, julia_helpers
 from ..export_latex import sympy2latex
-from ..sr import (
-    _check_assertions,
-    _csv_filename_to_pkl_filename,
-    _handle_feature_selection,
-    _process_constraints,
-    idx_model_selection,
-    run_feature_selection,
-)
 DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default

 from .. import PySRRegressor, julia_helpers
 from ..export_latex import sympy2latex
+from ..feature_selection import _handle_feature_selection, run_feature_selection
+from ..sr import _check_assertions, _process_constraints, idx_model_selection
+from ..utils import _csv_filename_to_pkl_filename
 DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default

pysr/utils.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import re
+from sklearn.utils.validation import _check_feature_names_in
+def _csv_filename_to_pkl_filename(csv_filename: str) -> str:
+    if os.path.splitext(csv_filename)[1] == ".pkl":
+        return csv_filename
+    # Assume that the csv filename is of the form "foo.csv"
+    assert str(csv_filename).endswith(".csv")
+    dirname = str(os.path.dirname(csv_filename))
+    basename = str(os.path.basename(csv_filename))
+    base = str(os.path.splitext(basename)[0])
+    pkl_basename = base + ".pkl"
+    return os.path.join(dirname, pkl_basename)
+_regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
+_regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
+_regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
+_apply_regexp_im = lambda x: _regexp_im.sub(r"\1j", x)
+_apply_regexp_im_sci = lambda x: _regexp_im_sci.sub(r"\1e\2j", x)
+_apply_regexp_sci = lambda x: _regexp_sci.sub(r"\1e\2", x)
+def _preprocess_julia_floats(s: str) -> str:
+    if isinstance(s, str):
+        s = _apply_regexp_im(s)
+        s = _apply_regexp_im_sci(s)
+        s = _apply_regexp_sci(s)
+    return s
+def _safe_check_feature_names_in(self, variable_names, generate_names=True):
+    """_check_feature_names_in with compat for old versions."""
+    try:
+        return _check_feature_names_in(
+            self, variable_names, generate_names=generate_names
+        )
+    except TypeError:
+        return _check_feature_names_in(self, variable_names)
+def _subscriptify(i: int) -> str:
+    """Converts integer to subscript text form.
+    For example, 123 -> "₁₂₃".
+    """
+    return "".join([chr(0x2080 + int(c)) for c in str(i)])