Spaces:
Running
Running
tttc3
commited on
Commit
·
19ef535
1
Parent(s):
ce60798
Fixed typos and ensured tests pass
Browse files- pysr/sr.py +17 -16
pysr/sr.py
CHANGED
@@ -349,14 +349,14 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
349 |
Relative likelihood for mutation to leave the individual.
|
350 |
|
351 |
weight_mutate_constant : float, default=0.048
|
352 |
-
Relative likelihood for mutation to change the constant slightly
|
353 |
in a random direction.
|
354 |
|
355 |
weight_mutate_operator : float, default=0.47
|
356 |
Relative likelihood for mutation to swap an operator.
|
357 |
|
358 |
weight_randomize : float, default=0.00023
|
359 |
-
Relative likelihood for mutation to completely delete and then
|
360 |
randomly generate the equation
|
361 |
|
362 |
weight_simplify : float, default=0.0020
|
@@ -891,7 +891,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
891 |
Raises
|
892 |
------
|
893 |
ValueError
|
894 |
-
Raised when on of the following
|
895 |
parameter is larger than `population_size`; `maxsize` is
|
896 |
less than 7; invalid `extra_jax_mappings` or
|
897 |
`extra_torch_mappings`; invalid optimizer algorithms.
|
@@ -1005,7 +1005,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1005 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1006 |
Target values. Will be cast to X's dtype if necessary.
|
1007 |
|
1008 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1009 |
(n_resampled, n_features), default=None
|
1010 |
Resampled training data used for denoising.
|
1011 |
|
@@ -1018,7 +1018,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1018 |
Validated training data.
|
1019 |
|
1020 |
y_validated : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
1021 |
-
|
|
|
|
|
|
|
1022 |
|
1023 |
variable_names_validated : list[str] of length n_features
|
1024 |
Validated list of variable names for each feature in `X`.
|
@@ -1064,7 +1067,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1064 |
else:
|
1065 |
raise NotImplementedError("y shape not supported!")
|
1066 |
|
1067 |
-
return X, y, variable_names
|
1068 |
|
1069 |
def _pre_transform_training_data(self, X, y, Xresampled, variable_names):
|
1070 |
"""
|
@@ -1080,7 +1083,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1080 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1081 |
Target values. Will be cast to X's dtype if necessary.
|
1082 |
|
1083 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1084 |
(n_resampled, n_features), default=None
|
1085 |
Resampled training data used for denoising.
|
1086 |
|
@@ -1119,9 +1122,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1119 |
variable_names = [variable_names[i] for i in self.selection_mask_]
|
1120 |
|
1121 |
# Re-perform data validation and feature name updating
|
1122 |
-
X, y = self._validate_data(
|
1123 |
-
X=X, y=y, reset=True, multi_output=True
|
1124 |
-
)
|
1125 |
# Update feature names with selected variable names
|
1126 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1127 |
print(f"Using features {self.feature_names_in_}")
|
@@ -1169,7 +1170,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1169 |
ImportError
|
1170 |
Raised when the julia backend fails to import a package.
|
1171 |
"""
|
1172 |
-
# Need to be global as we don't want to recreate/reinstate julia for
|
1173 |
# every new instance of PySRRegressor
|
1174 |
global already_ran
|
1175 |
global Main
|
@@ -1380,7 +1381,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1380 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1381 |
Target values. Will be cast to X's dtype if necessary.
|
1382 |
|
1383 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1384 |
(n_resampled, n_features), default=None
|
1385 |
Resampled training data used for denoising.
|
1386 |
|
@@ -1413,7 +1414,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1413 |
|
1414 |
# Parameter input validation (for parameters defined in __init__)
|
1415 |
self._validate_params(n_samples=X.shape[0])
|
1416 |
-
X, y, variable_names = self._validate_fit_params(
|
1417 |
X, y, Xresampled, variable_names
|
1418 |
)
|
1419 |
|
@@ -1422,7 +1423,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1422 |
X, y, Xresampled, variable_names
|
1423 |
)
|
1424 |
|
1425 |
-
# Warn about large feature counts (still warn if feature count is large
|
1426 |
# after running feature selection)
|
1427 |
if self.n_features_in_ >= 10:
|
1428 |
warnings.warn(
|
@@ -1516,7 +1517,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1516 |
"""
|
1517 |
Predict y from input X using the equation chosen by `model_selection`.
|
1518 |
|
1519 |
-
You may see what equation is used by printing this object. X should
|
1520 |
have the same columns as the training data.
|
1521 |
|
1522 |
Parameters
|
@@ -1787,7 +1788,7 @@ def _denoise(X, y, Xresampled=None):
|
|
1787 |
return X, gpr.predict(X)
|
1788 |
|
1789 |
|
1790 |
-
# Function
|
1791 |
def _handle_feature_selection(X, select_k_features, y, variable_names):
|
1792 |
if select_k_features is not None:
|
1793 |
selection = run_feature_selection(X, y, select_k_features)
|
|
|
349 |
Relative likelihood for mutation to leave the individual.
|
350 |
|
351 |
weight_mutate_constant : float, default=0.048
|
352 |
+
Relative likelihood for mutation to change the constant slightly
|
353 |
in a random direction.
|
354 |
|
355 |
weight_mutate_operator : float, default=0.47
|
356 |
Relative likelihood for mutation to swap an operator.
|
357 |
|
358 |
weight_randomize : float, default=0.00023
|
359 |
+
Relative likelihood for mutation to completely delete and then
|
360 |
randomly generate the equation
|
361 |
|
362 |
weight_simplify : float, default=0.0020
|
|
|
891 |
Raises
|
892 |
------
|
893 |
ValueError
|
894 |
+
Raised when on of the following occurs: `tournament_selection_n`
|
895 |
parameter is larger than `population_size`; `maxsize` is
|
896 |
less than 7; invalid `extra_jax_mappings` or
|
897 |
`extra_torch_mappings`; invalid optimizer algorithms.
|
|
|
1005 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1006 |
Target values. Will be cast to X's dtype if necessary.
|
1007 |
|
1008 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1009 |
(n_resampled, n_features), default=None
|
1010 |
Resampled training data used for denoising.
|
1011 |
|
|
|
1018 |
Validated training data.
|
1019 |
|
1020 |
y_validated : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
1021 |
+
Validated target data.
|
1022 |
+
|
1023 |
+
Xresampled : ndarray of shape (n_resampled, n_features)
|
1024 |
+
Validated resampled training data used for denoising.
|
1025 |
|
1026 |
variable_names_validated : list[str] of length n_features
|
1027 |
Validated list of variable names for each feature in `X`.
|
|
|
1067 |
else:
|
1068 |
raise NotImplementedError("y shape not supported!")
|
1069 |
|
1070 |
+
return X, y, Xresampled, variable_names
|
1071 |
|
1072 |
def _pre_transform_training_data(self, X, y, Xresampled, variable_names):
|
1073 |
"""
|
|
|
1083 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1084 |
Target values. Will be cast to X's dtype if necessary.
|
1085 |
|
1086 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1087 |
(n_resampled, n_features), default=None
|
1088 |
Resampled training data used for denoising.
|
1089 |
|
|
|
1122 |
variable_names = [variable_names[i] for i in self.selection_mask_]
|
1123 |
|
1124 |
# Re-perform data validation and feature name updating
|
1125 |
+
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
|
|
|
|
1126 |
# Update feature names with selected variable names
|
1127 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
1128 |
print(f"Using features {self.feature_names_in_}")
|
|
|
1170 |
ImportError
|
1171 |
Raised when the julia backend fails to import a package.
|
1172 |
"""
|
1173 |
+
# Need to be global as we don't want to recreate/reinstate julia for
|
1174 |
# every new instance of PySRRegressor
|
1175 |
global already_ran
|
1176 |
global Main
|
|
|
1381 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
1382 |
Target values. Will be cast to X's dtype if necessary.
|
1383 |
|
1384 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
1385 |
(n_resampled, n_features), default=None
|
1386 |
Resampled training data used for denoising.
|
1387 |
|
|
|
1414 |
|
1415 |
# Parameter input validation (for parameters defined in __init__)
|
1416 |
self._validate_params(n_samples=X.shape[0])
|
1417 |
+
X, y, Xresampled, variable_names = self._validate_fit_params(
|
1418 |
X, y, Xresampled, variable_names
|
1419 |
)
|
1420 |
|
|
|
1423 |
X, y, Xresampled, variable_names
|
1424 |
)
|
1425 |
|
1426 |
+
# Warn about large feature counts (still warn if feature count is large
|
1427 |
# after running feature selection)
|
1428 |
if self.n_features_in_ >= 10:
|
1429 |
warnings.warn(
|
|
|
1517 |
"""
|
1518 |
Predict y from input X using the equation chosen by `model_selection`.
|
1519 |
|
1520 |
+
You may see what equation is used by printing this object. X should
|
1521 |
have the same columns as the training data.
|
1522 |
|
1523 |
Parameters
|
|
|
1788 |
return X, gpr.predict(X)
|
1789 |
|
1790 |
|
1791 |
+
# Function has not been removed only due to usage in module tests
|
1792 |
def _handle_feature_selection(X, select_k_features, y, variable_names):
|
1793 |
if select_k_features is not None:
|
1794 |
selection = run_feature_selection(X, y, select_k_features)
|