Spaces:
Running
Running
Fix TypeError when a variable name matches a builtin python function (#558)
Browse files* fix thrown TypeError when a variable name matches a builtin python function
Example:
A dataset with a column named 'exec' failed with:
ValueError: Error from parse_expr with transformed code: "(Float ('86.76248' )-exec )"
... snip ...
TypeError: unsupported operand type(s) for -: 'Float' and 'builtin_function_or_method'
* Ensure backwards compatibility for `pysr2sympy` and use same method
* Fix potential issue with list ordering
* Combine builtin variable names test with noisy data test
* Fix builtin variable names test
---------
Co-authored-by: MilesCranmer <miles.cranmer@gmail.com>
- pysr/export_sympy.py +14 -2
- pysr/sr.py +1 -0
- pysr/test/test.py +5 -2
pysr/export_sympy.py
CHANGED
@@ -57,6 +57,12 @@ sympy_mappings = {
|
|
57 |
}
|
58 |
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
def create_sympy_symbols(
|
61 |
feature_names_in: List[str],
|
62 |
) -> List[sympy.Symbol]:
|
@@ -64,10 +70,16 @@ def create_sympy_symbols(
|
|
64 |
|
65 |
|
66 |
def pysr2sympy(
|
67 |
-
equation: str,
|
|
|
|
|
|
|
68 |
):
|
|
|
|
|
69 |
local_sympy_mappings = {
|
70 |
-
**(
|
|
|
71 |
**sympy_mappings,
|
72 |
}
|
73 |
|
|
|
57 |
}
|
58 |
|
59 |
|
60 |
+
def create_sympy_symbols_map(
|
61 |
+
feature_names_in: List[str],
|
62 |
+
) -> Dict[str, sympy.Symbol]:
|
63 |
+
return {variable: sympy.Symbol(variable) for variable in feature_names_in}
|
64 |
+
|
65 |
+
|
66 |
def create_sympy_symbols(
|
67 |
feature_names_in: List[str],
|
68 |
) -> List[sympy.Symbol]:
|
|
|
70 |
|
71 |
|
72 |
def pysr2sympy(
|
73 |
+
equation: str,
|
74 |
+
*,
|
75 |
+
feature_names_in: Optional[List[str]] = None,
|
76 |
+
extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
|
77 |
):
|
78 |
+
if feature_names_in is None:
|
79 |
+
feature_names_in = []
|
80 |
local_sympy_mappings = {
|
81 |
+
**create_sympy_symbols_map(feature_names_in),
|
82 |
+
**(extra_sympy_mappings if extra_sympy_mappings is not None else {}),
|
83 |
**sympy_mappings,
|
84 |
}
|
85 |
|
pysr/sr.py
CHANGED
@@ -2226,6 +2226,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
2226 |
for _, eqn_row in output.iterrows():
|
2227 |
eqn = pysr2sympy(
|
2228 |
eqn_row["equation"],
|
|
|
2229 |
extra_sympy_mappings=self.extra_sympy_mappings,
|
2230 |
)
|
2231 |
sympy_format.append(eqn)
|
|
|
2226 |
for _, eqn_row in output.iterrows():
|
2227 |
eqn = pysr2sympy(
|
2228 |
eqn_row["equation"],
|
2229 |
+
feature_names_in=self.feature_names_in_,
|
2230 |
extra_sympy_mappings=self.extra_sympy_mappings,
|
2231 |
)
|
2232 |
sympy_format.append(eqn)
|
pysr/test/test.py
CHANGED
@@ -272,7 +272,7 @@ class TestPipeline(unittest.TestCase):
|
|
272 |
regressor = PySRRegressor(warm_start=True, max_evals=10)
|
273 |
regressor.fit(self.X, y)
|
274 |
|
275 |
-
def
|
276 |
y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
|
277 |
model = PySRRegressor(
|
278 |
# Test that passing a single operator works:
|
@@ -289,9 +289,12 @@ class TestPipeline(unittest.TestCase):
|
|
289 |
model.set_params(model_selection="best")
|
290 |
# Also try without a temp equation file:
|
291 |
model.set_params(temp_equation_file=False)
|
292 |
-
|
|
|
293 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
294 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
|
|
|
|
295 |
|
296 |
def test_pandas_resample_with_nested_constraints(self):
|
297 |
X = pd.DataFrame(
|
|
|
272 |
regressor = PySRRegressor(warm_start=True, max_evals=10)
|
273 |
regressor.fit(self.X, y)
|
274 |
|
275 |
+
def test_noisy_builtin_variable_names(self):
|
276 |
y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
|
277 |
model = PySRRegressor(
|
278 |
# Test that passing a single operator works:
|
|
|
289 |
model.set_params(model_selection="best")
|
290 |
# Also try without a temp equation file:
|
291 |
model.set_params(temp_equation_file=False)
|
292 |
+
# We also test builtin variable names
|
293 |
+
model.fit(self.X, y, variable_names=["exec", "hash", "x3", "x4", "x5"])
|
294 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
295 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
296 |
+
self.assertIn("exec", model.latex()[0])
|
297 |
+
self.assertIn("hash", model.latex()[1])
|
298 |
|
299 |
def test_pandas_resample_with_nested_constraints(self):
|
300 |
X = pd.DataFrame(
|