Spaces:
Sleeping
Sleeping
tttc3
commited on
Commit
•
c7187a6
1
Parent(s):
73c6ffd
Updated tests for compatibility with refactor
Browse files- pysr/sr.py +1 -1
- test/test.py +21 -18
- test/test_jax.py +31 -4
- test/test_torch.py +39 -10
pysr/sr.py
CHANGED
@@ -1029,7 +1029,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
1029 |
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
1030 |
"Will use DataFrame column names instead."
|
1031 |
)
|
1032 |
-
|
1033 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
1034 |
X.columns = X.columns.str.replace(" ", "_")
|
1035 |
warnings.warn(
|
|
|
1029 |
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
1030 |
"Will use DataFrame column names instead."
|
1031 |
)
|
1032 |
+
|
1033 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
1034 |
X.columns = X.columns.str.replace(" ", "_")
|
1035 |
warnings.warn(
|
test/test.py
CHANGED
@@ -3,6 +3,7 @@ import unittest
|
|
3 |
from unittest.mock import patch
|
4 |
import numpy as np
|
5 |
from pysr import PySRRegressor
|
|
|
6 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
7 |
import sympy
|
8 |
from sympy import lambdify
|
@@ -21,7 +22,7 @@ class TestPipeline(unittest.TestCase):
|
|
21 |
inspect.signature(PySRRegressor.__init__).parameters["populations"].default
|
22 |
)
|
23 |
self.default_test_kwargs = dict(
|
24 |
-
model_selection="
|
25 |
niterations=default_niterations * 2,
|
26 |
populations=default_populations * 2,
|
27 |
)
|
@@ -32,15 +33,15 @@ class TestPipeline(unittest.TestCase):
|
|
32 |
y = self.X[:, 0]
|
33 |
model = PySRRegressor(**self.default_test_kwargs)
|
34 |
model.fit(self.X, y)
|
35 |
-
print(model.
|
36 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
37 |
|
38 |
def test_multiprocessing(self):
|
39 |
y = self.X[:, 0]
|
40 |
model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
|
41 |
model.fit(self.X, y)
|
42 |
-
print(model.
|
43 |
-
self.assertLessEqual(model.
|
44 |
|
45 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
46 |
y = self.X[:, [0, 1]] ** 2
|
@@ -57,9 +58,9 @@ class TestPipeline(unittest.TestCase):
|
|
57 |
constraints={"square_op": 10},
|
58 |
)
|
59 |
model.fit(self.X, y)
|
60 |
-
equations = model.
|
61 |
print(equations)
|
62 |
-
self.assertIn("square_op", model.
|
63 |
self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
|
64 |
self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
|
65 |
|
@@ -130,14 +131,14 @@ class TestPipeline(unittest.TestCase):
|
|
130 |
self.assertTrue("None" not in regressor.__repr__())
|
131 |
self.assertTrue(">>>>" in regressor.__repr__())
|
132 |
|
133 |
-
self.assertLessEqual(regressor.
|
134 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
135 |
|
136 |
# Test if repeated fit works:
|
137 |
regressor.set_params(niterations=0)
|
138 |
regressor.fit(X, y)
|
139 |
|
140 |
-
self.assertLessEqual(regressor.
|
141 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
142 |
|
143 |
# Tweak model selection:
|
@@ -188,12 +189,11 @@ class TestPipeline(unittest.TestCase):
|
|
188 |
unary_operators=[],
|
189 |
binary_operators=["+", "*", "/", "-"],
|
190 |
**self.default_test_kwargs,
|
191 |
-
Xresampled=Xresampled,
|
192 |
denoise=True,
|
193 |
select_k_features=2,
|
194 |
nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
|
195 |
)
|
196 |
-
model.fit(X, y)
|
197 |
self.assertNotIn("unused_feature", model.latex())
|
198 |
self.assertIn("T", model.latex())
|
199 |
self.assertIn("x", model.latex())
|
@@ -232,10 +232,13 @@ class TestBest(unittest.TestCase):
|
|
232 |
output_jax_format=False,
|
233 |
model_selection="accuracy",
|
234 |
)
|
235 |
-
self.model.n_features = 2
|
236 |
-
self.model.refresh()
|
237 |
-
self.equations = self.model.equations
|
238 |
self.rstate = np.random.RandomState(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
def test_best(self):
|
241 |
self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
|
@@ -250,9 +253,9 @@ class TestBest(unittest.TestCase):
|
|
250 |
self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
|
251 |
|
252 |
def test_best_lambda(self):
|
253 |
-
X = self.
|
254 |
-
y =
|
255 |
-
for f in [self.model.predict, self.
|
256 |
np.testing.assert_almost_equal(f(X), y, decimal=4)
|
257 |
|
258 |
|
@@ -292,12 +295,12 @@ class TestMiscellaneous(unittest.TestCase):
|
|
292 |
|
293 |
This should give a warning, and sets the correct value.
|
294 |
"""
|
295 |
-
with self.assertWarns(
|
296 |
model = PySRRegressor(fractionReplaced=0.2)
|
297 |
# This is a deprecated parameter, so we should get a warning.
|
298 |
|
299 |
# The correct value should be set:
|
300 |
-
self.assertEqual(model.
|
301 |
|
302 |
def test_size_warning(self):
|
303 |
"""Ensure that a warning is given for a large input size."""
|
|
|
3 |
from unittest.mock import patch
|
4 |
import numpy as np
|
5 |
from pysr import PySRRegressor
|
6 |
+
|
7 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
8 |
import sympy
|
9 |
from sympy import lambdify
|
|
|
22 |
inspect.signature(PySRRegressor.__init__).parameters["populations"].default
|
23 |
)
|
24 |
self.default_test_kwargs = dict(
|
25 |
+
model_selection="best",
|
26 |
niterations=default_niterations * 2,
|
27 |
populations=default_populations * 2,
|
28 |
)
|
|
|
33 |
y = self.X[:, 0]
|
34 |
model = PySRRegressor(**self.default_test_kwargs)
|
35 |
model.fit(self.X, y)
|
36 |
+
print(model.equations_)
|
37 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
38 |
|
39 |
def test_multiprocessing(self):
|
40 |
y = self.X[:, 0]
|
41 |
model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
|
42 |
model.fit(self.X, y)
|
43 |
+
print(model.equations_)
|
44 |
+
self.assertLessEqual(model.equations_.iloc[-1]["loss"], 1e-4)
|
45 |
|
46 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
47 |
y = self.X[:, [0, 1]] ** 2
|
|
|
58 |
constraints={"square_op": 10},
|
59 |
)
|
60 |
model.fit(self.X, y)
|
61 |
+
equations = model.equations_
|
62 |
print(equations)
|
63 |
+
self.assertIn("square_op", model.equations_[0].iloc[-1]["equation"])
|
64 |
self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
|
65 |
self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
|
66 |
|
|
|
131 |
self.assertTrue("None" not in regressor.__repr__())
|
132 |
self.assertTrue(">>>>" in regressor.__repr__())
|
133 |
|
134 |
+
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
135 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
136 |
|
137 |
# Test if repeated fit works:
|
138 |
regressor.set_params(niterations=0)
|
139 |
regressor.fit(X, y)
|
140 |
|
141 |
+
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
142 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
143 |
|
144 |
# Tweak model selection:
|
|
|
189 |
unary_operators=[],
|
190 |
binary_operators=["+", "*", "/", "-"],
|
191 |
**self.default_test_kwargs,
|
|
|
192 |
denoise=True,
|
193 |
select_k_features=2,
|
194 |
nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
|
195 |
)
|
196 |
+
model.fit(X, y, Xresampled=Xresampled)
|
197 |
self.assertNotIn("unused_feature", model.latex())
|
198 |
self.assertIn("T", model.latex())
|
199 |
self.assertIn("x", model.latex())
|
|
|
232 |
output_jax_format=False,
|
233 |
model_selection="accuracy",
|
234 |
)
|
|
|
|
|
|
|
235 |
self.rstate = np.random.RandomState(0)
|
236 |
+
# Placeholder values needed to fit the model from an equation file
|
237 |
+
self.X = self.rstate.randn(10, 2)
|
238 |
+
self.y = np.cos(self.X[:, 0]) ** 2
|
239 |
+
self.model.fit(self.X, self.y, from_equation_file=True)
|
240 |
+
self.model.refresh()
|
241 |
+
self.equations_ = self.model.equations_
|
242 |
|
243 |
def test_best(self):
|
244 |
self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
|
|
|
253 |
self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
|
254 |
|
255 |
def test_best_lambda(self):
|
256 |
+
X = self.X
|
257 |
+
y = self.y
|
258 |
+
for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
|
259 |
np.testing.assert_almost_equal(f(X), y, decimal=4)
|
260 |
|
261 |
|
|
|
295 |
|
296 |
This should give a warning, and sets the correct value.
|
297 |
"""
|
298 |
+
with self.assertWarns(FutureWarning):
|
299 |
model = PySRRegressor(fractionReplaced=0.2)
|
300 |
# This is a deprecated parameter, so we should get a warning.
|
301 |
|
302 |
# The correct value should be set:
|
303 |
+
self.assertEqual(model.fraction_replaced, 0.2)
|
304 |
|
305 |
def test_size_warning(self):
|
306 |
"""Ensure that a warning is given for a large input size."""
|
test/test_jax.py
CHANGED
@@ -4,7 +4,6 @@ from pysr import sympy2jax, PySRRegressor
|
|
4 |
import pandas as pd
|
5 |
from jax import numpy as jnp
|
6 |
from jax import random
|
7 |
-
from jax import grad
|
8 |
import sympy
|
9 |
|
10 |
|
@@ -21,6 +20,36 @@ class TestJAX(unittest.TestCase):
|
|
21 |
f, params = sympy2jax(cosx, [x, y, z])
|
22 |
self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def test_pipeline(self):
|
25 |
X = np.random.randn(100, 10)
|
26 |
equations = pd.DataFrame(
|
@@ -41,9 +70,7 @@ class TestJAX(unittest.TestCase):
|
|
41 |
variable_names="x1 x2 x3".split(" "),
|
42 |
)
|
43 |
|
44 |
-
model.
|
45 |
-
model.n_features = 3
|
46 |
-
model.using_pandas = False
|
47 |
model.refresh()
|
48 |
jformat = model.jax()
|
49 |
|
|
|
4 |
import pandas as pd
|
5 |
from jax import numpy as jnp
|
6 |
from jax import random
|
|
|
7 |
import sympy
|
8 |
|
9 |
|
|
|
20 |
f, params = sympy2jax(cosx, [x, y, z])
|
21 |
self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
|
22 |
|
23 |
+
def test_pipeline_pandas(self):
|
24 |
+
X = pd.DataFrame(np.random.randn(100, 10))
|
25 |
+
equations = pd.DataFrame(
|
26 |
+
{
|
27 |
+
"Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
|
28 |
+
"MSE": [1.0, 0.1, 1e-5],
|
29 |
+
"Complexity": [1, 2, 3],
|
30 |
+
}
|
31 |
+
)
|
32 |
+
|
33 |
+
equations["Complexity MSE Equation".split(" ")].to_csv(
|
34 |
+
"equation_file.csv.bkup", sep="|"
|
35 |
+
)
|
36 |
+
|
37 |
+
model = PySRRegressor(
|
38 |
+
equation_file="equation_file.csv",
|
39 |
+
output_jax_format=True,
|
40 |
+
variable_names="x1 x2 x3".split(" "),
|
41 |
+
)
|
42 |
+
|
43 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
44 |
+
model.refresh()
|
45 |
+
jformat = model.jax()
|
46 |
+
|
47 |
+
np.testing.assert_almost_equal(
|
48 |
+
np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
|
49 |
+
np.square(np.cos(X.values[:, 1])), # Select feature 1
|
50 |
+
decimal=4,
|
51 |
+
)
|
52 |
+
|
53 |
def test_pipeline(self):
|
54 |
X = np.random.randn(100, 10)
|
55 |
equations = pd.DataFrame(
|
|
|
70 |
variable_names="x1 x2 x3".split(" "),
|
71 |
)
|
72 |
|
73 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
|
|
|
|
74 |
model.refresh()
|
75 |
jformat = model.jax()
|
76 |
|
test/test_torch.py
CHANGED
@@ -20,6 +20,40 @@ class TestTorch(unittest.TestCase):
|
|
20 |
np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
|
21 |
)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
def test_pipeline(self):
|
24 |
X = np.random.randn(100, 10)
|
25 |
equations = pd.DataFrame(
|
@@ -37,20 +71,18 @@ class TestTorch(unittest.TestCase):
|
|
37 |
model = PySRRegressor(
|
38 |
model_selection="accuracy",
|
39 |
equation_file="equation_file.csv",
|
40 |
-
variable_names="x1 x2 x3".split(" "),
|
41 |
extra_sympy_mappings={},
|
42 |
output_torch_format=True,
|
43 |
)
|
44 |
-
|
45 |
-
model.
|
46 |
-
model.using_pandas = False
|
47 |
model.refresh()
|
48 |
|
49 |
tformat = model.pytorch()
|
50 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
51 |
np.testing.assert_almost_equal(
|
52 |
tformat(torch.tensor(X)).detach().numpy(),
|
53 |
-
np.square(np.cos(X[:, 1])), #
|
54 |
decimal=4,
|
55 |
)
|
56 |
|
@@ -89,14 +121,11 @@ class TestTorch(unittest.TestCase):
|
|
89 |
model = PySRRegressor(
|
90 |
model_selection="accuracy",
|
91 |
equation_file="equation_file_custom_operator.csv",
|
92 |
-
variable_names="x1 x2 x3".split(" "),
|
93 |
extra_sympy_mappings={"mycustomoperator": sympy.sin},
|
94 |
extra_torch_mappings={"mycustomoperator": torch.sin},
|
95 |
output_torch_format=True,
|
96 |
)
|
97 |
-
model.
|
98 |
-
model.n_features = 3
|
99 |
-
model.using_pandas = False
|
100 |
model.refresh()
|
101 |
self.assertEqual(str(model.sympy()), "sin(x1)")
|
102 |
# Will automatically use the set global state from get_hof.
|
@@ -105,6 +134,6 @@ class TestTorch(unittest.TestCase):
|
|
105 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
|
106 |
np.testing.assert_almost_equal(
|
107 |
tformat(torch.tensor(X)).detach().numpy(),
|
108 |
-
np.sin(X[:,
|
109 |
decimal=4,
|
110 |
)
|
|
|
20 |
np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
|
21 |
)
|
22 |
|
23 |
+
def test_pipeline_pandas(self):
|
24 |
+
X = pd.DataFrame(np.random.randn(100, 10))
|
25 |
+
equations = pd.DataFrame(
|
26 |
+
{
|
27 |
+
"Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
|
28 |
+
"MSE": [1.0, 0.1, 1e-5],
|
29 |
+
"Complexity": [1, 2, 3],
|
30 |
+
}
|
31 |
+
)
|
32 |
+
|
33 |
+
equations["Complexity MSE Equation".split(" ")].to_csv(
|
34 |
+
"equation_file.csv.bkup", sep="|"
|
35 |
+
)
|
36 |
+
|
37 |
+
model = PySRRegressor(
|
38 |
+
model_selection="accuracy",
|
39 |
+
equation_file="equation_file.csv",
|
40 |
+
extra_sympy_mappings={},
|
41 |
+
output_torch_format=True,
|
42 |
+
)
|
43 |
+
# Because a model hasn't been fit via the `fit` method, some
|
44 |
+
# attributes will not/cannot be set. For the purpose of
|
45 |
+
# testing, these attributes will be set manually here.
|
46 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
47 |
+
model.refresh()
|
48 |
+
|
49 |
+
tformat = model.pytorch()
|
50 |
+
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
51 |
+
np.testing.assert_almost_equal(
|
52 |
+
tformat(torch.tensor(X.values)).detach().numpy(),
|
53 |
+
np.square(np.cos(X.values[:, 1])), # Selection 1st feature
|
54 |
+
decimal=4,
|
55 |
+
)
|
56 |
+
|
57 |
def test_pipeline(self):
|
58 |
X = np.random.randn(100, 10)
|
59 |
equations = pd.DataFrame(
|
|
|
71 |
model = PySRRegressor(
|
72 |
model_selection="accuracy",
|
73 |
equation_file="equation_file.csv",
|
|
|
74 |
extra_sympy_mappings={},
|
75 |
output_torch_format=True,
|
76 |
)
|
77 |
+
|
78 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
|
|
79 |
model.refresh()
|
80 |
|
81 |
tformat = model.pytorch()
|
82 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
83 |
np.testing.assert_almost_equal(
|
84 |
tformat(torch.tensor(X)).detach().numpy(),
|
85 |
+
np.square(np.cos(X[:, 1])), # 2nd feature
|
86 |
decimal=4,
|
87 |
)
|
88 |
|
|
|
121 |
model = PySRRegressor(
|
122 |
model_selection="accuracy",
|
123 |
equation_file="equation_file_custom_operator.csv",
|
|
|
124 |
extra_sympy_mappings={"mycustomoperator": sympy.sin},
|
125 |
extra_torch_mappings={"mycustomoperator": torch.sin},
|
126 |
output_torch_format=True,
|
127 |
)
|
128 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
|
|
|
|
129 |
model.refresh()
|
130 |
self.assertEqual(str(model.sympy()), "sin(x1)")
|
131 |
# Will automatically use the set global state from get_hof.
|
|
|
134 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
|
135 |
np.testing.assert_almost_equal(
|
136 |
tformat(torch.tensor(X)).detach().numpy(),
|
137 |
+
np.sin(X[:, 1]),
|
138 |
decimal=4,
|
139 |
)
|