PySR / test /test.py
MilesCranmer's picture
Add .latex() representation to PySRRegressor
f59f827
raw
history blame
8.62 kB
import unittest
from unittest.mock import patch
import numpy as np
from pysr import pysr, get_hof, best, best_tex, best_callable, best_row, PySRRegressor
from pysr.sr import run_feature_selection, _handle_feature_selection, _yesno
import sympy
from sympy import lambdify
import pandas as pd
class TestPipeline(unittest.TestCase):
def setUp(self):
self.default_test_kwargs = dict(
niterations=10,
populations=4,
annealing=True,
useFrequency=False,
)
np.random.seed(0)
self.X = np.random.randn(100, 5)
def test_linear_relation(self):
y = self.X[:, 0]
equations = pysr(self.X, y, **self.default_test_kwargs)
print(equations)
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
def test_multiprocessing(self):
y = self.X[:, 0]
equations = pysr(
self.X, y, **self.default_test_kwargs, procs=2, multithreading=False
)
print(equations)
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
def test_multioutput_custom_operator(self):
y = self.X[:, [0, 1]] ** 2
equations = pysr(
self.X,
y,
unary_operators=["sq(x) = x^2"],
binary_operators=["plus"],
extra_sympy_mappings={"sq": lambda x: x ** 2},
**self.default_test_kwargs,
procs=0,
)
print(equations)
self.assertLessEqual(equations[0].iloc[-1]["MSE"], 1e-4)
self.assertLessEqual(equations[1].iloc[-1]["MSE"], 1e-4)
def test_multioutput_weighted_with_callable_temp_equation(self):
y = self.X[:, [0, 1]] ** 2
w = np.random.rand(*y.shape)
w[w < 0.5] = 0.0
w[w >= 0.5] = 1.0
# Double equation when weights are 0:
y = (2 - w) * y
# Thus, pysr needs to use the weights to find the right equation!
pysr(
self.X,
y,
weights=w,
unary_operators=["sq(x) = x^2"],
binary_operators=["plus"],
extra_sympy_mappings={"sq": lambda x: x ** 2},
**self.default_test_kwargs,
procs=0,
temp_equation_file=True,
delete_tempfiles=False,
)
np.testing.assert_almost_equal(
best_callable()[0](self.X), self.X[:, 0] ** 2, decimal=4
)
np.testing.assert_almost_equal(
best_callable()[1](self.X), self.X[:, 1] ** 2, decimal=4
)
def test_empty_operators_single_input_sklearn(self):
X = np.random.randn(100, 1)
y = X[:, 0] + 3.0
regressor = PySRRegressor(
model_selection="accuracy",
unary_operators=[],
binary_operators=["plus"],
**self.default_test_kwargs,
)
self.assertTrue("None" in regressor.__repr__())
regressor.fit(X, y)
self.assertTrue("None" not in regressor.__repr__())
self.assertTrue(">>>>" in regressor.__repr__())
self.assertLessEqual(regressor.equations.iloc[-1]["MSE"], 1e-4)
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
# Tweak model selection:
regressor.set_params(model_selection="best")
self.assertEqual(regressor.get_params()["model_selection"], "best")
self.assertTrue("None" not in regressor.__repr__())
self.assertTrue(">>>>" in regressor.__repr__())
# "best" model_selection should also give a decent loss:
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
def test_noisy(self):
np.random.seed(1)
y = self.X[:, [0, 1]] ** 2 + np.random.randn(self.X.shape[0], 1) * 0.05
equations = pysr(
self.X,
y,
# Test that passing a single operator works:
unary_operators="sq(x) = x^2",
binary_operators="plus",
extra_sympy_mappings={"sq": lambda x: x ** 2},
**self.default_test_kwargs,
procs=0,
denoise=True,
)
self.assertLessEqual(best_row(equations=equations)[0]["MSE"], 1e-2)
self.assertLessEqual(best_row(equations=equations)[1]["MSE"], 1e-2)
def test_pandas_resample(self):
np.random.seed(1)
X = pd.DataFrame(
{
"T": np.random.randn(500),
"x": np.random.randn(500),
"unused_feature": np.random.randn(500),
}
)
true_fn = lambda x: np.array(x["T"] + x["x"] ** 2 + 1.323837)
y = true_fn(X)
noise = np.random.randn(500) * 0.01
y = y + noise
# Resampled array is a different order of features:
Xresampled = pd.DataFrame(
{
"unused_feature": np.random.randn(100),
"x": np.random.randn(100),
"T": np.random.randn(100),
}
)
equations = pysr(
X,
y,
unary_operators=[],
binary_operators=["+", "*", "/", "-"],
**self.default_test_kwargs,
Xresampled=Xresampled,
denoise=True,
select_k_features=2,
)
self.assertNotIn("unused_feature", best_tex())
self.assertIn("T", best_tex())
self.assertIn("x", best_tex())
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-2)
fn = best_callable()
self.assertListEqual(list(sorted(fn._selection)), [0, 1])
X2 = pd.DataFrame(
{
"T": np.random.randn(100),
"unused_feature": np.random.randn(100),
"x": np.random.randn(100),
}
)
self.assertLess(np.average((fn(X2) - true_fn(X2)) ** 2), 1e-2)
class TestBest(unittest.TestCase):
def setUp(self):
equations = pd.DataFrame(
{
"Equation": ["1.0", "cos(x0)", "square(cos(x0))"],
"MSE": [1.0, 0.1, 1e-5],
"Complexity": [1, 2, 3],
}
)
equations["Complexity MSE Equation".split(" ")].to_csv(
"equation_file.csv.bkup", sep="|"
)
self.equations = get_hof(
"equation_file.csv",
n_features=2,
variables_names="x0 x1".split(" "),
extra_sympy_mappings={},
output_jax_format=False,
multioutput=False,
nout=1,
)
self.model = PySRRegressor()
self.model.equations = self.equations
def test_best(self):
self.assertEqual(best(self.equations), sympy.cos(sympy.Symbol("x0")) ** 2)
self.assertEqual(best(), sympy.cos(sympy.Symbol("x0")) ** 2)
self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
def test_best_tex(self):
self.assertEqual(best_tex(self.equations), "\\cos^{2}{\\left(x_{0} \\right)}")
self.assertEqual(best_tex(), "\\cos^{2}{\\left(x_{0} \\right)}")
self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
def test_best_lambda(self):
X = np.random.randn(10, 2)
y = np.cos(X[:, 0]) ** 2
for f in [best_callable(), best_callable(self.equations)]:
np.testing.assert_almost_equal(f(X), y, decimal=4)
class TestFeatureSelection(unittest.TestCase):
def setUp(self):
np.random.seed(0)
def test_feature_selection(self):
X = np.random.randn(20000, 5)
y = X[:, 2] ** 2 + X[:, 3] ** 2
selected = run_feature_selection(X, y, select_k_features=2)
self.assertEqual(sorted(selected), [2, 3])
def test_feature_selection_handler(self):
X = np.random.randn(20000, 5)
y = X[:, 2] ** 2 + X[:, 3] ** 2
var_names = [f"x{i}" for i in range(5)]
selected_X, selection = _handle_feature_selection(
X,
select_k_features=2,
variable_names=var_names,
y=y,
)
self.assertTrue((2 in selection) and (3 in selection))
selected_var_names = [var_names[i] for i in selection]
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
np.testing.assert_array_equal(
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
)
class TestHelperFunctions(unittest.TestCase):
@patch("builtins.input", side_effect=["y", "n"])
def test_yesno(self, mock_input):
# Assert that the yes/no function correctly deals with y/n
self.assertEqual(_yesno("Test"), True)
self.assertEqual(_yesno("Test"), False)