PySR / test /test.py
MilesCranmer's picture
Clean up global variables into single dict
c96b30c
raw
history blame
4.69 kB
import unittest
import numpy as np
from pysr import pysr, get_hof, best, best_tex, best_callable, best_row
from pysr.sr import run_feature_selection, _handle_feature_selection
import sympy
from sympy import lambdify
import pandas as pd
class TestPipeline(unittest.TestCase):
def setUp(self):
self.default_test_kwargs = dict(
niterations=10,
populations=4,
user_input=False,
annealing=True,
useFrequency=False,
)
np.random.seed(0)
self.X = np.random.randn(100, 5)
def test_linear_relation(self):
y = self.X[:, 0]
equations = pysr(self.X, y, **self.default_test_kwargs)
print(equations)
self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)
def test_multioutput_custom_operator(self):
y = self.X[:, [0, 1]]**2
equations = pysr(self.X, y,
unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
extra_sympy_mappings={'sq': lambda x: x**2},
**self.default_test_kwargs,
procs=0)
print(equations)
self.assertLessEqual(equations[0].iloc[-1]['MSE'], 1e-4)
self.assertLessEqual(equations[1].iloc[-1]['MSE'], 1e-4)
def test_multioutput_weighted_with_callable(self):
y = self.X[:, [0, 1]]**2
w = np.random.rand(*y.shape)
w[w < 0.5] = 0.0
w[w >= 0.5] = 1.0
# Double equation when weights are 0:
y += (1-w) * y
# Thus, pysr needs to use the weights to find the right equation!
equations = pysr(self.X, y, weights=w,
unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
extra_sympy_mappings={'sq': lambda x: x**2},
**self.default_test_kwargs,
procs=0)
np.testing.assert_almost_equal(
best_callable()[0](self.X),
self.X[:, 0]**2)
np.testing.assert_almost_equal(
best_callable()[1](self.X),
self.X[:, 1]**2)
def test_empty_operators_single_input(self):
X = np.random.randn(100, 1)
y = X[:, 0] + 3.0
equations = pysr(X, y,
unary_operators=[], binary_operators=["plus"],
**self.default_test_kwargs)
self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)
class TestBest(unittest.TestCase):
def setUp(self):
equations = pd.DataFrame({
'Equation': ['1.0', 'cos(x0)', 'square(cos(x0))'],
'MSE': [1.0, 0.1, 1e-5],
'Complexity': [1, 2, 3]
})
equations['Complexity MSE Equation'.split(' ')].to_csv(
'equation_file.csv.bkup', sep='|')
self.equations = get_hof(
'equation_file.csv', n_features=2,
variables_names='x0 x1'.split(' '),
extra_sympy_mappings={}, output_jax_format=False,
multioutput=False, nout=1)
def test_best(self):
self.assertEqual(best(self.equations), sympy.cos(sympy.Symbol('x0'))**2)
self.assertEqual(best(), sympy.cos(sympy.Symbol('x0'))**2)
def test_best_tex(self):
self.assertEqual(best_tex(self.equations), '\\cos^{2}{\\left(x_{0} \\right)}')
self.assertEqual(best_tex(), '\\cos^{2}{\\left(x_{0} \\right)}')
def test_best_lambda(self):
X = np.random.randn(10, 2)
y = np.cos(X[:, 0])**2
for f in [best_callable(), best_callable(self.equations)]:
np.testing.assert_almost_equal(f(X), y)
class TestFeatureSelection(unittest.TestCase):
def test_feature_selection(self):
np.random.seed(0)
X = np.random.randn(20001, 5)
y = X[:, 2]**2 + X[:, 3]**2
selected = run_feature_selection(X, y, select_k_features=2)
self.assertEqual(sorted(selected), [2, 3])
def test_feature_selection_handler(self):
np.random.seed(0)
X = np.random.randn(20000, 5)
y = X[:, 2]**2 + X[:, 3]**2
var_names = [f'x{i}' for i in range(5)]
selected_X, selected_var_names, selection = _handle_feature_selection(
X, select_k_features=2,
use_custom_variable_names=True,
variable_names=[f'x{i}' for i in range(5)],
y=y)
self.assertTrue((2 in selection) and (3 in selection))
self.assertEqual(set(selected_var_names), set('x2 x3'.split(' ')))
np.testing.assert_array_equal(
np.sort(selected_X, axis=1),
np.sort(X[:, [2, 3]], axis=1)
)