Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

PySR / test /test.py

MilesCranmer

Clean up global variables into single dict

c96b30c over 3 years ago

raw

history blame

4.69 kB

	import unittest
	import numpy as np
	from pysr import pysr, get_hof, best, best_tex, best_callable, best_row
	from pysr.sr import run_feature_selection, _handle_feature_selection
	import sympy
	from sympy import lambdify
	import pandas as pd

	class TestPipeline(unittest.TestCase):
	def setUp(self):
	self.default_test_kwargs = dict(
	niterations=10,
	populations=4,
	user_input=False,
	annealing=True,
	useFrequency=False,
	)
	np.random.seed(0)
	self.X = np.random.randn(100, 5)

	def test_linear_relation(self):
	y = self.X[:, 0]
	equations = pysr(self.X, y, **self.default_test_kwargs)
	print(equations)
	self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)

	def test_multioutput_custom_operator(self):
	y = self.X[:, [0, 1]]**2
	equations = pysr(self.X, y,
	unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
	extra_sympy_mappings={'sq': lambda x: x**2},
	**self.default_test_kwargs,
	procs=0)
	print(equations)
	self.assertLessEqual(equations[0].iloc[-1]['MSE'], 1e-4)
	self.assertLessEqual(equations[1].iloc[-1]['MSE'], 1e-4)

	def test_multioutput_weighted_with_callable(self):
	y = self.X[:, [0, 1]]**2
	w = np.random.rand(*y.shape)
	w[w < 0.5] = 0.0
	w[w >= 0.5] = 1.0

	# Double equation when weights are 0:
	y += (1-w) * y
	# Thus, pysr needs to use the weights to find the right equation!

	equations = pysr(self.X, y, weights=w,
	unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
	extra_sympy_mappings={'sq': lambda x: x**2},
	**self.default_test_kwargs,
	procs=0)

	np.testing.assert_almost_equal(
	best_callable()[0](self.X),
	self.X[:, 0]**2)
	np.testing.assert_almost_equal(
	best_callable()[1](self.X),
	self.X[:, 1]**2)

	def test_empty_operators_single_input(self):
	X = np.random.randn(100, 1)
	y = X[:, 0] + 3.0
	equations = pysr(X, y,
	unary_operators=[], binary_operators=["plus"],
	**self.default_test_kwargs)

	self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)

	class TestBest(unittest.TestCase):
	def setUp(self):
	equations = pd.DataFrame({
	'Equation': ['1.0', 'cos(x0)', 'square(cos(x0))'],
	'MSE': [1.0, 0.1, 1e-5],
	'Complexity': [1, 2, 3]
	})

	equations['Complexity MSE Equation'.split(' ')].to_csv(
	'equation_file.csv.bkup', sep='\|')

	self.equations = get_hof(
	'equation_file.csv', n_features=2,
	variables_names='x0 x1'.split(' '),
	extra_sympy_mappings={}, output_jax_format=False,
	multioutput=False, nout=1)

	def test_best(self):
	self.assertEqual(best(self.equations), sympy.cos(sympy.Symbol('x0'))**2)
	self.assertEqual(best(), sympy.cos(sympy.Symbol('x0'))**2)

	def test_best_tex(self):
	self.assertEqual(best_tex(self.equations), '\\cos^{2}{\\left(x_{0} \\right)}')
	self.assertEqual(best_tex(), '\\cos^{2}{\\left(x_{0} \\right)}')

	def test_best_lambda(self):
	X = np.random.randn(10, 2)
	y = np.cos(X[:, 0])**2
	for f in [best_callable(), best_callable(self.equations)]:
	np.testing.assert_almost_equal(f(X), y)


	class TestFeatureSelection(unittest.TestCase):
	def test_feature_selection(self):
	np.random.seed(0)
	X = np.random.randn(20001, 5)
	y = X[:, 2]2 + X[:, 3]2
	selected = run_feature_selection(X, y, select_k_features=2)
	self.assertEqual(sorted(selected), [2, 3])

	def test_feature_selection_handler(self):
	np.random.seed(0)
	X = np.random.randn(20000, 5)
	y = X[:, 2]2 + X[:, 3]2
	var_names = [f'x{i}' for i in range(5)]
	selected_X, selected_var_names, selection = _handle_feature_selection(
	X, select_k_features=2,
	use_custom_variable_names=True,
	variable_names=[f'x{i}' for i in range(5)],
	y=y)
	self.assertTrue((2 in selection) and (3 in selection))
	self.assertEqual(set(selected_var_names), set('x2 x3'.split(' ')))
	np.testing.assert_array_equal(
	np.sort(selected_X, axis=1),
	np.sort(X[:, [2, 3]], axis=1)
	)