Spaces:
Sleeping
Sleeping
File size: 4,749 Bytes
2f38c9c bed9614 6a4fa2c 97e6589 05cf610 6a4fa2c 1adfa85 bed9614 2f38c9c 10ff16a 2f38c9c 6a4fa2c 2f38c9c ddb4d52 6a4fa2c 51a6b05 6a4fa2c 51a6b05 6a4fa2c 2f38c9c 8cfda07 2f38c9c 1adfa85 00a6f27 1adfa85 00a6f27 1adfa85 a626763 51a6b05 97e6589 51a6b05 97e6589 51a6b05 97e6589 c96b30c 97e6589 c96b30c 97e6589 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import unittest
import numpy as np
from pysr import pysr, get_hof, best, best_tex, best_callable, best_row
from pysr.sr import run_feature_selection, _handle_feature_selection
import sympy
from sympy import lambdify
import pandas as pd
class TestPipeline(unittest.TestCase):
def setUp(self):
self.default_test_kwargs = dict(
niterations=10,
populations=4,
user_input=False,
annealing=True,
useFrequency=False,
)
np.random.seed(0)
self.X = np.random.randn(100, 5)
def test_linear_relation(self):
y = self.X[:, 0]
equations = pysr(self.X, y, **self.default_test_kwargs)
print(equations)
self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)
def test_multioutput_custom_operator(self):
y = self.X[:, [0, 1]]**2
equations = pysr(self.X, y,
unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
extra_sympy_mappings={'sq': lambda x: x**2},
**self.default_test_kwargs,
procs=0)
print(equations)
self.assertLessEqual(equations[0].iloc[-1]['MSE'], 1e-4)
self.assertLessEqual(equations[1].iloc[-1]['MSE'], 1e-4)
def test_multioutput_weighted_with_callable(self):
y = self.X[:, [0, 1]]**2
w = np.random.rand(*y.shape)
w[w < 0.5] = 0.0
w[w >= 0.5] = 1.0
# Double equation when weights are 0:
y += (1-w) * y
# Thus, pysr needs to use the weights to find the right equation!
equations = pysr(self.X, y, weights=w,
unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
extra_sympy_mappings={'sq': lambda x: x**2},
**self.default_test_kwargs,
procs=0)
np.testing.assert_almost_equal(
best_callable()[0](self.X),
self.X[:, 0]**2,
decimal=4)
np.testing.assert_almost_equal(
best_callable()[1](self.X),
self.X[:, 1]**2,
decimal=4)
def test_empty_operators_single_input(self):
X = np.random.randn(100, 1)
y = X[:, 0] + 3.0
equations = pysr(X, y,
unary_operators=[], binary_operators=["plus"],
**self.default_test_kwargs)
self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)
class TestBest(unittest.TestCase):
def setUp(self):
equations = pd.DataFrame({
'Equation': ['1.0', 'cos(x0)', 'square(cos(x0))'],
'MSE': [1.0, 0.1, 1e-5],
'Complexity': [1, 2, 3]
})
equations['Complexity MSE Equation'.split(' ')].to_csv(
'equation_file.csv.bkup', sep='|')
self.equations = get_hof(
'equation_file.csv', n_features=2,
variables_names='x0 x1'.split(' '),
extra_sympy_mappings={}, output_jax_format=False,
multioutput=False, nout=1)
def test_best(self):
self.assertEqual(best(self.equations), sympy.cos(sympy.Symbol('x0'))**2)
self.assertEqual(best(), sympy.cos(sympy.Symbol('x0'))**2)
def test_best_tex(self):
self.assertEqual(best_tex(self.equations), '\\cos^{2}{\\left(x_{0} \\right)}')
self.assertEqual(best_tex(), '\\cos^{2}{\\left(x_{0} \\right)}')
def test_best_lambda(self):
X = np.random.randn(10, 2)
y = np.cos(X[:, 0])**2
for f in [best_callable(), best_callable(self.equations)]:
np.testing.assert_almost_equal(f(X), y, decimal=4)
class TestFeatureSelection(unittest.TestCase):
def setUp(self):
np.random.seed(0)
def test_feature_selection(self):
X = np.random.randn(20000, 5)
y = X[:, 2]**2 + X[:, 3]**2
selected = run_feature_selection(X, y, select_k_features=2)
self.assertEqual(sorted(selected), [2, 3])
def test_feature_selection_handler(self):
X = np.random.randn(20000, 5)
y = X[:, 2]**2 + X[:, 3]**2
var_names = [f'x{i}' for i in range(5)]
selected_X, selected_var_names, selection = _handle_feature_selection(
X, select_k_features=2,
use_custom_variable_names=True,
variable_names=[f'x{i}' for i in range(5)],
y=y)
self.assertTrue((2 in selection) and (3 in selection))
self.assertEqual(set(selected_var_names), set('x2 x3'.split(' ')))
np.testing.assert_array_equal(
np.sort(selected_X, axis=1),
np.sort(X[:, [2, 3]], axis=1)
)
|