Spaces:
Running
Running
MilesCranmer
commited on
Commit
•
ccf71e9
1
Parent(s):
93cf05b
`load` function to init model from saved equations
Browse files- pysr/__init__.py +1 -0
- pysr/sr.py +74 -0
- test/test.py +27 -1
pysr/__init__.py
CHANGED
@@ -6,6 +6,7 @@ from .sr import (
|
|
6 |
best_tex,
|
7 |
best_callable,
|
8 |
best_row,
|
|
|
9 |
)
|
10 |
from .julia_helpers import install
|
11 |
from .feynman_problems import Problem, FeynmanProblem
|
|
|
6 |
best_tex,
|
7 |
best_callable,
|
8 |
best_row,
|
9 |
+
load,
|
10 |
)
|
11 |
from .julia_helpers import install
|
12 |
from .feynman_problems import Problem, FeynmanProblem
|
pysr/sr.py
CHANGED
@@ -2034,3 +2034,77 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
|
|
2034 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
2035 |
)
|
2036 |
return selector.get_support(indices=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2034 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
2035 |
)
|
2036 |
return selector.get_support(indices=True)
|
2037 |
+
|
2038 |
+
|
2039 |
+
def load(
|
2040 |
+
equation_file,
|
2041 |
+
*,
|
2042 |
+
binary_operators,
|
2043 |
+
unary_operators,
|
2044 |
+
n_features_in,
|
2045 |
+
feature_names_in=None,
|
2046 |
+
selection_mask=None,
|
2047 |
+
nout=1,
|
2048 |
+
**pysr_kwargs,
|
2049 |
+
):
|
2050 |
+
"""
|
2051 |
+
Create a model from equations stored as a csv file
|
2052 |
+
|
2053 |
+
Parameters
|
2054 |
+
----------
|
2055 |
+
equation_file : str
|
2056 |
+
Path to a csv file containing equations.
|
2057 |
+
|
2058 |
+
binary_operators : list[str], default=["+", "-", "*", "/"]
|
2059 |
+
The same binary operators used when creating the model.
|
2060 |
+
|
2061 |
+
unary_operators : list[str], default=None
|
2062 |
+
The same unary operators used when creating the model.
|
2063 |
+
|
2064 |
+
n_features_in : int
|
2065 |
+
Number of features passed to the model.
|
2066 |
+
|
2067 |
+
feature_names_in : list[str], default=None
|
2068 |
+
Names of the features passed to the model.
|
2069 |
+
|
2070 |
+
selection_mask : list[bool], default=None
|
2071 |
+
If using select_k_features, you must pass `model.selection_mask_` here.
|
2072 |
+
|
2073 |
+
nout : int, default=1
|
2074 |
+
Number of outputs of the model.
|
2075 |
+
|
2076 |
+
pysr_kwargs : dict
|
2077 |
+
Any other keyword arguments to initialize the PySRRegressor object.
|
2078 |
+
|
2079 |
+
Returns
|
2080 |
+
-------
|
2081 |
+
model : PySRRegressor
|
2082 |
+
The model with fitted equations.
|
2083 |
+
"""
|
2084 |
+
|
2085 |
+
# TODO: copy .bkup file if exists.
|
2086 |
+
model = PySRRegressor(
|
2087 |
+
equation_file=equation_file,
|
2088 |
+
binary_operators=binary_operators,
|
2089 |
+
unary_operators=unary_operators,
|
2090 |
+
**pysr_kwargs,
|
2091 |
+
)
|
2092 |
+
|
2093 |
+
model.equation_file_ = equation_file
|
2094 |
+
model.nout_ = nout
|
2095 |
+
model.n_features_in_ = n_features_in
|
2096 |
+
|
2097 |
+
if feature_names_in is None:
|
2098 |
+
model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
|
2099 |
+
else:
|
2100 |
+
assert len(feature_names_in) == n_features_in
|
2101 |
+
model.feature_names_in_ = feature_names_in
|
2102 |
+
|
2103 |
+
if selection_mask is None:
|
2104 |
+
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
2105 |
+
else:
|
2106 |
+
model.selection_mask_ = selection_mask
|
2107 |
+
|
2108 |
+
model.refresh()
|
2109 |
+
|
2110 |
+
return model
|
test/test.py
CHANGED
@@ -4,7 +4,7 @@ import inspect
|
|
4 |
import unittest
|
5 |
import numpy as np
|
6 |
from sklearn import model_selection
|
7 |
-
from pysr import PySRRegressor
|
8 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
9 |
from sklearn.utils.estimator_checks import check_estimator
|
10 |
import sympy
|
@@ -280,6 +280,32 @@ class TestPipeline(unittest.TestCase):
|
|
280 |
model.fit(X.values, y.values, Xresampled=Xresampled.values)
|
281 |
self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
class TestBest(unittest.TestCase):
|
285 |
def setUp(self):
|
|
|
4 |
import unittest
|
5 |
import numpy as np
|
6 |
from sklearn import model_selection
|
7 |
+
from pysr import PySRRegressor, load
|
8 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
9 |
from sklearn.utils.estimator_checks import check_estimator
|
10 |
import sympy
|
|
|
280 |
model.fit(X.values, y.values, Xresampled=Xresampled.values)
|
281 |
self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
|
282 |
|
283 |
+
def test_load_model(self):
|
284 |
+
"""See if we can load a ran model from the equation file."""
|
285 |
+
csv_file_data = """
|
286 |
+
Complexity|MSE|Equation
|
287 |
+
1|0.19951081|1.9762075
|
288 |
+
3|0.12717344|(f0 + 1.4724599)
|
289 |
+
4|0.104823045|pow_abs(2.2683423, cos(f3))"""
|
290 |
+
# Strip the indents:
|
291 |
+
csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
|
292 |
+
with open("equation_file.csv", "w") as f:
|
293 |
+
f.write(csv_file_data)
|
294 |
+
with open("equation_file.csv.bkup", "w") as f:
|
295 |
+
f.write(csv_file_data)
|
296 |
+
model = load(
|
297 |
+
"equation_file.csv",
|
298 |
+
n_features_in=5,
|
299 |
+
feature_names_in=["f0", "f1", "f2", "f3", "f4"],
|
300 |
+
binary_operators=["+", "*", "/", "-", "^"],
|
301 |
+
unary_operators=["cos"],
|
302 |
+
)
|
303 |
+
X = self.rstate.rand(100, 5)
|
304 |
+
y_truth = 2.2683423 ** np.cos(X[:, 3])
|
305 |
+
y_test = model.predict(X, 2)
|
306 |
+
|
307 |
+
np.testing.assert_allclose(y_truth, y_test)
|
308 |
+
|
309 |
|
310 |
class TestBest(unittest.TestCase):
|
311 |
def setUp(self):
|