Spaces:
Sleeping
Sleeping
MilesCranmer
commited on
Commit
·
17f8bf1
1
Parent(s):
f77e43f
Allow control over tournament selection parameters
Browse files- Project.toml +1 -1
- pysr/sr.py +16 -4
- setup.py +1 -1
Project.toml
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
|
3 |
|
4 |
[compat]
|
5 |
-
SymbolicRegression = "0.6.
|
6 |
julia = "1.5"
|
|
|
2 |
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
|
3 |
|
4 |
[compat]
|
5 |
+
SymbolicRegression = "0.6.1"
|
6 |
julia = "1.5"
|
pysr/sr.py
CHANGED
@@ -123,7 +123,9 @@ def pysr(X, y, weights=None,
|
|
123 |
optimizer_algorithm="BFGS",
|
124 |
optimizer_nrestarts=3,
|
125 |
optimize_probability=1.0,
|
126 |
-
optimizer_iterations=10
|
|
|
|
|
127 |
):
|
128 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
129 |
Note: most default parameters have been tuned over several example
|
@@ -234,6 +236,10 @@ def pysr(X, y, weights=None,
|
|
234 |
:type output_jax_format: bool
|
235 |
:param output_torch_format: Whether to create a 'torch_format' column in the output, containing a torch module with trainable parameters.
|
236 |
:type output_torch_format: bool
|
|
|
|
|
|
|
|
|
237 |
:returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
|
238 |
:type: pd.DataFrame/list
|
239 |
"""
|
@@ -259,12 +265,12 @@ def pysr(X, y, weights=None,
|
|
259 |
progress = False
|
260 |
|
261 |
assert optimizer_algorithm in ['NelderMead', 'BFGS']
|
|
|
262 |
|
263 |
if isinstance(X, pd.DataFrame):
|
264 |
variable_names = list(X.columns)
|
265 |
X = np.array(X)
|
266 |
|
267 |
-
|
268 |
if len(X.shape) == 1:
|
269 |
X = X[:, None]
|
270 |
|
@@ -344,7 +350,9 @@ def pysr(X, y, weights=None,
|
|
344 |
output_jax_format=output_jax_format,
|
345 |
output_torch_format=output_torch_format,
|
346 |
selection=selection,
|
347 |
-
multioutput=multioutput, nout=nout
|
|
|
|
|
348 |
|
349 |
kwargs = {**_set_paths(tempdir), **kwargs}
|
350 |
|
@@ -515,7 +523,9 @@ def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary
|
|
515 |
variable_names, warmupMaxsizeBy, weightAddNode,
|
516 |
ncyclesperiteration, fractionReplaced, topn, verbosity, progress, loss,
|
517 |
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
518 |
-
weightMutateOperator, weightRandomize, weightSimplify, weights,
|
|
|
|
|
519 |
try:
|
520 |
term_width = shutil.get_terminal_size().columns
|
521 |
except:
|
@@ -586,6 +596,8 @@ mutationWeights=[
|
|
586 |
warmupMaxsizeBy={warmupMaxsizeBy:f}f0,
|
587 |
useFrequency={"true" if useFrequency else "false"},
|
588 |
npop={npop:d},
|
|
|
|
|
589 |
ncyclesperiteration={ncyclesperiteration:d},
|
590 |
fractionReplaced={fractionReplaced:f}f0,
|
591 |
topn={topn:d},
|
|
|
123 |
optimizer_algorithm="BFGS",
|
124 |
optimizer_nrestarts=3,
|
125 |
optimize_probability=1.0,
|
126 |
+
optimizer_iterations=10,
|
127 |
+
tournament_selection_n=10,
|
128 |
+
tournament_selection_p=1.0
|
129 |
):
|
130 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
131 |
Note: most default parameters have been tuned over several example
|
|
|
236 |
:type output_jax_format: bool
|
237 |
:param output_torch_format: Whether to create a 'torch_format' column in the output, containing a torch module with trainable parameters.
|
238 |
:type output_torch_format: bool
|
239 |
+
:param tournament_selection_n: Number of expressions to consider in each tournament.
|
240 |
+
:type tournament_selection_n: int
|
241 |
+
:param tournament_selection_p: Probability of selecting the best expression in each tournament. The probability will decay as p*(1-p)^n for other expressions, sorted by loss.
|
242 |
+
:type tournament_selection_p: float
|
243 |
:returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
|
244 |
:type: pd.DataFrame/list
|
245 |
"""
|
|
|
265 |
progress = False
|
266 |
|
267 |
assert optimizer_algorithm in ['NelderMead', 'BFGS']
|
268 |
+
assert tournament_selection_n < npop
|
269 |
|
270 |
if isinstance(X, pd.DataFrame):
|
271 |
variable_names = list(X.columns)
|
272 |
X = np.array(X)
|
273 |
|
|
|
274 |
if len(X.shape) == 1:
|
275 |
X = X[:, None]
|
276 |
|
|
|
350 |
output_jax_format=output_jax_format,
|
351 |
output_torch_format=output_torch_format,
|
352 |
selection=selection,
|
353 |
+
multioutput=multioutput, nout=nout,
|
354 |
+
tournament_selection_n=tournament_selection_n,
|
355 |
+
tournament_selection_p=tournament_selection_p)
|
356 |
|
357 |
kwargs = {**_set_paths(tempdir), **kwargs}
|
358 |
|
|
|
523 |
variable_names, warmupMaxsizeBy, weightAddNode,
|
524 |
ncyclesperiteration, fractionReplaced, topn, verbosity, progress, loss,
|
525 |
weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
|
526 |
+
weightMutateOperator, weightRandomize, weightSimplify, weights,
|
527 |
+
tournament_selection_n, tournament_selection_p,
|
528 |
+
**kwargs):
|
529 |
try:
|
530 |
term_width = shutil.get_terminal_size().columns
|
531 |
except:
|
|
|
596 |
warmupMaxsizeBy={warmupMaxsizeBy:f}f0,
|
597 |
useFrequency={"true" if useFrequency else "false"},
|
598 |
npop={npop:d},
|
599 |
+
ns={tournament_selection_n:d},
|
600 |
+
probPickFirst={tournament_selection_p:f}f0,
|
601 |
ncyclesperiteration={ncyclesperiteration:d},
|
602 |
fractionReplaced={fractionReplaced:f}f0,
|
603 |
topn={topn:d},
|
setup.py
CHANGED
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
|
|
5 |
|
6 |
setuptools.setup(
|
7 |
name="pysr",
|
8 |
-
version="0.6.
|
9 |
author="Miles Cranmer",
|
10 |
author_email="miles.cranmer@gmail.com",
|
11 |
description="Simple and efficient symbolic regression",
|
|
|
5 |
|
6 |
setuptools.setup(
|
7 |
name="pysr",
|
8 |
+
version="0.6.1",
|
9 |
author="Miles Cranmer",
|
10 |
author_email="miles.cranmer@gmail.com",
|
11 |
description="Simple and efficient symbolic regression",
|