MilesCranmer commited on
Commit
17f8bf1
·
1 Parent(s): f77e43f

Allow control over tournament selection parameters

Browse files
Files changed (3) hide show
  1. Project.toml +1 -1
  2. pysr/sr.py +16 -4
  3. setup.py +1 -1
Project.toml CHANGED
@@ -2,5 +2,5 @@
2
  SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
3
 
4
  [compat]
5
- SymbolicRegression = "0.6.0"
6
  julia = "1.5"
 
2
  SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
3
 
4
  [compat]
5
+ SymbolicRegression = "0.6.1"
6
  julia = "1.5"
pysr/sr.py CHANGED
@@ -123,7 +123,9 @@ def pysr(X, y, weights=None,
123
  optimizer_algorithm="BFGS",
124
  optimizer_nrestarts=3,
125
  optimize_probability=1.0,
126
- optimizer_iterations=10
 
 
127
  ):
128
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
129
  Note: most default parameters have been tuned over several example
@@ -234,6 +236,10 @@ def pysr(X, y, weights=None,
234
  :type output_jax_format: bool
235
  :param output_torch_format: Whether to create a 'torch_format' column in the output, containing a torch module with trainable parameters.
236
  :type output_torch_format: bool
 
 
 
 
237
  :returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
238
  :type: pd.DataFrame/list
239
  """
@@ -259,12 +265,12 @@ def pysr(X, y, weights=None,
259
  progress = False
260
 
261
  assert optimizer_algorithm in ['NelderMead', 'BFGS']
 
262
 
263
  if isinstance(X, pd.DataFrame):
264
  variable_names = list(X.columns)
265
  X = np.array(X)
266
 
267
-
268
  if len(X.shape) == 1:
269
  X = X[:, None]
270
 
@@ -344,7 +350,9 @@ def pysr(X, y, weights=None,
344
  output_jax_format=output_jax_format,
345
  output_torch_format=output_torch_format,
346
  selection=selection,
347
- multioutput=multioutput, nout=nout)
 
 
348
 
349
  kwargs = {**_set_paths(tempdir), **kwargs}
350
 
@@ -515,7 +523,9 @@ def _make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary
515
  variable_names, warmupMaxsizeBy, weightAddNode,
516
  ncyclesperiteration, fractionReplaced, topn, verbosity, progress, loss,
517
  weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
518
- weightMutateOperator, weightRandomize, weightSimplify, weights, **kwargs):
 
 
519
  try:
520
  term_width = shutil.get_terminal_size().columns
521
  except:
@@ -586,6 +596,8 @@ mutationWeights=[
586
  warmupMaxsizeBy={warmupMaxsizeBy:f}f0,
587
  useFrequency={"true" if useFrequency else "false"},
588
  npop={npop:d},
 
 
589
  ncyclesperiteration={ncyclesperiteration:d},
590
  fractionReplaced={fractionReplaced:f}f0,
591
  topn={topn:d},
 
123
  optimizer_algorithm="BFGS",
124
  optimizer_nrestarts=3,
125
  optimize_probability=1.0,
126
+ optimizer_iterations=10,
127
+ tournament_selection_n=10,
128
+ tournament_selection_p=1.0
129
  ):
130
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
131
  Note: most default parameters have been tuned over several example
 
236
  :type output_jax_format: bool
237
  :param output_torch_format: Whether to create a 'torch_format' column in the output, containing a torch module with trainable parameters.
238
  :type output_torch_format: bool
239
+ :param tournament_selection_n: Number of expressions to consider in each tournament.
240
+ :type tournament_selection_n: int
241
+ :param tournament_selection_p: Probability of selecting the best expression in each tournament. The probability will decay as p*(1-p)^n for other expressions, sorted by loss.
242
+ :type tournament_selection_p: float
243
  :returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
244
  :type: pd.DataFrame/list
245
  """
 
265
  progress = False
266
 
267
  assert optimizer_algorithm in ['NelderMead', 'BFGS']
268
+ assert tournament_selection_n < npop
269
 
270
  if isinstance(X, pd.DataFrame):
271
  variable_names = list(X.columns)
272
  X = np.array(X)
273
 
 
274
  if len(X.shape) == 1:
275
  X = X[:, None]
276
 
 
350
  output_jax_format=output_jax_format,
351
  output_torch_format=output_torch_format,
352
  selection=selection,
353
+ multioutput=multioutput, nout=nout,
354
+ tournament_selection_n=tournament_selection_n,
355
+ tournament_selection_p=tournament_selection_p)
356
 
357
  kwargs = {**_set_paths(tempdir), **kwargs}
358
 
 
523
  variable_names, warmupMaxsizeBy, weightAddNode,
524
  ncyclesperiteration, fractionReplaced, topn, verbosity, progress, loss,
525
  weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
526
+ weightMutateOperator, weightRandomize, weightSimplify, weights,
527
+ tournament_selection_n, tournament_selection_p,
528
+ **kwargs):
529
  try:
530
  term_width = shutil.get_terminal_size().columns
531
  except:
 
596
  warmupMaxsizeBy={warmupMaxsizeBy:f}f0,
597
  useFrequency={"true" if useFrequency else "false"},
598
  npop={npop:d},
599
+ ns={tournament_selection_n:d},
600
+ probPickFirst={tournament_selection_p:f}f0,
601
  ncyclesperiteration={ncyclesperiteration:d},
602
  fractionReplaced={fractionReplaced:f}f0,
603
  topn={topn:d},
setup.py CHANGED
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
5
 
6
  setuptools.setup(
7
  name="pysr",
8
- version="0.6.0-1",
9
  author="Miles Cranmer",
10
  author_email="miles.cranmer@gmail.com",
11
  description="Simple and efficient symbolic regression",
 
5
 
6
  setuptools.setup(
7
  name="pysr",
8
+ version="0.6.1",
9
  author="Miles Cranmer",
10
  author_email="miles.cranmer@gmail.com",
11
  description="Simple and efficient symbolic regression",