commited on
Remove newlines which break docs building
Browse files- pysr/ +0 -72
@@ -236,52 +236,40 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
236 |
- `"best"` selects the candidate model with the highest score
237 |
among expressions with a loss better than at least 1.5x the
238 |
most accurate model.
239 |
240 |
binary_operators : list[str], default=["+", "-", "*", "/"]
241 |
List of strings giving the binary operators in Julia's Base.
242 |
243 |
unary_operators : list[str], default=None
244 |
Same as :param`binary_operators` but for operators taking a
245 |
single scalar.
246 |
247 |
niterations : int, default=40
248 |
Number of iterations of the algorithm to run. The best
249 |
equations are printed and migrate between populations at the
250 |
end of each iteration.
251 |
252 |
populations : int, default=15
253 |
Number of populations running.
254 |
255 |
population_size : int, default=33
256 |
Number of individuals in each population.
257 |
258 |
max_evals : int, default=None
259 |
Limits the total number of evaluations of expressions to
260 |
this number.
261 |
262 |
maxsize : int, default=20
263 |
Max complexity of an equation.
264 |
265 |
maxdepth : int, default=None
266 |
Max depth of an equation. You can use both :param`maxsize` and
267 |
:param`maxdepth`. :param`maxdepth` is by default not used.
268 |
269 |
warmup_maxsize_by : float, default=0.0
270 |
Whether to slowly increase max size from a small number up to
271 |
the maxsize (if greater than 0). If greater than 0, says the
272 |
fraction of training time at which the current maxsize will
273 |
reach the user-passed maxsize.
274 |
275 |
timeout_in_seconds : float, default=None
276 |
Make the search return early once this many seconds have passed.
277 |
278 |
constraints : dict[str, int | tuple[int,int]], default=None
279 |
Dictionary of int (unary) or 2-tuples (binary), this enforces
280 |
maxsize constraints on the individual arguments of operators.
281 |
E.g., `'pow': (-1, 1)` says that power laws can have any
282 |
complexity left argument, but only 1 complexity in the right
283 |
argument. Use this to force more interpretable solutions.
284 |
285 |
nested_constraints : dict[str, dict], default=None
286 |
Specifies how many times a combination of operators can be
287 |
nested. For example, `{"sin": {"cos": 0}}, "cos": {"cos": 2}}`
@@ -298,7 +286,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
298 |
operators, you only need to provide a single number: both
299 |
arguments are treated the same way, and the max of each
300 |
argument is constrained.
301 |
302 |
loss : str, default="L2DistLoss()"
303 |
String of Julia code specifying the loss function. Can either
304 |
be a loss from LossFunctions.jl, or your own loss written as a
@@ -314,7 +301,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
314 |
`L1HingeLoss()`, `SmoothedL1HingeLoss(γ)`,
315 |
`ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
316 |
`SigmoidLoss()`, `DWDMarginLoss(q)`.
317 |
318 |
complexity_of_operators : dict[str, float], default=None
319 |
If you would like to use a complexity other than 1 for an
320 |
operator, specify the complexity here. For example,
@@ -323,210 +309,156 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
323 |
the `+` operator (which is the default). You may specify real
324 |
numbers for a complexity, and the total complexity of a tree
325 |
will be rounded to the nearest integer after computing.
326 |
327 |
complexity_of_constants : float, default=1
328 |
Complexity of constants.
329 |
330 |
complexity_of_variables : float, default=1
331 |
Complexity of variables.
332 |
333 |
parsimony : float, default=0.0032
334 |
Multiplicative factor for how much to punish complexity.
335 |
336 |
use_frequency : bool, default=True
337 |
Whether to measure the frequency of complexities, and use that
338 |
instead of parsimony to explore equation space. Will naturally
339 |
find equations of all complexities.
340 |
341 |
use_frequency_in_tournament : bool, default=True
342 |
Whether to use the frequency mentioned above in the tournament,
343 |
rather than just the simulated annealing.
344 |
345 |
alpha : float, default=0.1
346 |
Initial temperature for simulated annealing
347 |
(requires :param`annealing` to be `True`).
348 |
349 |
annealing : bool, default=False
350 |
Whether to use annealing.
351 |
352 |
early_stop_condition : { float | str }, default=None
353 |
Stop the search early if this loss is reached. You may also
354 |
pass a string containing a Julia function which
355 |
takes a loss and complexity as input, for example:
356 |
`"f(loss, complexity) = (loss < 0.1) && (complexity < 10)"`.
357 |
358 |
ncyclesperiteration : int, default=550
359 |
Number of total mutations to run, per 10 samples of the
360 |
population, per iteration.
361 |
362 |
fraction_replaced : float, default=0.000364
363 |
How much of population to replace with migrating equations from
364 |
other populations.
365 |
366 |
fraction_replaced_hof : float, default=0.035
367 |
How much of population to replace with migrating equations from
368 |
hall of fame.
369 |
370 |
weight_add_node : float, default=0.79
371 |
Relative likelihood for mutation to add a node.
372 |
373 |
weight_insert_node : float, default=5.1
374 |
Relative likelihood for mutation to insert a node.
375 |
376 |
weight_delete_node : float, default=1.7
377 |
Relative likelihood for mutation to delete a node.
378 |
379 |
weight_do_nothing : float, default=0.21
380 |
Relative likelihood for mutation to leave the individual.
381 |
382 |
weight_mutate_constant : float, default=0.048
383 |
Relative likelihood for mutation to change the constant slightly
384 |
in a random direction.
385 |
386 |
weight_mutate_operator : float, default=0.47
387 |
Relative likelihood for mutation to swap an operator.
388 |
389 |
weight_randomize : float, default=0.00023
390 |
Relative likelihood for mutation to completely delete and then
391 |
randomly generate the equation
392 |
393 |
weight_simplify : float, default=0.0020
394 |
Relative likelihood for mutation to simplify constant parts by evaluation
395 |
396 |
crossover_probability : float, default=0.066
397 |
Absolute probability of crossover-type genetic operation, instead of a mutation.
398 |
399 |
skip_mutation_failures : bool, default=True
400 |
Whether to skip mutation and crossover failures, rather than
401 |
simply re-sampling the current member.
402 |
403 |
migration : bool, default=True
404 |
Whether to migrate.
405 |
406 |
hof_migration : bool, default=True
407 |
Whether to have the hall of fame migrate.
408 |
409 |
topn : int, default=12
410 |
How many top individuals migrate from each population.
411 |
412 |
should_optimize_constants : bool, default=True
413 |
Whether to numerically optimize constants (Nelder-Mead/Newton)
414 |
at the end of each iteration.
415 |
416 |
optimizer_algorithm : str, default="BFGS"
417 |
Optimization scheme to use for optimizing constants. Can currently
418 |
be `NelderMead` or `BFGS`.
419 |
420 |
optimizer_nrestarts : int, default=2
421 |
Number of time to restart the constants optimization process with
422 |
different initial conditions.
423 |
424 |
optimize_probability : float, default=0.14
425 |
Probability of optimizing the constants during a single iteration of
426 |
the evolutionary algorithm.
427 |
428 |
optimizer_iterations : int, default=8
429 |
Number of iterations that the constants optimizer can take.
430 |
431 |
perturbation_factor : float, default=0.076
432 |
Constants are perturbed by a max factor of
433 |
(perturbation_factor*T + 1). Either multiplied by this or
434 |
divided by this.
435 |
436 |
tournament_selection_n : int, default=10
437 |
Number of expressions to consider in each tournament.
438 |
439 |
tournament_selection_p : float, default=0.86
440 |
Probability of selecting the best expression in each
441 |
tournament. The probability will decay as p*(1-p)^n for other
442 |
expressions, sorted by loss.
443 |
444 |
procs : int, default=multiprocessing.cpu_count()
445 |
Number of processes (=number of populations running).
446 |
447 |
multithreading : bool, default=True
448 |
Use multithreading instead of distributed backend.
449 |
Using procs=0 will turn off both.
450 |
451 |
cluster_manager : str, default=None
452 |
For distributed computing, this sets the job queue system. Set
453 |
to one of "slurm", "pbs", "lsf", "sge", "qrsh", "scyld", or
454 |
"htc". If set to one of these, PySR will run in distributed
455 |
mode, and use `procs` to figure out how many processes to launch.
456 |
457 |
batching : bool, default=False
458 |
Whether to compare population members on small batches during
459 |
evolution. Still uses full dataset for comparing against hall
460 |
of fame.
461 |
462 |
batch_size : int, default=50
463 |
The amount of data to use if doing batching.
464 |
465 |
fast_cycle : bool, default=False (experimental)
466 |
Batch over population subsamples. This is a slightly different
467 |
algorithm than regularized evolution, but does cycles 15%
468 |
faster. May be algorithmically less efficient.
469 |
470 |
precision : int, default=32
471 |
What precision to use for the data. By default this is 32
472 |
(float32), but you can select 64 or 16 as well.
473 |
474 |
random_state : int, Numpy RandomState instance or None, default=None
475 |
Pass an int for reproducible results across multiple function calls.
476 |
See :term:`Glossary <random_state>`.
477 |
478 |
deterministic : bool, default=False
479 |
Make a PySR search give the same result every run.
480 |
To use this, you must turn off parallelism
481 |
(with :param`procs`=0, :param`multithreading`=False),
482 |
and set :param`random_state` to a fixed seed.
483 |
484 |
warm_start : bool, default=False
485 |
Tells fit to continue from where the last call to fit finished.
486 |
If false, each call to fit will be fresh, overwriting previous results.
487 |
488 |
verbosity : int, default=1e9
489 |
What verbosity level to use. 0 means minimal print statements.
490 |
491 |
update_verbosity : int, default=None
492 |
What verbosity level to use for package updates.
493 |
Will take value of :param`verbosity` if not given.
494 |
495 |
progress : bool, default=True
496 |
Whether to use a progress bar instead of printing to stdout.
497 |
498 |
equation_file : str, default=None
499 |
Where to save the files (.csv extension).
500 |
501 |
temp_equation_file : bool, default=False
502 |
Whether to put the hall of fame file in the temp directory.
503 |
Deletion is then controlled with the :param`delete_tempfiles`
504 |
505 |
506 |
tempdir : str, default=None
507 |
directory for the temporary files.
508 |
509 |
delete_tempfiles : bool, default=True
510 |
Whether to delete the temporary files after finishing.
511 |
512 |
julia_project : str, default=None
513 |
A Julia environment location containing a Project.toml
514 |
(and potentially the source code for SymbolicRegression.jl).
515 |
Default gives the Python package directory, where a
516 |
Project.toml file should be present from the install.
517 |
518 |
update: bool, default=True
519 |
Whether to automatically update Julia packages.
520 |
521 |
output_jax_format : bool, default=False
522 |
Whether to create a 'jax_format' column in the output,
523 |
containing jax-callable functions and the default parameters in
524 |
a jax array.
525 |
526 |
output_torch_format : bool, default=False
527 |
Whether to create a 'torch_format' column in the output,
528 |
containing a torch module with trainable parameters.
529 |
530 |
extra_sympy_mappings : dict[str, Callable], default=None
531 |
Provides mappings between custom :param`binary_operators` or
532 |
:param`unary_operators` defined in julia strings, to those same
@@ -534,23 +466,19 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
534 |
E.G if `unary_operators=["inv(x)=1/x"]`, then for the fitted
535 |
model to be export to sympy, :param`extra_sympy_mappings`
536 |
would be `{"inv": lambda x: 1/x}`.
537 |
538 |
extra_jax_mappings : dict[Callable, str], default=None
539 |
Similar to :param`extra_sympy_mappings` but for model export
540 |
to jax. The dictionary maps sympy functions to jax functions.
541 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
542 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
543 |
544 |
extra_torch_mappings : dict[Callable, Callable], default=None
545 |
The same as :param`extra_jax_mappings` but for model export
546 |
to pytorch. Note that the dictionary keys should be callable
547 |
pytorch expressions.
548 |
For example: `extra_torch_mappings={sympy.sin: torch.sin}`
549 |
550 |
denoise : bool, default=False
551 |
Whether to use a Gaussian Process to denoise the data before
552 |
inputting to PySR. Can help PySR fit noisy data.
553 |
554 |
select_k_features : int, default=None
555 |
whether to run feature selection in Python using random forests,
556 |
before passing to the symbolic regression code. None means no
236 |
- `"best"` selects the candidate model with the highest score
237 |
among expressions with a loss better than at least 1.5x the
238 |
most accurate model.
239 |
binary_operators : list[str], default=["+", "-", "*", "/"]
240 |
List of strings giving the binary operators in Julia's Base.
241 |
unary_operators : list[str], default=None
242 |
Same as :param`binary_operators` but for operators taking a
243 |
single scalar.
244 |
niterations : int, default=40
245 |
Number of iterations of the algorithm to run. The best
246 |
equations are printed and migrate between populations at the
247 |
end of each iteration.
248 |
populations : int, default=15
249 |
Number of populations running.
250 |
population_size : int, default=33
251 |
Number of individuals in each population.
252 |
max_evals : int, default=None
253 |
Limits the total number of evaluations of expressions to
254 |
this number.
255 |
maxsize : int, default=20
256 |
Max complexity of an equation.
257 |
maxdepth : int, default=None
258 |
Max depth of an equation. You can use both :param`maxsize` and
259 |
:param`maxdepth`. :param`maxdepth` is by default not used.
260 |
warmup_maxsize_by : float, default=0.0
261 |
Whether to slowly increase max size from a small number up to
262 |
the maxsize (if greater than 0). If greater than 0, says the
263 |
fraction of training time at which the current maxsize will
264 |
reach the user-passed maxsize.
265 |
timeout_in_seconds : float, default=None
266 |
Make the search return early once this many seconds have passed.
267 |
constraints : dict[str, int | tuple[int,int]], default=None
268 |
Dictionary of int (unary) or 2-tuples (binary), this enforces
269 |
maxsize constraints on the individual arguments of operators.
270 |
E.g., `'pow': (-1, 1)` says that power laws can have any
271 |
complexity left argument, but only 1 complexity in the right
272 |
argument. Use this to force more interpretable solutions.
273 |
nested_constraints : dict[str, dict], default=None
274 |
Specifies how many times a combination of operators can be
275 |
nested. For example, `{"sin": {"cos": 0}}, "cos": {"cos": 2}}`
286 |
operators, you only need to provide a single number: both
287 |
arguments are treated the same way, and the max of each
288 |
argument is constrained.
289 |
loss : str, default="L2DistLoss()"
290 |
String of Julia code specifying the loss function. Can either
291 |
be a loss from LossFunctions.jl, or your own loss written as a
301 |
`L1HingeLoss()`, `SmoothedL1HingeLoss(γ)`,
302 |
`ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
303 |
`SigmoidLoss()`, `DWDMarginLoss(q)`.
304 |
complexity_of_operators : dict[str, float], default=None
305 |
If you would like to use a complexity other than 1 for an
306 |
operator, specify the complexity here. For example,
309 |
the `+` operator (which is the default). You may specify real
310 |
numbers for a complexity, and the total complexity of a tree
311 |
will be rounded to the nearest integer after computing.
312 |
complexity_of_constants : float, default=1
313 |
Complexity of constants.
314 |
complexity_of_variables : float, default=1
315 |
Complexity of variables.
316 |
parsimony : float, default=0.0032
317 |
Multiplicative factor for how much to punish complexity.
318 |
use_frequency : bool, default=True
319 |
Whether to measure the frequency of complexities, and use that
320 |
instead of parsimony to explore equation space. Will naturally
321 |
find equations of all complexities.
322 |
use_frequency_in_tournament : bool, default=True
323 |
Whether to use the frequency mentioned above in the tournament,
324 |
rather than just the simulated annealing.
325 |
alpha : float, default=0.1
326 |
Initial temperature for simulated annealing
327 |
(requires :param`annealing` to be `True`).
328 |
annealing : bool, default=False
329 |
Whether to use annealing.
330 |
early_stop_condition : { float | str }, default=None
331 |
Stop the search early if this loss is reached. You may also
332 |
pass a string containing a Julia function which
333 |
takes a loss and complexity as input, for example:
334 |
`"f(loss, complexity) = (loss < 0.1) && (complexity < 10)"`.
335 |
ncyclesperiteration : int, default=550
336 |
Number of total mutations to run, per 10 samples of the
337 |
population, per iteration.
338 |
fraction_replaced : float, default=0.000364
339 |
How much of population to replace with migrating equations from
340 |
other populations.
341 |
fraction_replaced_hof : float, default=0.035
342 |
How much of population to replace with migrating equations from
343 |
hall of fame.
344 |
weight_add_node : float, default=0.79
345 |
Relative likelihood for mutation to add a node.
346 |
weight_insert_node : float, default=5.1
347 |
Relative likelihood for mutation to insert a node.
348 |
weight_delete_node : float, default=1.7
349 |
Relative likelihood for mutation to delete a node.
350 |
weight_do_nothing : float, default=0.21
351 |
Relative likelihood for mutation to leave the individual.
352 |
weight_mutate_constant : float, default=0.048
353 |
Relative likelihood for mutation to change the constant slightly
354 |
in a random direction.
355 |
weight_mutate_operator : float, default=0.47
356 |
Relative likelihood for mutation to swap an operator.
357 |
weight_randomize : float, default=0.00023
358 |
Relative likelihood for mutation to completely delete and then
359 |
randomly generate the equation
360 |
weight_simplify : float, default=0.0020
361 |
Relative likelihood for mutation to simplify constant parts by evaluation
362 |
crossover_probability : float, default=0.066
363 |
Absolute probability of crossover-type genetic operation, instead of a mutation.
364 |
skip_mutation_failures : bool, default=True
365 |
Whether to skip mutation and crossover failures, rather than
366 |
simply re-sampling the current member.
367 |
migration : bool, default=True
368 |
Whether to migrate.
369 |
hof_migration : bool, default=True
370 |
Whether to have the hall of fame migrate.
371 |
topn : int, default=12
372 |
How many top individuals migrate from each population.
373 |
should_optimize_constants : bool, default=True
374 |
Whether to numerically optimize constants (Nelder-Mead/Newton)
375 |
at the end of each iteration.
376 |
optimizer_algorithm : str, default="BFGS"
377 |
Optimization scheme to use for optimizing constants. Can currently
378 |
be `NelderMead` or `BFGS`.
379 |
optimizer_nrestarts : int, default=2
380 |
Number of time to restart the constants optimization process with
381 |
different initial conditions.
382 |
optimize_probability : float, default=0.14
383 |
Probability of optimizing the constants during a single iteration of
384 |
the evolutionary algorithm.
385 |
optimizer_iterations : int, default=8
386 |
Number of iterations that the constants optimizer can take.
387 |
perturbation_factor : float, default=0.076
388 |
Constants are perturbed by a max factor of
389 |
(perturbation_factor*T + 1). Either multiplied by this or
390 |
divided by this.
391 |
tournament_selection_n : int, default=10
392 |
Number of expressions to consider in each tournament.
393 |
tournament_selection_p : float, default=0.86
394 |
Probability of selecting the best expression in each
395 |
tournament. The probability will decay as p*(1-p)^n for other
396 |
expressions, sorted by loss.
397 |
procs : int, default=multiprocessing.cpu_count()
398 |
Number of processes (=number of populations running).
399 |
multithreading : bool, default=True
400 |
Use multithreading instead of distributed backend.
401 |
Using procs=0 will turn off both.
402 |
cluster_manager : str, default=None
403 |
For distributed computing, this sets the job queue system. Set
404 |
to one of "slurm", "pbs", "lsf", "sge", "qrsh", "scyld", or
405 |
"htc". If set to one of these, PySR will run in distributed
406 |
mode, and use `procs` to figure out how many processes to launch.
407 |
batching : bool, default=False
408 |
Whether to compare population members on small batches during
409 |
evolution. Still uses full dataset for comparing against hall
410 |
of fame.
411 |
batch_size : int, default=50
412 |
The amount of data to use if doing batching.
413 |
fast_cycle : bool, default=False (experimental)
414 |
Batch over population subsamples. This is a slightly different
415 |
algorithm than regularized evolution, but does cycles 15%
416 |
faster. May be algorithmically less efficient.
417 |
precision : int, default=32
418 |
What precision to use for the data. By default this is 32
419 |
(float32), but you can select 64 or 16 as well.
420 |
random_state : int, Numpy RandomState instance or None, default=None
421 |
Pass an int for reproducible results across multiple function calls.
422 |
See :term:`Glossary <random_state>`.
423 |
deterministic : bool, default=False
424 |
Make a PySR search give the same result every run.
425 |
To use this, you must turn off parallelism
426 |
(with :param`procs`=0, :param`multithreading`=False),
427 |
and set :param`random_state` to a fixed seed.
428 |
warm_start : bool, default=False
429 |
Tells fit to continue from where the last call to fit finished.
430 |
If false, each call to fit will be fresh, overwriting previous results.
431 |
verbosity : int, default=1e9
432 |
What verbosity level to use. 0 means minimal print statements.
433 |
update_verbosity : int, default=None
434 |
What verbosity level to use for package updates.
435 |
Will take value of :param`verbosity` if not given.
436 |
progress : bool, default=True
437 |
Whether to use a progress bar instead of printing to stdout.
438 |
equation_file : str, default=None
439 |
Where to save the files (.csv extension).
440 |
temp_equation_file : bool, default=False
441 |
Whether to put the hall of fame file in the temp directory.
442 |
Deletion is then controlled with the :param`delete_tempfiles`
443 |
444 |
tempdir : str, default=None
445 |
directory for the temporary files.
446 |
delete_tempfiles : bool, default=True
447 |
Whether to delete the temporary files after finishing.
448 |
julia_project : str, default=None
449 |
A Julia environment location containing a Project.toml
450 |
(and potentially the source code for SymbolicRegression.jl).
451 |
Default gives the Python package directory, where a
452 |
Project.toml file should be present from the install.
453 |
update: bool, default=True
454 |
Whether to automatically update Julia packages.
455 |
output_jax_format : bool, default=False
456 |
Whether to create a 'jax_format' column in the output,
457 |
containing jax-callable functions and the default parameters in
458 |
a jax array.
459 |
output_torch_format : bool, default=False
460 |
Whether to create a 'torch_format' column in the output,
461 |
containing a torch module with trainable parameters.
462 |
extra_sympy_mappings : dict[str, Callable], default=None
463 |
Provides mappings between custom :param`binary_operators` or
464 |
:param`unary_operators` defined in julia strings, to those same
466 |
E.G if `unary_operators=["inv(x)=1/x"]`, then for the fitted
467 |
model to be export to sympy, :param`extra_sympy_mappings`
468 |
would be `{"inv": lambda x: 1/x}`.
469 |
extra_jax_mappings : dict[Callable, str], default=None
470 |
Similar to :param`extra_sympy_mappings` but for model export
471 |
to jax. The dictionary maps sympy functions to jax functions.
472 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
473 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
474 |
extra_torch_mappings : dict[Callable, Callable], default=None
475 |
The same as :param`extra_jax_mappings` but for model export
476 |
to pytorch. Note that the dictionary keys should be callable
477 |
pytorch expressions.
478 |
For example: `extra_torch_mappings={sympy.sin: torch.sin}`
479 |
denoise : bool, default=False
480 |
Whether to use a Gaussian Process to denoise the data before
481 |
inputting to PySR. Can help PySR fit noisy data.
482 |
select_k_features : int, default=None
483 |
whether to run feature selection in Python using random forests,
484 |
before passing to the symbolic regression code. None means no