MilesCranmer commited on
Commit
502e3ec
1 Parent(s): 3ffd1fe

Add `bumper` parameter

Browse files
README.md CHANGED
@@ -287,7 +287,7 @@ model = PySRRegressor(
287
  # ^ Higher precision calculations.
288
  warm_start=True,
289
  # ^ Start from where left off.
290
- turbo=True,
291
  # ^ Faster evaluation (experimental)
292
  julia_project=None,
293
  # ^ Can set to the path of a folder containing the
 
287
  # ^ Higher precision calculations.
288
  warm_start=True,
289
  # ^ Start from where left off.
290
+ bumper=True,
291
  # ^ Faster evaluation (experimental)
292
  julia_project=None,
293
  # ^ Can set to the path of a folder containing the
docs/tuning.md CHANGED
@@ -20,7 +20,7 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o
20
  8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
21
  9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
22
  10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
23
- 11. Set `turbo` to `True`. This may or not work, if there's an error just turn it off (some operators are not SIMD-capable). If it does work, it should give you a nice 20% speedup.
24
  12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
25
 
26
  Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
 
20
  8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
21
  9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
22
  10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
23
+ 11. Set `bumper` to `True`. This turns on bump allocation but is experimental. It should give you a nice 20% speedup.
24
  12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
25
 
26
  Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
pysr/julia_extensions.py CHANGED
@@ -8,6 +8,8 @@ def load_required_packages(
8
  ):
9
  if turbo:
10
  load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
 
 
11
  if enable_autodiff:
12
  load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
13
  if cluster_manager is not None:
 
8
  ):
9
  if turbo:
10
  load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
11
+ if bumper:
12
+ load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e")
13
  if enable_autodiff:
14
  load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
15
  if cluster_manager is not None:
pysr/param_groupings.yml CHANGED
@@ -74,6 +74,7 @@
74
  - precision
75
  - fast_cycle
76
  - turbo
 
77
  - enable_autodiff
78
  - Determinism:
79
  - random_state
 
74
  - precision
75
  - fast_cycle
76
  - turbo
77
+ - bumper
78
  - enable_autodiff
79
  - Determinism:
80
  - random_state
pysr/sr.py CHANGED
@@ -484,6 +484,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
484
  search evaluation. Certain operators may not be supported.
485
  Does not support 16-bit precision floats.
486
  Default is `False`.
 
 
 
 
487
  precision : int
488
  What precision to use for the data. By default this is `32`
489
  (float32), but you can select `64` or `16` as well, giving
@@ -727,6 +731,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
727
  batch_size: int = 50,
728
  fast_cycle: bool = False,
729
  turbo: bool = False,
 
730
  precision: int = 32,
731
  enable_autodiff: bool = False,
732
  random_state=None,
@@ -822,6 +827,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
822
  self.batch_size = batch_size
823
  self.fast_cycle = fast_cycle
824
  self.turbo = turbo
 
825
  self.precision = precision
826
  self.enable_autodiff = enable_autodiff
827
  self.random_state = random_state
@@ -1609,6 +1615,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1609
 
1610
  load_required_packages(
1611
  turbo=self.turbo,
 
1612
  enable_autodiff=self.enable_autodiff,
1613
  cluster_manager=cluster_manager,
1614
  )
@@ -1654,6 +1661,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
1654
  maxdepth=maxdepth,
1655
  fast_cycle=self.fast_cycle,
1656
  turbo=self.turbo,
 
1657
  enable_autodiff=self.enable_autodiff,
1658
  migration=self.migration,
1659
  hof_migration=self.hof_migration,
 
484
  search evaluation. Certain operators may not be supported.
485
  Does not support 16-bit precision floats.
486
  Default is `False`.
487
+ bumper: bool
488
+ (Experimental) Whether to use Bumper.jl to speed up the search
489
+ evaluation. Does not support 16-bit precision floats.
490
+ Default is `False`.
491
  precision : int
492
  What precision to use for the data. By default this is `32`
493
  (float32), but you can select `64` or `16` as well, giving
 
731
  batch_size: int = 50,
732
  fast_cycle: bool = False,
733
  turbo: bool = False,
734
+ bumper: bool = False,
735
  precision: int = 32,
736
  enable_autodiff: bool = False,
737
  random_state=None,
 
827
  self.batch_size = batch_size
828
  self.fast_cycle = fast_cycle
829
  self.turbo = turbo
830
+ self.bumper = bumper
831
  self.precision = precision
832
  self.enable_autodiff = enable_autodiff
833
  self.random_state = random_state
 
1615
 
1616
  load_required_packages(
1617
  turbo=self.turbo,
1618
+ bumper=self.bumper,
1619
  enable_autodiff=self.enable_autodiff,
1620
  cluster_manager=cluster_manager,
1621
  )
 
1661
  maxdepth=maxdepth,
1662
  fast_cycle=self.fast_cycle,
1663
  turbo=self.turbo,
1664
+ bumper=self.bumper,
1665
  enable_autodiff=self.enable_autodiff,
1666
  migration=self.migration,
1667
  hof_migration=self.hof_migration,
pysr/test/test.py CHANGED
@@ -58,12 +58,13 @@ class TestPipeline(unittest.TestCase):
58
  model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
59
  self.assertIn("c1", model.equations_.iloc[-1]["equation"])
60
 
61
- def test_linear_relation_weighted(self):
62
  y = self.X[:, 0]
63
  weights = np.ones_like(y)
64
  model = PySRRegressor(
65
  **self.default_test_kwargs,
66
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
 
67
  )
68
  model.fit(self.X, y, weights=weights)
69
  print(model.equations_)
 
58
  model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
59
  self.assertIn("c1", model.equations_.iloc[-1]["equation"])
60
 
61
+ def test_linear_relation_weighted_bumper(self):
62
  y = self.X[:, 0]
63
  weights = np.ones_like(y)
64
  model = PySRRegressor(
65
  **self.default_test_kwargs,
66
  early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
67
+ bumper=True,
68
  )
69
  model.fit(self.X, y, weights=weights)
70
  print(model.equations_)