MilesCranmer commited on
Commit
57de954
·
unverified ·
2 Parent(s): 9a56c67 9d919df

Merge pull request #87 from MilesCranmer/pyjulia

Browse files
.github/workflows/CI.yml CHANGED
@@ -61,6 +61,7 @@ jobs:
61
  python -m pip install --upgrade pip
62
  pip install -r requirements.txt
63
  python setup.py install
 
64
  - name: "Install Coverage tool"
65
  run: pip install coverage coveralls
66
  - name: "Run tests"
 
61
  python -m pip install --upgrade pip
62
  pip install -r requirements.txt
63
  python setup.py install
64
+ python -c 'import pysr; pysr.install()'
65
  - name: "Install Coverage tool"
66
  run: pip install coverage coveralls
67
  - name: "Run tests"
.github/workflows/CI_Windows.yml CHANGED
@@ -61,6 +61,7 @@ jobs:
61
  python -m pip install --upgrade pip
62
  pip install -r requirements.txt
63
  python setup.py install
 
64
  - name: "Run tests"
65
  run: python -m unittest test.test
66
  shell: bash
 
61
  python -m pip install --upgrade pip
62
  pip install -r requirements.txt
63
  python setup.py install
64
+ python -c 'import pysr; pysr.install()'
65
  - name: "Run tests"
66
  run: python -m unittest test.test
67
  shell: bash
.github/workflows/CI_conda.yml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI_conda
2
+ # This tests whether conda, a statically-linked libpython, works
3
+ # with PySR.
4
+
5
+ on:
6
+ push:
7
+ branches:
8
+ - '*'
9
+ paths:
10
+ - 'test/**'
11
+ - 'pysr/**'
12
+ - '.github/workflows/**'
13
+ - 'setup.py'
14
+ - 'Project.toml'
15
+ pull_request:
16
+ branches:
17
+ - '*'
18
+ paths:
19
+ - 'test/**'
20
+ - 'pysr/**'
21
+ - '.github/workflows/**'
22
+ - 'setup.py'
23
+ - 'Project.toml'
24
+
25
+ jobs:
26
+ test:
27
+ runs-on: ${{ matrix.os }}
28
+ strategy:
29
+ matrix:
30
+ julia-version: ['1.7.1']
31
+ python-version: ['3.9']
32
+ os: ['ubuntu-latest']
33
+
34
+ steps:
35
+ - uses: actions/checkout@v1.0.0
36
+ - name: "Set up Julia"
37
+ uses: julia-actions/setup-julia@v1.6.0
38
+ with:
39
+ version: ${{ matrix.julia-version }}
40
+ - name: "Change package server"
41
+ shell: bash
42
+ env:
43
+ JULIA_PKG_SERVER: ""
44
+ run: |
45
+ julia -e 'using Pkg; Pkg.Registry.add("General")'
46
+ - name: "Cache dependencies"
47
+ uses: actions/cache@v1 # Thanks FromFile.jl
48
+ env:
49
+ cache-name: cache-artifacts
50
+ with:
51
+ path: ~/.julia/artifacts
52
+ key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
53
+ restore-keys: |
54
+ ${{ runner.os }}-build-${{ env.cache-name }}-
55
+ ${{ runner.os }}-build-
56
+ ${{ runner.os }}-
57
+ - name: "Set up Conda"
58
+ uses: conda-incubator/setup-miniconda@v2
59
+ with:
60
+ miniforge-variant: Mambaforge
61
+ miniforge-version: latest
62
+ auto-activate-base: true
63
+ python-version: ${{ matrix.python-version }}
64
+ activate-environment: test
65
+ environment-file: environment.yml
66
+ - name: "Install PySR"
67
+ run: |
68
+ python3 -m pip install .
69
+ python3 -c 'import pysr; pysr.install()'
70
+ shell: bash -l {0}
71
+ - name: "Ensure that static libpython warning appears"
72
+ run: python3 test/test_static_libpython_warning.py
73
+ shell: bash -l {0}
74
+ - name: "Run tests"
75
+ run: python3 -m unittest test.test
76
+ shell: bash -l {0}
.github/workflows/CI_mac.yml CHANGED
@@ -61,6 +61,7 @@ jobs:
61
  python -m pip install --upgrade pip
62
  pip install -r requirements.txt
63
  python setup.py install
 
64
  - name: "Run tests"
65
  run: python -m unittest test.test
66
  shell: bash
 
61
  python -m pip install --upgrade pip
62
  pip install -r requirements.txt
63
  python setup.py install
64
+ python -c 'import pysr; pysr.install()'
65
  - name: "Run tests"
66
  run: python -m unittest test.test
67
  shell: bash
.gitignore CHANGED
@@ -12,3 +12,6 @@ dist
12
  *.pyproj
13
  *.sln
14
  pysr/.vs/
 
 
 
 
12
  *.pyproj
13
  *.sln
14
  pysr/.vs/
15
+ pysr.egg-info
16
+ Manifest.toml
17
+ workflow
Dockerfile CHANGED
@@ -1,30 +1,44 @@
1
  # This builds a dockerfile containing a working copy of PySR
2
  # with all pre-requisites installed.
3
 
4
-
5
  ARG VERSION=latest
 
6
  FROM julia:$VERSION
7
 
8
  RUN apt-get update && apt-get upgrade -y && apt-get install -y \
9
- build-essential python3 python3-dev python3-pip python3-setuptools \
10
- vim git wget curl \
 
 
11
  && apt-get clean \
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
  WORKDIR /pysr
15
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
17
  ADD ./requirements.txt /pysr/requirements.txt
18
  RUN pip3 install -r /pysr/requirements.txt
19
 
20
  # Install PySR:
21
- ADD . /pysr/
 
 
 
 
22
  RUN pip3 install .
23
 
24
  # Install Julia pre-requisites:
25
- RUN julia -e 'using Pkg; Pkg.add("SymbolicRegression")'
26
-
27
- # Install IPython and other useful libraries:
28
- RUN pip3 install ipython jupyter matplotlib
29
 
30
- CMD ["bash"]
 
1
  # This builds a dockerfile containing a working copy of PySR
2
  # with all pre-requisites installed.
3
 
 
4
  ARG VERSION=latest
5
+
6
  FROM julia:$VERSION
7
 
8
  RUN apt-get update && apt-get upgrade -y && apt-get install -y \
9
+ make build-essential libssl-dev zlib1g-dev \
10
+ libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \
11
+ libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \
12
+ vim git \
13
  && apt-get clean \
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
  WORKDIR /pysr
17
 
18
+ # Install PyEnv to switch Python to dynamically linked version:
19
+ RUN curl https://pyenv.run | bash
20
+ ENV PATH="/root/.pyenv/bin:$PATH"
21
+
22
+ ENV PYTHON_VERSION="3.9.10"
23
+ RUN PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install ${PYTHON_VERSION}
24
+ ENV PATH="/root/.pyenv/versions/${PYTHON_VERSION}/bin:$PATH"
25
+
26
+ # Install IPython and other useful libraries:
27
+ RUN pip install ipython jupyter matplotlib
28
+
29
  # Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project)
30
  ADD ./requirements.txt /pysr/requirements.txt
31
  RUN pip3 install -r /pysr/requirements.txt
32
 
33
  # Install PySR:
34
+ # We do a minimal copy so it doesn't need to rerun at every file change:
35
+ ADD ./setup.py /pysr/setup.py
36
+ ADD ./README.md /pysr/README.md
37
+ Add ./Project.toml /pysr/Project.toml
38
+ ADD ./pysr/ /pysr/pysr/
39
  RUN pip3 install .
40
 
41
  # Install Julia pre-requisites:
42
+ RUN python3 -c 'import pysr; pysr.install()'
 
 
 
43
 
44
+ CMD ["bash"]
Project.toml CHANGED
@@ -2,5 +2,5 @@
2
  SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
3
 
4
  [compat]
5
- SymbolicRegression = "0.6.10"
6
  julia = "1.5"
 
2
  SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
3
 
4
  [compat]
5
+ SymbolicRegression = "0.6.18"
6
  julia = "1.5"
README.md CHANGED
@@ -62,11 +62,14 @@ and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
62
 
63
  You can install PySR with:
64
  ```bash
65
- pip install pysr
 
66
  ```
 
 
67
 
68
- The first launch will automatically install the Julia packages
69
- required. Most common issues at this stage are solved
70
  by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
71
  to use up-to-date packages.
72
 
@@ -121,6 +124,10 @@ which gives:
121
  x0**2 + 2.000016*cos(x3) - 1.9999845
122
  ```
123
 
 
 
 
 
124
  One can also use `best_tex` to get the LaTeX form,
125
  or `best_callable` to get a function you can call.
126
  This uses a score which balances complexity and error;
 
62
 
63
  You can install PySR with:
64
  ```bash
65
+ pip3 install pysr
66
+ python3 -c 'import pysr; pysr.install()'
67
  ```
68
+ The second line will install and update the required Julia packages, including
69
+ `PyCall.jl`.
70
 
71
+
72
+ Most common issues at this stage are solved
73
  by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
74
  to use up-to-date packages.
75
 
 
124
  x0**2 + 2.000016*cos(x3) - 1.9999845
125
  ```
126
 
127
+ The second and additional calls of `pysr` will be significantly
128
+ faster in startup time, since the first call to Julia will compile
129
+ and cache functions from the symbolic regression backend.
130
+
131
  One can also use `best_tex` to get the LaTeX form,
132
  or `best_callable` to get a function you can call.
133
  This uses a score which balances complexity and error;
docs/start.md CHANGED
@@ -7,20 +7,14 @@ Install Julia - see [downloads](https://julialang.org/downloads/), and
7
  then instructions for [mac](https://julialang.org/downloads/platform/#macos)
8
  and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
9
  (Don't use the `conda-forge` version; it doesn't seem to work properly.)
10
- Then, at the command line,
11
- install the `Optim` and `SpecialFunctions` packages via:
12
 
 
13
  ```bash
14
- julia -e 'import Pkg; Pkg.add("Optim"); Pkg.add("SpecialFunctions")'
15
- ```
16
-
17
- For python, you need to have Python 3, numpy, sympy, and pandas installed.
18
-
19
- You can install this package from PyPI with:
20
-
21
- ```bash
22
- pip install pysr
23
  ```
 
 
24
 
25
  ## Quickstart
26
 
@@ -48,6 +42,10 @@ which gives:
48
  x0**2 + 2.000016*cos(x3) - 1.9999845
49
  ```
50
 
 
 
 
 
51
  One can also use `best_tex` to get the LaTeX form,
52
  or `best_callable` to get a function you can call.
53
  This uses a score which balances complexity and error;
 
7
  then instructions for [mac](https://julialang.org/downloads/platform/#macos)
8
  and [linux](https://julialang.org/downloads/platform/#linux_and_freebsd).
9
  (Don't use the `conda-forge` version; it doesn't seem to work properly.)
 
 
10
 
11
+ You can install PySR with:
12
  ```bash
13
+ pip3 install pysr
14
+ python3 -c 'import pysr; pysr.install()'
 
 
 
 
 
 
 
15
  ```
16
+ The second line will install and update the required Julia packages, including
17
+ `PyCall.jl`.
18
 
19
  ## Quickstart
20
 
 
42
  x0**2 + 2.000016*cos(x3) - 1.9999845
43
  ```
44
 
45
+ The second and additional calls of `pysr` will be significantly
46
+ faster in startup time, since the first call to Julia will compile
47
+ and cache functions from the symbolic regression backend.
48
+
49
  One can also use `best_tex` to get the LaTeX form,
50
  or `best_callable` to get a function you can call.
51
  This uses a score which balances complexity and error;
environment.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: test
2
+ channels:
3
+ - conda-forge
4
+ - defaults
5
+ dependencies:
6
+ - sympy
7
+ - pandas
8
+ - numpy
9
+ - scikit-learn
10
+ - setuptools
11
+ - pip
12
+ - pip:
13
+ - julia
pysr/__init__.py CHANGED
@@ -1,4 +1,13 @@
1
- from .sr import pysr, get_hof, best, best_tex, best_callable, best_row
 
 
 
 
 
 
 
 
 
2
  from .feynman_problems import Problem, FeynmanProblem
3
  from .export_jax import sympy2jax
4
  from .export_torch import sympy2torch
 
1
+ from .sr import (
2
+ pysr,
3
+ get_hof,
4
+ best,
5
+ best_tex,
6
+ best_callable,
7
+ best_row,
8
+ install,
9
+ silence_julia_warning,
10
+ )
11
  from .feynman_problems import Problem, FeynmanProblem
12
  from .export_jax import sympy2jax
13
  from .export_torch import sympy2torch
pysr/feynman_problems.py CHANGED
@@ -1,6 +1,5 @@
1
  import numpy as np
2
  import csv
3
- import traceback
4
  from .sr import pysr, best
5
  from pathlib import Path
6
  from functools import partial
@@ -80,20 +79,14 @@ def mk_problems(first=100, gen=False, dp=500, data_dir=FEYNMAN_DATASET):
80
  """
81
  ret = []
82
  with open(data_dir) as csvfile:
83
- ind = 0
84
  reader = csv.DictReader(csvfile)
85
  for i, row in enumerate(reader):
86
- if ind > first:
87
  break
88
  if row["Filename"] == "":
89
  continue
90
- try:
91
- p = FeynmanProblem(row, gen=gen, dp=dp)
92
- ret.append(p)
93
- except Exception as e:
94
- traceback.print_exc()
95
- print(f"FAILED ON ROW {i} with {e}")
96
- ind += 1
97
  return ret
98
 
99
 
 
1
  import numpy as np
2
  import csv
 
3
  from .sr import pysr, best
4
  from pathlib import Path
5
  from functools import partial
 
79
  """
80
  ret = []
81
  with open(data_dir) as csvfile:
 
82
  reader = csv.DictReader(csvfile)
83
  for i, row in enumerate(reader):
84
+ if i > first:
85
  break
86
  if row["Filename"] == "":
87
  continue
88
+ p = FeynmanProblem(row, gen=gen, dp=dp)
89
+ ret.append(p)
 
 
 
 
 
90
  return ret
91
 
92
 
pysr/sr.py CHANGED
@@ -1,12 +1,9 @@
1
  import os
2
  import sys
3
- from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
4
- from collections import namedtuple
5
- import pathlib
6
  import numpy as np
7
  import pandas as pd
8
  import sympy
9
- from sympy import sympify, Symbol, lambdify
10
  import subprocess
11
  import tempfile
12
  import shutil
@@ -15,6 +12,26 @@ from datetime import datetime
15
  import warnings
16
  from multiprocessing import cpu_count
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  global_state = dict(
19
  equation_file="hall_of_fame.csv",
20
  n_features=None,
@@ -27,8 +44,11 @@ global_state = dict(
27
  multioutput=False,
28
  nout=1,
29
  selection=None,
 
30
  )
31
 
 
 
32
  sympy_mappings = {
33
  "div": lambda x, y: x / y,
34
  "mult": lambda x, y: x * y,
@@ -99,7 +119,6 @@ def pysr(
99
  weightRandomize=1,
100
  weightSimplify=0.01,
101
  perturbationFactor=1.0,
102
- timeout=None,
103
  extra_sympy_mappings=None,
104
  extra_torch_mappings=None,
105
  extra_jax_mappings=None,
@@ -118,9 +137,7 @@ def pysr(
118
  useFrequency=True,
119
  tempdir=None,
120
  delete_tempfiles=True,
121
- julia_optimization=3,
122
  julia_project=None,
123
- user_input=True,
124
  update=True,
125
  temp_equation_file=False,
126
  output_jax_format=False,
@@ -135,6 +152,7 @@ def pysr(
135
  Xresampled=None,
136
  precision=32,
137
  multithreading=None,
 
138
  ):
139
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
140
  Note: most default parameters have been tuned over several example
@@ -201,8 +219,6 @@ def pysr(
201
  :type weightRandomize: float
202
  :param weightSimplify: Relative likelihood for mutation to simplify constant parts by evaluation
203
  :type weightSimplify: float
204
- :param timeout: Time in seconds to timeout search
205
- :type timeout: float
206
  :param equation_file: Where to save the files (.csv separated by |)
207
  :type equation_file: str
208
  :param verbosity: What verbosity level to use. 0 means minimal print statements.
@@ -229,16 +245,12 @@ def pysr(
229
  :type constraints: dict
230
  :param useFrequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
231
  :type useFrequency: bool
232
- :param julia_optimization: Optimization level (0, 1, 2, 3)
233
- :type julia_optimization: int
234
  :param tempdir: directory for the temporary files
235
  :type tempdir: str/None
236
  :param delete_tempfiles: whether to delete the temporary files after finishing
237
  :type delete_tempfiles: bool
238
  :param julia_project: a Julia environment location containing a Project.toml (and potentially the source code for SymbolicRegression.jl). Default gives the Python package directory, where a Project.toml file should be present from the install.
239
  :type julia_project: str/None
240
- :param user_input: Whether to ask for user input or not for installing (to be used for automated scripts). Will choose to install when asked.
241
- :type user_input: bool
242
  :param update: Whether to automatically update Julia packages.
243
  :type update: bool
244
  :param temp_equation_file: Whether to put the hall of fame file in the temp directory. Deletion is then controlled with the delete_tempfiles argument.
@@ -257,9 +269,13 @@ def pysr(
257
  :type precision: int
258
  :param multithreading: Use multithreading instead of distributed backend. Default is yes. Using procs=0 will turn off both.
259
  :type multithreading: bool
 
 
260
  :returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
261
  :type: pd.DataFrame/list
262
  """
 
 
263
  if binary_operators is None:
264
  binary_operators = "+ * - /".split(" ")
265
  if unary_operators is None:
@@ -275,6 +291,13 @@ def pysr(
275
  # or procs is set to 0 (serial mode).
276
  multithreading = procs != 0
277
 
 
 
 
 
 
 
 
278
  buffer_available = "buffer" in sys.stdout.__dir__()
279
 
280
  if progress is not None:
@@ -324,7 +347,6 @@ def pysr(
324
  weights,
325
  y,
326
  )
327
- _check_for_julia_installation()
328
 
329
  if len(X) > 10000 and not batching:
330
  warnings.warn(
@@ -377,436 +399,206 @@ def pysr(
377
  else:
378
  X, y = _denoise(X, y, Xresampled=Xresampled)
379
 
380
- kwargs = dict(
381
- X=X,
382
- y=y,
383
- weights=weights,
384
- alpha=alpha,
385
- annealing=annealing,
386
- batchSize=batchSize,
387
- batching=batching,
388
- binary_operators=binary_operators,
389
- fast_cycle=fast_cycle,
390
- fractionReplaced=fractionReplaced,
391
- ncyclesperiteration=ncyclesperiteration,
392
- niterations=niterations,
393
- npop=npop,
394
- topn=topn,
395
- verbosity=verbosity,
396
- progress=progress,
397
- update=update,
398
- julia_optimization=julia_optimization,
399
- timeout=timeout,
400
- fractionReplacedHof=fractionReplacedHof,
401
- hofMigration=hofMigration,
402
- maxdepth=maxdepth,
403
- maxsize=maxsize,
404
- migration=migration,
405
- optimizer_algorithm=optimizer_algorithm,
406
- optimizer_nrestarts=optimizer_nrestarts,
407
- optimize_probability=optimize_probability,
408
- optimizer_iterations=optimizer_iterations,
409
- parsimony=parsimony,
410
- perturbationFactor=perturbationFactor,
411
- populations=populations,
412
- procs=procs,
413
- shouldOptimizeConstants=shouldOptimizeConstants,
414
- unary_operators=unary_operators,
415
- useFrequency=useFrequency,
416
- use_custom_variable_names=use_custom_variable_names,
417
- variable_names=variable_names,
418
- warmupMaxsizeBy=warmupMaxsizeBy,
419
- weightAddNode=weightAddNode,
420
- weightDeleteNode=weightDeleteNode,
421
- weightDoNothing=weightDoNothing,
422
- weightInsertNode=weightInsertNode,
423
- weightMutateConstant=weightMutateConstant,
424
- weightMutateOperator=weightMutateOperator,
425
- weightRandomize=weightRandomize,
426
- weightSimplify=weightSimplify,
427
- constraints=constraints,
428
- extra_sympy_mappings=extra_sympy_mappings,
429
- extra_jax_mappings=extra_jax_mappings,
430
- extra_torch_mappings=extra_torch_mappings,
431
- julia_project=julia_project,
432
- loss=loss,
433
- output_jax_format=output_jax_format,
434
- output_torch_format=output_torch_format,
435
- selection=selection,
436
- multioutput=multioutput,
437
- nout=nout,
438
- tournament_selection_n=tournament_selection_n,
439
- tournament_selection_p=tournament_selection_p,
440
- denoise=denoise,
441
- precision=precision,
442
- multithreading=multithreading,
443
- )
444
 
445
- kwargs = {**_set_paths(tempdir), **kwargs}
446
 
447
  if temp_equation_file:
448
- equation_file = kwargs["tmpdir"] / "hall_of_fame.csv"
449
  elif equation_file is None:
450
  date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
451
  equation_file = "hall_of_fame_" + date_time + ".csv"
452
 
453
- kwargs = {**dict(equation_file=equation_file), **kwargs}
454
-
455
- pkg_directory = kwargs["pkg_directory"]
456
- if kwargs["julia_project"] is not None:
457
- manifest_filepath = Path(kwargs["julia_project"]) / "Manifest.toml"
458
- else:
459
- manifest_filepath = pkg_directory / "Manifest.toml"
460
-
461
- kwargs["need_install"] = False
462
-
463
- if not (manifest_filepath).is_file():
464
- kwargs["need_install"] = (not user_input) or _yesno(
465
- "I will install Julia packages using PySR's Project.toml file. OK?"
466
- )
467
- if kwargs["need_install"]:
468
- print("OK. I will install at launch.")
469
- assert update
470
-
471
- kwargs["def_hyperparams"] = _create_inline_operators(**kwargs)
472
-
473
- _handle_constraints(**kwargs)
474
-
475
- kwargs["constraints_str"] = _make_constraints_str(**kwargs)
476
- kwargs["def_hyperparams"] = _make_hyperparams_julia_str(**kwargs)
477
- kwargs["def_datasets"] = _make_datasets_julia_str(**kwargs)
478
-
479
- _create_julia_files(**kwargs)
480
- _final_pysr_process(**kwargs)
481
- _set_globals(**kwargs)
482
-
483
- equations = get_hof(**kwargs)
484
-
485
- if delete_tempfiles:
486
- shutil.rmtree(kwargs["tmpdir"])
487
-
488
- return equations
489
-
490
-
491
- def _set_globals(X, **kwargs):
492
- global global_state
493
-
494
- global_state["n_features"] = X.shape[1]
495
- for key, value in kwargs.items():
496
- if key in global_state:
497
- global_state[key] = value
498
-
499
-
500
- def _final_pysr_process(
501
- julia_optimization, runfile_filename, timeout, multithreading, procs, **kwargs
502
- ):
503
- command = [
504
- "julia",
505
- f"-O{julia_optimization:d}",
506
- ]
507
-
508
- if multithreading:
509
- command.append("--threads")
510
- command.append(f"{procs}")
511
-
512
- command.append(str(runfile_filename))
513
- if timeout is not None:
514
- command = ["timeout", f"{timeout}"] + command
515
- _cmd_runner(command, **kwargs)
516
 
 
 
517
 
518
- def _cmd_runner(command, progress, **kwargs):
519
- if kwargs["verbosity"] > 0:
520
- print("Running on", " ".join(command))
521
- process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=-1)
522
  try:
523
- while True:
524
- line = process.stdout.readline()
525
- if not line:
526
- break
527
- decoded_line = line.decode("utf-8")
528
- if progress:
529
- decoded_line = (
530
- decoded_line.replace("\\033[K", "\033[K")
531
- .replace("\\033[1A", "\033[1A")
532
- .replace("\\033[1B", "\033[1B")
533
- .replace("\\r", "\r")
534
- .encode(sys.stdout.encoding, errors="replace")
535
- )
536
- sys.stdout.buffer.write(decoded_line)
537
- sys.stdout.flush()
538
- else:
539
- print(decoded_line, end="")
540
-
541
- process.stdout.close()
542
- process.wait()
543
- except KeyboardInterrupt:
544
- print("Killing process... will return when done.")
545
- process.kill()
546
-
547
-
548
- def _create_julia_files(
549
- dataset_filename,
550
- def_datasets,
551
- hyperparam_filename,
552
- def_hyperparams,
553
- niterations,
554
- runfile_filename,
555
- julia_project,
556
- procs,
557
- weights,
558
- X,
559
- variable_names,
560
- pkg_directory,
561
- need_install,
562
- update,
563
- multithreading,
564
- **kwargs,
565
- ):
566
- with open(hyperparam_filename, "w") as f:
567
- print(def_hyperparams, file=f)
568
- with open(dataset_filename, "w") as f:
569
- print(def_datasets, file=f)
570
- with open(runfile_filename, "w") as f:
571
- if julia_project is None:
572
- julia_project = pkg_directory
573
- else:
574
- julia_project = Path(julia_project)
575
- print(f"import Pkg", file=f)
576
- print(f'Pkg.activate("{_escape_filename(julia_project)}")', file=f)
577
- if need_install:
578
- print(f"Pkg.instantiate()", file=f)
579
- print("Pkg.update()", file=f)
580
- print("Pkg.precompile()", file=f)
581
- elif update:
582
- print(f"Pkg.update()", file=f)
583
- print(f"using SymbolicRegression", file=f)
584
- print(f'include("{_escape_filename(hyperparam_filename)}")', file=f)
585
- print(f'include("{_escape_filename(dataset_filename)}")', file=f)
586
- if len(variable_names) == 0:
587
- varMap = "[" + ",".join([f'"x{i}"' for i in range(X.shape[1])]) + "]"
588
- else:
589
- varMap = (
590
- "[" + ",".join(['"' + vname + '"' for vname in variable_names]) + "]"
591
- )
592
 
593
- cprocs = 0 if multithreading else procs
594
- if weights is not None:
595
- print(
596
- f"EquationSearch(X, y, weights=weights, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={cprocs}, multithreading={'true' if multithreading else 'false'})",
597
- file=f,
598
- )
599
- else:
600
- print(
601
- f"EquationSearch(X, y, niterations={niterations:d}, varMap={varMap}, options=options, numprocs={cprocs}, multithreading={'true' if multithreading else 'false'})",
602
- file=f,
603
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
- def _make_datasets_julia_str(
607
- X,
608
- X_filename,
609
- weights,
610
- weights_filename,
611
- y,
612
- y_filename,
613
- multioutput,
614
- precision,
615
- **kwargs,
616
- ):
617
- def_datasets = """using DelimitedFiles"""
618
- julia_dtype = {16: "Float16", 32: "Float32", 64: "Float64"}[precision]
619
  np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[precision]
620
 
621
- np.savetxt(X_filename, X.astype(np_dtype), delimiter=",")
622
- if multioutput:
623
- np.savetxt(y_filename, y.astype(np_dtype), delimiter=",")
624
  else:
625
- np.savetxt(y_filename, y.reshape(-1, 1).astype(np_dtype), delimiter=",")
626
-
627
  if weights is not None:
628
- if multioutput:
629
- np.savetxt(weights_filename, weights.astype(np_dtype), delimiter=",")
630
  else:
631
- np.savetxt(
632
- weights_filename,
633
- weights.reshape(-1, 1).astype(np_dtype),
634
- delimiter=",",
635
- )
 
 
 
 
 
 
 
 
 
 
 
636
 
637
- def_datasets += f"""
638
- X = copy(transpose(readdlm("{_escape_filename(X_filename)}", ',', {julia_dtype}, '\\n')))"""
 
 
 
 
 
 
 
 
 
 
 
 
639
 
640
- if multioutput:
641
- def_datasets += f"""
642
- y = copy(transpose(readdlm("{_escape_filename(y_filename)}", ',', {julia_dtype}, '\\n')))"""
643
- else:
644
- def_datasets += f"""
645
- y = readdlm("{_escape_filename(y_filename)}", ',', {julia_dtype}, '\\n')[:, 1]"""
 
 
 
 
 
 
 
646
 
647
- if weights is not None:
648
- if multioutput:
649
- def_datasets += f"""
650
- weights = copy(transpose(readdlm("{_escape_filename(weights_filename)}", ',', {julia_dtype}, '\\n')))"""
651
- else:
652
- def_datasets += f"""
653
- weights = readdlm("{_escape_filename(weights_filename)}", ',', {julia_dtype}, '\\n')[:, 1]"""
654
- return def_datasets
655
 
656
 
657
- def _make_hyperparams_julia_str(
 
658
  X,
659
- alpha,
660
- annealing,
661
- batchSize,
662
- batching,
663
- binary_operators,
664
- constraints_str,
665
- def_hyperparams,
666
  equation_file,
667
- fast_cycle,
668
- fractionReplacedHof,
669
- hofMigration,
670
- maxdepth,
671
- maxsize,
672
- migration,
673
- optimizer_algorithm,
674
- optimizer_nrestarts,
675
- optimize_probability,
676
- optimizer_iterations,
677
- npop,
678
- parsimony,
679
- perturbationFactor,
680
- populations,
681
- shouldOptimizeConstants,
682
- unary_operators,
683
- useFrequency,
684
- warmupMaxsizeBy,
685
- weightAddNode,
686
- ncyclesperiteration,
687
- fractionReplaced,
688
- topn,
689
- verbosity,
690
- progress,
691
- loss,
692
- weightDeleteNode,
693
- weightDoNothing,
694
- weightInsertNode,
695
- weightMutateConstant,
696
- weightMutateOperator,
697
- weightRandomize,
698
- weightSimplify,
699
- tournament_selection_n,
700
- tournament_selection_p,
701
- **kwargs,
702
  ):
703
- try:
704
- term_width = shutil.get_terminal_size().columns
705
- except:
706
- _, term_width = subprocess.check_output(["stty", "size"]).split()
707
 
708
- def tuple_fix(ops):
709
- if len(ops) > 1:
710
- return ", ".join(ops)
711
- if len(ops) == 0:
712
- return ""
713
- return ops[0] + ","
714
-
715
- def_hyperparams += f"""\n
716
- plus=(+)
717
- sub=(-)
718
- mult=(*)
719
- square=SymbolicRegression.square
720
- cube=SymbolicRegression.cube
721
- pow=(^)
722
- div=(/)
723
- log_abs=SymbolicRegression.log_abs
724
- log2_abs=SymbolicRegression.log2_abs
725
- log10_abs=SymbolicRegression.log10_abs
726
- log1p_abs=SymbolicRegression.log1p_abs
727
- acosh_abs=SymbolicRegression.acosh_abs
728
- atanh_clip=SymbolicRegression.atanh_clip
729
- sqrt_abs=SymbolicRegression.sqrt_abs
730
- neg=SymbolicRegression.neg
731
- greater=SymbolicRegression.greater
732
- relu=SymbolicRegression.relu
733
- logical_or=SymbolicRegression.logical_or
734
- logical_and=SymbolicRegression.logical_and
735
- _custom_loss = {loss}
736
-
737
- options = SymbolicRegression.Options(binary_operators={'(' + tuple_fix(binary_operators) + ')'},
738
- unary_operators={'(' + tuple_fix(unary_operators) + ')'},
739
- {constraints_str}
740
- parsimony={parsimony:f}f0,
741
- loss=_custom_loss,
742
- alpha={alpha:f}f0,
743
- maxsize={maxsize:d},
744
- maxdepth={maxdepth:d},
745
- fast_cycle={'true' if fast_cycle else 'false'},
746
- migration={'true' if migration else 'false'},
747
- hofMigration={'true' if hofMigration else 'false'},
748
- fractionReplacedHof={fractionReplacedHof}f0,
749
- shouldOptimizeConstants={'true' if shouldOptimizeConstants else 'false'},
750
- hofFile="{_escape_filename(equation_file)}",
751
- npopulations={populations:d},
752
- optimizer_algorithm="{optimizer_algorithm}",
753
- optimizer_nrestarts={optimizer_nrestarts:d},
754
- optimize_probability={optimize_probability:f}f0,
755
- optimizer_iterations={optimizer_iterations:d},
756
- perturbationFactor={perturbationFactor:f}f0,
757
- annealing={"true" if annealing else "false"},
758
- batching={"true" if batching else "false"},
759
- batchSize={min([batchSize, len(X)]) if batching else len(X):d},
760
- mutationWeights=[
761
- {weightMutateConstant:f},
762
- {weightMutateOperator:f},
763
- {weightAddNode:f},
764
- {weightInsertNode:f},
765
- {weightDeleteNode:f},
766
- {weightSimplify:f},
767
- {weightRandomize:f},
768
- {weightDoNothing:f}
769
- ],
770
- warmupMaxsizeBy={warmupMaxsizeBy:f}f0,
771
- useFrequency={"true" if useFrequency else "false"},
772
- npop={npop:d},
773
- ns={tournament_selection_n:d},
774
- probPickFirst={tournament_selection_p:f}f0,
775
- ncyclesperiteration={ncyclesperiteration:d},
776
- fractionReplaced={fractionReplaced:f}f0,
777
- topn={topn:d},
778
- verbosity=round(Int32, {verbosity:f}),
779
- progress={'true' if progress else 'false'},
780
- terminal_width={term_width:d}
781
- """
782
-
783
- def_hyperparams += "\n)"
784
- return def_hyperparams
785
-
786
-
787
- def _make_constraints_str(binary_operators, constraints, unary_operators, **kwargs):
788
- constraints_str = "una_constraints = ["
789
- first = True
790
- for op in unary_operators:
791
- val = constraints[op]
792
- if not first:
793
- constraints_str += ", "
794
- constraints_str += f"{val:d}"
795
- first = False
796
- constraints_str += """],
797
- bin_constraints = ["""
798
- first = True
799
- for op in binary_operators:
800
- tup = constraints[op]
801
- if not first:
802
- constraints_str += ", "
803
- constraints_str += f"({tup[0]:d}, {tup[1]:d})"
804
- first = False
805
- constraints_str += "],"
806
- return constraints_str
807
 
808
 
809
- def _handle_constraints(binary_operators, constraints, unary_operators, **kwargs):
810
  for op in unary_operators:
811
  if op not in constraints:
812
  constraints[op] = -1
@@ -829,14 +621,13 @@ def _handle_constraints(binary_operators, constraints, unary_operators, **kwargs
829
  )
830
 
831
 
832
- def _create_inline_operators(binary_operators, unary_operators, **kwargs):
833
- def_hyperparams = ""
834
  for op_list in [binary_operators, unary_operators]:
835
  for i, op in enumerate(op_list):
836
  is_user_defined_operator = "(" in op
837
 
838
  if is_user_defined_operator:
839
- def_hyperparams += op + "\n"
840
  # Cut off from the first non-alphanumeric char:
841
  first_non_char = [
842
  j
@@ -845,7 +636,6 @@ def _create_inline_operators(binary_operators, unary_operators, **kwargs):
845
  ][0]
846
  function_name = op[:first_non_char]
847
  op_list[i] = function_name
848
- return def_hyperparams
849
 
850
 
851
  def _handle_feature_selection(
@@ -863,30 +653,6 @@ def _handle_feature_selection(
863
  return X, variable_names, selection
864
 
865
 
866
- def _set_paths(tempdir):
867
- # System-independent paths
868
- pkg_directory = Path(__file__).parents[1]
869
- default_project_file = pkg_directory / "Project.toml"
870
- tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
871
- hyperparam_filename = tmpdir / f"hyperparams.jl"
872
- dataset_filename = tmpdir / f"dataset.jl"
873
- runfile_filename = tmpdir / "runfile.jl"
874
- X_filename = tmpdir / "X.csv"
875
- y_filename = tmpdir / "y.csv"
876
- weights_filename = tmpdir / "weights.csv"
877
- return dict(
878
- pkg_directory=pkg_directory,
879
- default_project_file=default_project_file,
880
- X_filename=X_filename,
881
- dataset_filename=dataset_filename,
882
- hyperparam_filename=hyperparam_filename,
883
- runfile_filename=runfile_filename,
884
- tmpdir=tmpdir,
885
- weights_filename=weights_filename,
886
- y_filename=y_filename,
887
- )
888
-
889
-
890
  def _check_assertions(
891
  X,
892
  binary_operators,
@@ -908,30 +674,13 @@ def _check_assertions(
908
  assert len(variable_names) == X.shape[1]
909
 
910
 
911
- def _check_for_julia_installation():
912
- try:
913
- process = subprocess.Popen(["julia", "-v"], stdout=subprocess.PIPE, bufsize=-1)
914
- while True:
915
- line = process.stdout.readline()
916
- if not line:
917
- break
918
- process.stdout.close()
919
- process.wait()
920
- except FileNotFoundError:
921
-
922
- raise RuntimeError(
923
- f"Your current $PATH is: {os.environ['PATH']}\nPySR could not start julia. Make sure julia is installed and on your $PATH."
924
- )
925
- process.kill()
926
-
927
-
928
  def run_feature_selection(X, y, select_k_features):
929
  """Use a gradient boosting tree regressor as a proxy for finding
930
  the k most important features in X, returning indices for those
931
  features as output."""
932
 
933
  from sklearn.ensemble import RandomForestRegressor
934
- from sklearn.feature_selection import SelectFromModel, SelectKBest
935
 
936
  clf = RandomForestRegressor(n_estimators=100, max_depth=3, random_state=0)
937
  clf.fit(X, y)
@@ -1068,7 +817,9 @@ def get_hof(
1068
  cur_score = 0.0
1069
  else:
1070
  if curMSE > 0.0:
1071
- cur_score = -np.log(curMSE / lastMSE) / (curComplexity - lastComplexity)
 
 
1072
  else:
1073
  cur_score = np.inf
1074
 
@@ -1197,3 +948,56 @@ class CallableEquation:
1197
  if self._selection is not None:
1198
  return self._lambda(*X[:, self._selection].T)
1199
  return self._lambda(*X.T)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import sys
 
 
 
3
  import numpy as np
4
  import pandas as pd
5
  import sympy
6
+ from sympy import sympify, lambdify
7
  import subprocess
8
  import tempfile
9
  import shutil
 
12
  import warnings
13
  from multiprocessing import cpu_count
14
 
15
+ is_julia_warning_silenced = False
16
+
17
+
18
+ def install(julia_project=None):
19
+ import julia
20
+
21
+ julia.install()
22
+
23
+ julia_project = _get_julia_project(julia_project)
24
+
25
+ init_julia()
26
+ from julia import Pkg
27
+
28
+ Pkg.activate(f"{_escape_filename(julia_project)}")
29
+ Pkg.update()
30
+ Pkg.instantiate()
31
+ Pkg.precompile()
32
+
33
+
34
+ Main = None
35
  global_state = dict(
36
  equation_file="hall_of_fame.csv",
37
  n_features=None,
 
44
  multioutput=False,
45
  nout=1,
46
  selection=None,
47
+ raw_julia_output=None,
48
  )
49
 
50
+ already_ran = False
51
+
52
  sympy_mappings = {
53
  "div": lambda x, y: x / y,
54
  "mult": lambda x, y: x * y,
 
119
  weightRandomize=1,
120
  weightSimplify=0.01,
121
  perturbationFactor=1.0,
 
122
  extra_sympy_mappings=None,
123
  extra_torch_mappings=None,
124
  extra_jax_mappings=None,
 
137
  useFrequency=True,
138
  tempdir=None,
139
  delete_tempfiles=True,
 
140
  julia_project=None,
 
141
  update=True,
142
  temp_equation_file=False,
143
  output_jax_format=False,
 
152
  Xresampled=None,
153
  precision=32,
154
  multithreading=None,
155
+ **kwargs,
156
  ):
157
  """Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
158
  Note: most default parameters have been tuned over several example
 
219
  :type weightRandomize: float
220
  :param weightSimplify: Relative likelihood for mutation to simplify constant parts by evaluation
221
  :type weightSimplify: float
 
 
222
  :param equation_file: Where to save the files (.csv separated by |)
223
  :type equation_file: str
224
  :param verbosity: What verbosity level to use. 0 means minimal print statements.
 
245
  :type constraints: dict
246
  :param useFrequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
247
  :type useFrequency: bool
 
 
248
  :param tempdir: directory for the temporary files
249
  :type tempdir: str/None
250
  :param delete_tempfiles: whether to delete the temporary files after finishing
251
  :type delete_tempfiles: bool
252
  :param julia_project: a Julia environment location containing a Project.toml (and potentially the source code for SymbolicRegression.jl). Default gives the Python package directory, where a Project.toml file should be present from the install.
253
  :type julia_project: str/None
 
 
254
  :param update: Whether to automatically update Julia packages.
255
  :type update: bool
256
  :param temp_equation_file: Whether to put the hall of fame file in the temp directory. Deletion is then controlled with the delete_tempfiles argument.
 
269
  :type precision: int
270
  :param multithreading: Use multithreading instead of distributed backend. Default is yes. Using procs=0 will turn off both.
271
  :type multithreading: bool
272
+ :param **kwargs: Other options passed to SymbolicRegression.Options, for example, if you modify SymbolicRegression.jl to include additional arguments.
273
+ :type **kwargs: dict
274
  :returns: Results dataframe, giving complexity, MSE, and equations (as strings), as well as functional forms. If list, each element corresponds to a dataframe of equations for each output.
275
  :type: pd.DataFrame/list
276
  """
277
+ global already_ran
278
+
279
  if binary_operators is None:
280
  binary_operators = "+ * - /".split(" ")
281
  if unary_operators is None:
 
291
  # or procs is set to 0 (serial mode).
292
  multithreading = procs != 0
293
 
294
+ global Main
295
+ if Main is None:
296
+ if multithreading:
297
+ os.environ["JULIA_NUM_THREADS"] = str(procs)
298
+
299
+ Main = init_julia()
300
+
301
  buffer_available = "buffer" in sys.stdout.__dir__()
302
 
303
  if progress is not None:
 
347
  weights,
348
  y,
349
  )
 
350
 
351
  if len(X) > 10000 and not batching:
352
  warnings.warn(
 
399
  else:
400
  X, y = _denoise(X, y, Xresampled=Xresampled)
401
 
402
+ julia_project = _get_julia_project(julia_project)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
+ tmpdir = Path(tempfile.mkdtemp(dir=tempdir))
405
 
406
  if temp_equation_file:
407
+ equation_file = tmpdir / "hall_of_fame.csv"
408
  elif equation_file is None:
409
  date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
410
  equation_file = "hall_of_fame_" + date_time + ".csv"
411
 
412
+ _create_inline_operators(
413
+ binary_operators=binary_operators, unary_operators=unary_operators
414
+ )
415
+ _handle_constraints(
416
+ binary_operators=binary_operators,
417
+ unary_operators=unary_operators,
418
+ constraints=constraints,
419
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
+ una_constraints = [constraints[op] for op in unary_operators]
422
+ bin_constraints = [constraints[op] for op in binary_operators]
423
 
 
 
 
 
424
  try:
425
+ term_width = shutil.get_terminal_size().columns
426
+ except:
427
+ _, term_width = subprocess.check_output(["stty", "size"]).split()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
 
429
+ if not already_ran:
430
+ from julia import Pkg
431
+
432
+ Pkg.activate(f"{_escape_filename(julia_project)}")
433
+ if update:
434
+ try:
435
+ Pkg.resolve()
436
+ except RuntimeError as e:
437
+ raise ImportError(
438
+ f"""
439
+ Required dependencies are not installed or built. Run the following code in the Python REPL:
440
+
441
+ >>> import pysr
442
+ >>> pysr.install()
443
+
444
+ Tried to activate project {julia_project} but failed."""
445
+ ) from e
446
+ Main.eval("using SymbolicRegression")
447
+
448
+ Main.plus = Main.eval("(+)")
449
+ Main.sub = Main.eval("(-)")
450
+ Main.mult = Main.eval("(*)")
451
+ Main.pow = Main.eval("(^)")
452
+ Main.div = Main.eval("(/)")
453
+
454
+ Main.custom_loss = Main.eval(loss)
455
+
456
+ mutationWeights = [
457
+ float(weightMutateConstant),
458
+ float(weightMutateOperator),
459
+ float(weightAddNode),
460
+ float(weightInsertNode),
461
+ float(weightDeleteNode),
462
+ float(weightSimplify),
463
+ float(weightRandomize),
464
+ float(weightDoNothing),
465
+ ]
466
 
467
+ options = Main.Options(
468
+ binary_operators=Main.eval(str(tuple(binary_operators)).replace("'", "")),
469
+ unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
470
+ bin_constraints=bin_constraints,
471
+ una_constraints=una_constraints,
472
+ parsimony=float(parsimony),
473
+ loss=Main.custom_loss,
474
+ alpha=float(alpha),
475
+ maxsize=int(maxsize),
476
+ maxdepth=int(maxdepth),
477
+ fast_cycle=fast_cycle,
478
+ migration=migration,
479
+ hofMigration=hofMigration,
480
+ fractionReplacedHof=float(fractionReplacedHof),
481
+ shouldOptimizeConstants=shouldOptimizeConstants,
482
+ hofFile=_escape_filename(equation_file),
483
+ npopulations=int(populations),
484
+ optimizer_algorithm=optimizer_algorithm,
485
+ optimizer_nrestarts=int(optimizer_nrestarts),
486
+ optimize_probability=float(optimize_probability),
487
+ optimizer_iterations=int(optimizer_iterations),
488
+ perturbationFactor=float(perturbationFactor),
489
+ annealing=annealing,
490
+ batching=batching,
491
+ batchSize=int(min([batchSize, len(X)]) if batching else len(X)),
492
+ mutationWeights=mutationWeights,
493
+ warmupMaxsizeBy=float(warmupMaxsizeBy),
494
+ useFrequency=useFrequency,
495
+ npop=int(npop),
496
+ ns=int(tournament_selection_n),
497
+ probPickFirst=float(tournament_selection_p),
498
+ ncyclesperiteration=int(ncyclesperiteration),
499
+ fractionReplaced=float(fractionReplaced),
500
+ topn=int(topn),
501
+ verbosity=int(verbosity),
502
+ progress=progress,
503
+ terminal_width=int(term_width),
504
+ **kwargs,
505
+ )
506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[precision]
508
 
509
+ Main.X = np.array(X, dtype=np_dtype).T
510
+ if len(y.shape) == 1:
511
+ Main.y = np.array(y, dtype=np_dtype)
512
  else:
513
+ Main.y = np.array(y, dtype=np_dtype).T
 
514
  if weights is not None:
515
+ if len(weights.shape) == 1:
516
+ Main.weights = np.array(weights, dtype=np_dtype)
517
  else:
518
+ Main.weights = np.array(weights, dtype=np_dtype).T
519
+ else:
520
+ Main.weights = None
521
+
522
+ cprocs = 0 if multithreading else procs
523
+
524
+ raw_julia_output = Main.EquationSearch(
525
+ Main.X,
526
+ Main.y,
527
+ weights=Main.weights,
528
+ niterations=int(niterations),
529
+ varMap=variable_names,
530
+ options=options,
531
+ numprocs=int(cprocs),
532
+ multithreading=bool(multithreading),
533
+ )
534
 
535
+ _set_globals(
536
+ X=X,
537
+ equation_file=equation_file,
538
+ variable_names=variable_names,
539
+ extra_sympy_mappings=extra_sympy_mappings,
540
+ extra_torch_mappings=extra_torch_mappings,
541
+ extra_jax_mappings=extra_jax_mappings,
542
+ output_jax_format=output_jax_format,
543
+ output_torch_format=output_torch_format,
544
+ multioutput=multioutput,
545
+ nout=nout,
546
+ selection=selection,
547
+ raw_julia_output=raw_julia_output,
548
+ )
549
 
550
+ equations = get_hof(
551
+ equation_file=equation_file,
552
+ n_features=X.shape[1],
553
+ variable_names=variable_names,
554
+ output_jax_format=output_jax_format,
555
+ output_torch_format=output_torch_format,
556
+ selection=selection,
557
+ extra_sympy_mappings=extra_sympy_mappings,
558
+ extra_jax_mappings=extra_jax_mappings,
559
+ extra_torch_mappings=extra_torch_mappings,
560
+ multioutput=multioutput,
561
+ nout=nout,
562
+ )
563
 
564
+ if delete_tempfiles:
565
+ shutil.rmtree(tmpdir)
566
+
567
+ return equations
 
 
 
 
568
 
569
 
570
+ def _set_globals(
571
+ *,
572
  X,
 
 
 
 
 
 
 
573
  equation_file,
574
+ variable_names,
575
+ extra_sympy_mappings,
576
+ extra_torch_mappings,
577
+ extra_jax_mappings,
578
+ output_jax_format,
579
+ output_torch_format,
580
+ multioutput,
581
+ nout,
582
+ selection,
583
+ raw_julia_output,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  ):
585
+ global global_state
 
 
 
586
 
587
+ global_state["n_features"] = X.shape[1]
588
+ global_state["equation_file"] = equation_file
589
+ global_state["variable_names"] = variable_names
590
+ global_state["extra_sympy_mappings"] = extra_sympy_mappings
591
+ global_state["extra_torch_mappings"] = extra_torch_mappings
592
+ global_state["extra_jax_mappings"] = extra_jax_mappings
593
+ global_state["output_jax_format"] = output_jax_format
594
+ global_state["output_torch_format"] = output_torch_format
595
+ global_state["multioutput"] = multioutput
596
+ global_state["nout"] = nout
597
+ global_state["selection"] = selection
598
+ global_state["raw_julia_output"] = raw_julia_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599
 
600
 
601
+ def _handle_constraints(binary_operators, unary_operators, constraints):
602
  for op in unary_operators:
603
  if op not in constraints:
604
  constraints[op] = -1
 
621
  )
622
 
623
 
624
+ def _create_inline_operators(binary_operators, unary_operators):
 
625
  for op_list in [binary_operators, unary_operators]:
626
  for i, op in enumerate(op_list):
627
  is_user_defined_operator = "(" in op
628
 
629
  if is_user_defined_operator:
630
+ Main.eval(op)
631
  # Cut off from the first non-alphanumeric char:
632
  first_non_char = [
633
  j
 
636
  ][0]
637
  function_name = op[:first_non_char]
638
  op_list[i] = function_name
 
639
 
640
 
641
  def _handle_feature_selection(
 
653
  return X, variable_names, selection
654
 
655
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  def _check_assertions(
657
  X,
658
  binary_operators,
 
674
  assert len(variable_names) == X.shape[1]
675
 
676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  def run_feature_selection(X, y, select_k_features):
678
  """Use a gradient boosting tree regressor as a proxy for finding
679
  the k most important features in X, returning indices for those
680
  features as output."""
681
 
682
  from sklearn.ensemble import RandomForestRegressor
683
+ from sklearn.feature_selection import SelectFromModel
684
 
685
  clf = RandomForestRegressor(n_estimators=100, max_depth=3, random_state=0)
686
  clf.fit(X, y)
 
817
  cur_score = 0.0
818
  else:
819
  if curMSE > 0.0:
820
+ cur_score = -np.log(curMSE / lastMSE) / (
821
+ curComplexity - lastComplexity
822
+ )
823
  else:
824
  cur_score = np.inf
825
 
 
948
  if self._selection is not None:
949
  return self._lambda(*X[:, self._selection].T)
950
  return self._lambda(*X.T)
951
+
952
+
953
+ def _get_julia_project(julia_project):
954
+ pkg_directory = Path(__file__).parents[1]
955
+ if julia_project is None:
956
+ return pkg_directory
957
+ return Path(julia_project)
958
+
959
+
960
+ def silence_julia_warning():
961
+ global is_julia_warning_silenced
962
+ is_julia_warning_silenced = True
963
+
964
+
965
+ def init_julia():
966
+ """Initialize julia binary, turning off compiled modules if needed."""
967
+ global is_julia_warning_silenced
968
+ from julia.core import JuliaInfo, UnsupportedPythonError
969
+
970
+ info = JuliaInfo.load(julia="julia")
971
+ if not info.is_pycall_built():
972
+ raise ImportError(
973
+ """
974
+ Required dependencies are not installed or built. Run the following code in the Python REPL:
975
+
976
+ >>> import pysr
977
+ >>> pysr.install()"""
978
+ )
979
+
980
+ Main = None
981
+ try:
982
+ from julia import Main as _Main
983
+
984
+ Main = _Main
985
+ except UnsupportedPythonError:
986
+ if not is_julia_warning_silenced:
987
+ warnings.warn(
988
+ """
989
+ Your Python version is statically linked to libpython. For example, this could be the python included with conda, or maybe your system's built-in python.
990
+ This will still work, but the precompilation cache for Julia will be turned off, which may result in slower startup times on the initial pysr() call.
991
+
992
+ To install a Python version that is dynamically linked to libpython, pyenv is recommended (https://github.com/pyenv/pyenv).
993
+
994
+ To silence this warning, you can run pysr.silence_julia_warning() after importing pysr."""
995
+ )
996
+ from julia.core import Julia
997
+
998
+ jl = Julia(compiled_modules=False)
999
+ from julia import Main as _Main
1000
+
1001
+ Main = _Main
1002
+
1003
+ return Main
requirements.txt CHANGED
@@ -2,3 +2,4 @@ sympy
2
  pandas
3
  numpy
4
  scikit_learn
 
 
2
  pandas
3
  numpy
4
  scikit_learn
5
+ julia
setup.py CHANGED
@@ -5,14 +5,14 @@ with open("README.md", "r") as fh:
5
 
6
  setuptools.setup(
7
  name="pysr",
8
- version="0.6.14",
9
  author="Miles Cranmer",
10
  author_email="miles.cranmer@gmail.com",
11
  description="Simple and efficient symbolic regression",
12
  long_description=long_description,
13
  long_description_content_type="text/markdown",
14
  url="https://github.com/MilesCranmer/pysr",
15
- install_requires=["numpy", "pandas", "sympy"],
16
  packages=setuptools.find_packages(),
17
  package_data={"pysr": ["../Project.toml", "../datasets/*"]},
18
  include_package_data=False,
 
5
 
6
  setuptools.setup(
7
  name="pysr",
8
+ version="0.7.0a1",
9
  author="Miles Cranmer",
10
  author_email="miles.cranmer@gmail.com",
11
  description="Simple and efficient symbolic regression",
12
  long_description=long_description,
13
  long_description_content_type="text/markdown",
14
  url="https://github.com/MilesCranmer/pysr",
15
+ install_requires=["julia", "numpy", "pandas", "sympy"],
16
  packages=setuptools.find_packages(),
17
  package_data={"pysr": ["../Project.toml", "../datasets/*"]},
18
  include_package_data=False,
test/test.py CHANGED
@@ -13,7 +13,6 @@ class TestPipeline(unittest.TestCase):
13
  self.default_test_kwargs = dict(
14
  niterations=10,
15
  populations=4,
16
- user_input=False,
17
  annealing=True,
18
  useFrequency=False,
19
  )
 
13
  self.default_test_kwargs = dict(
14
  niterations=10,
15
  populations=4,
 
16
  annealing=True,
17
  useFrequency=False,
18
  )
test/test_static_libpython_warning.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test that running PySR with static libpython raises a warning."""
2
+
3
+ import warnings
4
+ import pysr
5
+
6
+ # Taken from https://stackoverflow.com/a/14463362/2689923
7
+ with warnings.catch_warnings(record=True) as warning_catcher:
8
+ warnings.simplefilter("always")
9
+ pysr.sr.init_julia()
10
+
11
+ assert len(warning_catcher) == 1
12
+ assert issubclass(warning_catcher[-1].category, UserWarning)
13
+ assert "static" in str(warning_catcher[-1].message)