Spaces:
Running
Running
Commit
·
ecc6ae8
1
Parent(s):
d85f644
Can call distributed processing from python
Browse files- julia/loop.jl +0 -117
- julia/sr.jl +106 -0
- pysr/sr.py +16 -8
julia/loop.jl
DELETED
|
@@ -1,117 +0,0 @@
|
|
| 1 |
-
using Distributed
|
| 2 |
-
const nprocs = 4
|
| 3 |
-
addprocs(4)
|
| 4 |
-
@everywhere include(".dataset_28330894764081783777.jl")
|
| 5 |
-
@everywhere include(".hyperparams_28330894764081783777.jl")
|
| 6 |
-
@everywhere include("sr.jl")
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
# 1. Start a population on every process
|
| 10 |
-
allPops = Future[]
|
| 11 |
-
bestSubPops = [Population(1) for j=1:nprocs]
|
| 12 |
-
hallOfFame = HallOfFame()
|
| 13 |
-
|
| 14 |
-
for i=1:nprocs
|
| 15 |
-
npop=300
|
| 16 |
-
future = @spawnat :any Population(npop, 3)
|
| 17 |
-
push!(allPops, future)
|
| 18 |
-
end
|
| 19 |
-
|
| 20 |
-
npop=300
|
| 21 |
-
ncyclesperiteration=3000
|
| 22 |
-
fractionReplaced=0.1f0
|
| 23 |
-
verbosity=convert(Int, 1e9)
|
| 24 |
-
topn=10
|
| 25 |
-
niterations=10
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
# # 2. Start the cycle on every process:
|
| 29 |
-
for i=1:nprocs
|
| 30 |
-
allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, verbosity=verbosity)
|
| 31 |
-
end
|
| 32 |
-
println("Started!")
|
| 33 |
-
cycles_complete = nprocs * 10
|
| 34 |
-
while cycles_complete > 0
|
| 35 |
-
for i=1:nprocs
|
| 36 |
-
if isready(allPops[i])
|
| 37 |
-
cur_pop = fetch(allPops[i])
|
| 38 |
-
bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
|
| 39 |
-
|
| 40 |
-
#Try normal copy...
|
| 41 |
-
bestPops = Population([member for pop in bestSubPops for member in pop.members])
|
| 42 |
-
|
| 43 |
-
for member in cur_pop.members
|
| 44 |
-
size = countNodes(member.tree)
|
| 45 |
-
if member.score < hallOfFame.members[size].score
|
| 46 |
-
hallOfFame.members[size] = deepcopy(member)
|
| 47 |
-
hallOfFame.exists[size] = true
|
| 48 |
-
end
|
| 49 |
-
end
|
| 50 |
-
|
| 51 |
-
# Dominating pareto curve - must be better than all simpler equations
|
| 52 |
-
dominating = PopMember[]
|
| 53 |
-
open(hofFile, "w") do io
|
| 54 |
-
debug(verbosity, "\n")
|
| 55 |
-
debug(verbosity, "Hall of Fame:")
|
| 56 |
-
debug(verbosity, "-----------------------------------------")
|
| 57 |
-
debug(verbosity, "Complexity \t MSE \t Equation")
|
| 58 |
-
println(io,"Complexity|MSE|Equation")
|
| 59 |
-
for size=1:actualMaxsize
|
| 60 |
-
if hallOfFame.exists[size]
|
| 61 |
-
member = hallOfFame.members[size]
|
| 62 |
-
curMSE = MSE(evalTreeArray(member.tree), y)
|
| 63 |
-
numberSmallerAndBetter = sum([curMSE > MSE(evalTreeArray(hallOfFame.members[i].tree), y) for i=1:(size-1)])
|
| 64 |
-
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
| 65 |
-
if betterThanAllSmaller
|
| 66 |
-
debug(verbosity, "$size \t $(curMSE) \t $(stringTree(member.tree))")
|
| 67 |
-
println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
|
| 68 |
-
push!(dominating, member)
|
| 69 |
-
end
|
| 70 |
-
end
|
| 71 |
-
end
|
| 72 |
-
debug(verbosity, "")
|
| 73 |
-
end
|
| 74 |
-
|
| 75 |
-
# Try normal copy otherwise.
|
| 76 |
-
if migration
|
| 77 |
-
for k in rand(1:npop, round(Integer, npop*fractionReplaced))
|
| 78 |
-
to_copy = rand(1:size(bestPops.members)[1])
|
| 79 |
-
cur_pop.members[k] = PopMember(
|
| 80 |
-
copyNode(bestPops.members[to_copy].tree),
|
| 81 |
-
bestPops.members[to_copy].score)
|
| 82 |
-
end
|
| 83 |
-
end
|
| 84 |
-
|
| 85 |
-
if hofMigration && size(dominating)[1] > 0
|
| 86 |
-
for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
|
| 87 |
-
# Copy in case one gets used twice
|
| 88 |
-
to_copy = rand(1:size(dominating)[1])
|
| 89 |
-
cur_pop.members[k] = PopMember(
|
| 90 |
-
copyNode(dominating[to_copy].tree)
|
| 91 |
-
)
|
| 92 |
-
end
|
| 93 |
-
end
|
| 94 |
-
|
| 95 |
-
allPops[i] = @spawnat :any let
|
| 96 |
-
tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
|
| 97 |
-
for j=1:tmp_pop.n
|
| 98 |
-
if rand() < 0.1
|
| 99 |
-
tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
|
| 100 |
-
tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
|
| 101 |
-
if shouldOptimizeConstants
|
| 102 |
-
tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
|
| 103 |
-
end
|
| 104 |
-
end
|
| 105 |
-
end
|
| 106 |
-
tmp_pop
|
| 107 |
-
end
|
| 108 |
-
|
| 109 |
-
global cycles_complete -= 1
|
| 110 |
-
end
|
| 111 |
-
end
|
| 112 |
-
sleep(1e-3)
|
| 113 |
-
end
|
| 114 |
-
|
| 115 |
-
rmprocs(nprocs)
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
julia/sr.jl
CHANGED
|
@@ -738,3 +738,109 @@ mutable struct HallOfFame
|
|
| 738 |
end
|
| 739 |
|
| 740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
end
|
| 739 |
|
| 740 |
|
| 741 |
+
function fullRun(niterations::Integer;
|
| 742 |
+
npop::Integer=300,
|
| 743 |
+
ncyclesperiteration::Integer=3000,
|
| 744 |
+
fractionReplaced::Float32=0.1f0,
|
| 745 |
+
verbosity::Integer=0,
|
| 746 |
+
topn::Integer=10
|
| 747 |
+
)
|
| 748 |
+
# 1. Start a population on every process
|
| 749 |
+
allPops = Future[]
|
| 750 |
+
bestSubPops = [Population(1) for j=1:nprocs]
|
| 751 |
+
hallOfFame = HallOfFame()
|
| 752 |
+
|
| 753 |
+
for i=1:nprocs
|
| 754 |
+
npop=300
|
| 755 |
+
future = @spawnat :any Population(npop, 3)
|
| 756 |
+
push!(allPops, future)
|
| 757 |
+
end
|
| 758 |
+
|
| 759 |
+
# # 2. Start the cycle on every process:
|
| 760 |
+
for i=1:nprocs
|
| 761 |
+
allPops[i] = @spawnat :any run(fetch(allPops[i]), ncyclesperiteration, verbosity=verbosity)
|
| 762 |
+
end
|
| 763 |
+
println("Started!")
|
| 764 |
+
cycles_complete = nprocs * 10
|
| 765 |
+
while cycles_complete > 0
|
| 766 |
+
for i=1:nprocs
|
| 767 |
+
if isready(allPops[i])
|
| 768 |
+
cur_pop = fetch(allPops[i])
|
| 769 |
+
bestSubPops[i] = bestSubPop(cur_pop, topn=topn)
|
| 770 |
+
|
| 771 |
+
#Try normal copy...
|
| 772 |
+
bestPops = Population([member for pop in bestSubPops for member in pop.members])
|
| 773 |
+
|
| 774 |
+
for member in cur_pop.members
|
| 775 |
+
size = countNodes(member.tree)
|
| 776 |
+
if member.score < hallOfFame.members[size].score
|
| 777 |
+
hallOfFame.members[size] = deepcopy(member)
|
| 778 |
+
hallOfFame.exists[size] = true
|
| 779 |
+
end
|
| 780 |
+
end
|
| 781 |
+
|
| 782 |
+
# Dominating pareto curve - must be better than all simpler equations
|
| 783 |
+
dominating = PopMember[]
|
| 784 |
+
open(hofFile, "w") do io
|
| 785 |
+
debug(verbosity, "\n")
|
| 786 |
+
debug(verbosity, "Hall of Fame:")
|
| 787 |
+
debug(verbosity, "-----------------------------------------")
|
| 788 |
+
debug(verbosity, "Complexity \t MSE \t Equation")
|
| 789 |
+
println(io,"Complexity|MSE|Equation")
|
| 790 |
+
for size=1:actualMaxsize
|
| 791 |
+
if hallOfFame.exists[size]
|
| 792 |
+
member = hallOfFame.members[size]
|
| 793 |
+
curMSE = MSE(evalTreeArray(member.tree), y)
|
| 794 |
+
numberSmallerAndBetter = sum([curMSE > MSE(evalTreeArray(hallOfFame.members[i].tree), y) for i=1:(size-1)])
|
| 795 |
+
betterThanAllSmaller = (numberSmallerAndBetter == 0)
|
| 796 |
+
if betterThanAllSmaller
|
| 797 |
+
debug(verbosity, "$size \t $(curMSE) \t $(stringTree(member.tree))")
|
| 798 |
+
println(io, "$size|$(curMSE)|$(stringTree(member.tree))")
|
| 799 |
+
push!(dominating, member)
|
| 800 |
+
end
|
| 801 |
+
end
|
| 802 |
+
end
|
| 803 |
+
debug(verbosity, "")
|
| 804 |
+
end
|
| 805 |
+
|
| 806 |
+
# Try normal copy otherwise.
|
| 807 |
+
if migration
|
| 808 |
+
for k in rand(1:npop, round(Integer, npop*fractionReplaced))
|
| 809 |
+
to_copy = rand(1:size(bestPops.members)[1])
|
| 810 |
+
cur_pop.members[k] = PopMember(
|
| 811 |
+
copyNode(bestPops.members[to_copy].tree),
|
| 812 |
+
bestPops.members[to_copy].score)
|
| 813 |
+
end
|
| 814 |
+
end
|
| 815 |
+
|
| 816 |
+
if hofMigration && size(dominating)[1] > 0
|
| 817 |
+
for k in rand(1:npop, round(Integer, npop*fractionReplacedHof))
|
| 818 |
+
# Copy in case one gets used twice
|
| 819 |
+
to_copy = rand(1:size(dominating)[1])
|
| 820 |
+
cur_pop.members[k] = PopMember(
|
| 821 |
+
copyNode(dominating[to_copy].tree)
|
| 822 |
+
)
|
| 823 |
+
end
|
| 824 |
+
end
|
| 825 |
+
|
| 826 |
+
allPops[i] = @spawnat :any let
|
| 827 |
+
tmp_pop = run(cur_pop, ncyclesperiteration, verbosity=verbosity)
|
| 828 |
+
for j=1:tmp_pop.n
|
| 829 |
+
if rand() < 0.1
|
| 830 |
+
tmp_pop.members[j].tree = simplifyTree(tmp_pop.members[j].tree)
|
| 831 |
+
tmp_pop.members[j].tree = combineOperators(tmp_pop.members[j].tree)
|
| 832 |
+
if shouldOptimizeConstants
|
| 833 |
+
tmp_pop.members[j] = optimizeConstants(tmp_pop.members[j])
|
| 834 |
+
end
|
| 835 |
+
end
|
| 836 |
+
end
|
| 837 |
+
tmp_pop
|
| 838 |
+
end
|
| 839 |
+
|
| 840 |
+
cycles_complete -= 1
|
| 841 |
+
end
|
| 842 |
+
end
|
| 843 |
+
sleep(1e-3)
|
| 844 |
+
end
|
| 845 |
+
end
|
| 846 |
+
|
pysr/sr.py
CHANGED
|
@@ -5,7 +5,8 @@ import pathlib
|
|
| 5 |
import numpy as np
|
| 6 |
import pandas as pd
|
| 7 |
|
| 8 |
-
def pysr(X=None, y=None, weights=None,
|
|
|
|
| 9 |
niterations=100,
|
| 10 |
ncyclesperiteration=300,
|
| 11 |
binary_operators=["plus", "mult"],
|
|
@@ -35,6 +36,7 @@ def pysr(X=None, y=None, weights=None, threads=4,
|
|
| 35 |
test='simple1',
|
| 36 |
verbosity=1e9,
|
| 37 |
maxsize=20,
|
|
|
|
| 38 |
):
|
| 39 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
| 40 |
Note: most default parameters have been tuned over several example
|
|
@@ -43,9 +45,7 @@ def pysr(X=None, y=None, weights=None, threads=4,
|
|
| 43 |
|
| 44 |
:param X: np.ndarray, 2D array. Rows are examples, columns are features.
|
| 45 |
:param y: np.ndarray, 1D array. Rows are examples.
|
| 46 |
-
:param
|
| 47 |
-
You can have more threads than cores - it actually makes it more
|
| 48 |
-
efficient.
|
| 49 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
| 50 |
equations are printed, and migrate between populations, at the
|
| 51 |
end of each.
|
|
@@ -91,6 +91,8 @@ def pysr(X=None, y=None, weights=None, threads=4,
|
|
| 91 |
(as strings).
|
| 92 |
|
| 93 |
"""
|
|
|
|
|
|
|
| 94 |
|
| 95 |
# Check for potential errors before they happen
|
| 96 |
assert len(binary_operators) > 0
|
|
@@ -155,7 +157,7 @@ const hofMigration = {'true' if hofMigration else 'false'}
|
|
| 155 |
const fractionReplacedHof = {fractionReplacedHof}f0
|
| 156 |
const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
|
| 157 |
const hofFile = "{equation_file}"
|
| 158 |
-
const
|
| 159 |
const nrestarts = {nrestarts:d}
|
| 160 |
const perturbationFactor = {perturbationFactor:f}f0
|
| 161 |
const annealing = {"true" if annealing else "false"}
|
|
@@ -192,12 +194,18 @@ const weights = convert(Array{Float32, 1}, """f"{weight_str})"
|
|
| 192 |
with open(f'/tmp/.dataset_{rand_string}.jl', 'w') as f:
|
| 193 |
print(def_datasets, file=f)
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
command = [
|
| 197 |
'julia -O3',
|
| 198 |
-
'
|
| 199 |
-
'
|
| 200 |
-
f'\'include("/tmp/.hyperparams_{rand_string}.jl"); include("/tmp/.dataset_{rand_string}.jl"); include("{pkg_directory}/sr.jl"); fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})\'',
|
| 201 |
]
|
| 202 |
if timeout is not None:
|
| 203 |
command = [f'timeout {timeout}'] + command
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
import pandas as pd
|
| 7 |
|
| 8 |
+
def pysr(X=None, y=None, weights=None,
|
| 9 |
+
procs=4,
|
| 10 |
niterations=100,
|
| 11 |
ncyclesperiteration=300,
|
| 12 |
binary_operators=["plus", "mult"],
|
|
|
|
| 36 |
test='simple1',
|
| 37 |
verbosity=1e9,
|
| 38 |
maxsize=20,
|
| 39 |
+
threads=None, #deprecated
|
| 40 |
):
|
| 41 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
| 42 |
Note: most default parameters have been tuned over several example
|
|
|
|
| 45 |
|
| 46 |
:param X: np.ndarray, 2D array. Rows are examples, columns are features.
|
| 47 |
:param y: np.ndarray, 1D array. Rows are examples.
|
| 48 |
+
:param procs: int, Number of processes (=number of populations running).
|
|
|
|
|
|
|
| 49 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
| 50 |
equations are printed, and migrate between populations, at the
|
| 51 |
end of each.
|
|
|
|
| 91 |
(as strings).
|
| 92 |
|
| 93 |
"""
|
| 94 |
+
if threads is not None:
|
| 95 |
+
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
| 96 |
|
| 97 |
# Check for potential errors before they happen
|
| 98 |
assert len(binary_operators) > 0
|
|
|
|
| 157 |
const fractionReplacedHof = {fractionReplacedHof}f0
|
| 158 |
const shouldOptimizeConstants = {'true' if shouldOptimizeConstants else 'false'}
|
| 159 |
const hofFile = "{equation_file}"
|
| 160 |
+
const nprocs = {procs:d}
|
| 161 |
const nrestarts = {nrestarts:d}
|
| 162 |
const perturbationFactor = {perturbationFactor:f}f0
|
| 163 |
const annealing = {"true" if annealing else "false"}
|
|
|
|
| 194 |
with open(f'/tmp/.dataset_{rand_string}.jl', 'w') as f:
|
| 195 |
print(def_datasets, file=f)
|
| 196 |
|
| 197 |
+
with open(f'/tmp/.runfile_{rand_string}.jl', 'w') as f:
|
| 198 |
+
print(f'@everywhere include("/tmp/.hyperparams_{rand_string}.jl")', file=f)
|
| 199 |
+
print(f'@everywhere include("/tmp/.dataset_{rand_string}.jl")', file=f)
|
| 200 |
+
print(f'include("{pkg_directory}/sr.jl")', file=f)
|
| 201 |
+
print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
|
| 202 |
+
print(f'rmprocs(nprocs)', file=f)
|
| 203 |
+
|
| 204 |
|
| 205 |
command = [
|
| 206 |
'julia -O3',
|
| 207 |
+
f'-p {procs}',
|
| 208 |
+
f'/tmp/.runfile_{rand_string}.jl',
|
|
|
|
| 209 |
]
|
| 210 |
if timeout is not None:
|
| 211 |
command = [f'timeout {timeout}'] + command
|