AutonLabTruth commited on
Commit
0aafc34
1 Parent(s): 181a454

Refactored till file creation

Browse files
Files changed (1) hide show
  1. pysr/sr.py +114 -90
pysr/sr.py CHANGED
@@ -195,7 +195,6 @@ def pysr(X=None, y=None, weights=None,
195
  X_filename, dataset_filename, hyperparam_filename, operator_filename, pkg_filename, runfile_filename, tmpdir, \
196
  weights_filename, y_filename = set_paths(tempdir)
197
 
198
-
199
  if isinstance(X, pd.DataFrame):
200
  variable_names = list(X.columns)
201
  X = np.array(X)
@@ -231,27 +230,99 @@ def pysr(X=None, y=None, weights=None,
231
  #arbitrary complexity by default
232
  handle_constraints(binary_operators, constraints, unary_operators)
233
 
234
- constraints_str = "const una_constraints = ["
235
- first = True
236
- for op in unary_operators:
237
- val = constraints[op]
238
- if not first:
239
- constraints_str += ", "
240
- constraints_str += f"{val:d}"
241
- first = False
242
 
243
- constraints_str += """]
244
- const bin_constraints = ["""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
- first = True
247
- for op in binary_operators:
248
- tup = constraints[op]
249
- if not first:
250
- constraints_str += ", "
251
- constraints_str += f"({tup[0]:d}, {tup[1]:d})"
252
- first = False
253
- constraints_str += "]"
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
256
  {constraints_str}
257
  const binops = {'[' + ', '.join(binary_operators) + ']'}
@@ -290,7 +361,6 @@ const warmupMaxsize = {warmupMaxsize:d}
290
  const limitPowComplexity = {"true" if limitPowComplexity else "false"}
291
  const useFrequency = {"true" if useFrequency else "false"}
292
  """
293
-
294
  op_runner = ""
295
  if len(binary_operators) > 0:
296
  op_runner += """
@@ -301,14 +371,13 @@ const useFrequency = {"true" if useFrequency else "false"}
301
  end"""
302
  for i in range(1, len(binary_operators)):
303
  op_runner += f"""
304
- elseif i === {i+1}
305
  @inbounds @simd for j=1:clen
306
  x[j] = {binary_operators[i]}(x[j], y[j])
307
  end"""
308
  op_runner += """
309
  end
310
  end"""
311
-
312
  if len(unary_operators) > 0:
313
  op_runner += """
314
  @inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
@@ -318,85 +387,40 @@ end"""
318
  end"""
319
  for i in range(1, len(unary_operators)):
320
  op_runner += f"""
321
- elseif i === {i+1}
322
  @inbounds @simd for j=1:clen
323
  x[j] = {unary_operators[i]}(x[j])
324
  end"""
325
  op_runner += """
326
  end
327
  end"""
328
-
329
  def_hyperparams += op_runner
330
-
331
- def_datasets = """using DelimitedFiles"""
332
-
333
- np.savetxt(X_filename, X, delimiter=',')
334
- np.savetxt(y_filename, y, delimiter=',')
335
- if weights is not None:
336
- np.savetxt(weights_filename, weights, delimiter=',')
337
-
338
- def_datasets += f"""
339
- const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
340
- const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
341
-
342
- if weights is not None:
343
- def_datasets += f"""
344
- const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
345
-
346
  if use_custom_variable_names:
347
  def_hyperparams += f"""
348
- const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
349
-
350
- with open(hyperparam_filename, 'w') as f:
351
- print(def_hyperparams, file=f)
352
-
353
- with open(dataset_filename, 'w') as f:
354
- print(def_datasets, file=f)
355
-
356
- with open(runfile_filename, 'w') as f:
357
- print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
358
- print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
359
- print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
360
- print(f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})', file=f)
361
- print(f'rmprocs(nprocs)', file=f)
362
-
363
-
364
- command = [
365
- f'julia', f'-O{julia_optimization:d}',
366
- f'-p', f'{procs}',
367
- str(runfile_filename),
368
- ]
369
- if timeout is not None:
370
- command = [f'timeout', f'{timeout}'] + command
371
-
372
- global global_n_features
373
- global global_equation_file
374
- global global_variable_names
375
- global global_extra_sympy_mappings
376
-
377
- global_n_features = X.shape[1]
378
- global_equation_file = equation_file
379
- global_variable_names = variable_names
380
- global_extra_sympy_mappings = extra_sympy_mappings
381
-
382
- print("Running on", ' '.join(command))
383
- process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
384
- try:
385
- while True:
386
- line = process.stdout.readline()
387
- if not line: break
388
- print(line.decode('utf-8').replace('\n', ''))
389
-
390
- process.stdout.close()
391
- process.wait()
392
- except KeyboardInterrupt:
393
- print("Killing process... will return when done.")
394
- process.kill()
395
 
396
- if delete_tempfiles:
397
- shutil.rmtree(tmpdir)
398
 
399
- return get_hof()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
 
402
  def handle_constraints(binary_operators, constraints, unary_operators):
 
195
  X_filename, dataset_filename, hyperparam_filename, operator_filename, pkg_filename, runfile_filename, tmpdir, \
196
  weights_filename, y_filename = set_paths(tempdir)
197
 
 
198
  if isinstance(X, pd.DataFrame):
199
  variable_names = list(X.columns)
200
  X = np.array(X)
 
230
  #arbitrary complexity by default
231
  handle_constraints(binary_operators, constraints, unary_operators)
232
 
233
+ constraints_str = make_constraints_str(binary_operators, constraints, unary_operators)
 
 
 
 
 
 
 
234
 
235
+ def_hyperparams = make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators,
236
+ constraints_str, def_hyperparams, equation_file, fast_cycle,
237
+ fractionReplacedHof, hofMigration, limitPowComplexity, maxdepth,
238
+ maxsize, migration, nrestarts, operator_filename, parsimony,
239
+ perturbationFactor, populations, procs, shouldOptimizeConstants,
240
+ unary_operators, useFrequency, use_custom_variable_names, variable_names,
241
+ warmupMaxsize, weightAddNode, weightDeleteNode, weightDoNothing,
242
+ weightInsertNode, weightMutateConstant, weightMutateOperator,
243
+ weightRandomize, weightSimplify, weights)
244
+
245
+ def_datasets = make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename)
246
+
247
+ create_julia_files(dataset_filename, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
248
+ ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity)
249
+
250
+ command = [
251
+ f'julia', f'-O{julia_optimization:d}',
252
+ f'-p', f'{procs}',
253
+ str(runfile_filename),
254
+ ]
255
+ if timeout is not None:
256
+ command = [f'timeout', f'{timeout}'] + command
257
+
258
+ global global_n_features
259
+ global global_equation_file
260
+ global global_variable_names
261
+ global global_extra_sympy_mappings
262
+
263
+ global_n_features = X.shape[1]
264
+ global_equation_file = equation_file
265
+ global_variable_names = variable_names
266
+ global_extra_sympy_mappings = extra_sympy_mappings
267
+
268
+ print("Running on", ' '.join(command))
269
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1, shell=True)
270
+ try:
271
+ while True:
272
+ line = process.stdout.readline()
273
+ if not line: break
274
+ print(line.decode('utf-8').replace('\n', ''))
275
+
276
+ process.stdout.close()
277
+ process.wait()
278
+ except KeyboardInterrupt:
279
+ print("Killing process... will return when done.")
280
+ process.kill()
281
+
282
+ if delete_tempfiles:
283
+ shutil.rmtree(tmpdir)
284
+
285
+ return get_hof()
286
+
287
+
288
+ def create_julia_files(dataset_filename, def_datasets, def_hyperparams, fractionReplaced, hyperparam_filename,
289
+ ncyclesperiteration, niterations, npop, pkg_filename, runfile_filename, topn, verbosity):
290
+ with open(hyperparam_filename, 'w') as f:
291
+ print(def_hyperparams, file=f)
292
+ with open(dataset_filename, 'w') as f:
293
+ print(def_datasets, file=f)
294
+ with open(runfile_filename, 'w') as f:
295
+ print(f'@everywhere include("{_escape_filename(hyperparam_filename)}")', file=f)
296
+ print(f'@everywhere include("{_escape_filename(dataset_filename)}")', file=f)
297
+ print(f'@everywhere include("{_escape_filename(pkg_filename)}")', file=f)
298
+ print(
299
+ f'fullRun({niterations:d}, npop={npop:d}, ncyclesperiteration={ncyclesperiteration:d}, fractionReplaced={fractionReplaced:f}f0, verbosity=round(Int32, {verbosity:f}), topn={topn:d})',
300
+ file=f)
301
+ print(f'rmprocs(nprocs)', file=f)
302
 
 
 
 
 
 
 
 
 
303
 
304
+ def make_datasets_julia_str(X, X_filename, weights, weights_filename, y, y_filename):
305
+ def_datasets = """using DelimitedFiles"""
306
+ np.savetxt(X_filename, X, delimiter=',')
307
+ np.savetxt(y_filename, y, delimiter=',')
308
+ if weights is not None:
309
+ np.savetxt(weights_filename, weights, delimiter=',')
310
+ def_datasets += f"""
311
+ const X = readdlm("{_escape_filename(X_filename)}", ',', Float32, '\\n')
312
+ const y = readdlm("{_escape_filename(y_filename)}", ',', Float32, '\\n')"""
313
+ if weights is not None:
314
+ def_datasets += f"""
315
+ const weights = readdlm("{_escape_filename(weights_filename)}", ',', Float32, '\\n')"""
316
+ return def_datasets
317
+
318
+
319
+ def make_hyperparams_julia_str(X, alpha, annealing, batchSize, batching, binary_operators, constraints_str,
320
+ def_hyperparams, equation_file, fast_cycle, fractionReplacedHof, hofMigration,
321
+ limitPowComplexity, maxdepth, maxsize, migration, nrestarts, operator_filename,
322
+ parsimony, perturbationFactor, populations, procs, shouldOptimizeConstants,
323
+ unary_operators, useFrequency, use_custom_variable_names, variable_names, warmupMaxsize, weightAddNode,
324
+ weightDeleteNode, weightDoNothing, weightInsertNode, weightMutateConstant,
325
+ weightMutateOperator, weightRandomize, weightSimplify, weights):
326
  def_hyperparams += f"""include("{_escape_filename(operator_filename)}")
327
  {constraints_str}
328
  const binops = {'[' + ', '.join(binary_operators) + ']'}
 
361
  const limitPowComplexity = {"true" if limitPowComplexity else "false"}
362
  const useFrequency = {"true" if useFrequency else "false"}
363
  """
 
364
  op_runner = ""
365
  if len(binary_operators) > 0:
366
  op_runner += """
 
371
  end"""
372
  for i in range(1, len(binary_operators)):
373
  op_runner += f"""
374
+ elseif i === {i + 1}
375
  @inbounds @simd for j=1:clen
376
  x[j] = {binary_operators[i]}(x[j], y[j])
377
  end"""
378
  op_runner += """
379
  end
380
  end"""
 
381
  if len(unary_operators) > 0:
382
  op_runner += """
383
  @inline function UNAOP!(x::Array{Float32, 1}, i::Int, clen::Int)
 
387
  end"""
388
  for i in range(1, len(unary_operators)):
389
  op_runner += f"""
390
+ elseif i === {i + 1}
391
  @inbounds @simd for j=1:clen
392
  x[j] = {unary_operators[i]}(x[j])
393
  end"""
394
  op_runner += """
395
  end
396
  end"""
 
397
  def_hyperparams += op_runner
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  if use_custom_variable_names:
399
  def_hyperparams += f"""
400
+ const varMap = {'["' + '", "'.join(variable_names) + '"]'}"""
401
+ return def_hyperparams
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
 
 
 
403
 
404
+ def make_constraints_str(binary_operators, constraints, unary_operators):
405
+ constraints_str = "const una_constraints = ["
406
+ first = True
407
+ for op in unary_operators:
408
+ val = constraints[op]
409
+ if not first:
410
+ constraints_str += ", "
411
+ constraints_str += f"{val:d}"
412
+ first = False
413
+ constraints_str += """]
414
+ const bin_constraints = ["""
415
+ first = True
416
+ for op in binary_operators:
417
+ tup = constraints[op]
418
+ if not first:
419
+ constraints_str += ", "
420
+ constraints_str += f"({tup[0]:d}, {tup[1]:d})"
421
+ first = False
422
+ constraints_str += "]"
423
+ return constraints_str
424
 
425
 
426
  def handle_constraints(binary_operators, constraints, unary_operators):