|
import math |
|
import os |
|
import pandas as pd |
|
import random |
|
import shutil |
|
import time |
|
|
|
from code_generation import generate_and_debug, prepare_working_folder, code_execution, get_results |
|
from program_database import ProgramsDatabase, ProgramsDatabaseConfig |
|
|
|
def get_seed_score(nRMSE, convergence_rate): |
|
return { |
|
'bucketed_convergence_rate': int(max(0, convergence_rate)*4), |
|
'bucketed_nRMSE': int(-math.log10(min(1e9, nRMSE))*10) |
|
} |
|
|
|
def funsearch(cfg): |
|
num_trials = cfg.method.num_debugging_trials_per_sample |
|
pde_name = cfg.pde.name |
|
working_folder = cfg.working_folder |
|
model_name = cfg.model.name |
|
num_search_rounds = cfg.method.num_search_rounds |
|
num_initial_seeds = cfg.method.num_initial_seeds |
|
use_sample_solver_init = cfg.method.use_sample_solver_init |
|
assert use_sample_solver_init, 'Sample solvers must be enabled for refinement' |
|
|
|
sample_solver_folder = os.path.join( |
|
'solvers', pde_name, cfg.pde.pde_setting_name, 'seeds' |
|
) |
|
sample_solver_info = pd.read_csv( |
|
os.path.join(sample_solver_folder, 'seed_results.csv') |
|
) |
|
|
|
prepare_working_folder( |
|
cfg, |
|
working_folder=working_folder, |
|
pde_name=pde_name, |
|
use_sample_solver_init=use_sample_solver_init |
|
) |
|
|
|
pd_cfg = ProgramsDatabaseConfig() |
|
program_db = ProgramsDatabase(pd_cfg) |
|
|
|
|
|
seed_path = os.path.join( |
|
'../archived_logs', |
|
pde_name, |
|
cfg.pde.pde_setting_name, |
|
'repeated_sample', |
|
model_name |
|
) |
|
subdirectories = [d for d in os.listdir(seed_path) if os.path.isdir(os.path.join(seed_path, d))] |
|
assert len(subdirectories) == 1, 'Only one subdirectory is expected' |
|
seed_path = os.path.join(seed_path, subdirectories[0]) |
|
result_sheet = pd.read_csv(os.path.join(seed_path, 'test_results.csv')) |
|
|
|
for i in range(num_initial_seeds): |
|
relevant_files = [ |
|
'errors_{idx}.txt', |
|
'implementation_{idx}.py', |
|
'output_{idx}.txt', |
|
] |
|
|
|
complete_seed = True |
|
for file in relevant_files: |
|
if not os.path.exists(os.path.join(seed_path, file.format(idx=i))): |
|
complete_seed = False |
|
break |
|
if result_sheet[result_sheet['round'] == i].empty: |
|
complete_seed = False |
|
|
|
seed_info = result_sheet[result_sheet['round'] == i].to_numpy().tolist()[0] |
|
seed_info = [str(x) for x in seed_info] |
|
if seed_info[1] == 'failed': |
|
complete_seed = False |
|
|
|
if not complete_seed: |
|
continue |
|
|
|
|
|
for file in relevant_files: |
|
source_file = os.path.join(seed_path, file.format(idx=int(i))) |
|
destination_file = os.path.join(working_folder, file.format(idx=int(i))) |
|
shutil.copy(source_file, destination_file) |
|
with open(os.path.join(working_folder, 'test_results.csv'), 'a') as f: |
|
seed_info[0] = str(int(i)) |
|
f.write(','.join(seed_info) + '\n') |
|
|
|
|
|
seed_score = get_seed_score(float(seed_info[1]), float(seed_info[3])) |
|
with open(os.path.join(working_folder, f'implementation_{i}.py'), 'r') as f: |
|
implementation = f.readlines() |
|
program_len = len(implementation) |
|
program_db.register_program( |
|
program=i, |
|
program_len=program_len, |
|
island_id=None, |
|
scores_per_test=seed_score, |
|
) |
|
|
|
for i in range(num_initial_seeds, num_initial_seeds+num_search_rounds): |
|
island_id, seed_ids = program_db.get_seed() |
|
try: |
|
relative_error, elapsed_time, avg_rate = generate_and_debug( |
|
cfg, |
|
round_idx=i, |
|
num_trials=num_trials, |
|
pde_name=pde_name, |
|
working_folder=working_folder, |
|
seed_implementations=seed_ids, |
|
model_name=model_name |
|
) |
|
seed_score = get_seed_score(float(relative_error), float(avg_rate)) |
|
with open(os.path.join(working_folder, f'implementation_{i}.py'), 'r') as f: |
|
implementation = f.readlines() |
|
program_len = len(implementation) |
|
program_db.register_program( |
|
program=i, |
|
program_len=program_len, |
|
island_id=island_id, |
|
scores_per_test=seed_score, |
|
) |
|
except Exception as e: |
|
print(f'Error in round {i}: {e}. Move on to the next sample.') |
|
|
|
|
|
|
|
results = pd.read_csv(os.path.join(working_folder, 'test_results.csv')) |
|
keywords = ['nRMSE', 'elapsed_time', 'convergence_rate'] |
|
for keyword in keywords: |
|
results[keyword] = pd.to_numeric(results[keyword], errors="coerce") |
|
|
|
sorted_results = results.sort_values(by=keywords, ascending=[True, True, False]) |
|
best_idx = int(sorted_results.head(1)["round"].values[0]) |
|
|
|
test_run_id = 999 |
|
shutil.copy( |
|
os.path.join(working_folder, f'implementation_{best_idx}.py'), |
|
os.path.join(working_folder, f'implementation_{test_run_id}.py') |
|
) |
|
execution_results = code_execution( |
|
cfg, |
|
working_folder = working_folder, |
|
round_idx=test_run_id, |
|
pde_name=pde_name, |
|
eval_dataset=os.path.join( |
|
cfg.root_dataset_folder, |
|
cfg.pde.dataset_folder_for_eval.replace('_development.hdf5', '.hdf5') |
|
) |
|
) |
|
|
|
if execution_results['exit_code'] != 0: |
|
relative_error, elapsed_time, avg_rate = None, None, None |
|
else: |
|
relative_error, elapsed_time, avg_rate = get_results( |
|
os.path.join(working_folder, f'output_{test_run_id}.txt') |
|
) |
|
with open(os.path.join(working_folder, 'final_result.txt'), 'w') as f: |
|
f.write('best_idx,relative_error,elapsed_time,avg_rate\n') |
|
f.write(f'{best_idx},{relative_error},{elapsed_time},{avg_rate}\n') |
|
|