Spaces:
Sleeping
Sleeping
import os | |
import csv | |
import time | |
import random | |
from src.smb.level import * | |
from src.drl.me_reg import * | |
from src.drl.nets import esmb_sample | |
from src.utils.filesys import getpath | |
from src.utils.datastruct import RingQueue | |
from src.smb.asyncsimlt import AsycSimltPool | |
from src.env.environments import get_padded_obs | |
from src.olgen.ol_generator import VecOnlineGenerator, OnlineGenerator | |
from src.drl.drl_uses import load_cfgs, load_performance | |
from src.olgen.olg_policy import process_obs, RandGenPolicy, RLGenPolicy, EnsembleGenPolicy | |
def evaluate_rewards(lvls, rfunc='default', dest_path='', parallel=1, eval_pool=None): | |
internal_pool = eval_pool is None | |
if internal_pool: | |
eval_pool = AsycSimltPool(parallel, rfunc_name=rfunc, verbose=False, test=True) | |
res = [] | |
for lvl in lvls: | |
eval_pool.put('evaluate', (0, str(lvl))) | |
buffer = eval_pool.get() | |
for _, item in buffer: | |
res.append([sum(r) for r in zip(*item.values())]) | |
if internal_pool: | |
buffer = eval_pool.close() | |
else: | |
buffer = eval_pool.get(True) | |
for _, item in buffer: | |
res.append([sum(r) for r in zip(*item.values())]) | |
if len(dest_path): | |
np.save(dest_path, res) | |
return res | |
def evaluate_mnd(lvls, refs, parallel=2): | |
eval_pool = AsycSimltPool(parallel, verbose=False, refs=[str(ref) for ref in refs]) | |
res = [] | |
for lvl in lvls: | |
eval_pool.put('mnd_item', str(lvl)) | |
res += eval_pool.get() | |
res += eval_pool.get(wait=True) | |
res = np.array(res) | |
eval_pool.close() | |
return np.mean(res[:, 0]), np.mean(res[:, 1]) | |
def evaluate_mpd(lvls, parallel=2): | |
task_datas = [[] for _ in range(parallel)] | |
for i, (A, B) in enumerate(combinations(lvls, 2)): | |
task_datas[i % parallel].append((str(A), str(B))) | |
hms, dtws = [], [] | |
eval_pool = AsycSimltPool(parallel, verbose=False) | |
for task_data in task_datas: | |
eval_pool.put('mpd', task_data) | |
res = eval_pool.get(wait=True) | |
for task_hms, _ in res: | |
hms += task_hms | |
# dtws += task_dtws | |
return np.mean(hms) #, np.mean(dtws) | |
def evaluate_gen_log(path, parallel=5): | |
rfunc_name = load_cfgs(path, 'rfunc') | |
f = open(getpath(f'{path}/step_tests.csv'), 'w', newline='') | |
wrtr = csv.writer(f) | |
cols = ['step', 'r-avg', 'r-std', 'mnd-hm', 'mnd-dtw', 'mpd-hm', 'mpd-dtw', ''] | |
wrtr.writerow(cols) | |
start_time = time.time() | |
for lvls, name in traverse_batched_level_files(f'{path}/gen_log'): | |
step = name[4:] | |
rewards = [sum(item) for item in evaluate_rewards(lvls, rfunc_name, parallel=parallel)] | |
r_avg, r_std = np.mean(rewards), np.std(rewards) | |
mpd = evaluate_mpd(lvls, parallel=parallel) | |
line = [step, r_avg, r_std, mpd, ''] | |
wrtr.writerow(line) | |
f.flush() | |
print( | |
f'{path}: step{step} evaluated in {time.time()-start_time:.1f}s -- ' | |
+ '; '.join(f'{k}: {v}' for k, v in zip(cols, line)) | |
) | |
f.close() | |
pass | |
def evaluate_generator(generator, nr=200, h=50, parallel=5, dest_path=None, additional_info=None, rfunc_name='default'): | |
if additional_info is None: additional_info = {} | |
''' Test Reward ''' | |
lvls = generator.generate(nr, h) | |
rewards = [sum(item) for item in evaluate_rewards(lvls, parallel=parallel, rfunc=rfunc_name)] | |
r_avg, r_std = np.mean(rewards), np.std(rewards) | |
''' Test MPD ''' | |
# mpd, _ = evaluate_mpd(lvls, parallel=parallel) | |
mpd, *_ = evaluate_mpd(generator.generate(3000*2, h), parallel=parallel) | |
res = { | |
'r-avg': r_avg, 'r-std': r_std, 'div': mpd, | |
} | |
res.update(additional_info) | |
if dest_path: | |
with open(getpath(dest_path), 'w', newline='') as f: | |
keys = [k for k in res.keys()] | |
wrtr = csv.writer(f) | |
wrtr.writerow(keys + ['']) | |
wrtr.writerow([res[k] for k in keys] + ['']) | |
return res | |
pass | |
def evaluate_jmer(training_path, n=1000, max_parallel=None, device='cuda:0'): | |
init_vecs = np.load(getpath('smb/init_latvecs.npy')) | |
try: | |
m, histlen, h, gamma, me_type = load_cfgs(training_path, 'm', 'N', 'h', 'gamma', 'me_type') | |
except KeyError: | |
return 0. | |
mereg_func = LogWassersteinExclusion(1.) if me_type == 'logw' else WassersteinExclusion(1.) | |
model = torch.load(getpath(training_path, 'policy.pth'), map_location=device) | |
model.requires_grad_(False) | |
if max_parallel is None: | |
max_parallel = min(n, 512) | |
me_regs = [] | |
obs_queues = [RingQueue(histlen) for _ in range(max_parallel)] | |
while len(me_regs) < n: | |
size = min(max_parallel, n - len(me_regs)) | |
mereg_vals, discount = np.zeros([size]), 1. | |
veclists = [[] for _ in range(size)] | |
for queue, veclist in zip(obs_queues, veclists): | |
queue.clear() | |
init_latvec = init_vecs[random.randrange(0, len(init_vecs))] | |
queue.push(init_latvec) | |
veclist.append(init_latvec) | |
for _ in range(h): | |
obs = np.stack([get_padded_obs(queue.to_list(), histlen) for queue in obs_queues[:size]]) | |
muss, stdss, betas = model.get_intermediate(process_obs(obs, device)) | |
mereg_vals += discount * mereg_func.forward(muss, stdss, betas).squeeze().cpu().numpy() | |
discount *= gamma | |
actions, _ = esmb_sample(muss, stdss, betas) | |
for queue, veclist, action in zip(obs_queues, veclists, actions.cpu().numpy()): | |
queue.push(action) | |
veclist.append(action) | |
me_regs += mereg_vals.tolist() | |
return me_regs | |
def evaluate_baseline(*rfuncs, parallel=4): | |
nr, md, nd, h = 100, 1000, 200, 50 | |
gen_policy = RandGenPolicy() | |
olgenerator = OnlineGenerator(gen_policy) | |
lvls, refs = olgenerator.generate(md, h), olgenerator.generate(nd, h) | |
divs_h, divs_js = evaluate_mnd(lvls, refs, parallel=parallel) | |
keys, vals = ['d-h', 'd-js'], [divs_h, divs_js] | |
print(f'Diversity of baseline generator: Hamming {divs_h:.2f}; TPJS {divs_js:.2f}') | |
for rfunc in rfuncs: | |
try: | |
print(f'Start to evaluate {rfunc}') | |
start_time = time.time() | |
lvls = olgenerator.generate(nr, h) | |
rewards = [sum(item) for item in evaluate_rewards(lvls, parallel=parallel, rfunc=rfunc)] | |
keys.append(rfunc) | |
vals.append(np.mean(rewards)) | |
print(f'Evaluation for {rfunc} finished in {time.time()-start_time:.2f}s') | |
print(f'Evaluation results for {rfunc}: {vals[-1]:.2f}') | |
except AttributeError: | |
continue | |
with open(getpath('training_data', 'baselines.csv'), 'w', newline='') as f: | |
wrtr = csv.writer(f) | |
wrtr.writerow(keys) | |
wrtr.writerow(vals) | |
def sample_initial(): | |
playable_latvecs = np.load(getpath('smb/init_latvecs.npy')) | |
indexes = random.sample([*range(len(playable_latvecs))], 500) | |
z = playable_latvecs[indexes, :] | |
np.save(getpath('analysis/initial_seg.npy'), z) | |
pass | |
def generate_levels_for_test(h=25): | |
init_set = np.load(getpath('analysis/initial_seg.npy')) | |
def _generte_one(policy, path): | |
try: | |
start = time.time() | |
generator = VecOnlineGenerator(policy, vec_num=len(init_set)) | |
fd, _ = os.path.split(getpath(path)) | |
os.makedirs(fd, exist_ok=True) | |
generator.re_init(init_set) | |
lvls = generator.generate(len(init_set), h, rand_init=False) | |
save_batch(lvls, path) | |
print('Save to', path, '%.2fs' % (time.time() - start)) | |
except FileNotFoundError as e: | |
print(e) | |
for l, m in product(['0.0', '0.1', '0.2', '0.3', '0.4', '0.5'], [2, 3, 4, 5]): | |
for i in range(1, 6): | |
pi_path = f'training_data/varpm-fhp/l{l}_m{m}/t{i}' | |
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/varpm-fhp/l{l}_m{m}/t{i}/samples.lvls') | |
pi_path = f'training_data/varpm-lgp/l{l}_m{m}/t{i}' | |
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/varpm-lgp/l{l}_m{m}/t{i}/samples.lvls') | |
for algo in ['sac', 'egsac', 'asyncsac', 'pmoe']: | |
for i in range(1, 6): | |
pi_path = f'training_data/{algo}/fhp/t{i}' | |
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/{algo}/fhp/t{i}/samples.lvls') | |
pi_path = f'training_data/{algo}/lgp/t{i}' | |
_generte_one(RLGenPolicy.from_path(pi_path), f'test_data/{algo}/lgp/t{i}/samples.lvls') | |
for algo in ['sunrise', 'dvd']: | |
for i in range(1, 5): | |
pi_path = f'training_data/{algo}/fhp/t{i}' | |
_generte_one(EnsembleGenPolicy.from_path(pi_path), f'test_data/{algo}/fhp/t{i}/samples.lvls') | |
pi_path = f'training_data/{algo}/lgp/t{i}' | |
_generte_one(EnsembleGenPolicy.from_path(pi_path), f'test_data/{algo}/lgp/t{i}/samples.lvls') | |
pass | |
if __name__ == '__main__': | |
generate_levels_for_test() | |