File size: 4,641 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# distutils:language=c++
# cython:language_level=3
from libcpp.vector cimport vector
cdef class MinMaxStatsList:
cdef CMinMaxStatsList *cmin_max_stats_lst
def __cinit__(self, int num):
self.cmin_max_stats_lst = new CMinMaxStatsList(num)
def set_delta(self, float value_delta_max):
self.cmin_max_stats_lst[0].set_delta(value_delta_max)
def __dealloc__(self):
del self.cmin_max_stats_lst
cdef class ResultsWrapper:
cdef CSearchResults cresults
def __cinit__(self, int num):
self.cresults = CSearchResults(num)
def get_search_len(self):
return self.cresults.search_lens
cdef class Action:
cdef int is_root_action
cdef vector[float] value
cdef CAction action
def __cinit__(self):
pass
def __cinit__(self, vector[float] value, int is_root_action):
self.is_root_action = is_root_action
self.value = value
cdef class Roots:
cdef int root_num
cdef int action_space_size
cdef int num_of_sampled_actions
cdef CRoots *roots
cdef bool continuous_action_space
def __cinit__(self):
pass
def __cinit__(self, int root_num, list legal_actions_list, int action_space_size, int num_of_sampled_actions,
bool continuous_action_space):
#def __cinit__(self, int root_num, list legal_actions_list, int action_space_size, int num_of_sampled_actions):
self.root_num = root_num
self.action_space_size = action_space_size
self.num_of_sampled_actions = num_of_sampled_actions
self.roots = new CRoots(root_num, legal_actions_list, action_space_size, num_of_sampled_actions,
continuous_action_space)
def prepare(self, float root_noise_weight, list noises, list value_prefix_pool, list policy_logits_pool,
vector[int] & to_play_batch):
self.roots[0].prepare(root_noise_weight, noises, value_prefix_pool, policy_logits_pool, to_play_batch)
def prepare_no_noise(self, list value_prefix_pool, list policy_logits_pool, vector[int] & to_play_batch):
self.roots[0].prepare_no_noise(value_prefix_pool, policy_logits_pool, to_play_batch)
def get_trajectories(self):
return self.roots[0].get_trajectories()
def get_distributions(self):
return self.roots[0].get_distributions()
def get_sampled_actions(self):
return self.roots[0].get_sampled_actions()
def get_values(self):
return self.roots[0].get_values()
def clear(self):
self.roots[0].clear()
def __dealloc__(self):
del self.roots
@property
def num(self):
return self.root_num
cdef class Node:
cdef CNode cnode
cdef bool continuous_action_space
def __cinit__(self):
pass
#def __cinit__(self, float prior, vector[int] &legal_actions, int action_space_size, int num_of_sampled_actions):
def __cinit__(self, float prior, vector[int] & legal_actions, int action_space_size, int num_of_sampled_actions,
bool continuous_action_space):
pass
def expand(self, int to_play, int current_latent_state_index, int batch_index, float value_prefix,
list policy_logits):
cdef vector[float] cpolicy = policy_logits
self.cnode.expand(to_play, current_latent_state_index, batch_index, value_prefix, cpolicy)
def batch_backpropagate(int current_latent_state_index, float discount_factor, list value_prefixs, list values, list policies,
MinMaxStatsList min_max_stats_lst, ResultsWrapper results, list is_reset_list,
list to_play_batch):
cdef int i
cdef vector[float] cvalue_prefixs = value_prefixs
cdef vector[float] cvalues = values
cdef vector[vector[float]] cpolicies = policies
cbatch_backpropagate(current_latent_state_index, discount_factor, cvalue_prefixs, cvalues, cpolicies,
min_max_stats_lst.cmin_max_stats_lst, results.cresults, is_reset_list, to_play_batch)
def batch_traverse(Roots roots, int pb_c_base, float pb_c_init, float discount_factor, MinMaxStatsList min_max_stats_lst,
ResultsWrapper results, list virtual_to_play_batch, bool continuous_action_space):
cbatch_traverse(roots.roots, pb_c_base, pb_c_init, discount_factor, min_max_stats_lst.cmin_max_stats_lst, results.cresults,
virtual_to_play_batch, continuous_action_space)
return results.cresults.latent_state_index_in_search_path, results.cresults.latent_state_index_in_batch, results.cresults.last_actions, results.cresults.virtual_to_play_batchs
|