Spaces:
Runtime error
Runtime error
File size: 3,865 Bytes
ffe7549 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import numpy as np
import random
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.annotations import override
from ray.rllib.models.modelv2 import restore_original_dimensions
class HeuristicBase(Policy):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.exploration = self._create_exploration()
def learn_on_batch(self, samples):
pass
@override(Policy)
def get_weights(self):
"""No weights to save."""
return {}
@override(Policy)
def set_weights(self, weights):
"""No weights to set."""
pass
@override(Policy)
def compute_actions(
self,
obs_batch,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
info_batch=None,
episodes=None,
**kwargs
):
obs_batch = restore_original_dimensions(
np.array(obs_batch, dtype=np.float32), self.observation_space, tensorlib=np
)
return self._do_compute_actions(obs_batch)
def pick_legal_action(self, legal_action):
legal_choices = np.arange(len(legal_action))[legal_action == 1]
return np.random.choice(legal_choices)
class AlwaysSameHeuristic(HeuristicBase):
"""
Pick a random move and stick with it for the entire episode.
"""
_rand_choice = random.choice(range(7))
def _do_compute_actions(self, obs_batch):
def select_action(legal_action):
legal_choices = np.arange(len(legal_action))[legal_action == 1]
if self._rand_choice not in legal_choices:
self._rand_choice = np.random.choice(legal_choices)
return self._rand_choice
return [select_action(x) for x in obs_batch["action_mask"]], [], {}
class LinearHeuristic(HeuristicBase):
"""
Pick a random move and increment column index
"""
_rand_choice = random.choice(range(7))
_rand_sign = np.random.choice([-1, 1])
def _do_compute_actions(self, obs_batch):
def select_action(legal_action):
legal_choices = np.arange(len(legal_action))[legal_action == 1]
self._rand_choice += 1 * self._rand_sign
if self._rand_choice not in legal_choices:
self._rand_choice = np.random.choice(legal_choices)
return self._rand_choice
return [select_action(x) for x in obs_batch["action_mask"]], [], {}
class BeatLastHeuristic(HeuristicBase):
"""
Play the move the last move of the opponent.
"""
def _do_compute_actions(self, obs_batch):
def select_action(legal_action, observation):
legal_choices = np.arange(len(legal_action))[legal_action == 1]
obs_sums = np.sum(observation, axis=0)
desired_actions = np.squeeze(np.argwhere(obs_sums[:, 0] < obs_sums[:, 1]))
if desired_actions.size == 0:
return np.random.choice(legal_choices)
if desired_actions.size == 1:
desired_action = desired_actions[()]
else:
desired_action = np.random.choice(desired_actions)
if desired_action in legal_choices:
return desired_action
return np.random.choice(legal_choices)
return (
[
select_action(x, y)
for x, y in zip(obs_batch["action_mask"], obs_batch["observation"])
],
[],
{},
)
class RandomHeuristic(HeuristicBase):
"""
Just pick a random legal action
The outputted state of the environment needs to be a dictionary with an
'action_mask' key containing the legal actions for the agent.
"""
def _do_compute_actions(self, obs_batch):
return [self.pick_legal_action(x) for x in obs_batch["action_mask"]], [], {}
|