swarms / runner.py
YvesP's picture
initial load
a162e39
from swarm_policy import SwarmPolicy
def run_episode(env, obs, blues: int, reds: int):
blue_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=True)
red_policy = SwarmPolicy(blues=blues, reds=reds, is_blue=False)
sum_reward = 0
done = False
while not done:
action = blue_policy.predict(obs), red_policy.predict(obs)
obs, reward, done, info = env.step(action)
sum_reward += reward
return obs, sum_reward, done, info