Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

goal_keeper.py +1001 -0
openrl_policy.py +446 -0
openrl_utils.py +421 -0
submission.py +81 -0

goal_keeper.py ADDED Viewed

	@@ -0,0 +1,1001 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# original code from https://github.com/Sarvar-Anvarov/Google-Research-Football/blob/main/gfootball.py
+# modified by TARTRL team
+import math
+import random
+import numpy as np
+from functools import wraps
+from enum import Enum
+from typing import *
+class Action(Enum):
+    Idle = 0
+    Left = 1
+    TopLeft = 2
+    Top = 3
+    TopRight = 4
+    Right = 5
+    BottomRight = 6
+    Bottom = 7
+    BottomLeft = 8
+    LongPass= 9
+    HighPass = 10
+    ShortPass = 11
+    Shot = 12
+    Sprint = 13
+    ReleaseDirection = 14
+    ReleaseSprint = 15
+    Slide = 16
+    Dribble = 17
+    ReleaseDribble = 18
+ALL_DIRECTION_ACTIONS = [Action.Left, Action.TopLeft, Action.Top, Action.TopRight, Action.Right, Action.BottomRight, Action.Bottom, Action.BottomLeft]
+ALL_DIRECTION_VECS = [(-1, 0), (-1, -1), (0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1)]
+sticky_index_to_action = [
+    Action.Left,
+    Action.TopLeft,
+    Action.Top,
+    Action.TopRight,
+    Action.Right,
+    Action.BottomRight,
+    Action.Bottom,
+    Action.BottomLeft,
+    Action.Sprint,
+    Action.Dribble
+]
+GOAL_BIAS = 0.01
+class PlayerRole(Enum):
+    GoalKeeper = 0
+    CenterBack = 1
+    LeftBack = 2
+    RightBack = 3
+    DefenceMidfield = 4
+    CentralMidfield = 5
+    LeftMidfield = 6
+    RIghtMidfield = 7
+    AttackMidfield = 8
+    CentralFront = 9
+class GameMode(Enum):
+    Normal = 0
+    KickOff = 1
+    GoalKick = 2
+    FreeKick = 3
+    Corner = 4
+    ThrowIn = 5
+    Penalty = 6
+def human_readable_agent(agent: Callable[[Dict], Action]):
+    """
+    Decorator allowing for more human-friendly implementation of the agent function.
+    @human_readable_agent
+    def my_agent(obs):
+        ...
+        return football_action_set.action_right
+    """
+    @wraps(agent)
+    def agent_wrapper(obs) -> List[int]:
+        # Extract observations for the first (and only) player we control.
+        # obs = obs['players_raw'][0]
+        # Turn 'sticky_actions' into a set of active actions (strongly typed).
+        obs['sticky_actions'] = { sticky_index_to_action[nr] for nr, action in enumerate(obs['sticky_actions']) if action }
+        # Turn 'game_mode' into an enum.
+        obs['game_mode'] = GameMode(obs['game_mode'])
+        # In case of single agent mode, 'designated' is always equal to 'active'.
+        if 'designated' in obs:
+            del obs['designated']
+        # Conver players' roles to enum.
+        obs['left_team_roles'] = [ PlayerRole(role) for role in obs['left_team_roles'] ]
+        obs['right_team_roles'] = [ PlayerRole(role) for role in obs['right_team_roles'] ]
+        action = agent(obs)
+        return [action.value]
+    return agent_wrapper
+def find_patterns(obs, player_x, player_y):
+    """ find list of appropriate patterns in groups of memory patterns """
+    for get_group in groups_of_memory_patterns:
+        group = get_group(obs, player_x, player_y)
+        if group["environment_fits"](obs, player_x, player_y):
+            return group["get_memory_patterns"](obs, player_x, player_y)
+def get_action_of_agent(obs, player_x, player_y):
+    """ get action of appropriate pattern in agent's memory """
+    memory_patterns = find_patterns(obs, player_x, player_y)
+    # find appropriate pattern in list of memory patterns
+    for get_pattern in memory_patterns:
+        pattern = get_pattern(obs, player_x, player_y)
+        if pattern["environment_fits"](obs, player_x, player_y):
+            return pattern["get_action"](obs, player_x, player_y)
+def get_distance(x1, y1, right_team):
+    """ get two-dimensional Euclidean distance, considering y size of the field """
+    return math.sqrt((x1 - right_team[0]) ** 2 + (y1 * 2.38 - right_team[1] * 2.38) ** 2)
+def run_to_ball_bottom(obs, player_x, player_y):
+    """ run to the ball if it is to the bottom from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the bottom from player's position
+        if (obs["ball"][1] > player_y and
+                abs(obs["ball"][0] - player_x) < 0.01):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.Bottom
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_ball_bottom_left(obs, player_x, player_y):
+    """ run to the ball if it is to the bottom left from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the bottom left from player's position
+        if (obs["ball"][0] < player_x and
+                obs["ball"][1] > player_y):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.BottomLeft
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_ball_bottom_right(obs, player_x, player_y):
+    """ run to the ball if it is to the bottom right from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the bottom right from player's position
+        if (obs["ball"][0] > player_x and
+                obs["ball"][1] > player_y):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.BottomRight
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_ball_left(obs, player_x, player_y):
+    """ run to the ball if it is to the left from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the left from player's position
+        if (obs["ball"][0] < player_x and
+                abs(obs["ball"][1] - player_y) < 0.01):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.Left
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_ball_right(obs, player_x, player_y):
+    """ run to the ball if it is to the right from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the right from player's position
+        if (obs["ball"][0] > player_x and
+                abs(obs["ball"][1] - player_y) < 0.01):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.Right
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_ball_top(obs, player_x, player_y):
+    """ run to the ball if it is to the top from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the top from player's position
+        if (obs["ball"][1] < player_y and
+                abs(obs["ball"][0] - player_x) < 0.01):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.Top
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_ball_top_left(obs, player_x, player_y):
+    """ run to the ball if it is to the top left from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the top left from player's position
+        if (obs["ball"][0] < player_x and
+                obs["ball"][1] < player_y):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.TopLeft
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_ball_top_right(obs, player_x, player_y):
+    """ run to the ball if it is to the top right from player's position """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # ball is to the top right from player's position
+        if (obs["ball"][0] > player_x and
+                obs["ball"][1] < player_y):
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.TopRight
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def idle(obs, player_x, player_y):
+    """ do nothing, release all sticky actions """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        return True
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        return Action.Idle
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def start_sprinting(obs, player_x, player_y):
+    """ make sure player is sprinting """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        if Action.Sprint not in obs["sticky_actions"]:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if Action.Dribble in obs['sticky_actions']:
+            return Action.ReleaseDribble
+        return Action.Sprint
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def corner(obs, player_x, player_y):
+    """ perform a shot in corner game mode """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # it is corner game mode
+        if obs['game_mode'] == GameMode.Corner:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if player_y > 0:
+            if Action.TopRight not in obs["sticky_actions"]:
+                return Action.TopRight
+        else:
+            if Action.BottomRight not in obs["sticky_actions"]:
+                return Action.BottomRight
+        return Action.HighPass
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def free_kick(obs, player_x, player_y):
+    """ perform a high pass or a shot in free kick game mode """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # it is free kick game mode
+        if obs['game_mode'] == GameMode.FreeKick:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        # shot if player close to goal
+        if player_x > 0.5:
+            if player_y > 0:
+                if Action.TopRight not in obs["sticky_actions"]:
+                    return Action.TopRight
+            else:
+                if Action.BottomRight not in obs["sticky_actions"]:
+                    return Action.BottomRight
+            return Action.Shot
+        # high pass if player far from goal
+        else:
+            if player_y > 0:
+                if Action.BottomRight not in obs["sticky_actions"]:
+                    return Action.BottomRight
+            else:
+                if Action.TopRight not in obs['sticky_actions']:
+                    return Action.TopRight
+            return Action.ShortPass
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def goal_kick(obs, player_x, player_y):
+    """ perform a short pass in goal kick game mode """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # it is goal kick game mode
+        if obs['game_mode'] == GameMode.GoalKick:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if Action.BottomRight not in obs["sticky_actions"]:
+            return Action.BottomRight
+        return Action.ShortPass
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def kick_off(obs, player_x, player_y):
+    """ perform a short pass in kick off game mode """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # it is kick off game mode
+        if obs['game_mode'] == GameMode.KickOff:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if player_y > 0:
+            if Action.Top not in obs["sticky_actions"]:
+                return Action.Top
+        else:
+            if Action.Bottom not in obs["sticky_actions"]:
+                return Action.Bottom
+        return Action.ShortPass
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def penalty(obs, player_x, player_y):
+    """ perform a shot in penalty game mode """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # it is penalty game mode
+        if obs['game_mode'] == GameMode.Penalty:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if (random.random() < 0.5 and
+                Action.TopRight not in obs["sticky_actions"] and
+                Action.BottomRight not in obs["sticky_actions"]):
+            return Action.TopRight
+        else:
+            if Action.BottomRight not in obs["sticky_actions"]:
+                return Action.BottomRight
+        return Action.Shot
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def throw_in(obs, player_x, player_y):
+    """ perform a short pass in throw in game mode """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # it is throw in game mode
+        if obs['game_mode'] == GameMode.ThrowIn:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if Action.Right not in obs["sticky_actions"]:
+            return Action.Right
+        return Action.ShortPass
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def defence_memory_patterns(obs, player_x, player_y):
+    """ group of memory patterns for environments in which opponent's team has the ball """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # player don't have the ball
+        if obs["ball_owned_team"] != 0:
+            return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        # shift ball position
+        obs["ball"][0] += obs["ball_direction"][0] * 7
+        obs["ball"][1] += obs["ball_direction"][1] * 3
+        # if opponent has the ball and is far from y axis center
+        if abs(obs["ball"][1]) > 0.07 and obs["ball_owned_team"] == 1:
+            obs["ball"][0] -= 0.01
+            if obs["ball"][1] > 0:
+                obs["ball"][1] -= 0.01
+            else:
+                obs["ball"][1] += 0.01
+        memory_patterns = [
+            start_sprinting,
+            run_to_ball_right,
+            run_to_ball_left,
+            run_to_ball_bottom,
+            run_to_ball_top,
+            run_to_ball_top_right,
+            run_to_ball_top_left,
+            run_to_ball_bottom_right,
+            run_to_ball_bottom_left,
+            idle
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def goalkeeper_memory_patterns(obs, player_x, player_y):
+    """ group of memory patterns for goalkeeper """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # player is a goalkeeper have the ball
+        if (obs["ball_owned_player"] == obs["active"] and
+                obs["ball_owned_team"] == 0 and
+                obs["ball_owned_player"] == 0):
+            return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            long_pass_forward,
+            idle
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def offence_memory_patterns(obs, player_x, player_y):
+    """ group of memory patterns for environments in which player's team has the ball """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # player have the ball
+        if obs["ball_owned_player"] == obs["active"] and obs["ball_owned_team"] == 0:
+            return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            close_to_goalkeeper_shot,
+            spot_shot,
+            cross,
+            long_pass_forward,
+            keep_the_ball,
+        idle
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def other_memory_patterns(obs, player_x, player_y):
+    """ group of memory patterns for all other environments """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        return True
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            idle
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def special_game_modes_memory_patterns(obs, player_x, player_y):
+    """ group of memory patterns for special game mode environments """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # if game mode is not normal
+        if obs['game_mode'] != GameMode.Normal:
+            return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            corner,
+            free_kick,
+            goal_kick,
+            kick_off,
+            penalty,
+            throw_in,
+            idle
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def special_spot_shot(obs, player_x, player_y):
+    """ group of memory patterns for special game mode environments """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # if game mode is not normal
+        if player_x > 0.8 and abs(player_y) < 0.21:
+            return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            shot,
+            idle
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def own_goal(obs, player_x, player_y):
+    """ group of memory patterns for special game mode environments """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # if game mode is not normal
+        if player_x < -0.9 and player_y:
+            return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            own_goal_2
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def get_best_direction(obs, target_direction):
+    active_position = obs["left_team"][obs["active"]]
+    relative_goal_position = np.array(target_direction) - active_position
+    all_directions_vecs = [np.array(v) / np.linalg.norm(np.array(v)) for v in ALL_DIRECTION_VECS]
+    best_direction = np.argmax([np.dot(relative_goal_position, v) for v in all_directions_vecs])
+    return ALL_DIRECTION_ACTIONS[best_direction]
+def get_distance2ball(obs):
+    return np.linalg.norm(obs["ball"][:2] - obs["left_team"][obs['active']])
+def get_target2line(obs):
+    active_position = obs["left_team"][obs["active"]]
+    ball_x, ball_y = obs['ball'][0], obs['ball'][1]
+    distance2goal = ((ball_x + 1) ** 2 + ball_y ** 2) ** 0.5 + 1e-5
+    cos_theta = (ball_x + 1) / distance2goal
+    sin_theta = ball_y / distance2goal
+    target_pos = np.array([0.03 * cos_theta - 1, 0.03 * sin_theta])
+    return target_pos
+def already_near_goal(obs, player_x, player_y):
+    """ do nothing, release all sticky actions """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        active_position = obs["left_team"][obs["active"]]
+        relative_goal_position = np.array([-1 + GOAL_BIAS, 0]) - active_position
+        distance2goal = np.linalg.norm(relative_goal_position)
+        if distance2goal < 0.02:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        # print(obs["sticky_actions"])
+        if Action.Sprint in obs["sticky_actions"]:
+            return Action.ReleaseSprint
+        if Action.Dribble in obs["sticky_actions"]:
+            return Action.ReleaseDribble
+        if len(obs["sticky_actions"]) > 0:
+            return Action.ReleaseDirection
+        return Action.Idle
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def already_in_line(obs, player_x, player_y):
+    """ do nothing, release all sticky actions """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        target_pos = get_target2line(obs)
+        distance2goal = np.linalg.norm(target_pos - obs['left_team'][obs['active']])
+        if distance2goal < 0.02:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        # print(obs["sticky_actions"])
+        if Action.Sprint in obs["sticky_actions"]:
+            return Action.ReleaseSprint
+        if Action.Dribble in obs["sticky_actions"]:
+            return Action.ReleaseDribble
+        if len(obs["sticky_actions"]) > 0:
+            return Action.ReleaseDirection
+        return Action.Idle
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_goal(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        return True
+    def get_action(obs, player_x, player_y):
+        # active_position = obs["left_team"][obs["active"]]
+        # relative_goal_position = np.array([-1 + GOAL_BIAS, 0]) - active_position
+        # all_directions_vecs = [np.array(v) / np.linalg.norm(np.array(v)) for v in ALL_DIRECTION_VECS]
+        # best_direction = np.argmax([np.dot(relative_goal_position, v) for v in all_directions_vecs])
+        # return ALL_DIRECTION_ACTIONS[best_direction]
+        return get_best_direction(obs, [-1 + GOAL_BIAS, 0])
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def run_to_line(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        return True
+    def get_action(obs, player_x, player_y):
+        target_pos = get_target2line(obs)
+        return get_best_direction(obs, target_pos)
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def goal_keeper_far_pattern(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # player have the ball
+        if (obs["active"] == 0):
+            active_position = obs["left_team"][0]
+            relative_ball_position = obs["ball"][:2] - active_position
+            distance2ball = np.linalg.norm(relative_ball_position)
+            if distance2ball > 0.5 or (obs['ball_owned_team'] == 0 and obs['ball_owned_player'] != 0):
+                return True
+            if active_position[0] > -0.7 or abs(active_position[1]) > 0.25:
+                for teammate_pos in obs['left_team'][1:]:
+                    teammate_dis = np.linalg.norm(obs["ball"][:2] - teammate_pos)
+                    if teammate_dis < distance2ball:
+                        return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            already_near_goal,
+            start_sprinting,
+            run_to_goal
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def ball_distance_2_5(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # player have the ball
+        if (obs["active"] == 0 and obs['ball_owned_team'] != 0):
+            distance2ball = get_distance2ball(obs)
+            if distance2ball <= 0.5 and distance2ball >= 0.2:
+                return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            already_in_line,
+            start_sprinting,
+            run_to_line
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+def ball_distance_close(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # player have the ball
+        if (obs["active"] == 0 and obs['ball_owned_team'] != 0):
+            distance2ball = get_distance2ball(obs)
+            if distance2ball < 0.25:
+                return True
+        return False
+    def get_memory_patterns(obs, player_x, player_y):
+        """ get list of memory patterns """
+        memory_patterns = [
+            shot
+        ]
+        return memory_patterns
+    return {"environment_fits": environment_fits, "get_memory_patterns": get_memory_patterns}
+# list of groups of memory patterns
+groups_of_memory_patterns = [
+    goal_keeper_far_pattern,        # 安全
+    goalkeeper_memory_patterns,     # 守门员持球
+    # special_spot_shot,      # 射门 进不去
+    special_game_modes_memory_patterns,     # 特殊game mode
+    ball_distance_2_5,
+    ball_distance_close,
+    # own_goal,
+    # offence_memory_patterns,        # 我方持球 进不去
+    defence_memory_patterns,
+    other_memory_patterns       # idle
+]
+def keep_the_ball(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        return True
+    def get_action(obs, player_x, player_y):
+        right_team, left_team = obs['right_team'], obs['left_team']
+        dist = [get_distance(player_x, player_y, i) for i in right_team]
+        closest = right_team[np.argmin(dist)]
+        near = [i for i in right_team if (i[0] < player_x + 0.2) and (i[0] > player_x) and (i[1] > player_y - 0.05)
+               and (i[1] < player_y + 0.05)]
+        back = [i for i in right_team if (i[0] > player_x)]
+        bottom_right = [i for i in left_team if (i[0] > player_x - 0.05) and (i[0] < player_x + 0.2) and (i[1] < player_y + 0.2) and
+                       (i[1] > player_y)]
+        top_right = [i for i in left_team if (i[0] > player_x - 0.05) and (i[0] < player_x + 0.2) and (i[1] > player_y - 0.2) and
+                       (i[1] < player_y)]
+        bottom_left = [i for i in left_team if (i[0] < player_x) and (i[0] > player_x - 0.2) and (i[1] < player_y + 0.2) and
+                       (i[1] > player_y)]
+        top_left = [i for i in left_team if (i[0] < player_x) and (i[0] > player_x - 0.2) and (i[1] > player_y - 0.2) and
+                       (i[1] < player_y)]
+        if len(near) == 0:
+            return Action.Right
+        if player_y > 0:
+            if player_y > 0.35:
+                return Action.Right
+            if len(bottom_right) > 0:
+                if Action.BottomRight not in obs['sticky_actions']:
+                    return Action.BottomRight
+                return Action.ShortPass
+            return Action.BottomRight
+        if player_y < 0:
+            if player_y < -0.35:
+                return Action.Right
+            if len(top_right) > 0:
+                if Action.TopRight not in obs['sticky_actions']:
+                    return Action.TopRight
+                return Action.ShortPass
+            return Action.TopRight
+    return {'environment_fits': environment_fits, 'get_action': get_action}
+def spot_shot(obs, player_x, player_y):
+    """ shot if close to the goalkeeper """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        # shoot if in spotted location
+        if player_x > 0.75 and abs(player_y) < 0.21:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if player_y >= 0:
+            if Action.TopRight not in obs["sticky_actions"]:
+                return Action.TopRight
+        else:
+            if Action.BottomRight not in obs["sticky_actions"]:
+                return Action.BottomRight
+        return Action.Shot
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def cross(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        if player_x > 0.7 and abs(player_y) > 0.21:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        if player_x > 0.88:
+            if player_y > 0:
+                if Action.Top not in obs['sticky_actions']:
+                    return Action.Top
+            else:
+                if Action.Bottom not in obs['sticky_actions']:
+                    return Action.Bottom
+            return Action.HighPass
+        if player_x > 0.9:
+            if (Action.Right in obs['sticky_actions'] or
+                Action.TopRight in obs['sticky_actions'] or
+                Action.BottomRight in obs['sticky_actions']):
+                return Action.ReleaseDirection
+            if Action.Right not in obs['sticky_actions']:
+                if player_y > 0:
+                    if Action.Top not in obs['sticky_actions']:
+                        return Action.Top
+                if player_y < 0:
+                    if Action.Bottom not in obs['sticky_actions']:
+                        return Action.Bottom
+        return Action.HighPass
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def close_to_goalkeeper_shot(obs, player_x, player_y):
+    """ shot if close to the goalkeeper """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        goalkeeper_x = obs["right_team"][0][0] + obs["right_team_direction"][0][0] * 13
+        goalkeeper_y = obs["right_team"][0][1] + obs["right_team_direction"][0][1] * 13
+        goalkeeper = [goalkeeper_x,goalkeeper_y]
+        if get_distance(player_x, player_y, goalkeeper) < 0.25:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        if player_y >= 0:
+            if Action.TopRight not in obs["sticky_actions"]:
+                return Action.TopRight
+        else:
+            if Action.BottomRight not in obs["sticky_actions"]:
+                return Action.BottomRight
+        return Action.Shot
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def long_pass_forward(obs, player_x, player_y):
+    """ perform a high pass, if far from opponent's goal """
+    def environment_fits(obs, player_x, player_y):
+        """ environment fits constraints """
+        right_team = obs["right_team"][1:]
+        # player have the ball and is far from opponent's goal
+        if player_x < -0.4:
+            return True
+        return False
+    def get_action(obs, player_x, player_y):
+        """ get action of this memory pattern """
+        right_team, left_team = obs['right_team'], obs['left_team']
+        dist = [get_distance(player_x, player_y, i) for i in right_team]
+        closest = right_team[np.argmin(dist)]
+        if abs(player_y) > 0.22:
+            if Action.Right not in obs["sticky_actions"]:
+                return Action.Right
+            return Action.HighPass
+        if np.min(dist) > 0.4:
+            if player_y > 0:
+                return Action.Bottom
+            else:
+                return Action.Top
+        if np.min(dist) < 0.4 and np.min(dist) > 0.2:
+            if player_y < 0:
+                return Action.TopRight
+            else:
+                return Action.BottomRight
+        if np.min(dist) < 0.2:
+            if Action.Right not in obs['sticky_actions']:
+                return Action.Right
+            return Action.HighPass
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def shot(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        return True
+    def get_action(obs, player_x, player_y):
+        # if player_y > 0:
+        #     if Action.TopRight not in obs['sticky_actions']:
+        #         return Action.TopRight
+        # else:
+        #     if Action.BottomRight not in obs['sticky_actions']:
+        #         return Action.BottomRight
+        return Action.Shot
+    return {"environment_fits": environment_fits, "get_action": get_action}
+def own_goal_2(obs, player_x, player_y):
+    def environment_fits(obs, player_x, player_y):
+        return True
+    def get_action(obs, player_x, player_y):
+        return Action.Shot
+    return {"environment_fits": environment_fits, "get_action": get_action}
+# @human_readable_agent wrapper modifies raw observations
+# provided by the environment:
+# https://github.com/google-research/football/blob/master/gfootball/doc/observation.md#raw-observations
+# into a form easier to work with by humans.
+# Following modifications are applied:
+# - Action, PlayerRole and GameMode enums are introduced.
+# - 'sticky_actions' are turned into a set of active actions (Action enum)
+#    see usage example below.
+# - 'game_mode' is turned into GameMode enum.
+# - 'designated' field is removed, as it always equals to 'active'
+#    when a single player is controlled on the team.
+# - 'left_team_roles'/'right_team_roles' are turned into PlayerRole enums.
+# - Action enum is to be returned by the agent function.
+@human_readable_agent
+def agent_get_action(obs):
+    """ Ole ole ole ole """
+    # dictionary for Memory Patterns data
+    obs["memory_patterns"] = {}
+    # We always control left team (observations and actions
+    # are mirrored appropriately by the environment).
+    controlled_player_pos = obs["left_team"][obs["active"]]
+    # get action of appropriate pattern in agent's memory
+    action = get_action_of_agent(obs, controlled_player_pos[0], controlled_player_pos[1])
+    # return action
+    return action

openrl_policy.py ADDED Viewed

	@@ -0,0 +1,446 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.distributions import Categorical
+import gym
+def check(input):
+    output = torch.from_numpy(input) if type(input) == np.ndarray else input
+    return output
+class FcEncoder(nn.Module):
+    def __init__(self, fc_num, input_size, output_size):
+        super(FcEncoder, self).__init__()
+        self.first_mlp = nn.Sequential(
+                nn.Linear(input_size, output_size), nn.ReLU(), nn.LayerNorm(output_size)
+            )
+        self.mlp = nn.Sequential()
+        for _ in range(fc_num - 1):
+            self.mlp.append(nn.Sequential(
+                nn.Linear(output_size, output_size), nn.ReLU(), nn.LayerNorm(output_size)
+            ))
+    def forward(self, x):
+        output = self.first_mlp(x)
+        return self.mlp(output)
+def init(module, weight_init, bias_init, gain=1):
+    weight_init(module.weight.data, gain=gain)
+    if module.bias is not None:
+        bias_init(module.bias.data)
+    return module
+class FixedCategorical(torch.distributions.Categorical):
+    def sample(self):
+        return super().sample().unsqueeze(-1)
+    def log_probs(self, actions):
+        return (
+            super()
+            .log_prob(actions.squeeze(-1))
+            .view(actions.size(0), -1)
+            .sum(-1)
+            .unsqueeze(-1)
+        )
+    def mode(self):
+        return self.probs.argmax(dim=-1, keepdim=True)
+class Categorical(nn.Module):
+    def __init__(self, num_inputs, num_outputs, use_orthogonal=True, gain=0.01):
+        super(Categorical, self).__init__()
+        init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal]
+        def init_(m):
+            return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain)
+        self.linear = init_(nn.Linear(num_inputs, num_outputs))
+    def forward(self, x, available_actions=None):
+        x = self.linear(x)
+        if available_actions is not None:
+            x[available_actions == 0] = -1e10
+        return FixedCategorical(logits=x)
+class AddBias(nn.Module):
+    def __init__(self, bias):
+        super(AddBias, self).__init__()
+        self._bias = nn.Parameter(bias.unsqueeze(1))
+    def forward(self, x):
+        if x.dim() == 2:
+            bias = self._bias.t().view(1, -1)
+        else:
+            bias = self._bias.t().view(1, -1, 1, 1)
+        return x + bias
+class ACTLayer(nn.Module):
+    def __init__(self, action_space, inputs_dim, use_orthogonal, gain):
+        super(ACTLayer, self).__init__()
+        self.multidiscrete_action = False
+        self.continuous_action = False
+        self.mixed_action = False
+        action_dim = action_space.n
+        self.action_out = Categorical(inputs_dim, action_dim, use_orthogonal, gain)
+    def forward(self, x, available_actions=None, deterministic=False):
+        if self.mixed_action :
+            actions = []
+            action_log_probs = []
+            for action_out in self.action_outs:
+                action_logit = action_out(x)
+                action = action_logit.mode() if deterministic else action_logit.sample()
+                action_log_prob = action_logit.log_probs(action)
+                actions.append(action.float())
+                action_log_probs.append(action_log_prob)
+            actions = torch.cat(actions, -1)
+            action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True)
+        elif self.multidiscrete_action:
+            actions = []
+            action_log_probs = []
+            for action_out in self.action_outs:
+                action_logit = action_out(x)
+                action = action_logit.mode() if deterministic else action_logit.sample()
+                action_log_prob = action_logit.log_probs(action)
+                actions.append(action)
+                action_log_probs.append(action_log_prob)
+            actions = torch.cat(actions, -1)
+            action_log_probs = torch.cat(action_log_probs, -1)
+        elif self.continuous_action:
+            action_logits = self.action_out(x)
+            actions = action_logits.mode() if deterministic else action_logits.sample()
+            action_log_probs = action_logits.log_probs(actions)
+        else:
+            action_logits = self.action_out(x, available_actions)
+            actions = action_logits.mode() if deterministic else action_logits.sample()
+            action_log_probs = action_logits.log_probs(actions)
+        return actions, action_log_probs
+    def get_probs(self, x, available_actions=None):
+        if self.mixed_action or self.multidiscrete_action:
+            action_probs = []
+            for action_out in self.action_outs:
+                action_logit = action_out(x)
+                action_prob = action_logit.probs
+                action_probs.append(action_prob)
+            action_probs = torch.cat(action_probs, -1)
+        elif self.continuous_action:
+            action_logits = self.action_out(x)
+            action_probs = action_logits.probs
+        else:
+            action_logits = self.action_out(x, available_actions)
+            action_probs = action_logits.probs
+        return action_probs
+    def evaluate_actions(self, x, action, available_actions=None, active_masks=None, get_probs=False):
+        if self.mixed_action:
+            a, b = action.split((2, 1), -1)
+            b = b.long()
+            action = [a, b]
+            action_log_probs = []
+            dist_entropy = []
+            for action_out, act in zip(self.action_outs, action):
+                action_logit = action_out(x)
+                action_log_probs.append(action_logit.log_probs(act))
+                if active_masks is not None:
+                    if len(action_logit.entropy().shape) == len(active_masks.shape):
+                        dist_entropy.append((action_logit.entropy() * active_masks).sum()/active_masks.sum())
+                    else:
+                        dist_entropy.append((action_logit.entropy() * active_masks.squeeze(-1)).sum()/active_masks.sum())
+                else:
+                    dist_entropy.append(action_logit.entropy().mean())
+            action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True)
+            dist_entropy = dist_entropy[0] * 0.0025 + dist_entropy[1] * 0.01
+        elif self.multidiscrete_action:
+            action = torch.transpose(action, 0, 1)
+            action_log_probs = []
+            dist_entropy = []
+            for action_out, act in zip(self.action_outs, action):
+                action_logit = action_out(x)
+                action_log_probs.append(action_logit.log_probs(act))
+                if active_masks is not None:
+                    dist_entropy.append((action_logit.entropy()*active_masks.squeeze(-1)).sum()/active_masks.sum())
+                else:
+                    dist_entropy.append(action_logit.entropy().mean())
+            action_log_probs = torch.cat(action_log_probs, -1) # ! could be wrong
+            dist_entropy = torch.tensor(dist_entropy).mean()
+        elif self.continuous_action:
+            action_logits = self.action_out(x)
+            action_log_probs = action_logits.log_probs(action)
+            act_entropy = action_logits.entropy()
+            # import pdb;pdb.set_trace()
+            if active_masks is not None:
+                dist_entropy = (act_entropy*active_masks).sum()/active_masks.sum()
+            else:
+                dist_entropy = act_entropy.mean()
+        else:
+            action_logits = self.action_out(x, available_actions)
+            action_log_probs = action_logits.log_probs(action)
+            if active_masks is not None:
+                dist_entropy = (action_logits.entropy()*active_masks.squeeze(-1)).sum()/active_masks.sum()
+            else:
+                dist_entropy = action_logits.entropy().mean()
+        if not get_probs:
+            return action_log_probs, dist_entropy
+        else:
+            return action_log_probs, dist_entropy, action_logits
+class RNNLayer(nn.Module):
+    def __init__(self, inputs_dim, outputs_dim, recurrent_N, use_orthogonal,rnn_type='gru'):
+        super(RNNLayer, self).__init__()
+        self._recurrent_N = recurrent_N
+        self._use_orthogonal = use_orthogonal
+        self.rnn_type = rnn_type
+        if rnn_type == 'gru':
+            self.rnn = nn.GRU(inputs_dim, outputs_dim, num_layers=self._recurrent_N)
+        elif rnn_type == 'lstm':
+            self.rnn = nn.LSTM(inputs_dim, outputs_dim, num_layers=self._recurrent_N)
+        else:
+            raise NotImplementedError(f'RNN type {rnn_type} has not been implemented.')
+        for name, param in self.rnn.named_parameters():
+            if 'bias' in name:
+                nn.init.constant_(param, 0)
+            elif 'weight' in name:
+                if self._use_orthogonal:
+                    nn.init.orthogonal_(param)
+                else:
+                    nn.init.xavier_uniform_(param)
+        self.norm = nn.LayerNorm(outputs_dim)
+    def rnn_forward(self, x, h):
+        if self.rnn_type == 'lstm':
+            h = torch.split(h, h.shape[-1] // 2, dim=-1)
+            h = (h[0].contiguous(), h[1].contiguous())
+        x_, h_ = self.rnn(x, h)
+        if self.rnn_type == 'lstm':
+            h_ = torch.cat(h_, -1)
+        return x_, h_
+    def forward(self, x, hxs, masks):
+        if x.size(0) == hxs.size(0):
+            x, hxs = self.rnn_forward(x.unsqueeze(0), (hxs * masks.repeat(1, self._recurrent_N).unsqueeze(-1)).transpose(0, 1).contiguous())
+            #x= self.gru(x.unsqueeze(0))
+            x = x.squeeze(0)
+            hxs = hxs.transpose(0, 1)
+        else:
+            # x is a (T, N, -1) tensor that has been flatten to (T * N, -1)
+            N = hxs.size(0)
+            T = int(x.size(0) / N)
+            # unflatten
+            x = x.view(T, N, x.size(1))
+            # Same deal with masks
+            masks = masks.view(T, N)
+            # Let's figure out which steps in the sequence have a zero for any agent
+            # We will always assume t=0 has a zero in it as that makes the logic cleaner
+            has_zeros = ((masks[1:] == 0.0)
+                         .any(dim=-1)
+                         .nonzero()
+                         .squeeze()
+                         .cpu())
+            # +1 to correct the masks[1:]
+            if has_zeros.dim() == 0:
+                # Deal with scalar
+                has_zeros = [has_zeros.item() + 1]
+            else:
+                has_zeros = (has_zeros + 1).numpy().tolist()
+            # add t=0 and t=T to the list
+            has_zeros = [0] + has_zeros + [T]
+            hxs = hxs.transpose(0, 1)
+            outputs = []
+            for i in range(len(has_zeros) - 1):
+                # We can now process steps that don't have any zeros in masks together!
+                # This is much faster
+                start_idx = has_zeros[i]
+                end_idx = has_zeros[i + 1]
+                temp = (hxs * masks[start_idx].view(1, -1, 1).repeat(self._recurrent_N, 1, 1)).contiguous()
+                rnn_scores, hxs = self.rnn_forward(x[start_idx:end_idx], temp)
+                outputs.append(rnn_scores)
+            # assert len(outputs) == T
+            # x is a (T, N, -1) tensor
+            x = torch.cat(outputs, dim=0)
+            # flatten
+            x = x.reshape(T * N, -1)
+            hxs = hxs.transpose(0, 1)
+        x = self.norm(x)
+        return x, hxs
+class InputEncoder(nn.Module):
+    def __init__(self):
+        super(InputEncoder, self).__init__()
+        fc_layer_num = 2
+        fc_output_num = 64
+        self.active_input_num = 87
+        self.ball_owner_input_num = 57
+        self.left_input_num = 88
+        self.right_input_num = 88
+        self.match_state_input_num = 9
+        self.active_encoder = FcEncoder(fc_layer_num, self.active_input_num, fc_output_num)
+        self.ball_owner_encoder = FcEncoder(fc_layer_num, self.ball_owner_input_num, fc_output_num)
+        self.left_encoder = FcEncoder(fc_layer_num, self.left_input_num, fc_output_num)
+        self.right_encoder = FcEncoder(fc_layer_num, self.right_input_num, fc_output_num)
+        self.match_state_encoder = FcEncoder(fc_layer_num, self.match_state_input_num, self.match_state_input_num)
+    def forward(self, x):
+        active_vec = x[:, :self.active_input_num]
+        ball_owner_vec = x[:, self.active_input_num : self.active_input_num + self.ball_owner_input_num]
+        left_vec = x[:, self.active_input_num + self.ball_owner_input_num : self.active_input_num + self.ball_owner_input_num + self.left_input_num]
+        right_vec = x[:, self.active_input_num + self.ball_owner_input_num + self.left_input_num : \
+            self.active_input_num + self.ball_owner_input_num + self.left_input_num + self.right_input_num]
+        match_state_vec = x[:, self.active_input_num + self.ball_owner_input_num + self.left_input_num + self.right_input_num:]
+        active_output = self.active_encoder(active_vec)
+        ball_owner_output = self.ball_owner_encoder(ball_owner_vec)
+        left_output = self.left_encoder(left_vec)
+        right_output = self.right_encoder(right_vec)
+        match_state_output = self.match_state_encoder(match_state_vec)
+        return torch.cat([
+            active_output,
+            ball_owner_output,
+            left_output,
+            right_output,
+            match_state_output
+        ], 1)
+def get_fc(input_size, output_size):
+    return nn.Sequential(nn.Linear(input_size, output_size), nn.ReLU(), nn.LayerNorm(output_size))
+class ObsEncoder(nn.Module):
+    def __init__(self, input_embedding_size, hidden_size, _recurrent_N, _use_orthogonal, rnn_type):
+        super(ObsEncoder, self).__init__()
+        self.input_encoder = InputEncoder()     # input先过一遍input encoder
+        self.input_embedding = get_fc(input_embedding_size, hidden_size)  # 将encoder输出进行embedding
+        self.rnn = RNNLayer(hidden_size, hidden_size, _recurrent_N, _use_orthogonal, rnn_type=rnn_type)  # embedding输出过一遍rnn
+        self.after_rnn_mlp = get_fc(hidden_size, hidden_size)   # 过了rnn后再过该mlp
+    def forward(self, obs, rnn_states, masks):
+        actor_features = self.input_encoder(obs)
+        actor_features = self.input_embedding(actor_features)
+        output, rnn_states = self.rnn(actor_features, rnn_states, masks)
+        return self.after_rnn_mlp(output), rnn_states
+class PolicyNetwork(nn.Module):
+    def __init__(self, device=torch.device("cpu")):
+        super(PolicyNetwork, self).__init__()
+        self.tpdv = dict(dtype=torch.float32, device=device)
+        self.device = device
+        self.hidden_size = 256
+        self._use_policy_active_masks = True
+        recurrent_N = 1
+        use_orthogonal = True
+        rnn_type = 'lstm'
+        gain = 0.01
+        action_space = gym.spaces.Discrete(20)
+        self.action_dim = 19
+        input_embedding_size = 64 * 4 + 9
+        self.active_id_size = 1
+        self.id_max = 11
+        self.obs_encoder = ObsEncoder(input_embedding_size, self.hidden_size, recurrent_N, use_orthogonal, rnn_type)
+        self.predict_id = get_fc(self.hidden_size + self.action_dim, self.id_max)    # 其他信息(指除了active_id外的信息)过了rnn和一层mlp后，经过该层来预测id
+        self.id_embedding = get_fc(self.id_max, self.id_max)     # active id作为输入，输出和其他信息的feature concat
+        self.before_act_wrapper = FcEncoder(2, self.hidden_size + self.id_max, self.hidden_size)
+        self.act = ACTLayer(action_space, self.hidden_size, use_orthogonal, gain)
+        self.to(device)
+    def forward(self, obs, rnn_states, masks=np.concatenate(np.ones((1, 1, 1), dtype=np.float32)), available_actions=None, deterministic=False):
+        obs = check(obs).to(**self.tpdv)
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+        masks = check(masks).to(**self.tpdv)
+        rnn_states = check(rnn_states).to(**self.tpdv)
+        active_id = obs[:,:self.active_id_size].squeeze(1).long()
+        id_onehot = torch.eye(self.id_max)[active_id].to(self.device)
+        obs = obs[:,self.active_id_size:]
+        obs_output, rnn_states = self.obs_encoder(obs, rnn_states, masks)
+        id_output = self.id_embedding(id_onehot)
+        output = torch.cat([id_output, obs_output], 1)
+        output = self.before_act_wrapper(output)
+        actions, action_log_probs = self.act(output, available_actions, deterministic)
+        return actions, rnn_states
+    def eval_actions(self, obs, rnn_states, action, masks, available_actions=None, active_masks=None):
+        obs = check(obs).to(**self.tpdv)
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+        if active_masks is not None:
+            active_masks = check(active_masks).to(**self.tpdv)
+        masks = check(masks).to(**self.tpdv)
+        rnn_states = check(rnn_states).to(**self.tpdv)
+        action = check(action).to(**self.tpdv)
+        id_groundtruth = obs[:,:self.active_id_size].squeeze(1).long()
+        id_onehot = torch.eye(self.id_max)[id_groundtruth].to(self.device)
+        obs = obs[:,self.active_id_size:]
+        obs_output, rnn_states = self.obs_encoder(obs, rnn_states, masks)
+        id_output = self.id_embedding(id_onehot)
+        action_onehot = torch.eye(self.action_dim)[action.squeeze(1).long()].to(self.device)
+        id_prediction = self.predict_id(torch.cat([obs_output, action_onehot], 1))
+        output = torch.cat([id_output, obs_output], 1)
+        output = self.before_act_wrapper(output)
+        action_log_probs, dist_entropy = self.act.evaluate_actions(output, action, available_actions,
+                                                                   active_masks=active_masks if self._use_policy_active_masks else None)
+        values = None
+        return action_log_probs, dist_entropy, values, id_prediction, id_groundtruth

openrl_utils.py ADDED Viewed

	@@ -0,0 +1,421 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+# Area.
+THIRD_X = 0.3
+BOX_X = 0.7
+MAX_X = 1.0
+BOX_Y = 0.24
+MAX_Y = 0.42
+# Actions.
+IDLE = 0
+LEFT = 1
+TOP_LEFT = 2
+TOP = 3
+TOP_RIGHT = 4
+RIGHT = 5
+BOTTOM_RIGHT = 6
+BOTTOM = 7
+BOTTOM_LEFT = 8
+LONG_PASS = 9
+HIGH_PASS = 10
+SHORT_PASS = 11
+SHOT = 12
+SPRINT = 13
+RELEASE_DIRECTION = 14
+RELEASE_SPRINT = 15
+SLIDING = 16
+DRIBBLE = 17
+RELEASE_DRIBBLE = 18
+STICKY_LEFT = 0
+STICKY_TOP_LEFT = 1
+STICKY_TOP = 2
+STICKY_TOP_RIGHT = 3
+STICKY_RIGHT = 4
+STICKY_BOTTOM_RIGHT = 5
+STICKY_BOTTOM = 6
+STICKY_BOTTOM_LEFT = 7
+RIGHT_ACTIONS = [TOP_RIGHT, RIGHT, BOTTOM_RIGHT, TOP, BOTTOM]
+LEFT_ACTIONS = [TOP_LEFT, LEFT, BOTTOM_LEFT, TOP, BOTTOM]
+BOTTOM_ACTIONS = [BOTTOM_LEFT, BOTTOM, BOTTOM_RIGHT, LEFT, RIGHT]
+TOP_ACTIONS = [TOP_LEFT, TOP, TOP_RIGHT, LEFT, RIGHT]
+ALL_DIRECTION_ACTIONS = [LEFT, TOP_LEFT, TOP, TOP_RIGHT, RIGHT, BOTTOM_RIGHT, BOTTOM, BOTTOM_LEFT]
+ALL_DIRECTION_VECS = [(-1, 0), (-1, -1), (0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1)]
+def get_direction_action(available_action, sticky_actions, forbidden_action, target_action, active_direction, need_sprint):
+    available_action = np.zeros(19)
+    available_action[forbidden_action] = 0
+    available_action[target_action] = 1
+    if need_sprint:
+        available_action[RELEASE_SPRINT] = 0
+        if sticky_actions[8] == 0:
+            available_action = np.zeros(19)
+            available_action[SPRINT] = 1
+    else:
+        available_action[SPRINT] = 0
+        if sticky_actions[8] == 1:
+            available_action = np.zeros(19)
+            available_action[RELEASE_SPRINT] = 1
+    return available_action
+def openrl_obs_deal(obs):
+    direction_x_bound = 0.03
+    direction_y_bound = 0.02
+    ball_direction_x_bound = 0.15
+    ball_direction_y_bound = 0.07
+    ball_direction_z_bound = 4
+    ball_rotation_x_bound = 0.0005
+    ball_rotation_y_bound = 0.0004
+    ball_rotation_z_bound = 0.015
+    active_id = [obs["active"]]
+    assert active_id[0] < 11 and active_id[0] >= 0, "active id is wrong, active id = {}".format(active_id[0])
+    # left team 88
+    left_position = np.concatenate(obs["left_team"])
+    left_direction = np.concatenate(obs["left_team_direction"])
+    left_tired_factor = obs["left_team_tired_factor"]
+    left_yellow_card = obs["left_team_yellow_card"]
+    left_red_card = ~obs["left_team_active"]
+    left_offside = np.zeros(11)
+    if obs["ball_owned_team"] == 0:
+        left_offside_line = max(0, obs["ball"][0], np.sort(obs["right_team"][:, 0])[-2])
+        left_offside = obs["left_team"][:, 0] > left_offside_line
+        left_offside[obs["ball_owned_player"]] = False
+    new_left_direction = left_direction.copy()
+    for counting in range(len(new_left_direction)):
+        new_left_direction[counting] = new_left_direction[counting] / direction_x_bound if counting % 2 == 0 else new_left_direction[counting] / direction_y_bound
+    left_team = np.concatenate([
+        left_position,
+        new_left_direction,
+        left_tired_factor,
+        left_yellow_card,
+        left_red_card,
+        left_offside,
+    ]).astype(np.float64)
+    # right team 88
+    right_position = np.concatenate(obs["right_team"])
+    right_direction = np.concatenate(obs["right_team_direction"])
+    right_tired_factor = obs["right_team_tired_factor"]
+    right_yellow_card = obs["right_team_yellow_card"]
+    right_red_card = ~obs["right_team_active"]
+    right_offside = np.zeros(11)
+    if obs["ball_owned_team"] == 1:
+        right_offside_line = min(0, obs["ball"][0], np.sort(obs["left_team"][:, 0])[1])
+        right_offside = obs["right_team"][:, 0] < right_offside_line
+        right_offside[obs["ball_owned_player"]] = False
+    new_right_direction = right_direction.copy()
+    for counting in range(len(new_right_direction)):
+        new_right_direction[counting] = new_right_direction[counting] / direction_x_bound if counting % 2 == 0 else new_right_direction[counting] / direction_y_bound
+    right_team = np.concatenate([
+        right_position,
+        new_right_direction,
+        right_tired_factor,
+        right_yellow_card,
+        right_red_card,
+        right_offside,
+    ]).astype(np.float64)
+    # active 18
+    sticky_actions = obs["sticky_actions"][:10]
+    active_position = obs["left_team"][obs["active"]]
+    active_direction = obs["left_team_direction"][obs["active"]]
+    active_tired_factor = left_tired_factor[obs["active"]]
+    active_yellow_card = left_yellow_card[obs["active"]]
+    active_red_card = left_red_card[obs["active"]]
+    active_offside = left_offside[obs["active"]]
+    new_active_direction = active_direction.copy()
+    new_active_direction[0] /= direction_x_bound
+    new_active_direction[1] /= direction_y_bound
+    active_player = np.concatenate([
+        sticky_actions,
+        active_position,
+        new_active_direction,
+        [active_tired_factor],
+        [active_yellow_card],
+        [active_red_card],
+        [active_offside],
+    ]).astype(np.float64)
+    # relative 69
+    relative_ball_position = obs["ball"][:2] - active_position
+    distance2ball = np.linalg.norm(relative_ball_position)
+    relative_left_position = obs["left_team"] - active_position
+    distance2left = np.linalg.norm(relative_left_position, axis=1)
+    relative_left_position = np.concatenate(relative_left_position)
+    relative_right_position = obs["right_team"] - active_position
+    distance2right = np.linalg.norm(relative_right_position, axis=1)
+    relative_right_position = np.concatenate(relative_right_position)
+    relative_info = np.concatenate([
+        relative_ball_position,
+        [distance2ball],
+        relative_left_position,
+        distance2left,
+        relative_right_position,
+        distance2right,
+    ]).astype(np.float64)
+    active_info = np.concatenate([active_player, relative_info])    # 87
+    # ball info 12
+    ball_owned_team = np.zeros(3)
+    ball_owned_team[obs["ball_owned_team"] + 1] = 1.0
+    new_ball_direction = obs["ball_direction"].copy()
+    new_ball_rotation = obs['ball_rotation'].copy()
+    for counting in range(len(new_ball_direction)):
+        if counting % 3 == 0:
+            new_ball_direction[counting] /= ball_direction_x_bound
+            new_ball_rotation[counting] /= ball_rotation_x_bound
+        if counting % 3 == 1:
+            new_ball_direction[counting] /= ball_direction_y_bound
+            new_ball_rotation[counting] /= ball_rotation_y_bound
+        if counting % 3 == 2:
+            new_ball_direction[counting] /= ball_direction_z_bound
+            new_ball_rotation[counting] /= ball_rotation_z_bound
+    ball_info = np.concatenate([
+        obs["ball"],
+        new_ball_direction,
+        ball_owned_team,
+        new_ball_rotation
+    ]).astype(np.float64)
+    # ball owned player 23
+    ball_owned_player = np.zeros(23)
+    if obs["ball_owned_team"] == 1:     # 对手
+        ball_owned_player[11 + obs['ball_owned_player']] = 1.0
+        ball_owned_player_pos = obs['right_team'][obs['ball_owned_player']]
+        ball_owned_player_direction = obs["right_team_direction"][obs['ball_owned_player']]
+        ball_owner_tired_factor = right_tired_factor[obs['ball_owned_player']]
+        ball_owner_yellow_card = right_yellow_card[obs['ball_owned_player']]
+        ball_owner_red_card = right_red_card[obs['ball_owned_player']]
+        ball_owner_offside = right_offside[obs['ball_owned_player']]
+    elif obs["ball_owned_team"] == 0:
+        ball_owned_player[obs['ball_owned_player']] = 1.0
+        ball_owned_player_pos = obs['left_team'][obs['ball_owned_player']]
+        ball_owned_player_direction = obs["left_team_direction"][obs['ball_owned_player']]
+        ball_owner_tired_factor = left_tired_factor[obs['ball_owned_player']]
+        ball_owner_yellow_card = left_yellow_card[obs['ball_owned_player']]
+        ball_owner_red_card = left_red_card[obs['ball_owned_player']]
+        ball_owner_offside = left_offside[obs['ball_owned_player']]
+    else:
+        ball_owned_player[-1] = 1.0
+        ball_owned_player_pos = np.zeros(2)
+        ball_owned_player_direction = np.zeros(2)
+    relative_ball_owner_position = np.zeros(2)
+    distance2ballowner = np.zeros(1)
+    ball_owner_info = np.zeros(4)
+    if obs["ball_owned_team"] != -1:
+        relative_ball_owner_position = ball_owned_player_pos - active_position
+        distance2ballowner = [np.linalg.norm(relative_ball_owner_position)]
+        ball_owner_info = np.concatenate([
+            [ball_owner_tired_factor],
+            [ball_owner_yellow_card],
+            [ball_owner_red_card],
+            [ball_owner_offside]
+        ])
+    new_ball_owned_player_direction = ball_owned_player_direction.copy()
+    new_ball_owned_player_direction[0] /= direction_x_bound
+    new_ball_owned_player_direction[1] /= direction_y_bound
+    ball_own_active_info = np.concatenate([
+        ball_info,      # 12
+        ball_owned_player,  # 23
+        active_position,    # 2
+        new_active_direction,    # 2
+        [active_tired_factor],    # 1
+        [active_yellow_card],    # 1
+        [active_red_card],    # 1
+        [active_offside],    # 1
+        relative_ball_position,    # 2
+        [distance2ball],    # 1
+        ball_owned_player_pos,    # 2
+        new_ball_owned_player_direction,    # 2
+        relative_ball_owner_position,    # 2
+        distance2ballowner,    # 1
+        ball_owner_info # 4
+    ])
+    # match state
+    game_mode = np.zeros(7)
+    game_mode[obs["game_mode"]] = 1.0
+    goal_diff_ratio = (obs["score"][0] - obs["score"][1]) / 5
+    steps_left_ratio = obs["steps_left"] / 3001
+    match_state = np.concatenate([
+        game_mode,
+        [goal_diff_ratio],
+        [steps_left_ratio],
+    ]).astype(np.float64)
+    # available action
+    available_action = np.ones(19)
+    available_action[IDLE] = 0
+    available_action[RELEASE_DIRECTION] = 0
+    should_left = False
+    if obs["game_mode"] == 0:
+        active_x = active_position[0]
+        counting_right_enemy_num = 0
+        counting_right_teammate_num = 0
+        counting_left_teammate_num = 0
+        for enemy_pos in obs["right_team"][1:]:
+            if active_x < enemy_pos[0]:
+                counting_right_enemy_num += 1
+        for teammate_pos in obs["left_team"][1:]:
+            if active_x < teammate_pos[0]:
+                counting_right_teammate_num += 1
+            if active_x > teammate_pos[0]:
+                counting_left_teammate_num += 1
+        if active_x > obs['ball'][0] + 0.05:
+            if counting_left_teammate_num < 2:
+                if obs['ball_owned_team'] != 0:
+                    should_left = True
+    if should_left:
+        available_action = get_direction_action(available_action, sticky_actions, RIGHT_ACTIONS, [LEFT, BOTTOM_LEFT, TOP_LEFT], active_direction, True)
+    if (abs(relative_ball_position[0]) > 0.75 or abs(relative_ball_position[1]) > 0.5):
+        all_directions_vecs = [np.array(v) / np.linalg.norm(np.array(v)) for v in ALL_DIRECTION_VECS]
+        best_direction = np.argmax([np.dot(relative_ball_position, v) for v in all_directions_vecs])
+        target_direction = ALL_DIRECTION_ACTIONS[best_direction]
+        forbidden_actions = ALL_DIRECTION_ACTIONS.copy()
+        forbidden_actions.remove(target_direction)
+        available_action = get_direction_action(available_action, sticky_actions, forbidden_actions, [target_direction], active_direction, True)
+    if_i_hold_ball = (obs["ball_owned_team"] == 0 and obs["ball_owned_player"] == obs['active'])
+    ball_pos_offset = 0.05
+    no_ball_pos_offset = 0.03
+    active_x, active_y = active_position[0], active_position[1]
+    if_outside = False
+    if active_x <= (-1 + no_ball_pos_offset) or (if_i_hold_ball and active_x <= (-1 + ball_pos_offset)):
+        if_outside = True
+        action_index = LEFT_ACTIONS
+        target_direction = RIGHT
+    elif active_x >= (1 - no_ball_pos_offset) or (if_i_hold_ball and active_x >= (1 - ball_pos_offset)):
+        if_outside = True
+        action_index = RIGHT_ACTIONS
+        target_direction = LEFT
+    elif active_y >= (0.42 - no_ball_pos_offset) or (if_i_hold_ball and active_y >= (0.42 - ball_pos_offset)):
+        if_outside = True
+        action_index = BOTTOM_ACTIONS
+        target_direction = TOP
+    elif active_y <= (-0.42 + no_ball_pos_offset) or (if_i_hold_ball and active_x <= (-0.42 + ball_pos_offset)):
+        if_outside = True
+        action_index = TOP_ACTIONS
+        target_direction = BOTTOM
+    if obs["game_mode"] in [1, 2, 3, 4, 5]:
+        left2ball = np.linalg.norm(obs["left_team"] - obs["ball"][:2], axis=1)
+        right2ball = np.linalg.norm(obs["right_team"] - obs["ball"][:2], axis=1)
+        if np.min(left2ball) < np.min(right2ball) and obs["active"] == np.argmin(left2ball):
+            if_outside = False
+    elif obs["game_mode"] in [6]:
+        if obs["ball"][0] > 0 and active_position[0] > BOX_X:
+            if_outside = False
+    if if_outside:
+        available_action = get_direction_action(available_action, sticky_actions, action_index, [target_direction], active_direction, False)
+    if np.sum(sticky_actions[:8]) == 0:
+        available_action[RELEASE_DIRECTION] = 0
+    if sticky_actions[8] == 0:
+        available_action[RELEASE_SPRINT] = 0
+    else:
+        available_action[SPRINT] = 0
+    if sticky_actions[9] == 0:
+        available_action[RELEASE_DRIBBLE] = 0
+    else:
+        available_action[DRIBBLE] = 0
+    if active_position[0] < 0.4 or abs(active_position[1]) > 0.3:
+        available_action[SHOT] = 0
+    if obs["game_mode"] == 0:
+        if obs["ball_owned_team"] == -1:
+            available_action[DRIBBLE] = 0
+            if distance2ball >= 0.05:
+                available_action[SLIDING] = 0
+                available_action[[LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT]] = 0
+        elif obs["ball_owned_team"] == 0:
+            available_action[SLIDING] = 0
+            if distance2ball >= 0.05:
+                available_action[[LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, DRIBBLE]] = 0
+        elif obs["ball_owned_team"] == 1:
+            available_action[DRIBBLE] = 0
+            if distance2ball >= 0.05:
+                available_action[[LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, SLIDING]] = 0
+    elif obs["game_mode"] in [1, 2, 3, 4, 5]:
+        left2ball = np.linalg.norm(obs["left_team"] - obs["ball"][:2], axis=1)
+        right2ball = np.linalg.norm(obs["right_team"] - obs["ball"][:2], axis=1)
+        if np.min(left2ball) < np.min(right2ball) and obs["active"] == np.argmin(left2ball):
+            available_action[[SPRINT, RELEASE_SPRINT, SLIDING, DRIBBLE, RELEASE_DRIBBLE]] = 0
+        else:
+            available_action[[LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT]] = 0
+            available_action[[SLIDING, DRIBBLE, RELEASE_DRIBBLE]] = 0
+    elif obs["game_mode"] == 6:
+        if obs["ball"][0] > 0 and active_position[0] > BOX_X:
+            available_action[[LONG_PASS, HIGH_PASS, SHORT_PASS]] = 0
+            available_action[[SPRINT, RELEASE_SPRINT, SLIDING, DRIBBLE, RELEASE_DRIBBLE]] = 0
+        else:
+            available_action[[LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT]] = 0
+            available_action[[SLIDING, DRIBBLE, RELEASE_DRIBBLE]] = 0
+    obs = np.concatenate([
+            active_id, # 1
+            active_info, # 87
+            ball_own_active_info,   # 57
+            left_team,  # 88
+            right_team, # 88
+            match_state, # 9
+        ])
+    share_obs = np.concatenate([
+            ball_info, # 12
+            ball_owned_player,   # 23
+            left_team, # 88
+            right_team, # 88
+            match_state, # 9
+        ])
+    assert available_action.sum() > 0
+    return dict(
+        obs=obs,
+        share_obs=share_obs,
+        available_action=available_action,
+    )
+def _t2n(x):
+    return x.detach().cpu().numpy()

submission.py ADDED Viewed

	@@ -0,0 +1,81 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""""""
+import os
+import sys
+from pathlib import Path
+import numpy as np
+import torch
+base_dir = Path(__file__).resolve().parent
+sys.path.append(str(base_dir))
+from openrl_policy import PolicyNetwork
+from openrl_utils import openrl_obs_deal, _t2n
+from goal_keeper import agent_get_action
+class OpenRLAgent():
+    def __init__(self):
+        rnn_shape = [1,1,1,512]
+        self.rnn_hidden_state = [np.zeros(rnn_shape, dtype=np.float32) for _ in range (11)]
+        self.model = PolicyNetwork()
+        self.model.load_state_dict(torch.load( os.path.dirname(os.path.abspath(__file__)) + '/actor.pt', map_location=torch.device("cpu")))
+        self.model.eval()
+    def get_action(self,raw_obs,idx):
+        if idx == 0:
+            re_action = [[0]*19]
+            re_action_index = agent_get_action(raw_obs)[0]
+            re_action[0][re_action_index] = 1
+            return re_action
+        openrl_obs = openrl_obs_deal(raw_obs)
+        obs = openrl_obs['obs']
+        obs = np.concatenate(obs.reshape(1, 1, 330))
+        rnn_hidden_state = np.concatenate(self.rnn_hidden_state[idx])
+        avail_actions = np.zeros(20)
+        avail_actions[:19] = openrl_obs['available_action']
+        avail_actions = np.concatenate(avail_actions.reshape([1, 1, 20]))
+        with torch.no_grad():
+            actions, rnn_hidden_state = self.model(obs, rnn_hidden_state, available_actions=avail_actions, deterministic=True)
+        if actions[0][0] == 17 and raw_obs["sticky_actions"][8] == 1:
+            actions[0][0] = 15
+        self.rnn_hidden_state[idx] = np.array(np.split(_t2n(rnn_hidden_state), 1))
+        re_action = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
+        re_action[0][actions[0]] = 1
+        return re_action
+agent = OpenRLAgent()
+def my_controller(obs_list, action_space_list, is_act_continuous=False):
+    idx = obs_list['controlled_player_index'] % 11
+    del obs_list['controlled_player_index']
+    action = agent.get_action(obs_list,idx)
+    return action
+def jidi_controller(obs_list=None):
+    if obs_list is None:
+        return
+    #重命名，防止加载错误
+    re = my_controller(obs_list,None)
+    assert isinstance(re,list)
+    assert isinstance(re[0],list)
+    return re