genrl / envs /custom_dmc_tasks /quadruped.py
mazpie's picture
Initial commit
2d9a728
raw
history blame contribute delete
No virus
26.3 kB
# Copyright 2019 The dm_control Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Quadruped Domain."""
import collections
from dm_control.suite import quadruped
from dm_control import mujoco
from dm_control.mujoco.wrapper import mjbindings
from dm_control.rl import control
from dm_control.suite import base
from dm_control.suite import common
from dm_control.utils import containers
from dm_control.utils import rewards
from dm_control.utils import xml_tools
from dm_control.utils import io as resources
from lxml import etree
import numpy as np
from scipy import ndimage
import os
enums = mjbindings.enums
mjlib = mjbindings.mjlib
_DEFAULT_TIME_LIMIT = 20
_CONTROL_TIMESTEP = .02
# Horizontal speeds above which the move reward is 1.
_RUN_SPEED = 5
_WALK_SPEED = 0.5
_JUMP_HEIGHT = 1.0 # -also good for foot up
_LIE_DOWN_HEIGHT = 0.2
_FOOT_DOWN_HEIGHT = 0.2
_FOOT_UP_HEIGHT = 0.8
# Constants related to terrain generation.
_HEIGHTFIELD_ID = 0
_TERRAIN_SMOOTHNESS = 0.15 # 0.0: maximally bumpy; 1.0: completely smooth.
_TERRAIN_BUMP_SCALE = 2 # Spatial scale of terrain bumps (in meters).
# Named model elements.
_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right']
_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny']
def make(task,
task_kwargs=None,
environment_kwargs=None,
visualize_reward=False):
task_kwargs = task_kwargs or {}
if environment_kwargs is not None:
task_kwargs = task_kwargs.copy()
task_kwargs['environment_kwargs'] = environment_kwargs
env = SUITE[task](**task_kwargs)
env.task.visualize_reward = visualize_reward
return env
def get_model_and_assets():
"""Returns a tuple containing the model XML string and a dict of assets."""
root_dir = os.path.dirname(os.path.dirname(__file__))
xml = resources.GetResource(
os.path.join(root_dir, 'custom_dmc_tasks', 'quadruped.xml'))
return xml, common.ASSETS
def make_model(floor_size=None, terrain=False, rangefinders=False,
walls_and_ball=False):
"""Returns the model XML string."""
root_dir = os.path.dirname(os.path.dirname(__file__))
xml_string = common.read_model(os.path.join(root_dir, 'custom_dmc_tasks', 'quadruped.xml'))
parser = etree.XMLParser(remove_blank_text=True)
mjcf = etree.XML(xml_string, parser)
# Set floor size.
if floor_size is not None:
floor_geom = mjcf.find('.//geom[@name=\'floor\']')
floor_geom.attrib['size'] = f'{floor_size} {floor_size} .5'
# Remove walls, ball and target.
if not walls_and_ball:
for wall in _WALLS:
wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
wall_geom.getparent().remove(wall_geom)
# Remove ball.
ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
ball_body.getparent().remove(ball_body)
# Remove target.
target_site = xml_tools.find_element(mjcf, 'site', 'target')
target_site.getparent().remove(target_site)
# Remove terrain.
if not terrain:
terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
terrain_geom.getparent().remove(terrain_geom)
# Remove rangefinders if they're not used, as range computations can be
# expensive, especially in a scene with heightfields.
if not rangefinders:
rangefinder_sensors = mjcf.findall('.//rangefinder')
for rf in rangefinder_sensors:
rf.getparent().remove(rf)
return etree.tostring(mjcf, pretty_print=True)
@quadruped.SUITE.add('custom')
def lie_down(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
"""Returns the Walk task."""
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
physics = Physics.from_xml_string(xml_string, common.ASSETS)
task = Stand(goal='lie_down', random=random)
environment_kwargs = environment_kwargs or {}
return control.Environment(physics, task, time_limit=time_limit,
control_timestep=_CONTROL_TIMESTEP,
**environment_kwargs)
@quadruped.SUITE.add('custom')
def two_legs(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
"""Returns the Walk task."""
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
physics = Physics.from_xml_string(xml_string, common.ASSETS)
task = Stand(goal='two_legs', random=random)
environment_kwargs = environment_kwargs or {}
return control.Environment(physics, task, time_limit=time_limit,
control_timestep=_CONTROL_TIMESTEP,
**environment_kwargs)
@quadruped.SUITE.add('custom')
def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
"""Returns the Walk task."""
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
physics = Physics.from_xml_string(xml_string, common.ASSETS)
task = Stand(goal='stand', random=random)
environment_kwargs = environment_kwargs or {}
return control.Environment(physics, task, time_limit=time_limit,
control_timestep=_CONTROL_TIMESTEP,
**environment_kwargs)
@quadruped.SUITE.add('custom')
def jump(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
"""Returns the Walk task."""
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
physics = Physics.from_xml_string(xml_string, common.ASSETS)
task = Jump(desired_height=_JUMP_HEIGHT, random=random)
environment_kwargs = environment_kwargs or {}
return control.Environment(physics, task, time_limit=time_limit,
control_timestep=_CONTROL_TIMESTEP,
**environment_kwargs)
@quadruped.SUITE.add('custom')
def roll(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
"""Returns the Walk task."""
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
physics = Physics.from_xml_string(xml_string, common.ASSETS)
task = Roll(desired_speed=_WALK_SPEED, random=random)
environment_kwargs = environment_kwargs or {}
return control.Environment(physics, task, time_limit=time_limit,
control_timestep=_CONTROL_TIMESTEP,
**environment_kwargs)
@quadruped.SUITE.add('custom')
def roll_fast(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
"""Returns the Walk task."""
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
physics = Physics.from_xml_string(xml_string, common.ASSETS)
task = Roll(desired_speed=_RUN_SPEED, random=random)
environment_kwargs = environment_kwargs or {}
return control.Environment(physics, task, time_limit=time_limit,
control_timestep=_CONTROL_TIMESTEP,
**environment_kwargs)
class Physics(mujoco.Physics):
"""Physics simulation with additional features for the Quadruped domain."""
def _reload_from_data(self, data):
super()._reload_from_data(data)
# Clear cached sensor names when the physics is reloaded.
self._sensor_types_to_names = {}
self._hinge_names = []
def _get_sensor_names(self, *sensor_types):
try:
sensor_names = self._sensor_types_to_names[sensor_types]
except KeyError:
[sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
self._sensor_types_to_names[sensor_types] = sensor_names
return sensor_names
def torso_upright(self):
"""Returns the dot-product of the torso z-axis and the global z-axis."""
return np.asarray(self.named.data.xmat['torso', 'zz'])
def torso_velocity(self):
"""Returns the velocity of the torso, in the local frame."""
return self.named.data.sensordata['velocimeter'].copy()
def com_height(self):
return self.named.data.sensordata['center_of_mass'].copy()[2]
def egocentric_state(self):
"""Returns the state without global orientation or position."""
if not self._hinge_names:
[hinge_ids] = np.nonzero(self.model.jnt_type ==
enums.mjtJoint.mjJNT_HINGE)
self._hinge_names = [self.model.id2name(j_id, 'joint')
for j_id in hinge_ids]
return np.hstack((self.named.data.qpos[self._hinge_names],
self.named.data.qvel[self._hinge_names],
self.data.act))
def toe_positions(self):
"""Returns toe positions in egocentric frame."""
torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
torso_pos = self.named.data.xpos['torso']
torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
return torso_to_toe.dot(torso_frame)
def force_torque(self):
"""Returns scaled force/torque sensor readings at the toes."""
force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
enums.mjtSensor.mjSENS_TORQUE)
return np.arcsinh(self.named.data.sensordata[force_torque_sensors])
def imu(self):
"""Returns IMU-like sensor readings."""
imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
enums.mjtSensor.mjSENS_ACCELEROMETER)
return self.named.data.sensordata[imu_sensors]
def rangefinder(self):
"""Returns scaled rangefinder sensor readings."""
rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER)
rf_readings = self.named.data.sensordata[rf_sensors]
no_intersection = -1.0
return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings))
def origin_distance(self):
"""Returns the distance from the origin to the workspace."""
return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))
def origin(self):
"""Returns origin position in the torso frame."""
torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
torso_pos = self.named.data.xpos['torso']
return -torso_pos.dot(torso_frame)
def ball_state(self):
"""Returns ball position and velocity relative to the torso frame."""
data = self.named.data
torso_frame = data.xmat['torso'].reshape(3, 3)
ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
ball_rot_vel = data.qvel['ball_root'][3:]
ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
return ball_state.dot(torso_frame).ravel()
def target_position(self):
"""Returns target position in torso frame."""
torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
torso_pos = self.named.data.xpos['torso']
torso_to_target = self.named.data.site_xpos['target'] - torso_pos
return torso_to_target.dot(torso_frame)
def ball_to_target_distance(self):
"""Returns horizontal distance from the ball to the target."""
ball_to_target = (self.named.data.site_xpos['target'] -
self.named.data.xpos['ball'])
return np.linalg.norm(ball_to_target[:2])
def self_to_ball_distance(self):
"""Returns horizontal distance from the quadruped workspace to the ball."""
self_to_ball = (self.named.data.site_xpos['workspace']
-self.named.data.xpos['ball'])
return np.linalg.norm(self_to_ball[:2])
def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
"""Find a height with no contacts given a body orientation.
Args:
physics: An instance of `Physics`.
orientation: A quaternion.
x_pos: A float. Position along global x-axis.
y_pos: A float. Position along global y-axis.
Raises:
RuntimeError: If a non-contacting configuration has not been found after
10,000 attempts.
"""
z_pos = 0.0 # Start embedded in the floor.
num_contacts = 1
num_attempts = 0
# Move up in 1cm increments until no contacts.
while num_contacts > 0:
try:
with physics.reset_context():
physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
physics.named.data.qpos['root'][3:] = orientation
except control.PhysicsError:
# We may encounter a PhysicsError here due to filling the contact
# buffer, in which case we simply increment the height and continue.
pass
num_contacts = physics.data.ncon
z_pos += 0.01
num_attempts += 1
if num_attempts > 10000:
raise RuntimeError('Failed to find a non-contacting configuration.')
def _common_observations(physics):
"""Returns the observations common to all tasks."""
obs = collections.OrderedDict()
obs['egocentric_state'] = physics.egocentric_state()
obs['torso_velocity'] = physics.torso_velocity()
obs['torso_upright'] = physics.torso_upright()
obs['imu'] = physics.imu()
obs['force_torque'] = physics.force_torque()
return obs
def _lie_down_reward(physics, deviation_angle=0):
"""Returns a reward proportional to how upright the torso is.
Args:
physics: an instance of `Physics`.
deviation_angle: A float, in degrees. The reward is 0 when the torso is
exactly upside-down and 1 when the torso's z-axis is less than
`deviation_angle` away from the global z-axis.
"""
torso = physics.named.data.xpos['torso', 'z']
return rewards.tolerance(
torso,
bounds=(-float('inf'), _LIE_DOWN_HEIGHT),
margin=_LIE_DOWN_HEIGHT * 1.5)
def _two_legs_reward(physics, deviation_angle=0):
"""Returns a reward proportional to how upright the torso is.
Args:
physics: an instance of `Physics`.
deviation_angle: A float, in degrees. The reward is 0 when the torso is
exactly upside-down and 1 when the torso's z-axis is less than
`deviation_angle` away from the global z-axis.
"""
toes = []
for t in ['toe_front_left', 'toe_front_right', 'toe_back_left', 'toe_back_right']:
toe = physics.named.data.xpos[t, 'z']
toes.append(toe)
toes = sorted(toes)
min_toes = sum(toes[:2]) / 2
max_toes = sum(toes[2:]) / 2
toes_up = rewards.tolerance(
max_toes,
bounds=(_FOOT_UP_HEIGHT, float('inf')),
margin=_FOOT_UP_HEIGHT // 2)
toes_down = rewards.tolerance(
min_toes,
bounds=(-float('inf'), _FOOT_DOWN_HEIGHT),
margin=_FOOT_DOWN_HEIGHT * 1.5)
return toes_down * toes_up
def _upright_reward(physics, deviation_angle=0):
"""Returns a reward proportional to how upright the torso is.
Args:
physics: an instance of `Physics`.
deviation_angle: A float, in degrees. The reward is 0 when the torso is
exactly upside-down and 1 when the torso's z-axis is less than
`deviation_angle` away from the global z-axis.
"""
deviation = np.cos(np.deg2rad(deviation_angle))
return rewards.tolerance(
physics.torso_upright(),
bounds=(deviation, float('inf')),
sigmoid='linear',
margin=1 + deviation,
value_at_margin=0)
class Move(base.Task):
"""A quadruped task solved by moving forward at a designated speed."""
def __init__(self, desired_speed, random=None):
"""Initializes an instance of `Move`.
Args:
desired_speed: A float. If this value is zero, reward is given simply
for standing upright. Otherwise this specifies the horizontal velocity
at which the velocity-dependent reward component is maximized.
random: Optional, either a `numpy.random.RandomState` instance, an
integer seed for creating a new `RandomState`, or None to select a seed
automatically (default).
"""
self._desired_speed = desired_speed
super().__init__(random=random)
def initialize_episode(self, physics):
"""Sets the state of the environment at the start of each episode.
Args:
physics: An instance of `Physics`.
"""
# Initial configuration.
orientation = self.random.randn(4)
orientation /= np.linalg.norm(orientation)
_find_non_contacting_height(physics, orientation)
super().initialize_episode(physics)
def get_observation(self, physics):
"""Returns an observation to the agent."""
return _common_observations(physics)
def get_reward(self, physics):
"""Returns a reward to the agent."""
# Move reward term.
move_reward = rewards.tolerance(
physics.torso_velocity()[0],
bounds=(self._desired_speed, float('inf')),
margin=self._desired_speed,
value_at_margin=0.5,
sigmoid='linear')
return _upright_reward(physics) * move_reward
class Stand(base.Task):
"""A quadruped task solved by moving forward at a designated speed."""
def __init__(self, random=None, goal='stand'):
"""Initializes an instance of `Move`.
Args:
desired_speed: A float. If this value is zero, reward is given simply
for standing upright. Otherwise this specifies the horizontal velocity
at which the velocity-dependent reward component is maximized.
random: Optional, either a `numpy.random.RandomState` instance, an
integer seed for creating a new `RandomState`, or None to select a seed
automatically (default).
"""
super().__init__(random=random)
self._goal = goal
def initialize_episode(self, physics):
"""Sets the state of the environment at the start of each episode.
Args:
physics: An instance of `Physics`.
"""
# Initial configuration.
orientation = self.random.randn(4)
orientation /= np.linalg.norm(orientation)
_find_non_contacting_height(physics, orientation)
super().initialize_episode(physics)
def get_observation(self, physics):
"""Returns an observation to the agent."""
return _common_observations(physics)
def get_reward(self, physics):
"""Returns a reward to the agent."""
if self._goal == 'stand':
return _upright_reward(physics)
elif self._goal == 'lie_down':
return _lie_down_reward(physics)
elif self._goal == 'two_legs':
return _two_legs_reward(physics)
class Jump(base.Task):
"""A quadruped task solved by moving forward at a designated speed."""
def __init__(self, desired_height, random=None):
"""Initializes an instance of `Move`.
Args:
desired_speed: A float. If this value is zero, reward is given simply
for standing upright. Otherwise this specifies the horizontal velocity
at which the velocity-dependent reward component is maximized.
random: Optional, either a `numpy.random.RandomState` instance, an
integer seed for creating a new `RandomState`, or None to select a seed
automatically (default).
"""
self._desired_height = desired_height
super().__init__(random=random)
def initialize_episode(self, physics):
"""Sets the state of the environment at the start of each episode.
Args:
physics: An instance of `Physics`.
"""
# Initial configuration.
orientation = self.random.randn(4)
orientation /= np.linalg.norm(orientation)
_find_non_contacting_height(physics, orientation)
super().initialize_episode(physics)
def get_observation(self, physics):
"""Returns an observation to the agent."""
return _common_observations(physics)
def get_reward(self, physics):
"""Returns a reward to the agent."""
# Move reward term.
jump_up = rewards.tolerance(
physics.com_height(),
bounds=(self._desired_height, float('inf')),
margin=self._desired_height,
value_at_margin=0.5,
sigmoid='linear')
return _upright_reward(physics) * jump_up
class Roll(base.Task):
"""A quadruped task solved by moving forward at a designated speed."""
def __init__(self, desired_speed, random=None):
"""Initializes an instance of `Move`.
Args:
desired_speed: A float. If this value is zero, reward is given simply
for standing upright. Otherwise this specifies the horizontal velocity
at which the velocity-dependent reward component is maximized.
random: Optional, either a `numpy.random.RandomState` instance, an
integer seed for creating a new `RandomState`, or None to select a seed
automatically (default).
"""
self._desired_speed = desired_speed
super().__init__(random=random)
def initialize_episode(self, physics):
"""Sets the state of the environment at the start of each episode.
Args:
physics: An instance of `Physics`.
"""
# Initial configuration.
orientation = self.random.randn(4)
orientation /= np.linalg.norm(orientation)
_find_non_contacting_height(physics, orientation)
super().initialize_episode(physics)
def get_observation(self, physics):
"""Returns an observation to the agent."""
return _common_observations(physics)
def get_reward(self, physics):
"""Returns a reward to the agent."""
# Move reward term.
move_reward = rewards.tolerance(
np.linalg.norm(physics.torso_velocity()),
bounds=(self._desired_speed, float('inf')),
margin=self._desired_speed,
value_at_margin=0.5,
sigmoid='linear')
return _upright_reward(physics) * move_reward
class Escape(base.Task):
"""A quadruped task solved by escaping a bowl-shaped terrain."""
def initialize_episode(self, physics):
"""Sets the state of the environment at the start of each episode.
Args:
physics: An instance of `Physics`.
"""
# Get heightfield resolution, assert that it is square.
res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
# Sinusoidal bowl shape.
row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j]
radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1)
bowl_shape = .5 - np.cos(2*np.pi*radius)/2
# Random smooth bumps.
terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res))
smooth_bumps = ndimage.zoom(bumps, res / float(bump_res))
# Terrain is elementwise product.
terrain = bowl_shape * smooth_bumps
start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
super().initialize_episode(physics)
# If we have a rendering context, we need to re-upload the modified
# heightfield data.
if physics.contexts:
with physics.contexts.gl.make_current() as ctx:
ctx.call(mjlib.mjr_uploadHField,
physics.model.ptr,
physics.contexts.mujoco.ptr,
_HEIGHTFIELD_ID)
# Initial configuration.
orientation = self.random.randn(4)
orientation /= np.linalg.norm(orientation)
_find_non_contacting_height(physics, orientation)
def get_observation(self, physics):
"""Returns an observation to the agent."""
obs = _common_observations(physics)
obs['origin'] = physics.origin()
obs['rangefinder'] = physics.rangefinder()
return obs
def get_reward(self, physics):
"""Returns a reward to the agent."""
# Escape reward term.
terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
escape_reward = rewards.tolerance(
physics.origin_distance(),
bounds=(terrain_size, float('inf')),
margin=terrain_size,
value_at_margin=0,
sigmoid='linear')
return _upright_reward(physics, deviation_angle=20) * escape_reward
class Fetch(base.Task):
"""A quadruped task solved by bringing a ball to the origin."""
def initialize_episode(self, physics):
"""Sets the state of the environment at the start of each episode.
Args:
physics: An instance of `Physics`.
"""
# Initial configuration, random azimuth and horizontal position.
azimuth = self.random.uniform(0, 2*np.pi)
orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
_find_non_contacting_height(physics, orientation, x_pos, y_pos)
# Initial ball state.
physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
-spawn_radius, spawn_radius, size=(2,))
physics.named.data.qpos['ball_root'][2] = 2
physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
super().initialize_episode(physics)
def get_observation(self, physics):
"""Returns an observation to the agent."""
obs = _common_observations(physics)
obs['ball_state'] = physics.ball_state()
obs['target_position'] = physics.target_position()
return obs
def get_reward(self, physics):
"""Returns a reward to the agent."""
# Reward for moving close to the ball.
arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
workspace_radius = physics.named.model.site_size['workspace', 0]
ball_radius = physics.named.model.geom_size['ball', 0]
reach_reward = rewards.tolerance(
physics.self_to_ball_distance(),
bounds=(0, workspace_radius+ball_radius),
sigmoid='linear',
margin=arena_radius, value_at_margin=0)
# Reward for bringing the ball to the target.
target_radius = physics.named.model.site_size['target', 0]
fetch_reward = rewards.tolerance(
physics.ball_to_target_distance(),
bounds=(0, target_radius),
sigmoid='linear',
margin=arena_radius, value_at_margin=0)
reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)
return _upright_reward(physics) * reach_then_fetch