jat / README.md
qgallouedec's picture
qgallouedec HF staff
Update README.md
93fb594 verified
|
raw
history blame
69.4 kB
metadata
tags:
  - reinforcement-learning
  - atari-alien
  - atari-amidar
  - atari-assault
  - atari-asterix
  - atari-asteroids
  - atari-atlantis
  - atari-bankheist
  - atari-battlezone
  - atari-beamrider
  - atari-berzerk
  - atari-bowling
  - atari-boxing
  - atari-breakout
  - atari-centipede
  - atari-choppercommand
  - atari-crazyclimber
  - atari-defender
  - atari-demonattack
  - atari-doubledunk
  - atari-enduro
  - atari-fishingderby
  - atari-freeway
  - atari-frostbite
  - atari-gopher
  - atari-gravitar
  - atari-hero
  - atari-icehockey
  - atari-jamesbond
  - atari-kangaroo
  - atari-krull
  - atari-kungfumaster
  - atari-montezumarevenge
  - atari-mspacman
  - atari-namethisgame
  - atari-phoenix
  - atari-pitfall
  - atari-pong
  - atari-privateeye
  - atari-qbert
  - atari-riverraid
  - atari-roadrunner
  - atari-robotank
  - atari-seaquest
  - atari-skiing
  - atari-solaris
  - atari-spaceinvaders
  - atari-stargunner
  - atari-surround
  - atari-tennis
  - atari-timepilot
  - atari-tutankham
  - atari-upndown
  - atari-venture
  - atari-videopinball
  - atari-wizardofwor
  - atari-yarsrevenge
  - atari-zaxxon
  - babyai-action-obj-door
  - babyai-blocked-unlock-pickup
  - babyai-boss-level-no-unlock
  - babyai-boss-level
  - babyai-find-obj-s5
  - babyai-go-to-door
  - babyai-go-to-imp-unlock
  - babyai-go-to-local
  - babyai-go-to-obj-door
  - babyai-go-to-obj
  - babyai-go-to-red-ball-grey
  - babyai-go-to-red-ball-no-dists
  - babyai-go-to-red-ball
  - babyai-go-to-red-blue-ball
  - babyai-go-to-seq
  - babyai-go-to
  - babyai-key-corridor
  - babyai-mini-boss-level
  - babyai-move-two-across-s8n9
  - babyai-one-room-s8
  - babyai-open-door
  - babyai-open-doors-order-n4
  - babyai-open-red-door
  - babyai-open-two-doors
  - babyai-open
  - babyai-pickup-above
  - babyai-pickup-dist
  - babyai-pickup-loc
  - babyai-pickup
  - babyai-put-next-local
  - babyai-put-next
  - babyai-synth-loc
  - babyai-synth-seq
  - babyai-synth
  - babyai-unblock-pickup
  - babyai-unlock-local
  - babyai-unlock-pickup
  - babyai-unlock-to-unlock
  - babyai-unlock
  - metaworld-assembly
  - metaworld-basketball
  - metaworld-bin-picking
  - metaworld-box-close
  - metaworld-button-press-topdown-wall
  - metaworld-button-press-topdown
  - metaworld-button-press-wall
  - metaworld-button-press
  - metaworld-coffee-button
  - metaworld-coffee-pull
  - metaworld-coffee-push
  - metaworld-dial-turn
  - metaworld-disassemble
  - metaworld-door-close
  - metaworld-door-lock
  - metaworld-door-open
  - metaworld-door-unlock
  - metaworld-drawer-close
  - metaworld-drawer-open
  - metaworld-faucet-close
  - metaworld-faucet-open
  - metaworld-hammer
  - metaworld-hand-insert
  - metaworld-handle-press-side
  - metaworld-handle-press
  - metaworld-handle-pull-side
  - metaworld-handle-pull
  - metaworld-lever-pull
  - metaworld-peg-insert-side
  - metaworld-peg-unplug-side
  - metaworld-pick-out-of-hole
  - metaworld-pick-place-wall
  - metaworld-pick-place
  - metaworld-plate-slide-back-side
  - metaworld-plate-slide-back
  - metaworld-plate-slide-side
  - metaworld-plate-slide
  - metaworld-push-back
  - metaworld-push-wall
  - metaworld-push
  - metaworld-reach-wall
  - metaworld-reach
  - metaworld-shelf-place
  - metaworld-soccer
  - metaworld-stick-pull
  - metaworld-stick-push
  - metaworld-sweep-into
  - metaworld-sweep
  - metaworld-window-close
  - metaworld-window-open
  - mujoco-ant
  - mujoco-doublependulum
  - mujoco-halfcheetah
  - mujoco-hopper
  - mujoco-humanoid
  - mujoco-pendulum
  - mujoco-pusher
  - mujoco-reacher
  - mujoco-standup
  - mujoco-swimmer
  - mujoco-walker
datasets: jat-project/jat-dataset
pipeline_tag: reinforcement-learning
model-index:
  - name: jat-project/jat
    results:
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Atari 57
          type: atari
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.14 [0.14, 0.15]
            name: IQM expert normalized total reward
          - type: iqm_human_normalized_total_reward
            value: 0.38 [0.37, 0.39]
            name: IQM human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: BabyAI
          type: babyai
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.99 [0.99, 0.99]
            name: IQM expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: MetaWorld
          type: metaworld
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.65 [0.64, 0.67]
            name: IQM expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: MuJoCo
          type: mujoco
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.85 [0.83, 0.86]
            name: IQM expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Alien
          type: atari-alien
        metrics:
          - type: total_reward
            value: 1518.70 +/- 568.14
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.08 +/- 0.03
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.19 +/- 0.08
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Amidar
          type: atari-amidar
        metrics:
          - type: total_reward
            value: 89.17 +/- 78.73
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.04
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.05 +/- 0.05
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Assault
          type: atari-assault
        metrics:
          - type: total_reward
            value: 1676.91 +/- 780.73
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.09 +/- 0.05
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 2.80 +/- 1.50
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Asterix
          type: atari-asterix
        metrics:
          - type: total_reward
            value: 844.50 +/- 546.85
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.18 +/- 0.16
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.08 +/- 0.07
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Asteroids
          type: atari-asteroids
        metrics:
          - type: total_reward
            value: 1357.90 +/- 453.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Atlantis
          type: atari-atlantis
        metrics:
          - type: total_reward
            value: 51843.00 +/- 123857.07
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.13 +/- 0.40
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 2.41 +/- 7.66
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Bank Heist
          type: atari-bankheist
        metrics:
          - type: total_reward
            value: 977.80 +/- 156.49
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.74 +/- 0.12
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.30 +/- 0.21
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Battle Zone
          type: atari-battlezone
        metrics:
          - type: total_reward
            value: 16780.00 +/- 6926.15
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.06 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.45 +/- 0.19
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Beam Rider
          type: atari-beamrider
        metrics:
          - type: total_reward
            value: 768.36 +/- 364.06
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.02 +/- 0.02
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Berzerk
          type: atari-berzerk
        metrics:
          - type: total_reward
            value: 616.20 +/- 296.08
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.20 +/- 0.12
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Bowling
          type: atari-bowling
        metrics:
          - type: total_reward
            value: 22.32 +/- 5.18
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: '-0.01 +/- 0.04'
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Boxing
          type: atari-boxing
        metrics:
          - type: total_reward
            value: 92.31 +/- 18.24
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.94 +/- 0.19
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 7.68 +/- 1.52
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Breakout
          type: atari-breakout
        metrics:
          - type: total_reward
            value: 7.93 +/- 5.66
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.22 +/- 0.20
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Centipede
          type: atari-centipede
        metrics:
          - type: total_reward
            value: 5888.27 +/- 2594.62
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.40 +/- 0.27
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.38 +/- 0.26
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Chopper Command
          type: atari-choppercommand
        metrics:
          - type: total_reward
            value: 2371.00 +/- 1195.43
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.02 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.24 +/- 0.18
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Crazy Climber
          type: atari-crazyclimber
        metrics:
          - type: total_reward
            value: 97145.00 +/- 30388.04
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.51 +/- 0.18
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 3.45 +/- 1.21
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Defender
          type: atari-defender
        metrics:
          - type: total_reward
            value: 39317.50 +/- 16246.15
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.10 +/- 0.05
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 2.30 +/- 1.03
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Demon Attack
          type: atari-demonattack
        metrics:
          - type: total_reward
            value: 795.10 +/- 982.55
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.35 +/- 0.54
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Double Dunk
          type: atari-doubledunk
        metrics:
          - type: total_reward
            value: 13.40 +/- 11.07
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.81 +/- 0.28
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.91 +/- 0.32
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Enduro
          type: atari-enduro
        metrics:
          - type: total_reward
            value: 103.11 +/- 28.05
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.12 +/- 0.03
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Fishing Derby
          type: atari-fishingderby
        metrics:
          - type: total_reward
            value: '-31.67 +/- 22.54'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.61 +/- 0.23
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.46 +/- 0.17
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Freeway
          type: atari-freeway
        metrics:
          - type: total_reward
            value: 27.57 +/- 1.87
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.81 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.93 +/- 0.06
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Frostbite
          type: atari-frostbite
        metrics:
          - type: total_reward
            value: 2875.60 +/- 1679.84
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.21 +/- 0.13
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.66 +/- 0.39
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Gopher
          type: atari-gopher
        metrics:
          - type: total_reward
            value: 5508.80 +/- 2802.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.06 +/- 0.03
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 2.44 +/- 1.30
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Gravitar
          type: atari-gravitar
        metrics:
          - type: total_reward
            value: 1330.50 +/- 918.23
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.30 +/- 0.24
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.36 +/- 0.29
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: H.E.R.O.
          type: atari-hero
        metrics:
          - type: total_reward
            value: 11932.00 +/- 3036.87
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.25 +/- 0.07
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.37 +/- 0.10
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Ice Hockey
          type: atari-icehockey
        metrics:
          - type: total_reward
            value: 7.61 +/- 5.28
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.52 +/- 0.15
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.55 +/- 0.44
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: James Bond
          type: atari-jamesbond
        metrics:
          - type: total_reward
            value: 425.00 +/- 632.51
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.45 +/- 2.31
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Kangaroo
          type: atari-kangaroo
        metrics:
          - type: total_reward
            value: 375.00 +/- 314.13
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.62 +/- 0.60
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.11 +/- 0.11
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Krull
          type: atari-krull
        metrics:
          - type: total_reward
            value: 10743.30 +/- 1311.26
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.93 +/- 0.13
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 8.57 +/- 1.23
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Kung-Fu Master
          type: atari-kungfumaster
        metrics:
          - type: total_reward
            value: 253.00 +/- 233.86
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-0.00 +/- 0.01'
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: '-0.00 +/- 0.01'
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Montezuma's Revenge
          type: atari-montezumarevenge
        metrics:
          - type: total_reward
            value: 0.00 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Ms. Pacman
          type: atari-mspacman
        metrics:
          - type: total_reward
            value: 1610.10 +/- 504.08
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.20 +/- 0.08
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.20 +/- 0.08
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Name This Game
          type: atari-namethisgame
        metrics:
          - type: total_reward
            value: 7726.40 +/- 2166.18
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.26 +/- 0.10
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.94 +/- 0.38
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Phoenix
          type: atari-phoenix
        metrics:
          - type: total_reward
            value: 1814.20 +/- 1275.29
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.16 +/- 0.20
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: PitFall
          type: atari-pitfall
        metrics:
          - type: total_reward
            value: '-4.61 +/- 15.86'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.07
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.03 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pong
          type: atari-pong
        metrics:
          - type: total_reward
            value: 16.54 +/- 10.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.89 +/- 0.25
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.05 +/- 0.29
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Private Eye
          type: atari-privateeye
        metrics:
          - type: total_reward
            value: 44.00 +/- 49.64
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.25 +/- 0.66
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Q*Bert
          type: atari-qbert
        metrics:
          - type: total_reward
            value: 2118.50 +/- 2764.25
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.05 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.15 +/- 0.21
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: River Raid
          type: atari-riverraid
        metrics:
          - type: total_reward
            value: 3925.20 +/- 1530.94
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.19 +/- 0.11
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.16 +/- 0.10
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Road Runner
          type: atari-roadrunner
        metrics:
          - type: total_reward
            value: 6929.00 +/- 5577.35
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.09 +/- 0.07
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.88 +/- 0.71
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Robotank
          type: atari-robotank
        metrics:
          - type: total_reward
            value: 10.22 +/- 4.71
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.10 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.83 +/- 0.49
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Seaquest
          type: atari-seaquest
        metrics:
          - type: total_reward
            value: 859.80 +/- 407.80
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.31 +/- 0.16
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.02 +/- 0.01
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Skiing
          type: atari-skiing
        metrics:
          - type: total_reward
            value: '-15960.04 +/- 5887.52'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.18 +/- 0.93
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.09 +/- 0.46
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Solaris
          type: atari-solaris
        metrics:
          - type: total_reward
            value: 1202.60 +/- 445.27
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-0.29 +/- 3.79'
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: '-0.00 +/- 0.04'
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Space Invaders
          type: atari-spaceinvaders
        metrics:
          - type: total_reward
            value: 326.85 +/- 141.89
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.12 +/- 0.09
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Star Gunner
          type: atari-stargunner
        metrics:
          - type: total_reward
            value: 5219.00 +/- 3544.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.48 +/- 0.37
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Surround
          type: atari-surround
        metrics:
          - type: total_reward
            value: 1.52 +/- 4.60
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.59 +/- 0.24
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.70 +/- 0.28
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Tennis
          type: atari-tennis
        metrics:
          - type: total_reward
            value: '-12.80 +/- 3.70'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.32 +/- 0.11
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.34 +/- 0.12
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Time Pilot
          type: atari-timepilot
        metrics:
          - type: total_reward
            value: 11603.00 +/- 4323.25
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.12 +/- 0.07
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 4.84 +/- 2.60
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Tutankham
          type: atari-tutankham
        metrics:
          - type: total_reward
            value: 108.82 +/- 70.14
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.35 +/- 0.25
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.62 +/- 0.45
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Up and Down
          type: atari-upndown
        metrics:
          - type: total_reward
            value: 19074.60 +/- 9961.77
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.66 +/- 0.89
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Venture
          type: atari-venture
        metrics:
          - type: total_reward
            value: 0.00 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Video Pinball
          type: atari-videopinball
        metrics:
          - type: total_reward
            value: 12466.69 +/- 8723.07
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.03 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.71 +/- 0.49
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Wizard of Wor
          type: atari-wizardofwor
        metrics:
          - type: total_reward
            value: 2231.00 +/- 2042.92
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.03 +/- 0.04
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.40 +/- 0.49
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Yars Revenge
          type: atari-yarsrevenge
        metrics:
          - type: total_reward
            value: 11190.85 +/- 7342.58
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.03 +/- 0.03
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.16 +/- 0.14
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Zaxxon
          type: atari-zaxxon
        metrics:
          - type: total_reward
            value: 5976.00 +/- 2889.54
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.08 +/- 0.04
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.65 +/- 0.32
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Action Obj Door
          type: babyai-action-obj-door
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.22
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.88 +/- 0.36
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Blocked Unlock Pickup
          type: babyai-blocked-unlock-pickup
        metrics:
          - type: total_reward
            value: 0.95 +/- 0.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.01
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Boss Level No Unlock
          type: babyai-boss-level-no-unlock
        metrics:
          - type: total_reward
            value: 0.49 +/- 0.43
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.49 +/- 0.49
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Boss Level
          type: babyai-boss-level
        metrics:
          - type: total_reward
            value: 0.54 +/- 0.43
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.54 +/- 0.49
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Find Obj S5
          type: babyai-find-obj-s5
        metrics:
          - type: total_reward
            value: 0.94 +/- 0.04
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Door
          type: babyai-go-to-door
        metrics:
          - type: total_reward
            value: 0.99 +/- 0.02
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.03
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Imp Unlock
          type: babyai-go-to-imp-unlock
        metrics:
          - type: total_reward
            value: 0.53 +/- 0.41
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.60 +/- 0.55
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Local
          type: babyai-go-to-local
        metrics:
          - type: total_reward
            value: 0.87 +/- 0.16
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.93 +/- 0.22
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Obj Door
          type: babyai-go-to-obj-door
        metrics:
          - type: total_reward
            value: 0.98 +/- 0.04
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.98 +/- 0.08
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Obj
          type: babyai-go-to-obj
        metrics:
          - type: total_reward
            value: 0.94 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.01 +/- 0.03
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Ball Grey
          type: babyai-go-to-red-ball-grey
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.05
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.06
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Ball No Dists
          type: babyai-go-to-red-ball-no-dists
        metrics:
          - type: total_reward
            value: 0.93 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.03
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Ball
          type: babyai-go-to-red-ball
        metrics:
          - type: total_reward
            value: 0.91 +/- 0.09
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.98 +/- 0.12
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Blue Ball
          type: babyai-go-to-red-blue-ball
        metrics:
          - type: total_reward
            value: 0.91 +/- 0.08
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.10
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Seq
          type: babyai-go-to-seq
        metrics:
          - type: total_reward
            value: 0.73 +/- 0.33
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.76 +/- 0.38
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To
          type: babyai-go-to
        metrics:
          - type: total_reward
            value: 0.78 +/- 0.28
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.82 +/- 0.35
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Key Corridor
          type: babyai-key-corridor
        metrics:
          - type: total_reward
            value: 0.87 +/- 0.13
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.96 +/- 0.14
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Mini Boss Level
          type: babyai-mini-boss-level
        metrics:
          - type: total_reward
            value: 0.53 +/- 0.41
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.56 +/- 0.50
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Move Two Across S8N9
          type: babyai-move-two-across-s8n9
        metrics:
          - type: total_reward
            value: 0.05 +/- 0.19
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.05 +/- 0.20
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: One Room S8
          type: babyai-one-room-s8
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.04
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Door
          type: babyai-open-door
        metrics:
          - type: total_reward
            value: 0.99 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.01
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Doors Order N4
          type: babyai-open-doors-order-n4
        metrics:
          - type: total_reward
            value: 0.96 +/- 0.14
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.96 +/- 0.17
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Red Door
          type: babyai-open-red-door
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.03
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Two Doors
          type: babyai-open-two-doors
        metrics:
          - type: total_reward
            value: 0.98 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.00
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open
          type: babyai-open
        metrics:
          - type: total_reward
            value: 0.95 +/- 0.08
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.10
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup Above
          type: babyai-pickup-above
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.06
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.01 +/- 0.07
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup Dist
          type: babyai-pickup-dist
        metrics:
          - type: total_reward
            value: 0.87 +/- 0.12
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.02 +/- 0.16
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup Loc
          type: babyai-pickup-loc
        metrics:
          - type: total_reward
            value: 0.85 +/- 0.19
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.92 +/- 0.23
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup
          type: babyai-pickup
        metrics:
          - type: total_reward
            value: 0.79 +/- 0.30
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.85 +/- 0.36
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Put Next Local
          type: babyai-put-next-local
        metrics:
          - type: total_reward
            value: 0.67 +/- 0.32
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.73 +/- 0.35
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Put Next S7N4
          type: babyai-put-next
        metrics:
          - type: total_reward
            value: 0.85 +/- 0.25
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.89 +/- 0.26
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Synth Loc
          type: babyai-synth-loc
        metrics:
          - type: total_reward
            value: 0.77 +/- 0.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.78 +/- 0.43
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Synth Seq
          type: babyai-synth-seq
        metrics:
          - type: total_reward
            value: 0.57 +/- 0.43
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.58 +/- 0.49
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Synth
          type: babyai-synth
        metrics:
          - type: total_reward
            value: 0.75 +/- 0.35
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.78 +/- 0.43
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unblock Pickup
          type: babyai-unblock-pickup
        metrics:
          - type: total_reward
            value: 0.79 +/- 0.29
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.86 +/- 0.35
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock Local
          type: babyai-unlock-local
        metrics:
          - type: total_reward
            value: 0.98 +/- 0.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.01
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock Pickup
          type: babyai-unlock-pickup
        metrics:
          - type: total_reward
            value: 0.75 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.05
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock To Unlock
          type: babyai-unlock-to-unlock
        metrics:
          - type: total_reward
            value: 0.85 +/- 0.31
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.88 +/- 0.32
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock
          type: babyai-unlock
        metrics:
          - type: total_reward
            value: 0.43 +/- 0.43
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.48 +/- 0.52
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Assembly
          type: metaworld-assembly
        metrics:
          - type: total_reward
            value: 243.78 +/- 10.44
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.05
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Basketball
          type: metaworld-basketball
        metrics:
          - type: total_reward
            value: 1.71 +/- 0.63
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-0.00 +/- 0.00'
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: BinPicking
          type: metaworld-bin-picking
        metrics:
          - type: total_reward
            value: 314.42 +/- 196.40
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.74 +/- 0.46
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Box Close
          type: metaworld-box-close
        metrics:
          - type: total_reward
            value: 482.86 +/- 146.37
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.93 +/- 0.34
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press Topdown Wall
          type: metaworld-button-press-topdown-wall
        metrics:
          - type: total_reward
            value: 268.30 +/- 82.78
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.51 +/- 0.18
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press Topdown
          type: metaworld-button-press-topdown
        metrics:
          - type: total_reward
            value: 269.14 +/- 82.81
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.52 +/- 0.18
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press Wall
          type: metaworld-button-press-wall
        metrics:
          - type: total_reward
            value: 608.87 +/- 169.50
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.90 +/- 0.25
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press
          type: metaworld-button-press
        metrics:
          - type: total_reward
            value: 624.03 +/- 73.53
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.12
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Coffee Button
          type: metaworld-coffee-button
        metrics:
          - type: total_reward
            value: 334.92 +/- 301.67
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.43 +/- 0.43
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Coffee Pull
          type: metaworld-coffee-pull
        metrics:
          - type: total_reward
            value: 38.00 +/- 63.97
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.13 +/- 0.25
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Coffee Push
          type: metaworld-coffee-push
        metrics:
          - type: total_reward
            value: 151.38 +/- 207.69
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.30 +/- 0.42
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Dial Turn
          type: metaworld-dial-turn
        metrics:
          - type: total_reward
            value: 752.25 +/- 138.50
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.95 +/- 0.18
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Disassemble
          type: metaworld-disassemble
        metrics:
          - type: total_reward
            value: 40.87 +/- 9.35
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.22 +/- 3.71
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Close
          type: metaworld-door-close
        metrics:
          - type: total_reward
            value: 530.48 +/- 29.02
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.06
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Lock
          type: metaworld-door-lock
        metrics:
          - type: total_reward
            value: 678.98 +/- 194.57
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.81 +/- 0.28
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Open
          type: metaworld-door-open
        metrics:
          - type: total_reward
            value: 574.71 +/- 50.82
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.10
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Unlock
          type: metaworld-door-unlock
        metrics:
          - type: total_reward
            value: 761.82 +/- 114.70
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.94 +/- 0.16
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Drawer Close
          type: metaworld-drawer-close
        metrics:
          - type: total_reward
            value: 519.05 +/- 154.38
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.54 +/- 0.21
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Drawer Open
          type: metaworld-drawer-open
        metrics:
          - type: total_reward
            value: 486.02 +/- 34.17
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.98 +/- 0.09
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Faucet Close
          type: metaworld-faucet-close
        metrics:
          - type: total_reward
            value: 366.78 +/- 86.77
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.23 +/- 0.17
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Faucet Open
          type: metaworld-faucet-open
        metrics:
          - type: total_reward
            value: 685.01 +/- 65.52
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.96 +/- 0.14
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Hammer
          type: metaworld-hammer
        metrics:
          - type: total_reward
            value: 678.36 +/- 79.36
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.98 +/- 0.13
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Hand Insert
          type: metaworld-hand-insert
        metrics:
          - type: total_reward
            value: 695.27 +/- 134.25
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.94 +/- 0.18
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Press Side
          type: metaworld-handle-press-side
        metrics:
          - type: total_reward
            value: 65.07 +/- 69.65
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.09
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Press
          type: metaworld-handle-press
        metrics:
          - type: total_reward
            value: 695.97 +/- 311.48
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.79 +/- 0.40
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Pull Side
          type: metaworld-handle-pull-side
        metrics:
          - type: total_reward
            value: 145.34 +/- 179.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.37 +/- 0.47
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Pull
          type: metaworld-handle-pull
        metrics:
          - type: total_reward
            value: 514.56 +/- 205.75
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.77 +/- 0.31
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Lever Pull
          type: metaworld-lever-pull
        metrics:
          - type: total_reward
            value: 250.51 +/- 220.33
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.34 +/- 0.40
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Peg Insert Side
          type: metaworld-peg-insert-side
        metrics:
          - type: total_reward
            value: 305.94 +/- 166.53
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.53
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Peg Unplug Side
          type: metaworld-peg-unplug-side
        metrics:
          - type: total_reward
            value: 120.73 +/- 169.26
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.26 +/- 0.37
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pick Out Of Hole
          type: metaworld-pick-out-of-hole
        metrics:
          - type: total_reward
            value: 2.08 +/- 0.05
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pick Place Wall
          type: metaworld-pick-place-wall
        metrics:
          - type: total_reward
            value: 62.30 +/- 131.13
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.14 +/- 0.29
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pick Place
          type: metaworld-pick-place
        metrics:
          - type: total_reward
            value: 311.95 +/- 180.95
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.74 +/- 0.43
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide Back Side
          type: metaworld-plate-slide-back-side
        metrics:
          - type: total_reward
            value: 689.54 +/- 157.90
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.94 +/- 0.23
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide Back
          type: metaworld-plate-slide-back
        metrics:
          - type: total_reward
            value: 197.00 +/- 1.58
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.24 +/- 0.00
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide Side
          type: metaworld-plate-slide-side
        metrics:
          - type: total_reward
            value: 122.56 +/- 24.56
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.16 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide
          type: metaworld-plate-slide
        metrics:
          - type: total_reward
            value: 456.66 +/- 198.51
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.84 +/- 0.44
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Push Back
          type: metaworld-push-back
        metrics:
          - type: total_reward
            value: 71.38 +/- 100.60
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.84 +/- 1.20
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Push Wall
          type: metaworld-push-wall
        metrics:
          - type: total_reward
            value: 216.66 +/- 256.33
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.28 +/- 0.35
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Push
          type: metaworld-push
        metrics:
          - type: total_reward
            value: 583.25 +/- 296.10
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.78 +/- 0.40
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Reach Wall
          type: metaworld-reach-wall
        metrics:
          - type: total_reward
            value: 681.90 +/- 186.63
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.89 +/- 0.31
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Reach
          type: metaworld-reach
        metrics:
          - type: total_reward
            value: 347.45 +/- 190.66
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.37 +/- 0.36
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Shelf Place
          type: metaworld-shelf-place
        metrics:
          - type: total_reward
            value: 60.57 +/- 97.16
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.25 +/- 0.40
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Soccer
          type: metaworld-soccer
        metrics:
          - type: total_reward
            value: 309.21 +/- 172.64
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.82 +/- 0.47
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Stick Pull
          type: metaworld-stick-pull
        metrics:
          - type: total_reward
            value: 364.98 +/- 234.82
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.70 +/- 0.45
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Stick Push
          type: metaworld-stick-push
        metrics:
          - type: total_reward
            value: 91.05 +/- 204.71
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.14 +/- 0.33
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Sweep Into
          type: metaworld-sweep-into
        metrics:
          - type: total_reward
            value: 714.98 +/- 209.19
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.89 +/- 0.27
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Sweep
          type: metaworld-sweep
        metrics:
          - type: total_reward
            value: 15.82 +/- 16.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.03
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Window Close
          type: metaworld-window-close
        metrics:
          - type: total_reward
            value: 347.90 +/- 222.50
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.54 +/- 0.42
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Window Open
          type: metaworld-window-open
        metrics:
          - type: total_reward
            value: 574.72 +/- 75.65
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.14
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Ant
          type: mujoco-ant
        metrics:
          - type: total_reward
            value: 4993.13 +/- 1656.89
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.86 +/- 0.28
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Inverted Double Pendulum
          type: mujoco-doublependulum
        metrics:
          - type: total_reward
            value: 8744.92 +/- 1471.45
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.94 +/- 0.16
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Half Cheetah
          type: mujoco-halfcheetah
        metrics:
          - type: total_reward
            value: 6601.12 +/- 488.36
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.89 +/- 0.06
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Hopper
          type: mujoco-hopper
        metrics:
          - type: total_reward
            value: 1435.45 +/- 361.77
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.77 +/- 0.20
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Humanoid
          type: mujoco-humanoid
        metrics:
          - type: total_reward
            value: 695.92 +/- 115.07
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.09 +/- 0.02
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Inverted Pendulum
          type: mujoco-pendulum
        metrics:
          - type: total_reward
            value: 117.64 +/- 21.73
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.24 +/- 0.05
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pusher
          type: mujoco-pusher
        metrics:
          - type: total_reward
            value: '-24.93 +/- 6.47'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.05
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Reacher
          type: mujoco-reacher
        metrics:
          - type: total_reward
            value: '-5.77 +/- 2.27'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.06
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Humanoid Standup
          type: mujoco-standup
        metrics:
          - type: total_reward
            value: 113587.22 +/- 21821.69
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.33 +/- 0.09
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Swimmer
          type: mujoco-swimmer
        metrics:
          - type: total_reward
            value: 94.08 +/- 3.94
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.02 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Walker 2d
          type: mujoco-walker
        metrics:
          - type: total_reward
            value: 4381.69 +/- 848.39
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.95 +/- 0.18
            name: Expert normalized total reward

Model Card for Jat

This is a multi-modal and multi-task model.

Model Details

Model Description

  • Developed by: The JAT Team
  • License: Apache 2.0

Model Sources

Training

The model was trained on the following tasks:
  • Alien
  • Amidar
  • Assault
  • Asterix
  • Asteroids
  • Atlantis
  • Bank Heist
  • Battle Zone
  • Beam Rider
  • Berzerk
  • Bowling
  • Boxing
  • Breakout
  • Centipede
  • Chopper Command
  • Crazy Climber
  • Defender
  • Demon Attack
  • Double Dunk
  • Enduro
  • Fishing Derby
  • Freeway
  • Frostbite
  • Gopher
  • Gravitar
  • H.E.R.O.
  • Ice Hockey
  • James Bond
  • Kangaroo
  • Krull
  • Kung-Fu Master
  • Montezuma's Revenge
  • Ms. Pacman
  • Name This Game
  • Phoenix
  • PitFall
  • Pong
  • Private Eye
  • Q*Bert
  • River Raid
  • Road Runner
  • Robotank
  • Seaquest
  • Skiing
  • Solaris
  • Space Invaders
  • Star Gunner
  • Surround
  • Tennis
  • Time Pilot
  • Tutankham
  • Up and Down
  • Venture
  • Video Pinball
  • Wizard of Wor
  • Yars Revenge
  • Zaxxon
  • Action Obj Door
  • Blocked Unlock Pickup
  • Boss Level No Unlock
  • Boss Level
  • Find Obj S5
  • Go To Door
  • Go To Imp Unlock
  • Go To Local
  • Go To Obj Door
  • Go To Obj
  • Go To Red Ball Grey
  • Go To Red Ball No Dists
  • Go To Red Ball
  • Go To Red Blue Ball
  • Go To Seq
  • Go To
  • Key Corridor
  • Mini Boss Level
  • Move Two Across S8N9
  • One Room S8
  • Open Door
  • Open Doors Order N4
  • Open Red Door
  • Open Two Doors
  • Open
  • Pickup Above
  • Pickup Dist
  • Pickup Loc
  • Pickup
  • Put Next Local
  • Put Next S7N4
  • Synth Loc
  • Synth Seq
  • Synth
  • Unblock Pickup
  • Unlock Local
  • Unlock Pickup
  • Unlock To Unlock
  • Unlock
  • Assembly
  • Basketball
  • BinPicking
  • Box Close
  • Button Press Topdown Wall
  • Button Press Topdown
  • Button Press Wall
  • Button Press
  • Coffee Button
  • Coffee Pull
  • Coffee Push
  • Dial Turn
  • Disassemble
  • Door Close
  • Door Lock
  • Door Open
  • Door Unlock
  • Drawer Close
  • Drawer Open
  • Faucet Close
  • Faucet Open
  • Hammer
  • Hand Insert
  • Handle Press Side
  • Handle Press
  • Handle Pull Side
  • Handle Pull
  • Lever Pull
  • Peg Insert Side
  • Peg Unplug Side
  • Pick Out Of Hole
  • Pick Place Wall
  • Pick Place
  • Plate Slide Back Side
  • Plate Slide Back
  • Plate Slide Side
  • Plate Slide
  • Push Back
  • Push Wall
  • Push
  • Reach Wall
  • Reach
  • Shelf Place
  • Soccer
  • Stick Pull
  • Stick Push
  • Sweep Into
  • Sweep
  • Window Close
  • Window Open
  • Ant
  • Inverted Double Pendulum
  • Half Cheetah
  • Hopper
  • Humanoid
  • Inverted Pendulum
  • Pusher
  • Reacher
  • Humanoid Standup
  • Swimmer
  • Walker 2d

How to Get Started with the Model

Use the code below to get started with the model.

from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("jat-project/jat")