--- tags: - reinforcement-learning - atari-alien - atari-amidar - atari-assault - atari-asterix - atari-asteroids - atari-atlantis - atari-bankheist - atari-battlezone - atari-beamrider - atari-berzerk - atari-bowling - atari-boxing - atari-breakout - atari-centipede - atari-choppercommand - atari-crazyclimber - atari-defender - atari-demonattack - atari-doubledunk - atari-enduro - atari-fishingderby - atari-freeway - atari-frostbite - atari-gopher - atari-gravitar - atari-hero - atari-icehockey - atari-jamesbond - atari-kangaroo - atari-krull - atari-kungfumaster - atari-montezumarevenge - atari-mspacman - atari-namethisgame - atari-phoenix - atari-pitfall - atari-pong - atari-privateeye - atari-qbert - atari-riverraid - atari-roadrunner - atari-robotank - atari-seaquest - atari-skiing - atari-solaris - atari-spaceinvaders - atari-stargunner - atari-surround - atari-tennis - atari-timepilot - atari-tutankham - atari-upndown - atari-venture - atari-videopinball - atari-wizardofwor - atari-yarsrevenge - atari-zaxxon - babyai-action-obj-door - babyai-blocked-unlock-pickup - babyai-boss-level-no-unlock - babyai-boss-level - babyai-find-obj-s5 - babyai-go-to-door - babyai-go-to-imp-unlock - babyai-go-to-local - babyai-go-to-obj-door - babyai-go-to-obj - babyai-go-to-red-ball-grey - babyai-go-to-red-ball-no-dists - babyai-go-to-red-ball - babyai-go-to-red-blue-ball - babyai-go-to-seq - babyai-go-to - babyai-key-corridor - babyai-mini-boss-level - babyai-move-two-across-s8n9 - babyai-one-room-s8 - babyai-open-door - babyai-open-doors-order-n4 - babyai-open-red-door - babyai-open-two-doors - babyai-open - babyai-pickup-above - babyai-pickup-dist - babyai-pickup-loc - babyai-pickup - babyai-put-next-local - babyai-put-next - babyai-synth-loc - babyai-synth-seq - babyai-synth - babyai-unblock-pickup - babyai-unlock-local - babyai-unlock-pickup - babyai-unlock-to-unlock - babyai-unlock - metaworld-assembly - metaworld-basketball - metaworld-bin-picking - metaworld-box-close - metaworld-button-press-topdown-wall - metaworld-button-press-topdown - metaworld-button-press-wall - metaworld-button-press - metaworld-coffee-button - metaworld-coffee-pull - metaworld-coffee-push - metaworld-dial-turn - metaworld-disassemble - metaworld-door-close - metaworld-door-lock - metaworld-door-open - metaworld-door-unlock - metaworld-drawer-close - metaworld-drawer-open - metaworld-faucet-close - metaworld-faucet-open - metaworld-hammer - metaworld-hand-insert - metaworld-handle-press-side - metaworld-handle-press - metaworld-handle-pull-side - metaworld-handle-pull - metaworld-lever-pull - metaworld-peg-insert-side - metaworld-peg-unplug-side - metaworld-pick-out-of-hole - metaworld-pick-place-wall - metaworld-pick-place - metaworld-plate-slide-back-side - metaworld-plate-slide-back - metaworld-plate-slide-side - metaworld-plate-slide - metaworld-push-back - metaworld-push-wall - metaworld-push - metaworld-reach-wall - metaworld-reach - metaworld-shelf-place - metaworld-soccer - metaworld-stick-pull - metaworld-stick-push - metaworld-sweep-into - metaworld-sweep - metaworld-window-close - metaworld-window-open - mujoco-ant - mujoco-doublependulum - mujoco-halfcheetah - mujoco-hopper - mujoco-humanoid - mujoco-pendulum - mujoco-pusher - mujoco-reacher - mujoco-standup - mujoco-swimmer - mujoco-walker datasets: jat-project/jat-dataset pipeline_tag: reinforcement-learning model-index: - name: jat-project/jat results: - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Atari 57 type: atari metrics: - type: iqm_expert_normalized_total_reward value: 0.06 [0.06, 0.06] name: IQM expert normalized total reward - type: iqm_human_normalized_total_reward value: 0.17 [0.16, 0.17] name: IQM human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: BabyAI type: babyai metrics: - type: iqm_expert_normalized_total_reward value: 0.99 [0.99, 0.99] name: IQM expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: MetaWorld type: metaworld metrics: - type: iqm_expert_normalized_total_reward value: 0.68 [0.67, 0.69] name: IQM expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: MuJoCo type: mujoco metrics: - type: iqm_expert_normalized_total_reward value: 0.81 [0.80, 0.82] name: IQM expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Alien type: atari-alien metrics: - type: total_reward value: 1085.90 +/- 396.36 name: Total reward - type: expert_normalized_total_reward value: 0.05 +/- 0.02 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.12 +/- 0.06 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Amidar type: atari-amidar metrics: - type: total_reward value: 41.26 +/- 28.57 name: Total reward - type: expert_normalized_total_reward value: 0.02 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.02 +/- 0.02 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Assault type: atari-assault metrics: - type: total_reward value: 772.89 +/- 59.34 name: Total reward - type: expert_normalized_total_reward value: 0.04 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 1.06 +/- 0.11 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Asterix type: atari-asterix metrics: - type: total_reward value: 778.50 +/- 428.97 name: Total reward - type: expert_normalized_total_reward value: 0.16 +/- 0.12 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.07 +/- 0.05 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Asteroids type: atari-asteroids metrics: - type: total_reward value: 1423.60 +/- 538.79 name: Total reward - type: expert_normalized_total_reward value: 0.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.02 +/- 0.01 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Atlantis type: atari-atlantis metrics: - type: total_reward value: 23541.00 +/- 10376.72 name: Total reward - type: expert_normalized_total_reward value: 0.03 +/- 0.03 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.66 +/- 0.64 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Bank Heist type: atari-bankheist metrics: - type: total_reward value: 685.50 +/- 157.92 name: Total reward - type: expert_normalized_total_reward value: 0.51 +/- 0.12 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.91 +/- 0.21 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Battle Zone type: atari-battlezone metrics: - type: total_reward value: 12950.00 +/- 4306.68 name: Total reward - type: expert_normalized_total_reward value: 0.04 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.34 +/- 0.12 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Beam Rider type: atari-beamrider metrics: - type: total_reward value: 762.04 +/- 243.25 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.02 +/- 0.01 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Berzerk type: atari-berzerk metrics: - type: total_reward value: 523.90 +/- 161.95 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.16 +/- 0.06 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Bowling type: atari-bowling metrics: - type: total_reward value: 29.99 +/- 11.49 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.05 +/- 0.08 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Boxing type: atari-boxing metrics: - type: total_reward value: 87.00 +/- 22.57 name: Total reward - type: expert_normalized_total_reward value: 0.89 +/- 0.23 name: Expert normalized total reward - type: human_normalized_total_reward value: 7.24 +/- 1.88 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Breakout type: atari-breakout metrics: - type: total_reward value: 9.16 +/- 5.76 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.26 +/- 0.20 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Centipede type: atari-centipede metrics: - type: total_reward value: 4461.72 +/- 2188.80 name: Total reward - type: expert_normalized_total_reward value: 0.25 +/- 0.23 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.24 +/- 0.22 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Chopper Command type: atari-choppercommand metrics: - type: total_reward value: 1497.00 +/- 723.11 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.10 +/- 0.11 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Crazy Climber type: atari-crazyclimber metrics: - type: total_reward value: 52850.00 +/- 31617.86 name: Total reward - type: expert_normalized_total_reward value: 0.25 +/- 0.19 name: Expert normalized total reward - type: human_normalized_total_reward value: 1.68 +/- 1.26 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Defender type: atari-defender metrics: - type: total_reward value: 10627.50 +/- 4473.21 name: Total reward - type: expert_normalized_total_reward value: 0.02 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.49 +/- 0.28 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Demon Attack type: atari-demonattack metrics: - type: total_reward value: 315.10 +/- 279.01 name: Total reward - type: expert_normalized_total_reward value: 0.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.09 +/- 0.15 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Double Dunk type: atari-doubledunk metrics: - type: total_reward value: 0.08 +/- 11.61 name: Total reward - type: expert_normalized_total_reward value: 0.47 +/- 0.29 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.53 +/- 0.33 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Enduro type: atari-enduro metrics: - type: total_reward value: 111.49 +/- 27.36 name: Total reward - type: expert_normalized_total_reward value: 0.05 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.13 +/- 0.03 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Fishing Derby type: atari-fishingderby metrics: - type: total_reward value: -55.21 +/- 19.35 name: Total reward - type: expert_normalized_total_reward value: 0.37 +/- 0.20 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.28 +/- 0.15 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Freeway type: atari-freeway metrics: - type: total_reward value: 24.12 +/- 1.64 name: Total reward - type: expert_normalized_total_reward value: 0.71 +/- 0.05 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.81 +/- 0.06 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Frostbite type: atari-frostbite metrics: - type: total_reward value: 617.30 +/- 686.11 name: Total reward - type: expert_normalized_total_reward value: 0.04 +/- 0.05 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.13 +/- 0.16 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Gopher type: atari-gopher metrics: - type: total_reward value: 2947.20 +/- 1448.32 name: Total reward - type: expert_normalized_total_reward value: 0.03 +/- 0.02 name: Expert normalized total reward - type: human_normalized_total_reward value: 1.25 +/- 0.67 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Gravitar type: atari-gravitar metrics: - type: total_reward value: 1030.50 +/- 719.20 name: Total reward - type: expert_normalized_total_reward value: 0.22 +/- 0.19 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.27 +/- 0.23 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: H.E.R.O. type: atari-hero metrics: - type: total_reward value: 6997.95 +/- 2562.51 name: Total reward - type: expert_normalized_total_reward value: 0.14 +/- 0.06 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.20 +/- 0.09 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Ice Hockey type: atari-icehockey metrics: - type: total_reward value: -3.77 +/- 3.10 name: Total reward - type: expert_normalized_total_reward value: 0.20 +/- 0.09 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.61 +/- 0.26 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: James Bond type: atari-jamesbond metrics: - type: total_reward value: 187.50 +/- 72.24 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.58 +/- 0.26 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Kangaroo type: atari-kangaroo metrics: - type: total_reward value: 124.00 +/- 156.92 name: Total reward - type: expert_normalized_total_reward value: 0.14 +/- 0.30 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.02 +/- 0.05 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Krull type: atari-krull metrics: - type: total_reward value: 8933.00 +/- 1358.65 name: Total reward - type: expert_normalized_total_reward value: 0.75 +/- 0.14 name: Expert normalized total reward - type: human_normalized_total_reward value: 6.87 +/- 1.27 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Kung-Fu Master type: atari-kungfumaster metrics: - type: total_reward value: 100.00 +/- 142.13 name: Total reward - type: expert_normalized_total_reward value: -0.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: -0.01 +/- 0.01 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Montezuma's Revenge type: atari-montezumarevenge metrics: - type: total_reward value: 0.00 +/- 0.00 name: Total reward - type: expert_normalized_total_reward value: 0.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.00 +/- 0.00 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Ms. Pacman type: atari-mspacman metrics: - type: total_reward value: 1516.30 +/- 376.72 name: Total reward - type: expert_normalized_total_reward value: 0.18 +/- 0.06 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.18 +/- 0.06 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Name This Game type: atari-namethisgame metrics: - type: total_reward value: 3798.60 +/- 1361.64 name: Total reward - type: expert_normalized_total_reward value: 0.07 +/- 0.07 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.26 +/- 0.24 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Phoenix type: atari-phoenix metrics: - type: total_reward value: 1267.50 +/- 1013.72 name: Total reward - type: expert_normalized_total_reward value: 0.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.08 +/- 0.16 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: PitFall type: atari-pitfall metrics: - type: total_reward value: -287.36 +/- 492.82 name: Total reward - type: expert_normalized_total_reward value: -0.25 +/- 2.16 name: Expert normalized total reward - type: human_normalized_total_reward value: -0.01 +/- 0.07 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pong type: atari-pong metrics: - type: total_reward value: -11.03 +/- 11.29 name: Total reward - type: expert_normalized_total_reward value: 0.23 +/- 0.27 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.27 +/- 0.32 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Private Eye type: atari-privateeye metrics: - type: total_reward value: 96.00 +/- 19.60 name: Total reward - type: expert_normalized_total_reward value: 0.95 +/- 0.26 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.00 +/- 0.00 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Q*Bert type: atari-qbert metrics: - type: total_reward value: 1701.75 +/- 1912.56 name: Total reward - type: expert_normalized_total_reward value: 0.04 +/- 0.04 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.12 +/- 0.14 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: River Raid type: atari-riverraid metrics: - type: total_reward value: 2793.10 +/- 693.84 name: Total reward - type: expert_normalized_total_reward value: 0.11 +/- 0.05 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.09 +/- 0.04 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Road Runner type: atari-roadrunner metrics: - type: total_reward value: 7699.00 +/- 3446.61 name: Total reward - type: expert_normalized_total_reward value: 0.10 +/- 0.04 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.98 +/- 0.44 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Robotank type: atari-robotank metrics: - type: total_reward value: 16.36 +/- 5.24 name: Total reward - type: expert_normalized_total_reward value: 0.18 +/- 0.07 name: Expert normalized total reward - type: human_normalized_total_reward value: 1.46 +/- 0.54 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Seaquest type: atari-seaquest metrics: - type: total_reward value: 515.20 +/- 141.51 name: Total reward - type: expert_normalized_total_reward value: 0.18 +/- 0.06 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.01 +/- 0.00 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Skiing type: atari-skiing metrics: - type: total_reward value: -29396.08 +/- 3289.80 name: Total reward - type: expert_normalized_total_reward value: -1.93 +/- 0.52 name: Expert normalized total reward - type: human_normalized_total_reward value: -0.96 +/- 0.26 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Solaris type: atari-solaris metrics: - type: total_reward value: 988.20 +/- 487.42 name: Total reward - type: expert_normalized_total_reward value: -2.11 +/- 4.15 name: Expert normalized total reward - type: human_normalized_total_reward value: -0.02 +/- 0.04 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Space Invaders type: atari-spaceinvaders metrics: - type: total_reward value: 339.50 +/- 164.05 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.13 +/- 0.11 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Star Gunner type: atari-stargunner metrics: - type: total_reward value: 978.00 +/- 638.37 name: Total reward - type: expert_normalized_total_reward value: 0.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.03 +/- 0.07 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Surround type: atari-surround metrics: - type: total_reward value: -8.22 +/- 1.19 name: Total reward - type: expert_normalized_total_reward value: 0.09 +/- 0.06 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.11 +/- 0.07 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Tennis type: atari-tennis metrics: - type: total_reward value: -22.38 +/- 2.22 name: Total reward - type: expert_normalized_total_reward value: 0.04 +/- 0.06 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.04 +/- 0.07 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Time Pilot type: atari-timepilot metrics: - type: total_reward value: 9534.00 +/- 2577.76 name: Total reward - type: expert_normalized_total_reward value: 0.09 +/- 0.04 name: Expert normalized total reward - type: human_normalized_total_reward value: 3.59 +/- 1.55 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Tutankham type: atari-tutankham metrics: - type: total_reward value: 40.20 +/- 14.51 name: Total reward - type: expert_normalized_total_reward value: 0.10 +/- 0.05 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.18 +/- 0.09 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Up and Down type: atari-upndown metrics: - type: total_reward value: 6072.00 +/- 2283.30 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.01 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.50 +/- 0.20 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Venture type: atari-venture metrics: - type: total_reward value: 0.00 +/- 0.00 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.00 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.00 +/- 0.00 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Video Pinball type: atari-videopinball metrics: - type: total_reward value: 7943.01 +/- 8351.21 name: Total reward - type: expert_normalized_total_reward value: 0.02 +/- 0.02 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.45 +/- 0.47 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Wizard of Wor type: atari-wizardofwor metrics: - type: total_reward value: 1306.00 +/- 1139.81 name: Total reward - type: expert_normalized_total_reward value: 0.02 +/- 0.02 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.18 +/- 0.27 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Yars Revenge type: atari-yarsrevenge metrics: - type: total_reward value: 8597.41 +/- 4291.81 name: Total reward - type: expert_normalized_total_reward value: 0.02 +/- 0.02 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.11 +/- 0.08 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Zaxxon type: atari-zaxxon metrics: - type: total_reward value: 896.00 +/- 1172.68 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.02 name: Expert normalized total reward - type: human_normalized_total_reward value: 0.09 +/- 0.13 name: Human normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Action Obj Door type: babyai-action-obj-door metrics: - type: total_reward value: 0.95 +/- 0.13 name: Total reward - type: expert_normalized_total_reward value: 0.94 +/- 0.22 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Blocked Unlock Pickup type: babyai-blocked-unlock-pickup metrics: - type: total_reward value: 0.95 +/- 0.01 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.01 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Boss Level No Unlock type: babyai-boss-level-no-unlock metrics: - type: total_reward value: 0.44 +/- 0.45 name: Total reward - type: expert_normalized_total_reward value: 0.43 +/- 0.51 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Boss Level type: babyai-boss-level metrics: - type: total_reward value: 0.48 +/- 0.45 name: Total reward - type: expert_normalized_total_reward value: 0.48 +/- 0.51 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Find Obj S5 type: babyai-find-obj-s5 metrics: - type: total_reward value: 0.95 +/- 0.03 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Door type: babyai-go-to-door metrics: - type: total_reward value: 0.99 +/- 0.01 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.01 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Imp Unlock type: babyai-go-to-imp-unlock metrics: - type: total_reward value: 0.50 +/- 0.44 name: Total reward - type: expert_normalized_total_reward value: 0.56 +/- 0.59 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Local type: babyai-go-to-local metrics: - type: total_reward value: 0.88 +/- 0.14 name: Total reward - type: expert_normalized_total_reward value: 0.94 +/- 0.18 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Obj Door type: babyai-go-to-obj-door metrics: - type: total_reward value: 0.98 +/- 0.04 name: Total reward - type: expert_normalized_total_reward value: 0.97 +/- 0.08 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Obj type: babyai-go-to-obj metrics: - type: total_reward value: 0.93 +/- 0.04 name: Total reward - type: expert_normalized_total_reward value: 0.99 +/- 0.05 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Red Ball Grey type: babyai-go-to-red-ball-grey metrics: - type: total_reward value: 0.91 +/- 0.06 name: Total reward - type: expert_normalized_total_reward value: 0.99 +/- 0.08 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Red Ball No Dists type: babyai-go-to-red-ball-no-dists metrics: - type: total_reward value: 0.93 +/- 0.03 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Red Ball type: babyai-go-to-red-ball metrics: - type: total_reward value: 0.91 +/- 0.08 name: Total reward - type: expert_normalized_total_reward value: 0.98 +/- 0.11 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Red Blue Ball type: babyai-go-to-red-blue-ball metrics: - type: total_reward value: 0.88 +/- 0.11 name: Total reward - type: expert_normalized_total_reward value: 0.96 +/- 0.13 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To Seq type: babyai-go-to-seq metrics: - type: total_reward value: 0.73 +/- 0.34 name: Total reward - type: expert_normalized_total_reward value: 0.75 +/- 0.40 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Go To type: babyai-go-to metrics: - type: total_reward value: 0.80 +/- 0.27 name: Total reward - type: expert_normalized_total_reward value: 0.85 +/- 0.35 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Key Corridor type: babyai-key-corridor metrics: - type: total_reward value: 0.88 +/- 0.10 name: Total reward - type: expert_normalized_total_reward value: 0.97 +/- 0.11 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Mini Boss Level type: babyai-mini-boss-level metrics: - type: total_reward value: 0.69 +/- 0.35 name: Total reward - type: expert_normalized_total_reward value: 0.76 +/- 0.43 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Move Two Across S8N9 type: babyai-move-two-across-s8n9 metrics: - type: total_reward value: 0.03 +/- 0.15 name: Total reward - type: expert_normalized_total_reward value: 0.03 +/- 0.16 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: One Room S8 type: babyai-one-room-s8 metrics: - type: total_reward value: 0.92 +/- 0.03 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Open Door type: babyai-open-door metrics: - type: total_reward value: 0.99 +/- 0.00 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.01 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Open Doors Order N4 type: babyai-open-doors-order-n4 metrics: - type: total_reward value: 0.96 +/- 0.11 name: Total reward - type: expert_normalized_total_reward value: 0.97 +/- 0.13 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Open Red Door type: babyai-open-red-door metrics: - type: total_reward value: 0.92 +/- 0.02 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.03 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Open Two Doors type: babyai-open-two-doors metrics: - type: total_reward value: 0.98 +/- 0.00 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.00 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Open type: babyai-open metrics: - type: total_reward value: 0.93 +/- 0.11 name: Total reward - type: expert_normalized_total_reward value: 0.97 +/- 0.13 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pickup Above type: babyai-pickup-above metrics: - type: total_reward value: 0.92 +/- 0.06 name: Total reward - type: expert_normalized_total_reward value: 1.01 +/- 0.07 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pickup Dist type: babyai-pickup-dist metrics: - type: total_reward value: 0.88 +/- 0.13 name: Total reward - type: expert_normalized_total_reward value: 1.03 +/- 0.18 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pickup Loc type: babyai-pickup-loc metrics: - type: total_reward value: 0.84 +/- 0.20 name: Total reward - type: expert_normalized_total_reward value: 0.91 +/- 0.24 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pickup type: babyai-pickup metrics: - type: total_reward value: 0.72 +/- 0.34 name: Total reward - type: expert_normalized_total_reward value: 0.77 +/- 0.40 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Put Next Local type: babyai-put-next-local metrics: - type: total_reward value: 0.60 +/- 0.36 name: Total reward - type: expert_normalized_total_reward value: 0.65 +/- 0.39 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Put Next S7N4 type: babyai-put-next metrics: - type: total_reward value: 0.82 +/- 0.26 name: Total reward - type: expert_normalized_total_reward value: 0.86 +/- 0.27 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Synth Loc type: babyai-synth-loc metrics: - type: total_reward value: 0.82 +/- 0.31 name: Total reward - type: expert_normalized_total_reward value: 0.85 +/- 0.38 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Synth Seq type: babyai-synth-seq metrics: - type: total_reward value: 0.57 +/- 0.44 name: Total reward - type: expert_normalized_total_reward value: 0.57 +/- 0.50 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Synth type: babyai-synth metrics: - type: total_reward value: 0.68 +/- 0.39 name: Total reward - type: expert_normalized_total_reward value: 0.69 +/- 0.47 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Unblock Pickup type: babyai-unblock-pickup metrics: - type: total_reward value: 0.76 +/- 0.33 name: Total reward - type: expert_normalized_total_reward value: 0.82 +/- 0.39 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Unlock Local type: babyai-unlock-local metrics: - type: total_reward value: 0.98 +/- 0.01 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.01 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Unlock Pickup type: babyai-unlock-pickup metrics: - type: total_reward value: 0.76 +/- 0.03 name: Total reward - type: expert_normalized_total_reward value: 1.01 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Unlock To Unlock type: babyai-unlock-to-unlock metrics: - type: total_reward value: 0.86 +/- 0.29 name: Total reward - type: expert_normalized_total_reward value: 0.89 +/- 0.30 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Unlock type: babyai-unlock metrics: - type: total_reward value: 0.55 +/- 0.42 name: Total reward - type: expert_normalized_total_reward value: 0.63 +/- 0.50 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Assembly type: metaworld-assembly metrics: - type: total_reward value: 238.32 +/- 32.98 name: Total reward - type: expert_normalized_total_reward value: 0.96 +/- 0.16 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Basketball type: metaworld-basketball metrics: - type: total_reward value: 1.59 +/- 0.43 name: Total reward - type: expert_normalized_total_reward value: -0.00 +/- 0.00 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: BinPicking type: metaworld-bin-picking metrics: - type: total_reward value: 374.18 +/- 168.23 name: Total reward - type: expert_normalized_total_reward value: 0.88 +/- 0.40 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Box Close type: metaworld-box-close metrics: - type: total_reward value: 510.10 +/- 117.47 name: Total reward - type: expert_normalized_total_reward value: 0.99 +/- 0.27 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Button Press Topdown Wall type: metaworld-button-press-topdown-wall metrics: - type: total_reward value: 260.07 +/- 67.75 name: Total reward - type: expert_normalized_total_reward value: 0.49 +/- 0.14 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Button Press Topdown type: metaworld-button-press-topdown metrics: - type: total_reward value: 265.16 +/- 77.93 name: Total reward - type: expert_normalized_total_reward value: 0.51 +/- 0.17 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Button Press Wall type: metaworld-button-press-wall metrics: - type: total_reward value: 621.75 +/- 137.13 name: Total reward - type: expert_normalized_total_reward value: 0.92 +/- 0.21 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Button Press type: metaworld-button-press metrics: - type: total_reward value: 556.75 +/- 198.85 name: Total reward - type: expert_normalized_total_reward value: 0.86 +/- 0.33 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Coffee Button type: metaworld-coffee-button metrics: - type: total_reward value: 250.50 +/- 266.92 name: Total reward - type: expert_normalized_total_reward value: 0.31 +/- 0.38 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Coffee Pull type: metaworld-coffee-pull metrics: - type: total_reward value: 55.13 +/- 96.96 name: Total reward - type: expert_normalized_total_reward value: 0.20 +/- 0.38 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Coffee Push type: metaworld-coffee-push metrics: - type: total_reward value: 269.17 +/- 237.82 name: Total reward - type: expert_normalized_total_reward value: 0.54 +/- 0.48 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Dial Turn type: metaworld-dial-turn metrics: - type: total_reward value: 738.22 +/- 168.43 name: Total reward - type: expert_normalized_total_reward value: 0.93 +/- 0.22 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Disassemble type: metaworld-disassemble metrics: - type: total_reward value: 39.14 +/- 11.85 name: Total reward - type: expert_normalized_total_reward value: -0.47 +/- 4.70 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Door Close type: metaworld-door-close metrics: - type: total_reward value: 528.17 +/- 29.90 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.06 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Door Lock type: metaworld-door-lock metrics: - type: total_reward value: 676.51 +/- 192.68 name: Total reward - type: expert_normalized_total_reward value: 0.81 +/- 0.28 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Door Open type: metaworld-door-open metrics: - type: total_reward value: 572.76 +/- 57.53 name: Total reward - type: expert_normalized_total_reward value: 0.98 +/- 0.11 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Door Unlock type: metaworld-door-unlock metrics: - type: total_reward value: 654.94 +/- 260.64 name: Total reward - type: expert_normalized_total_reward value: 0.79 +/- 0.37 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Drawer Close type: metaworld-drawer-close metrics: - type: total_reward value: 663.02 +/- 214.51 name: Total reward - type: expert_normalized_total_reward value: 0.73 +/- 0.29 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Drawer Open type: metaworld-drawer-open metrics: - type: total_reward value: 489.07 +/- 21.28 name: Total reward - type: expert_normalized_total_reward value: 0.99 +/- 0.06 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Faucet Close type: metaworld-faucet-close metrics: - type: total_reward value: 361.32 +/- 72.28 name: Total reward - type: expert_normalized_total_reward value: 0.22 +/- 0.14 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Faucet Open type: metaworld-faucet-open metrics: - type: total_reward value: 637.86 +/- 134.50 name: Total reward - type: expert_normalized_total_reward value: 0.85 +/- 0.29 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Hammer type: metaworld-hammer metrics: - type: total_reward value: 691.72 +/- 25.25 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Hand Insert type: metaworld-hand-insert metrics: - type: total_reward value: 719.57 +/- 99.26 name: Total reward - type: expert_normalized_total_reward value: 0.97 +/- 0.13 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Handle Press Side type: metaworld-handle-press-side metrics: - type: total_reward value: 84.25 +/- 113.34 name: Total reward - type: expert_normalized_total_reward value: 0.03 +/- 0.14 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Handle Press type: metaworld-handle-press metrics: - type: total_reward value: 731.94 +/- 261.90 name: Total reward - type: expert_normalized_total_reward value: 0.84 +/- 0.34 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Handle Pull Side type: metaworld-handle-pull-side metrics: - type: total_reward value: 233.11 +/- 199.49 name: Total reward - type: expert_normalized_total_reward value: 0.60 +/- 0.52 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Handle Pull type: metaworld-handle-pull metrics: - type: total_reward value: 501.29 +/- 209.45 name: Total reward - type: expert_normalized_total_reward value: 0.74 +/- 0.32 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Lever Pull type: metaworld-lever-pull metrics: - type: total_reward value: 250.18 +/- 228.59 name: Total reward - type: expert_normalized_total_reward value: 0.34 +/- 0.41 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Peg Insert Side type: metaworld-peg-insert-side metrics: - type: total_reward value: 288.02 +/- 157.87 name: Total reward - type: expert_normalized_total_reward value: 0.91 +/- 0.50 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Peg Unplug Side type: metaworld-peg-unplug-side metrics: - type: total_reward value: 68.48 +/- 125.34 name: Total reward - type: expert_normalized_total_reward value: 0.14 +/- 0.28 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pick Out Of Hole type: metaworld-pick-out-of-hole metrics: - type: total_reward value: 2.08 +/- 0.05 name: Total reward - type: expert_normalized_total_reward value: 0.00 +/- 0.00 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pick Place Wall type: metaworld-pick-place-wall metrics: - type: total_reward value: 6.87 +/- 44.99 name: Total reward - type: expert_normalized_total_reward value: 0.02 +/- 0.10 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pick Place type: metaworld-pick-place metrics: - type: total_reward value: 264.18 +/- 195.69 name: Total reward - type: expert_normalized_total_reward value: 0.63 +/- 0.47 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Plate Slide Back Side type: metaworld-plate-slide-back-side metrics: - type: total_reward value: 697.54 +/- 137.79 name: Total reward - type: expert_normalized_total_reward value: 0.95 +/- 0.20 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Plate Slide Back type: metaworld-plate-slide-back metrics: - type: total_reward value: 196.80 +/- 1.73 name: Total reward - type: expert_normalized_total_reward value: 0.24 +/- 0.00 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Plate Slide Side type: metaworld-plate-slide-side metrics: - type: total_reward value: 122.61 +/- 24.52 name: Total reward - type: expert_normalized_total_reward value: 0.16 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Plate Slide type: metaworld-plate-slide metrics: - type: total_reward value: 497.42 +/- 168.74 name: Total reward - type: expert_normalized_total_reward value: 0.93 +/- 0.37 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Push Back type: metaworld-push-back metrics: - type: total_reward value: 91.41 +/- 115.05 name: Total reward - type: expert_normalized_total_reward value: 1.08 +/- 1.37 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Push Wall type: metaworld-push-wall metrics: - type: total_reward value: 116.49 +/- 208.05 name: Total reward - type: expert_normalized_total_reward value: 0.15 +/- 0.28 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Push type: metaworld-push metrics: - type: total_reward value: 604.25 +/- 261.90 name: Total reward - type: expert_normalized_total_reward value: 0.80 +/- 0.35 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Reach Wall type: metaworld-reach-wall metrics: - type: total_reward value: 634.57 +/- 231.40 name: Total reward - type: expert_normalized_total_reward value: 0.81 +/- 0.38 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Reach type: metaworld-reach metrics: - type: total_reward value: 325.27 +/- 159.21 name: Total reward - type: expert_normalized_total_reward value: 0.33 +/- 0.30 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Shelf Place type: metaworld-shelf-place metrics: - type: total_reward value: 124.60 +/- 112.83 name: Total reward - type: expert_normalized_total_reward value: 0.52 +/- 0.47 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Soccer type: metaworld-soccer metrics: - type: total_reward value: 364.50 +/- 175.45 name: Total reward - type: expert_normalized_total_reward value: 0.97 +/- 0.47 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Stick Pull type: metaworld-stick-pull metrics: - type: total_reward value: 398.64 +/- 205.60 name: Total reward - type: expert_normalized_total_reward value: 0.76 +/- 0.39 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Stick Push type: metaworld-stick-push metrics: - type: total_reward value: 158.29 +/- 264.59 name: Total reward - type: expert_normalized_total_reward value: 0.25 +/- 0.42 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Sweep Into type: metaworld-sweep-into metrics: - type: total_reward value: 775.30 +/- 119.00 name: Total reward - type: expert_normalized_total_reward value: 0.97 +/- 0.15 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Sweep type: metaworld-sweep metrics: - type: total_reward value: 15.64 +/- 9.29 name: Total reward - type: expert_normalized_total_reward value: 0.01 +/- 0.02 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Window Close type: metaworld-window-close metrics: - type: total_reward value: 423.33 +/- 203.92 name: Total reward - type: expert_normalized_total_reward value: 0.69 +/- 0.38 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Window Open type: metaworld-window-open metrics: - type: total_reward value: 593.10 +/- 54.83 name: Total reward - type: expert_normalized_total_reward value: 1.00 +/- 0.10 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Ant type: mujoco-ant metrics: - type: total_reward value: 5268.02 +/- 1495.39 name: Total reward - type: expert_normalized_total_reward value: 0.90 +/- 0.25 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Inverted Double Pendulum type: mujoco-doublependulum metrics: - type: total_reward value: 4750.14 +/- 931.20 name: Total reward - type: expert_normalized_total_reward value: 0.51 +/- 0.10 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Half Cheetah type: mujoco-halfcheetah metrics: - type: total_reward value: 6659.69 +/- 409.71 name: Total reward - type: expert_normalized_total_reward value: 0.90 +/- 0.05 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Hopper type: mujoco-hopper metrics: - type: total_reward value: 1835.93 +/- 532.21 name: Total reward - type: expert_normalized_total_reward value: 0.99 +/- 0.29 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Humanoid type: mujoco-humanoid metrics: - type: total_reward value: 697.44 +/- 108.06 name: Total reward - type: expert_normalized_total_reward value: 0.09 +/- 0.02 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Inverted Pendulum type: mujoco-pendulum metrics: - type: total_reward value: 116.34 +/- 20.19 name: Total reward - type: expert_normalized_total_reward value: 0.23 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Pusher type: mujoco-pusher metrics: - type: total_reward value: -26.33 +/- 6.32 name: Total reward - type: expert_normalized_total_reward value: 0.99 +/- 0.05 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Reacher type: mujoco-reacher metrics: - type: total_reward value: -6.06 +/- 2.64 name: Total reward - type: expert_normalized_total_reward value: 0.99 +/- 0.07 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Humanoid Standup type: mujoco-standup metrics: - type: total_reward value: 118125.15 +/- 24880.28 name: Total reward - type: expert_normalized_total_reward value: 0.35 +/- 0.10 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Swimmer type: mujoco-swimmer metrics: - type: total_reward value: 93.26 +/- 3.78 name: Total reward - type: expert_normalized_total_reward value: 1.01 +/- 0.04 name: Expert normalized total reward - task: type: reinforcement-learning name: Reinforcement Learning dataset: name: Walker 2d type: mujoco-walker metrics: - type: total_reward value: 4662.43 +/- 762.67 name: Total reward - type: expert_normalized_total_reward value: 1.01 +/- 0.16 name: Expert normalized total reward --- # Model Card for Jat This is a multi-modal and multi-task model. ## Model Details ### Model Description - **Developed by:** The JAT Team - **License:** Apache 2.0 ### Model Sources - **Repository:** - **Paper:** Coming soon - **Demo:** Coming soon ## Training The model was trained on the following tasks: - Alien - Amidar - Assault - Asterix - Asteroids - Atlantis - Bank Heist - Battle Zone - Beam Rider - Berzerk - Bowling - Boxing - Breakout - Centipede - Chopper Command - Crazy Climber - Defender - Demon Attack - Double Dunk - Enduro - Fishing Derby - Freeway - Frostbite - Gopher - Gravitar - H.E.R.O. - Ice Hockey - James Bond - Kangaroo - Krull - Kung-Fu Master - Montezuma's Revenge - Ms. Pacman - Name This Game - Phoenix - PitFall - Pong - Private Eye - Q*Bert - River Raid - Road Runner - Robotank - Seaquest - Skiing - Solaris - Space Invaders - Star Gunner - Surround - Tennis - Time Pilot - Tutankham - Up and Down - Venture - Video Pinball - Wizard of Wor - Yars Revenge - Zaxxon - Action Obj Door - Blocked Unlock Pickup - Boss Level No Unlock - Boss Level - Find Obj S5 - Go To Door - Go To Imp Unlock - Go To Local - Go To Obj Door - Go To Obj - Go To Red Ball Grey - Go To Red Ball No Dists - Go To Red Ball - Go To Red Blue Ball - Go To Seq - Go To - Key Corridor - Mini Boss Level - Move Two Across S8N9 - One Room S8 - Open Door - Open Doors Order N4 - Open Red Door - Open Two Doors - Open - Pickup Above - Pickup Dist - Pickup Loc - Pickup - Put Next Local - Put Next S7N4 - Synth Loc - Synth Seq - Synth - Unblock Pickup - Unlock Local - Unlock Pickup - Unlock To Unlock - Unlock - Assembly - Basketball - BinPicking - Box Close - Button Press Topdown Wall - Button Press Topdown - Button Press Wall - Button Press - Coffee Button - Coffee Pull - Coffee Push - Dial Turn - Disassemble - Door Close - Door Lock - Door Open - Door Unlock - Drawer Close - Drawer Open - Faucet Close - Faucet Open - Hammer - Hand Insert - Handle Press Side - Handle Press - Handle Pull Side - Handle Pull - Lever Pull - Peg Insert Side - Peg Unplug Side - Pick Out Of Hole - Pick Place Wall - Pick Place - Plate Slide Back Side - Plate Slide Back - Plate Slide Side - Plate Slide - Push Back - Push Wall - Push - Reach Wall - Reach - Shelf Place - Soccer - Stick Pull - Stick Push - Sweep Into - Sweep - Window Close - Window Open - Ant - Inverted Double Pendulum - Half Cheetah - Hopper - Humanoid - Inverted Pendulum - Pusher - Reacher - Humanoid Standup - Swimmer - Walker 2d ## How to Get Started with the Model Use the code below to get started with the model. ```python from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("jat-project/jat") ```