qgallouedec HF staff commited on
Commit
f7dbbec
1 Parent(s): c737ca8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +161 -160
README.md CHANGED
@@ -171,10 +171,10 @@ model-index:
171
  type: atari
172
  metrics:
173
  - type: iqm_expert_normalized_total_reward
174
- value: 0.06 [0.06, 0.06]
175
  name: IQM expert normalized total reward
176
  - type: iqm_human_normalized_total_reward
177
- value: 0.17 [0.16, 0.17]
178
  name: IQM human normalized total reward
179
  - task:
180
  type: reinforcement-learning
@@ -214,13 +214,13 @@ model-index:
214
  type: atari-alien
215
  metrics:
216
  - type: total_reward
217
- value: 1085.90 +/- 396.36
218
  name: Total reward
219
  - type: expert_normalized_total_reward
220
- value: 0.05 +/- 0.02
221
  name: Expert normalized total reward
222
  - type: human_normalized_total_reward
223
- value: 0.12 +/- 0.06
224
  name: Human normalized total reward
225
  - task:
226
  type: reinforcement-learning
@@ -230,13 +230,13 @@ model-index:
230
  type: atari-amidar
231
  metrics:
232
  - type: total_reward
233
- value: 41.26 +/- 28.57
234
  name: Total reward
235
  - type: expert_normalized_total_reward
236
- value: 0.02 +/- 0.01
237
  name: Expert normalized total reward
238
  - type: human_normalized_total_reward
239
- value: 0.02 +/- 0.02
240
  name: Human normalized total reward
241
  - task:
242
  type: reinforcement-learning
@@ -246,13 +246,13 @@ model-index:
246
  type: atari-assault
247
  metrics:
248
  - type: total_reward
249
- value: 772.89 +/- 59.34
250
  name: Total reward
251
  - type: expert_normalized_total_reward
252
- value: 0.04 +/- 0.00
253
  name: Expert normalized total reward
254
  - type: human_normalized_total_reward
255
- value: 1.06 +/- 0.11
256
  name: Human normalized total reward
257
  - task:
258
  type: reinforcement-learning
@@ -262,13 +262,13 @@ model-index:
262
  type: atari-asterix
263
  metrics:
264
  - type: total_reward
265
- value: 778.50 +/- 428.97
266
  name: Total reward
267
  - type: expert_normalized_total_reward
268
- value: 0.16 +/- 0.12
269
  name: Expert normalized total reward
270
  - type: human_normalized_total_reward
271
- value: 0.07 +/- 0.05
272
  name: Human normalized total reward
273
  - task:
274
  type: reinforcement-learning
@@ -278,13 +278,13 @@ model-index:
278
  type: atari-asteroids
279
  metrics:
280
  - type: total_reward
281
- value: 1423.60 +/- 538.79
282
  name: Total reward
283
  - type: expert_normalized_total_reward
284
  value: 0.00 +/- 0.00
285
  name: Expert normalized total reward
286
  - type: human_normalized_total_reward
287
- value: 0.02 +/- 0.01
288
  name: Human normalized total reward
289
  - task:
290
  type: reinforcement-learning
@@ -294,13 +294,13 @@ model-index:
294
  type: atari-atlantis
295
  metrics:
296
  - type: total_reward
297
- value: 23541.00 +/- 10376.72
298
  name: Total reward
299
  - type: expert_normalized_total_reward
300
- value: 0.03 +/- 0.03
301
  name: Expert normalized total reward
302
  - type: human_normalized_total_reward
303
- value: 0.66 +/- 0.64
304
  name: Human normalized total reward
305
  - task:
306
  type: reinforcement-learning
@@ -310,13 +310,13 @@ model-index:
310
  type: atari-bankheist
311
  metrics:
312
  - type: total_reward
313
- value: 685.50 +/- 157.92
314
  name: Total reward
315
  - type: expert_normalized_total_reward
316
- value: 0.51 +/- 0.12
317
  name: Expert normalized total reward
318
  - type: human_normalized_total_reward
319
- value: 0.91 +/- 0.21
320
  name: Human normalized total reward
321
  - task:
322
  type: reinforcement-learning
@@ -326,13 +326,13 @@ model-index:
326
  type: atari-battlezone
327
  metrics:
328
  - type: total_reward
329
- value: 12950.00 +/- 4306.68
330
  name: Total reward
331
  - type: expert_normalized_total_reward
332
- value: 0.04 +/- 0.01
333
  name: Expert normalized total reward
334
  - type: human_normalized_total_reward
335
- value: 0.34 +/- 0.12
336
  name: Human normalized total reward
337
  - task:
338
  type: reinforcement-learning
@@ -342,13 +342,13 @@ model-index:
342
  type: atari-beamrider
343
  metrics:
344
  - type: total_reward
345
- value: 762.04 +/- 243.25
346
  name: Total reward
347
  - type: expert_normalized_total_reward
348
  value: 0.01 +/- 0.01
349
  name: Expert normalized total reward
350
  - type: human_normalized_total_reward
351
- value: 0.02 +/- 0.01
352
  name: Human normalized total reward
353
  - task:
354
  type: reinforcement-learning
@@ -358,13 +358,13 @@ model-index:
358
  type: atari-berzerk
359
  metrics:
360
  - type: total_reward
361
- value: 523.90 +/- 161.95
362
  name: Total reward
363
  - type: expert_normalized_total_reward
364
- value: 0.01 +/- 0.00
365
  name: Expert normalized total reward
366
  - type: human_normalized_total_reward
367
- value: 0.16 +/- 0.06
368
  name: Human normalized total reward
369
  - task:
370
  type: reinforcement-learning
@@ -374,13 +374,13 @@ model-index:
374
  type: atari-bowling
375
  metrics:
376
  - type: total_reward
377
- value: 29.99 +/- 11.49
378
  name: Total reward
379
  - type: expert_normalized_total_reward
380
  value: 1.00 +/- 0.00
381
  name: Expert normalized total reward
382
  - type: human_normalized_total_reward
383
- value: 0.05 +/- 0.08
384
  name: Human normalized total reward
385
  - task:
386
  type: reinforcement-learning
@@ -390,13 +390,13 @@ model-index:
390
  type: atari-boxing
391
  metrics:
392
  - type: total_reward
393
- value: 87.00 +/- 22.57
394
  name: Total reward
395
  - type: expert_normalized_total_reward
396
- value: 0.89 +/- 0.23
397
  name: Expert normalized total reward
398
  - type: human_normalized_total_reward
399
- value: 7.24 +/- 1.88
400
  name: Human normalized total reward
401
  - task:
402
  type: reinforcement-learning
@@ -406,13 +406,13 @@ model-index:
406
  type: atari-breakout
407
  metrics:
408
  - type: total_reward
409
- value: 9.16 +/- 5.76
410
  name: Total reward
411
  - type: expert_normalized_total_reward
412
  value: 0.01 +/- 0.01
413
  name: Expert normalized total reward
414
  - type: human_normalized_total_reward
415
- value: 0.26 +/- 0.20
416
  name: Human normalized total reward
417
  - task:
418
  type: reinforcement-learning
@@ -422,13 +422,13 @@ model-index:
422
  type: atari-centipede
423
  metrics:
424
  - type: total_reward
425
- value: 4461.72 +/- 2188.80
426
  name: Total reward
427
  - type: expert_normalized_total_reward
428
- value: 0.25 +/- 0.23
429
  name: Expert normalized total reward
430
  - type: human_normalized_total_reward
431
- value: 0.24 +/- 0.22
432
  name: Human normalized total reward
433
  - task:
434
  type: reinforcement-learning
@@ -438,13 +438,13 @@ model-index:
438
  type: atari-choppercommand
439
  metrics:
440
  - type: total_reward
441
- value: 1497.00 +/- 723.11
442
  name: Total reward
443
  - type: expert_normalized_total_reward
444
- value: 0.01 +/- 0.01
445
  name: Expert normalized total reward
446
  - type: human_normalized_total_reward
447
- value: 0.10 +/- 0.11
448
  name: Human normalized total reward
449
  - task:
450
  type: reinforcement-learning
@@ -454,13 +454,13 @@ model-index:
454
  type: atari-crazyclimber
455
  metrics:
456
  - type: total_reward
457
- value: 52850.00 +/- 31617.86
458
  name: Total reward
459
  - type: expert_normalized_total_reward
460
- value: 0.25 +/- 0.19
461
  name: Expert normalized total reward
462
  - type: human_normalized_total_reward
463
- value: 1.68 +/- 1.26
464
  name: Human normalized total reward
465
  - task:
466
  type: reinforcement-learning
@@ -470,13 +470,13 @@ model-index:
470
  type: atari-defender
471
  metrics:
472
  - type: total_reward
473
- value: 10627.50 +/- 4473.21
474
  name: Total reward
475
  - type: expert_normalized_total_reward
476
- value: 0.02 +/- 0.01
477
  name: Expert normalized total reward
478
  - type: human_normalized_total_reward
479
- value: 0.49 +/- 0.28
480
  name: Human normalized total reward
481
  - task:
482
  type: reinforcement-learning
@@ -486,13 +486,13 @@ model-index:
486
  type: atari-demonattack
487
  metrics:
488
  - type: total_reward
489
- value: 315.10 +/- 279.01
490
  name: Total reward
491
  - type: expert_normalized_total_reward
492
- value: 0.00 +/- 0.00
493
  name: Expert normalized total reward
494
  - type: human_normalized_total_reward
495
- value: 0.09 +/- 0.15
496
  name: Human normalized total reward
497
  - task:
498
  type: reinforcement-learning
@@ -502,13 +502,13 @@ model-index:
502
  type: atari-doubledunk
503
  metrics:
504
  - type: total_reward
505
- value: 0.08 +/- 11.61
506
  name: Total reward
507
  - type: expert_normalized_total_reward
508
- value: 0.47 +/- 0.29
509
  name: Expert normalized total reward
510
  - type: human_normalized_total_reward
511
- value: 0.53 +/- 0.33
512
  name: Human normalized total reward
513
  - task:
514
  type: reinforcement-learning
@@ -518,13 +518,13 @@ model-index:
518
  type: atari-enduro
519
  metrics:
520
  - type: total_reward
521
- value: 111.49 +/- 27.36
522
  name: Total reward
523
  - type: expert_normalized_total_reward
524
- value: 0.05 +/- 0.01
525
  name: Expert normalized total reward
526
  - type: human_normalized_total_reward
527
- value: 0.13 +/- 0.03
528
  name: Human normalized total reward
529
  - task:
530
  type: reinforcement-learning
@@ -534,13 +534,13 @@ model-index:
534
  type: atari-fishingderby
535
  metrics:
536
  - type: total_reward
537
- value: -55.21 +/- 19.35
538
  name: Total reward
539
  - type: expert_normalized_total_reward
540
- value: 0.37 +/- 0.20
541
  name: Expert normalized total reward
542
  - type: human_normalized_total_reward
543
- value: 0.28 +/- 0.15
544
  name: Human normalized total reward
545
  - task:
546
  type: reinforcement-learning
@@ -550,13 +550,13 @@ model-index:
550
  type: atari-freeway
551
  metrics:
552
  - type: total_reward
553
- value: 24.12 +/- 1.64
554
  name: Total reward
555
  - type: expert_normalized_total_reward
556
- value: 0.71 +/- 0.05
557
  name: Expert normalized total reward
558
  - type: human_normalized_total_reward
559
- value: 0.81 +/- 0.06
560
  name: Human normalized total reward
561
  - task:
562
  type: reinforcement-learning
@@ -566,13 +566,13 @@ model-index:
566
  type: atari-frostbite
567
  metrics:
568
  - type: total_reward
569
- value: 617.30 +/- 686.11
570
  name: Total reward
571
  - type: expert_normalized_total_reward
572
- value: 0.04 +/- 0.05
573
  name: Expert normalized total reward
574
  - type: human_normalized_total_reward
575
- value: 0.13 +/- 0.16
576
  name: Human normalized total reward
577
  - task:
578
  type: reinforcement-learning
@@ -582,13 +582,13 @@ model-index:
582
  type: atari-gopher
583
  metrics:
584
  - type: total_reward
585
- value: 2947.20 +/- 1448.32
586
  name: Total reward
587
  - type: expert_normalized_total_reward
588
- value: 0.03 +/- 0.02
589
  name: Expert normalized total reward
590
  - type: human_normalized_total_reward
591
- value: 1.25 +/- 0.67
592
  name: Human normalized total reward
593
  - task:
594
  type: reinforcement-learning
@@ -598,13 +598,13 @@ model-index:
598
  type: atari-gravitar
599
  metrics:
600
  - type: total_reward
601
- value: 1030.50 +/- 719.20
602
  name: Total reward
603
  - type: expert_normalized_total_reward
604
- value: 0.22 +/- 0.19
605
  name: Expert normalized total reward
606
  - type: human_normalized_total_reward
607
- value: 0.27 +/- 0.23
608
  name: Human normalized total reward
609
  - task:
610
  type: reinforcement-learning
@@ -614,13 +614,13 @@ model-index:
614
  type: atari-hero
615
  metrics:
616
  - type: total_reward
617
- value: 6997.95 +/- 2562.51
618
  name: Total reward
619
  - type: expert_normalized_total_reward
620
- value: 0.14 +/- 0.06
621
  name: Expert normalized total reward
622
  - type: human_normalized_total_reward
623
- value: 0.20 +/- 0.09
624
  name: Human normalized total reward
625
  - task:
626
  type: reinforcement-learning
@@ -630,13 +630,13 @@ model-index:
630
  type: atari-icehockey
631
  metrics:
632
  - type: total_reward
633
- value: -3.77 +/- 3.10
634
  name: Total reward
635
  - type: expert_normalized_total_reward
636
- value: 0.20 +/- 0.09
637
  name: Expert normalized total reward
638
  - type: human_normalized_total_reward
639
- value: 0.61 +/- 0.26
640
  name: Human normalized total reward
641
  - task:
642
  type: reinforcement-learning
@@ -646,13 +646,13 @@ model-index:
646
  type: atari-jamesbond
647
  metrics:
648
  - type: total_reward
649
- value: 187.50 +/- 72.24
650
  name: Total reward
651
  - type: expert_normalized_total_reward
652
  value: 0.01 +/- 0.00
653
  name: Expert normalized total reward
654
  - type: human_normalized_total_reward
655
- value: 0.58 +/- 0.26
656
  name: Human normalized total reward
657
  - task:
658
  type: reinforcement-learning
@@ -662,13 +662,13 @@ model-index:
662
  type: atari-kangaroo
663
  metrics:
664
  - type: total_reward
665
- value: 124.00 +/- 156.92
666
  name: Total reward
667
  - type: expert_normalized_total_reward
668
- value: 0.14 +/- 0.30
669
  name: Expert normalized total reward
670
  - type: human_normalized_total_reward
671
- value: 0.02 +/- 0.05
672
  name: Human normalized total reward
673
  - task:
674
  type: reinforcement-learning
@@ -678,13 +678,13 @@ model-index:
678
  type: atari-krull
679
  metrics:
680
  - type: total_reward
681
- value: 8933.00 +/- 1358.65
682
  name: Total reward
683
  - type: expert_normalized_total_reward
684
- value: 0.75 +/- 0.14
685
  name: Expert normalized total reward
686
  - type: human_normalized_total_reward
687
- value: 6.87 +/- 1.27
688
  name: Human normalized total reward
689
  - task:
690
  type: reinforcement-learning
@@ -694,13 +694,13 @@ model-index:
694
  type: atari-kungfumaster
695
  metrics:
696
  - type: total_reward
697
- value: 100.00 +/- 142.13
698
  name: Total reward
699
  - type: expert_normalized_total_reward
700
- value: -0.00 +/- 0.00
701
  name: Expert normalized total reward
702
  - type: human_normalized_total_reward
703
- value: -0.01 +/- 0.01
704
  name: Human normalized total reward
705
  - task:
706
  type: reinforcement-learning
@@ -726,13 +726,13 @@ model-index:
726
  type: atari-mspacman
727
  metrics:
728
  - type: total_reward
729
- value: 1516.30 +/- 376.72
730
  name: Total reward
731
  - type: expert_normalized_total_reward
732
- value: 0.18 +/- 0.06
733
  name: Expert normalized total reward
734
  - type: human_normalized_total_reward
735
- value: 0.18 +/- 0.06
736
  name: Human normalized total reward
737
  - task:
738
  type: reinforcement-learning
@@ -742,13 +742,13 @@ model-index:
742
  type: atari-namethisgame
743
  metrics:
744
  - type: total_reward
745
- value: 3798.60 +/- 1361.64
746
  name: Total reward
747
  - type: expert_normalized_total_reward
748
- value: 0.07 +/- 0.07
749
  name: Expert normalized total reward
750
  - type: human_normalized_total_reward
751
- value: 0.26 +/- 0.24
752
  name: Human normalized total reward
753
  - task:
754
  type: reinforcement-learning
@@ -758,13 +758,13 @@ model-index:
758
  type: atari-phoenix
759
  metrics:
760
  - type: total_reward
761
- value: 1267.50 +/- 1013.72
762
  name: Total reward
763
  - type: expert_normalized_total_reward
764
  value: 0.00 +/- 0.00
765
  name: Expert normalized total reward
766
  - type: human_normalized_total_reward
767
- value: 0.08 +/- 0.16
768
  name: Human normalized total reward
769
  - task:
770
  type: reinforcement-learning
@@ -774,13 +774,13 @@ model-index:
774
  type: atari-pitfall
775
  metrics:
776
  - type: total_reward
777
- value: -287.36 +/- 492.82
778
  name: Total reward
779
  - type: expert_normalized_total_reward
780
- value: -0.25 +/- 2.16
781
  name: Expert normalized total reward
782
  - type: human_normalized_total_reward
783
- value: -0.01 +/- 0.07
784
  name: Human normalized total reward
785
  - task:
786
  type: reinforcement-learning
@@ -790,13 +790,13 @@ model-index:
790
  type: atari-pong
791
  metrics:
792
  - type: total_reward
793
- value: -11.03 +/- 11.29
794
  name: Total reward
795
  - type: expert_normalized_total_reward
796
- value: 0.23 +/- 0.27
797
  name: Expert normalized total reward
798
  - type: human_normalized_total_reward
799
- value: 0.27 +/- 0.32
800
  name: Human normalized total reward
801
  - task:
802
  type: reinforcement-learning
@@ -806,10 +806,10 @@ model-index:
806
  type: atari-privateeye
807
  metrics:
808
  - type: total_reward
809
- value: 96.00 +/- 19.60
810
  name: Total reward
811
  - type: expert_normalized_total_reward
812
- value: 0.95 +/- 0.26
813
  name: Expert normalized total reward
814
  - type: human_normalized_total_reward
815
  value: 0.00 +/- 0.00
@@ -822,13 +822,13 @@ model-index:
822
  type: atari-qbert
823
  metrics:
824
  - type: total_reward
825
- value: 1701.75 +/- 1912.56
826
  name: Total reward
827
  - type: expert_normalized_total_reward
828
- value: 0.04 +/- 0.04
829
  name: Expert normalized total reward
830
  - type: human_normalized_total_reward
831
- value: 0.12 +/- 0.14
832
  name: Human normalized total reward
833
  - task:
834
  type: reinforcement-learning
@@ -838,13 +838,13 @@ model-index:
838
  type: atari-riverraid
839
  metrics:
840
  - type: total_reward
841
- value: 2793.10 +/- 693.84
842
  name: Total reward
843
  - type: expert_normalized_total_reward
844
- value: 0.11 +/- 0.05
845
  name: Expert normalized total reward
846
  - type: human_normalized_total_reward
847
- value: 0.09 +/- 0.04
848
  name: Human normalized total reward
849
  - task:
850
  type: reinforcement-learning
@@ -854,13 +854,13 @@ model-index:
854
  type: atari-roadrunner
855
  metrics:
856
  - type: total_reward
857
- value: 7699.00 +/- 3446.61
858
  name: Total reward
859
  - type: expert_normalized_total_reward
860
- value: 0.10 +/- 0.04
861
  name: Expert normalized total reward
862
  - type: human_normalized_total_reward
863
- value: 0.98 +/- 0.44
864
  name: Human normalized total reward
865
  - task:
866
  type: reinforcement-learning
@@ -870,13 +870,13 @@ model-index:
870
  type: atari-robotank
871
  metrics:
872
  - type: total_reward
873
- value: 16.36 +/- 5.24
874
  name: Total reward
875
  - type: expert_normalized_total_reward
876
- value: 0.18 +/- 0.07
877
  name: Expert normalized total reward
878
  - type: human_normalized_total_reward
879
- value: 1.46 +/- 0.54
880
  name: Human normalized total reward
881
  - task:
882
  type: reinforcement-learning
@@ -886,13 +886,13 @@ model-index:
886
  type: atari-seaquest
887
  metrics:
888
  - type: total_reward
889
- value: 515.20 +/- 141.51
890
  name: Total reward
891
  - type: expert_normalized_total_reward
892
- value: 0.18 +/- 0.06
893
  name: Expert normalized total reward
894
  - type: human_normalized_total_reward
895
- value: 0.01 +/- 0.00
896
  name: Human normalized total reward
897
  - task:
898
  type: reinforcement-learning
@@ -902,13 +902,13 @@ model-index:
902
  type: atari-skiing
903
  metrics:
904
  - type: total_reward
905
- value: -29396.08 +/- 3289.80
906
  name: Total reward
907
  - type: expert_normalized_total_reward
908
- value: -1.93 +/- 0.52
909
  name: Expert normalized total reward
910
  - type: human_normalized_total_reward
911
- value: -0.96 +/- 0.26
912
  name: Human normalized total reward
913
  - task:
914
  type: reinforcement-learning
@@ -918,13 +918,13 @@ model-index:
918
  type: atari-solaris
919
  metrics:
920
  - type: total_reward
921
- value: 988.20 +/- 487.42
922
  name: Total reward
923
  - type: expert_normalized_total_reward
924
- value: -2.11 +/- 4.15
925
  name: Expert normalized total reward
926
  - type: human_normalized_total_reward
927
- value: -0.02 +/- 0.04
928
  name: Human normalized total reward
929
  - task:
930
  type: reinforcement-learning
@@ -934,13 +934,13 @@ model-index:
934
  type: atari-spaceinvaders
935
  metrics:
936
  - type: total_reward
937
- value: 339.50 +/- 164.05
938
  name: Total reward
939
  - type: expert_normalized_total_reward
940
  value: 0.01 +/- 0.01
941
  name: Expert normalized total reward
942
  - type: human_normalized_total_reward
943
- value: 0.13 +/- 0.11
944
  name: Human normalized total reward
945
  - task:
946
  type: reinforcement-learning
@@ -950,13 +950,13 @@ model-index:
950
  type: atari-stargunner
951
  metrics:
952
  - type: total_reward
953
- value: 978.00 +/- 638.37
954
  name: Total reward
955
  - type: expert_normalized_total_reward
956
- value: 0.00 +/- 0.00
957
  name: Expert normalized total reward
958
  - type: human_normalized_total_reward
959
- value: 0.03 +/- 0.07
960
  name: Human normalized total reward
961
  - task:
962
  type: reinforcement-learning
@@ -966,13 +966,13 @@ model-index:
966
  type: atari-surround
967
  metrics:
968
  - type: total_reward
969
- value: -8.22 +/- 1.19
970
  name: Total reward
971
  - type: expert_normalized_total_reward
972
- value: 0.09 +/- 0.06
973
  name: Expert normalized total reward
974
  - type: human_normalized_total_reward
975
- value: 0.11 +/- 0.07
976
  name: Human normalized total reward
977
  - task:
978
  type: reinforcement-learning
@@ -982,13 +982,13 @@ model-index:
982
  type: atari-tennis
983
  metrics:
984
  - type: total_reward
985
- value: -22.38 +/- 2.22
986
  name: Total reward
987
  - type: expert_normalized_total_reward
988
- value: 0.04 +/- 0.06
989
  name: Expert normalized total reward
990
  - type: human_normalized_total_reward
991
- value: 0.04 +/- 0.07
992
  name: Human normalized total reward
993
  - task:
994
  type: reinforcement-learning
@@ -998,13 +998,13 @@ model-index:
998
  type: atari-timepilot
999
  metrics:
1000
  - type: total_reward
1001
- value: 9534.00 +/- 2577.76
1002
  name: Total reward
1003
  - type: expert_normalized_total_reward
1004
- value: 0.09 +/- 0.04
1005
  name: Expert normalized total reward
1006
  - type: human_normalized_total_reward
1007
- value: 3.59 +/- 1.55
1008
  name: Human normalized total reward
1009
  - task:
1010
  type: reinforcement-learning
@@ -1014,13 +1014,13 @@ model-index:
1014
  type: atari-tutankham
1015
  metrics:
1016
  - type: total_reward
1017
- value: 40.20 +/- 14.51
1018
  name: Total reward
1019
  - type: expert_normalized_total_reward
1020
- value: 0.10 +/- 0.05
1021
  name: Expert normalized total reward
1022
  - type: human_normalized_total_reward
1023
- value: 0.18 +/- 0.09
1024
  name: Human normalized total reward
1025
  - task:
1026
  type: reinforcement-learning
@@ -1030,13 +1030,13 @@ model-index:
1030
  type: atari-upndown
1031
  metrics:
1032
  - type: total_reward
1033
- value: 6072.00 +/- 2283.30
1034
  name: Total reward
1035
  - type: expert_normalized_total_reward
1036
- value: 0.01 +/- 0.01
1037
  name: Expert normalized total reward
1038
  - type: human_normalized_total_reward
1039
- value: 0.50 +/- 0.20
1040
  name: Human normalized total reward
1041
  - task:
1042
  type: reinforcement-learning
@@ -1062,13 +1062,13 @@ model-index:
1062
  type: atari-videopinball
1063
  metrics:
1064
  - type: total_reward
1065
- value: 7943.01 +/- 8351.21
1066
  name: Total reward
1067
  - type: expert_normalized_total_reward
1068
- value: 0.02 +/- 0.02
1069
  name: Expert normalized total reward
1070
  - type: human_normalized_total_reward
1071
- value: 0.45 +/- 0.47
1072
  name: Human normalized total reward
1073
  - task:
1074
  type: reinforcement-learning
@@ -1078,13 +1078,13 @@ model-index:
1078
  type: atari-wizardofwor
1079
  metrics:
1080
  - type: total_reward
1081
- value: 1306.00 +/- 1139.81
1082
  name: Total reward
1083
  - type: expert_normalized_total_reward
1084
- value: 0.02 +/- 0.02
1085
  name: Expert normalized total reward
1086
  - type: human_normalized_total_reward
1087
- value: 0.18 +/- 0.27
1088
  name: Human normalized total reward
1089
  - task:
1090
  type: reinforcement-learning
@@ -1094,13 +1094,13 @@ model-index:
1094
  type: atari-yarsrevenge
1095
  metrics:
1096
  - type: total_reward
1097
- value: 8597.41 +/- 4291.81
1098
  name: Total reward
1099
  - type: expert_normalized_total_reward
1100
- value: 0.02 +/- 0.02
1101
  name: Expert normalized total reward
1102
  - type: human_normalized_total_reward
1103
- value: 0.11 +/- 0.08
1104
  name: Human normalized total reward
1105
  - task:
1106
  type: reinforcement-learning
@@ -1110,13 +1110,13 @@ model-index:
1110
  type: atari-zaxxon
1111
  metrics:
1112
  - type: total_reward
1113
- value: 896.00 +/- 1172.68
1114
  name: Total reward
1115
  - type: expert_normalized_total_reward
1116
- value: 0.01 +/- 0.02
1117
  name: Expert normalized total reward
1118
  - type: human_normalized_total_reward
1119
- value: 0.09 +/- 0.13
1120
  name: Human normalized total reward
1121
  - task:
1122
  type: reinforcement-learning
@@ -2441,7 +2441,6 @@ This is a multi-modal and multi-task model.
2441
 
2442
  <details>
2443
  <summary>The model was trained on the following tasks:</summary>
2444
-
2445
  - Alien
2446
  - Amidar
2447
  - Assault
@@ -2599,6 +2598,7 @@ This is a multi-modal and multi-task model.
2599
  - Humanoid Standup
2600
  - Swimmer
2601
  - Walker 2d
 
2602
  </details>
2603
 
2604
  ## How to Get Started with the Model
@@ -2610,3 +2610,4 @@ from transformers import AutoModelForCausalLM
2610
 
2611
  model = AutoModelForCausalLM.from_pretrained("jat-project/jat")
2612
  ```
 
 
171
  type: atari
172
  metrics:
173
  - type: iqm_expert_normalized_total_reward
174
+ value: 0.14 [0.14, 0.15]
175
  name: IQM expert normalized total reward
176
  - type: iqm_human_normalized_total_reward
177
+ value: 0.38 [0.37, 0.38]
178
  name: IQM human normalized total reward
179
  - task:
180
  type: reinforcement-learning
 
214
  type: atari-alien
215
  metrics:
216
  - type: total_reward
217
+ value: 1474.90 +/- 588.75
218
  name: Total reward
219
  - type: expert_normalized_total_reward
220
+ value: 0.07 +/- 0.04
221
  name: Expert normalized total reward
222
  - type: human_normalized_total_reward
223
+ value: 0.18 +/- 0.09
224
  name: Human normalized total reward
225
  - task:
226
  type: reinforcement-learning
 
230
  type: atari-amidar
231
  metrics:
232
  - type: total_reward
233
+ value: 104.89 +/- 103.52
234
  name: Total reward
235
  - type: expert_normalized_total_reward
236
+ value: 0.05 +/- 0.05
237
  name: Expert normalized total reward
238
  - type: human_normalized_total_reward
239
+ value: 0.06 +/- 0.06
240
  name: Human normalized total reward
241
  - task:
242
  type: reinforcement-learning
 
246
  type: atari-assault
247
  metrics:
248
  - type: total_reward
249
+ value: 1650.07 +/- 820.99
250
  name: Total reward
251
  - type: expert_normalized_total_reward
252
+ value: 0.09 +/- 0.05
253
  name: Expert normalized total reward
254
  - type: human_normalized_total_reward
255
+ value: 2.75 +/- 1.58
256
  name: Human normalized total reward
257
  - task:
258
  type: reinforcement-learning
 
262
  type: atari-asterix
263
  metrics:
264
  - type: total_reward
265
+ value: 800.00 +/- 584.85
266
  name: Total reward
267
  - type: expert_normalized_total_reward
268
+ value: 0.17 +/- 0.17
269
  name: Expert normalized total reward
270
  - type: human_normalized_total_reward
271
+ value: 0.07 +/- 0.07
272
  name: Human normalized total reward
273
  - task:
274
  type: reinforcement-learning
 
278
  type: atari-asteroids
279
  metrics:
280
  - type: total_reward
281
+ value: 1385.30 +/- 507.53
282
  name: Total reward
283
  - type: expert_normalized_total_reward
284
  value: 0.00 +/- 0.00
285
  name: Expert normalized total reward
286
  - type: human_normalized_total_reward
287
+ value: 0.01 +/- 0.01
288
  name: Human normalized total reward
289
  - task:
290
  type: reinforcement-learning
 
294
  type: atari-atlantis
295
  metrics:
296
  - type: total_reward
297
+ value: 66980.00 +/- 158449.73
298
  name: Total reward
299
  - type: expert_normalized_total_reward
300
+ value: 0.18 +/- 0.51
301
  name: Expert normalized total reward
302
  - type: human_normalized_total_reward
303
+ value: 3.35 +/- 9.79
304
  name: Human normalized total reward
305
  - task:
306
  type: reinforcement-learning
 
310
  type: atari-bankheist
311
  metrics:
312
  - type: total_reward
313
+ value: 948.30 +/- 199.86
314
  name: Total reward
315
  - type: expert_normalized_total_reward
316
+ value: 0.71 +/- 0.15
317
  name: Expert normalized total reward
318
  - type: human_normalized_total_reward
319
+ value: 1.26 +/- 0.27
320
  name: Human normalized total reward
321
  - task:
322
  type: reinforcement-learning
 
326
  type: atari-battlezone
327
  metrics:
328
  - type: total_reward
329
+ value: 17420.00 +/- 6071.54
330
  name: Total reward
331
  - type: expert_normalized_total_reward
332
+ value: 0.06 +/- 0.02
333
  name: Expert normalized total reward
334
  - type: human_normalized_total_reward
335
+ value: 0.47 +/- 0.16
336
  name: Human normalized total reward
337
  - task:
338
  type: reinforcement-learning
 
342
  type: atari-beamrider
343
  metrics:
344
  - type: total_reward
345
+ value: 797.32 +/- 328.31
346
  name: Total reward
347
  - type: expert_normalized_total_reward
348
  value: 0.01 +/- 0.01
349
  name: Expert normalized total reward
350
  - type: human_normalized_total_reward
351
+ value: 0.03 +/- 0.02
352
  name: Human normalized total reward
353
  - task:
354
  type: reinforcement-learning
 
358
  type: atari-berzerk
359
  metrics:
360
  - type: total_reward
361
+ value: 687.30 +/- 331.91
362
  name: Total reward
363
  - type: expert_normalized_total_reward
364
+ value: 0.01 +/- 0.01
365
  name: Expert normalized total reward
366
  - type: human_normalized_total_reward
367
+ value: 0.22 +/- 0.13
368
  name: Human normalized total reward
369
  - task:
370
  type: reinforcement-learning
 
374
  type: atari-bowling
375
  metrics:
376
  - type: total_reward
377
+ value: 22.41 +/- 5.57
378
  name: Total reward
379
  - type: expert_normalized_total_reward
380
  value: 1.00 +/- 0.00
381
  name: Expert normalized total reward
382
  - type: human_normalized_total_reward
383
+ value: -0.01 +/- 0.04
384
  name: Human normalized total reward
385
  - task:
386
  type: reinforcement-learning
 
390
  type: atari-boxing
391
  metrics:
392
  - type: total_reward
393
+ value: 90.10 +/- 23.05
394
  name: Total reward
395
  - type: expert_normalized_total_reward
396
+ value: 0.92 +/- 0.24
397
  name: Expert normalized total reward
398
  - type: human_normalized_total_reward
399
+ value: 7.50 +/- 1.92
400
  name: Human normalized total reward
401
  - task:
402
  type: reinforcement-learning
 
406
  type: atari-breakout
407
  metrics:
408
  - type: total_reward
409
+ value: 8.82 +/- 5.63
410
  name: Total reward
411
  - type: expert_normalized_total_reward
412
  value: 0.01 +/- 0.01
413
  name: Expert normalized total reward
414
  - type: human_normalized_total_reward
415
+ value: 0.25 +/- 0.20
416
  name: Human normalized total reward
417
  - task:
418
  type: reinforcement-learning
 
422
  type: atari-centipede
423
  metrics:
424
  - type: total_reward
425
+ value: 5589.92 +/- 2567.26
426
  name: Total reward
427
  - type: expert_normalized_total_reward
428
+ value: 0.37 +/- 0.27
429
  name: Expert normalized total reward
430
  - type: human_normalized_total_reward
431
+ value: 0.35 +/- 0.26
432
  name: Human normalized total reward
433
  - task:
434
  type: reinforcement-learning
 
438
  type: atari-choppercommand
439
  metrics:
440
  - type: total_reward
441
+ value: 2417.00 +/- 1489.90
442
  name: Total reward
443
  - type: expert_normalized_total_reward
444
+ value: 0.02 +/- 0.02
445
  name: Expert normalized total reward
446
  - type: human_normalized_total_reward
447
+ value: 0.24 +/- 0.23
448
  name: Human normalized total reward
449
  - task:
450
  type: reinforcement-learning
 
454
  type: atari-crazyclimber
455
  metrics:
456
  - type: total_reward
457
+ value: 97639.00 +/- 26184.68
458
  name: Total reward
459
  - type: expert_normalized_total_reward
460
+ value: 0.52 +/- 0.16
461
  name: Expert normalized total reward
462
  - type: human_normalized_total_reward
463
+ value: 3.47 +/- 1.05
464
  name: Human normalized total reward
465
  - task:
466
  type: reinforcement-learning
 
470
  type: atari-defender
471
  metrics:
472
  - type: total_reward
473
+ value: 39323.50 +/- 15202.98
474
  name: Total reward
475
  - type: expert_normalized_total_reward
476
+ value: 0.10 +/- 0.04
477
  name: Expert normalized total reward
478
  - type: human_normalized_total_reward
479
+ value: 2.30 +/- 0.96
480
  name: Human normalized total reward
481
  - task:
482
  type: reinforcement-learning
 
486
  type: atari-demonattack
487
  metrics:
488
  - type: total_reward
489
+ value: 815.30 +/- 989.67
490
  name: Total reward
491
  - type: expert_normalized_total_reward
492
+ value: 0.01 +/- 0.01
493
  name: Expert normalized total reward
494
  - type: human_normalized_total_reward
495
+ value: 0.36 +/- 0.54
496
  name: Human normalized total reward
497
  - task:
498
  type: reinforcement-learning
 
502
  type: atari-doubledunk
503
  metrics:
504
  - type: total_reward
505
+ value: 14.42 +/- 9.97
506
  name: Total reward
507
  - type: expert_normalized_total_reward
508
+ value: 0.84 +/- 0.25
509
  name: Expert normalized total reward
510
  - type: human_normalized_total_reward
511
+ value: 0.94 +/- 0.28
512
  name: Human normalized total reward
513
  - task:
514
  type: reinforcement-learning
 
518
  type: atari-enduro
519
  metrics:
520
  - type: total_reward
521
+ value: 108.52 +/- 42.73
522
  name: Total reward
523
  - type: expert_normalized_total_reward
524
+ value: 0.05 +/- 0.02
525
  name: Expert normalized total reward
526
  - type: human_normalized_total_reward
527
+ value: 0.13 +/- 0.05
528
  name: Human normalized total reward
529
  - task:
530
  type: reinforcement-learning
 
534
  type: atari-fishingderby
535
  metrics:
536
  - type: total_reward
537
+ value: -30.35 +/- 24.37
538
  name: Total reward
539
  - type: expert_normalized_total_reward
540
+ value: 0.62 +/- 0.25
541
  name: Expert normalized total reward
542
  - type: human_normalized_total_reward
543
+ value: 0.47 +/- 0.19
544
  name: Human normalized total reward
545
  - task:
546
  type: reinforcement-learning
 
550
  type: atari-freeway
551
  metrics:
552
  - type: total_reward
553
+ value: 27.49 +/- 1.63
554
  name: Total reward
555
  - type: expert_normalized_total_reward
556
+ value: 0.81 +/- 0.05
557
  name: Expert normalized total reward
558
  - type: human_normalized_total_reward
559
+ value: 0.93 +/- 0.06
560
  name: Human normalized total reward
561
  - task:
562
  type: reinforcement-learning
 
566
  type: atari-frostbite
567
  metrics:
568
  - type: total_reward
569
+ value: 2769.60 +/- 1445.61
570
  name: Total reward
571
  - type: expert_normalized_total_reward
572
+ value: 0.21 +/- 0.11
573
  name: Expert normalized total reward
574
  - type: human_normalized_total_reward
575
+ value: 0.63 +/- 0.34
576
  name: Human normalized total reward
577
  - task:
578
  type: reinforcement-learning
 
582
  type: atari-gopher
583
  metrics:
584
  - type: total_reward
585
+ value: 5340.60 +/- 2547.07
586
  name: Total reward
587
  - type: expert_normalized_total_reward
588
+ value: 0.06 +/- 0.03
589
  name: Expert normalized total reward
590
  - type: human_normalized_total_reward
591
+ value: 2.36 +/- 1.18
592
  name: Human normalized total reward
593
  - task:
594
  type: reinforcement-learning
 
598
  type: atari-gravitar
599
  metrics:
600
  - type: total_reward
601
+ value: 1269.50 +/- 902.99
602
  name: Total reward
603
  - type: expert_normalized_total_reward
604
+ value: 0.29 +/- 0.24
605
  name: Expert normalized total reward
606
  - type: human_normalized_total_reward
607
+ value: 0.34 +/- 0.28
608
  name: Human normalized total reward
609
  - task:
610
  type: reinforcement-learning
 
614
  type: atari-hero
615
  metrics:
616
  - type: total_reward
617
+ value: 11709.65 +/- 3233.53
618
  name: Total reward
619
  - type: expert_normalized_total_reward
620
+ value: 0.24 +/- 0.07
621
  name: Expert normalized total reward
622
  - type: human_normalized_total_reward
623
+ value: 0.36 +/- 0.11
624
  name: Human normalized total reward
625
  - task:
626
  type: reinforcement-learning
 
630
  type: atari-icehockey
631
  metrics:
632
  - type: total_reward
633
+ value: 7.48 +/- 5.60
634
  name: Total reward
635
  - type: expert_normalized_total_reward
636
+ value: 0.51 +/- 0.15
637
  name: Expert normalized total reward
638
  - type: human_normalized_total_reward
639
+ value: 1.54 +/- 0.46
640
  name: Human normalized total reward
641
  - task:
642
  type: reinforcement-learning
 
646
  type: atari-jamesbond
647
  metrics:
648
  - type: total_reward
649
+ value: 327.50 +/- 123.16
650
  name: Total reward
651
  - type: expert_normalized_total_reward
652
  value: 0.01 +/- 0.00
653
  name: Expert normalized total reward
654
  - type: human_normalized_total_reward
655
+ value: 1.09 +/- 0.45
656
  name: Human normalized total reward
657
  - task:
658
  type: reinforcement-learning
 
662
  type: atari-kangaroo
663
  metrics:
664
  - type: total_reward
665
+ value: 378.00 +/- 343.97
666
  name: Total reward
667
  - type: expert_normalized_total_reward
668
+ value: 0.62 +/- 0.66
669
  name: Expert normalized total reward
670
  - type: human_normalized_total_reward
671
+ value: 0.11 +/- 0.12
672
  name: Human normalized total reward
673
  - task:
674
  type: reinforcement-learning
 
678
  type: atari-krull
679
  metrics:
680
  - type: total_reward
681
+ value: 10720.50 +/- 1284.13
682
  name: Total reward
683
  - type: expert_normalized_total_reward
684
+ value: 0.93 +/- 0.13
685
  name: Expert normalized total reward
686
  - type: human_normalized_total_reward
687
+ value: 8.55 +/- 1.20
688
  name: Human normalized total reward
689
  - task:
690
  type: reinforcement-learning
 
694
  type: atari-kungfumaster
695
  metrics:
696
  - type: total_reward
697
+ value: 288.00 +/- 255.06
698
  name: Total reward
699
  - type: expert_normalized_total_reward
700
+ value: 0.00 +/- 0.01
701
  name: Expert normalized total reward
702
  - type: human_normalized_total_reward
703
+ value: 0.00 +/- 0.01
704
  name: Human normalized total reward
705
  - task:
706
  type: reinforcement-learning
 
726
  type: atari-mspacman
727
  metrics:
728
  - type: total_reward
729
+ value: 1573.10 +/- 483.96
730
  name: Total reward
731
  - type: expert_normalized_total_reward
732
+ value: 0.19 +/- 0.07
733
  name: Expert normalized total reward
734
  - type: human_normalized_total_reward
735
+ value: 0.19 +/- 0.07
736
  name: Human normalized total reward
737
  - task:
738
  type: reinforcement-learning
 
742
  type: atari-namethisgame
743
  metrics:
744
  - type: total_reward
745
+ value: 7523.30 +/- 2471.38
746
  name: Total reward
747
  - type: expert_normalized_total_reward
748
+ value: 0.25 +/- 0.12
749
  name: Expert normalized total reward
750
  - type: human_normalized_total_reward
751
+ value: 0.91 +/- 0.43
752
  name: Human normalized total reward
753
  - task:
754
  type: reinforcement-learning
 
758
  type: atari-phoenix
759
  metrics:
760
  - type: total_reward
761
+ value: 2197.90 +/- 1795.38
762
  name: Total reward
763
  - type: expert_normalized_total_reward
764
  value: 0.00 +/- 0.00
765
  name: Expert normalized total reward
766
  - type: human_normalized_total_reward
767
+ value: 0.22 +/- 0.28
768
  name: Human normalized total reward
769
  - task:
770
  type: reinforcement-learning
 
774
  type: atari-pitfall
775
  metrics:
776
  - type: total_reward
777
+ value: -6.68 +/- 19.05
778
  name: Total reward
779
  - type: expert_normalized_total_reward
780
+ value: 0.98 +/- 0.08
781
  name: Expert normalized total reward
782
  - type: human_normalized_total_reward
783
+ value: 0.03 +/- 0.00
784
  name: Human normalized total reward
785
  - task:
786
  type: reinforcement-learning
 
790
  type: atari-pong
791
  metrics:
792
  - type: total_reward
793
+ value: 13.69 +/- 13.35
794
  name: Total reward
795
  - type: expert_normalized_total_reward
796
+ value: 0.82 +/- 0.32
797
  name: Expert normalized total reward
798
  - type: human_normalized_total_reward
799
+ value: 0.97 +/- 0.38
800
  name: Human normalized total reward
801
  - task:
802
  type: reinforcement-learning
 
806
  type: atari-privateeye
807
  metrics:
808
  - type: total_reward
809
+ value: 44.00 +/- 49.64
810
  name: Total reward
811
  - type: expert_normalized_total_reward
812
+ value: 0.25 +/- 0.66
813
  name: Expert normalized total reward
814
  - type: human_normalized_total_reward
815
  value: 0.00 +/- 0.00
 
822
  type: atari-qbert
823
  metrics:
824
  - type: total_reward
825
+ value: 1951.50 +/- 2577.24
826
  name: Total reward
827
  - type: expert_normalized_total_reward
828
+ value: 0.04 +/- 0.06
829
  name: Expert normalized total reward
830
  - type: human_normalized_total_reward
831
+ value: 0.13 +/- 0.19
832
  name: Human normalized total reward
833
  - task:
834
  type: reinforcement-learning
 
838
  type: atari-riverraid
839
  metrics:
840
  - type: total_reward
841
+ value: 3758.50 +/- 1536.66
842
  name: Total reward
843
  - type: expert_normalized_total_reward
844
+ value: 0.18 +/- 0.11
845
  name: Expert normalized total reward
846
  - type: human_normalized_total_reward
847
+ value: 0.15 +/- 0.10
848
  name: Human normalized total reward
849
  - task:
850
  type: reinforcement-learning
 
854
  type: atari-roadrunner
855
  metrics:
856
  - type: total_reward
857
+ value: 6407.00 +/- 4847.36
858
  name: Total reward
859
  - type: expert_normalized_total_reward
860
+ value: 0.08 +/- 0.06
861
  name: Expert normalized total reward
862
  - type: human_normalized_total_reward
863
+ value: 0.82 +/- 0.62
864
  name: Human normalized total reward
865
  - task:
866
  type: reinforcement-learning
 
870
  type: atari-robotank
871
  metrics:
872
  - type: total_reward
873
+ value: 11.34 +/- 5.52
874
  name: Total reward
875
  - type: expert_normalized_total_reward
876
+ value: 0.12 +/- 0.07
877
  name: Expert normalized total reward
878
  - type: human_normalized_total_reward
879
+ value: 0.94 +/- 0.57
880
  name: Human normalized total reward
881
  - task:
882
  type: reinforcement-learning
 
886
  type: atari-seaquest
887
  metrics:
888
  - type: total_reward
889
+ value: 804.00 +/- 403.33
890
  name: Total reward
891
  - type: expert_normalized_total_reward
892
+ value: 0.29 +/- 0.16
893
  name: Expert normalized total reward
894
  - type: human_normalized_total_reward
895
+ value: 0.02 +/- 0.01
896
  name: Human normalized total reward
897
  - task:
898
  type: reinforcement-learning
 
902
  type: atari-skiing
903
  metrics:
904
  - type: total_reward
905
+ value: -16231.54 +/- 6060.48
906
  name: Total reward
907
  - type: expert_normalized_total_reward
908
+ value: 0.14 +/- 0.95
909
  name: Expert normalized total reward
910
  - type: human_normalized_total_reward
911
+ value: 0.07 +/- 0.47
912
  name: Human normalized total reward
913
  - task:
914
  type: reinforcement-learning
 
918
  type: atari-solaris
919
  metrics:
920
  - type: total_reward
921
+ value: 1286.60 +/- 446.70
922
  name: Total reward
923
  - type: expert_normalized_total_reward
924
+ value: 0.43 +/- 3.81
925
  name: Expert normalized total reward
926
  - type: human_normalized_total_reward
927
+ value: 0.00 +/- 0.04
928
  name: Human normalized total reward
929
  - task:
930
  type: reinforcement-learning
 
934
  type: atari-spaceinvaders
935
  metrics:
936
  - type: total_reward
937
+ value: 325.45 +/- 163.36
938
  name: Total reward
939
  - type: expert_normalized_total_reward
940
  value: 0.01 +/- 0.01
941
  name: Expert normalized total reward
942
  - type: human_normalized_total_reward
943
+ value: 0.12 +/- 0.11
944
  name: Human normalized total reward
945
  - task:
946
  type: reinforcement-learning
 
950
  type: atari-stargunner
951
  metrics:
952
  - type: total_reward
953
+ value: 4379.00 +/- 3027.22
954
  name: Total reward
955
  - type: expert_normalized_total_reward
956
+ value: 0.01 +/- 0.01
957
  name: Expert normalized total reward
958
  - type: human_normalized_total_reward
959
+ value: 0.39 +/- 0.32
960
  name: Human normalized total reward
961
  - task:
962
  type: reinforcement-learning
 
966
  type: atari-surround
967
  metrics:
968
  - type: total_reward
969
+ value: 2.67 +/- 4.74
970
  name: Total reward
971
  - type: expert_normalized_total_reward
972
+ value: 0.65 +/- 0.24
973
  name: Expert normalized total reward
974
  - type: human_normalized_total_reward
975
+ value: 0.77 +/- 0.29
976
  name: Human normalized total reward
977
  - task:
978
  type: reinforcement-learning
 
982
  type: atari-tennis
983
  metrics:
984
  - type: total_reward
985
+ value: -13.46 +/- 3.80
986
  name: Total reward
987
  - type: expert_normalized_total_reward
988
+ value: 0.30 +/- 0.11
989
  name: Expert normalized total reward
990
  - type: human_normalized_total_reward
991
+ value: 0.32 +/- 0.12
992
  name: Human normalized total reward
993
  - task:
994
  type: reinforcement-learning
 
998
  type: atari-timepilot
999
  metrics:
1000
  - type: total_reward
1001
+ value: 13028.00 +/- 5222.57
1002
  name: Total reward
1003
  - type: expert_normalized_total_reward
1004
+ value: 0.14 +/- 0.08
1005
  name: Expert normalized total reward
1006
  - type: human_normalized_total_reward
1007
+ value: 5.69 +/- 3.14
1008
  name: Human normalized total reward
1009
  - task:
1010
  type: reinforcement-learning
 
1014
  type: atari-tutankham
1015
  metrics:
1016
  - type: total_reward
1017
+ value: 85.66 +/- 61.77
1018
  name: Total reward
1019
  - type: expert_normalized_total_reward
1020
+ value: 0.27 +/- 0.22
1021
  name: Expert normalized total reward
1022
  - type: human_normalized_total_reward
1023
+ value: 0.48 +/- 0.40
1024
  name: Human normalized total reward
1025
  - task:
1026
  type: reinforcement-learning
 
1030
  type: atari-upndown
1031
  metrics:
1032
  - type: total_reward
1033
+ value: 17768.70 +/- 10321.95
1034
  name: Total reward
1035
  - type: expert_normalized_total_reward
1036
+ value: 0.04 +/- 0.02
1037
  name: Expert normalized total reward
1038
  - type: human_normalized_total_reward
1039
+ value: 1.54 +/- 0.92
1040
  name: Human normalized total reward
1041
  - task:
1042
  type: reinforcement-learning
 
1062
  type: atari-videopinball
1063
  metrics:
1064
  - type: total_reward
1065
+ value: 11917.43 +/- 8204.28
1066
  name: Total reward
1067
  - type: expert_normalized_total_reward
1068
+ value: 0.03 +/- 0.02
1069
  name: Expert normalized total reward
1070
  - type: human_normalized_total_reward
1071
+ value: 0.67 +/- 0.46
1072
  name: Human normalized total reward
1073
  - task:
1074
  type: reinforcement-learning
 
1078
  type: atari-wizardofwor
1079
  metrics:
1080
  - type: total_reward
1081
+ value: 2544.00 +/- 2902.42
1082
  name: Total reward
1083
  - type: expert_normalized_total_reward
1084
+ value: 0.04 +/- 0.06
1085
  name: Expert normalized total reward
1086
  - type: human_normalized_total_reward
1087
+ value: 0.47 +/- 0.69
1088
  name: Human normalized total reward
1089
  - task:
1090
  type: reinforcement-learning
 
1094
  type: atari-yarsrevenge
1095
  metrics:
1096
  - type: total_reward
1097
+ value: 12532.70 +/- 8062.85
1098
  name: Total reward
1099
  - type: expert_normalized_total_reward
1100
+ value: 0.04 +/- 0.03
1101
  name: Expert normalized total reward
1102
  - type: human_normalized_total_reward
1103
+ value: 0.18 +/- 0.16
1104
  name: Human normalized total reward
1105
  - task:
1106
  type: reinforcement-learning
 
1110
  type: atari-zaxxon
1111
  metrics:
1112
  - type: total_reward
1113
+ value: 6902.00 +/- 3206.09
1114
  name: Total reward
1115
  - type: expert_normalized_total_reward
1116
+ value: 0.09 +/- 0.04
1117
  name: Expert normalized total reward
1118
  - type: human_normalized_total_reward
1119
+ value: 0.75 +/- 0.35
1120
  name: Human normalized total reward
1121
  - task:
1122
  type: reinforcement-learning
 
2441
 
2442
  <details>
2443
  <summary>The model was trained on the following tasks:</summary>
 
2444
  - Alien
2445
  - Amidar
2446
  - Assault
 
2598
  - Humanoid Standup
2599
  - Swimmer
2600
  - Walker 2d
2601
+
2602
  </details>
2603
 
2604
  ## How to Get Started with the Model
 
2610
 
2611
  model = AutoModelForCausalLM.from_pretrained("jat-project/jat")
2612
  ```
2613
+