diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..80fd5778543b4270bd14e9dfe7e1debb11ae8747 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 954.0 0.0 954.0 954.0 676.30365 0.0 676.30365 676.30365 0.14188744 0.029580075 0.18554002 0.099726446 0.23613659 1.685011386871338 +1 1088.0 0.0 1088.0 1088.0 875.66016 0.0 875.66016 875.66016 2.7156632 0.26439393 3.124498 1.9588704 0.2846375 3.354931354522705 +2 1067.0 0.0 1067.0 1067.0 933.66156 0.0 933.66156 933.66156 3.7461822 0.7800167 4.7676888 2.1093745 0.20866704 5.025254249572754 +3 1118.0 0.0 1118.0 1118.0 947.54987 0.0 947.54987 947.54987 4.13867 0.97118366 5.501315 2.3923388 0.22726646 6.718552827835083 +4 1146.0 0.0 1146.0 1146.0 875.4446 0.0 875.4446 875.4446 4.1683536 0.95543545 5.538224 2.405601 0.35046008 8.415615320205688 +5 794.0 0.0 794.0 794.0 794.0 0.0 794.0 794.0 3.7854521 0.7186139 4.9333296 2.130577 0.2753125 10.088018417358398 +6 1138.0 0.0 1138.0 1138.0 918.3662 0.0 918.3662 918.3662 3.632194 0.25992176 4.0443754 2.6205857 0.2599218 11.742835283279419 +7 1138.0 0.0 1138.0 1138.0 934.5082 0.0 934.5082 934.5082 4.052964 0.7706236 5.1679006 2.463 0.25029743 13.396860122680664 +8 816.0 0.0 816.0 816.0 804.1616 0.0 804.1616 804.1616 4.123271 0.911171 5.480109 2.1358142 0.2818742 15.051286458969116 +9 1110.0 0.0 1110.0 1110.0 914.8971 0.0 914.8971 914.8971 3.742129 0.3038148 4.1755457 2.754515 0.2608328 16.70838165283203 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdc71e479b78faf818fed6eea584e840506a8e8f --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee031b8ea45491362e43c7ce674d39ecb36e746010f6b95e465253ff86c21ef9 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..d06895fedd7cb1e1ce9d71f91002f959b21db2a1 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 997.0 0.0 997.0 997.0 1058.7911 0.0 1058.7911 1058.7911 0.21737418 0.042151038 0.28000772 0.15522087 -0.052144095 1.51759934425354 +1 1136.0 0.0 1136.0 1136.0 1197.8588 0.0 1197.8588 1197.8588 4.1114125 0.24794304 4.4734383 3.6201067 -0.034405142 3.0470921993255615 +2 1102.0 0.0 1102.0 1102.0 1221.4424 0.0 1221.4424 1221.4424 5.045687 1.1366005 6.9132295 3.5108063 -0.06643066 4.5611960887908936 +3 1096.0 0.0 1096.0 1096.0 1184.7007 0.0 1184.7007 1184.7007 5.3385153 1.1670407 7.4819694 3.6490781 -0.049916 6.074115991592407 +4 1109.0 0.0 1109.0 1109.0 1199.6577 0.0 1199.6577 1199.6577 5.2527514 1.1154491 7.4085336 3.6039195 -0.050365567 7.692310333251953 +5 1115.0 0.0 1115.0 1115.0 1207.973 0.0 1207.973 1207.973 5.2693186 1.3570057 8.087105 3.36601 -0.05167994 9.33620572090149 +6 1162.0 0.0 1162.0 1162.0 1264.7056 0.0 1264.7056 1264.7056 5.2789917 1.4095856 8.628875 3.575611 -0.057090417 10.972888946533203 +7 1114.0 0.0 1114.0 1114.0 1283.9711 0.0 1283.9711 1283.9711 5.4764786 1.4388483 8.887676 3.9530437 -0.09442891 12.60637092590332 +8 1122.0 0.0 1122.0 1122.0 1244.4823 0.0 1244.4823 1244.4823 5.741321 1.2205355 8.815638 4.464262 -0.06808375 14.243744611740112 +9 1144.0 0.0 1144.0 1144.0 1196.1349 0.0 1196.1349 1196.1349 5.5388002 1.3731841 8.808408 4.0520897 -0.028963044 15.886062622070312 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a55f3dac03689dcf134cc76cdeacf40ae5866eb --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d47a7445193fafa260f96a14587ab71d6260743127df121e945093162a55331 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..bfa257ebe3783cb6b5a00c3548b565d63bd52277 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 969.0 0.0 969.0 969.0 672.0358 0.0 672.0358 672.0358 0.13857247 0.028742015 0.18557829 0.0976936 0.23910148 1.7235760688781738 +1 1129.0 0.0 1129.0 1129.0 892.93854 0.0 892.93854 892.93854 2.7903426 0.26809582 3.1833777 2.220827 0.28893682 3.436563491821289 +2 1108.0 0.0 1108.0 1108.0 950.0444 0.0 950.0444 950.0444 3.851257 0.8142764 4.8837566 1.9756792 0.212878 5.146819591522217 +3 1140.0 0.0 1140.0 1140.0 950.0861 0.0 950.0861 950.0861 4.295231 0.9214783 5.551752 2.4944773 0.23075852 6.881531238555908 +4 1161.0 0.0 1161.0 1161.0 863.2509 0.0 863.2509 863.2509 4.3084717 0.9155701 5.5986896 2.2193334 0.35488576 8.564943075180054 +5 777.0 0.0 777.0 777.0 777.0 0.0 777.0 777.0 3.9260473 0.62447685 4.9239335 2.5548265 0.27851456 10.14668321609497 +6 1156.0 0.0 1156.0 1156.0 915.9216 0.0 915.9216 915.9216 3.6206124 0.2194028 3.9964087 2.7010636 0.26411274 11.727385520935059 +7 1157.0 0.0 1157.0 1157.0 944.37555 0.0 944.37555 944.37555 4.159795 0.72802097 5.235955 2.2842658 0.25494617 13.318596839904785 +8 815.0 0.0 815.0 815.0 801.8212 0.0 801.8212 801.8212 4.259533 0.8243809 5.52667 2.2840571 0.28649062 14.910341024398804 +9 1140.0 0.0 1140.0 1140.0 923.73157 0.0 923.73157 923.73157 3.7145355 0.22608782 4.069248 2.8610425 0.26535982 16.709343433380127 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba2b777a9c6260a6148a57703c2c7893d825da1c --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcbb21a53a76ab81a8916d88fa3128417631d9f9330c82f2352d414e6ac97e7f +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..140d197544452904a0144209c69c99dea77cf1d0 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1026.0 0.0 1026.0 1026.0 1044.981 0.0 1044.981 1044.981 0.21341561 0.04293082 0.2790111 0.15456104 -0.016195491 1.649519443511963 +1 1196.0 0.0 1196.0 1196.0 1193.6396 0.0 1193.6396 1193.6396 4.2593827 0.41461113 4.723946 3.4081898 0.0013123602 3.3138091564178467 +2 1146.0 0.0 1146.0 1146.0 1203.194 0.0 1203.194 1203.194 5.139553 1.3488352 6.81776 2.957762 -0.031880304 4.954023122787476 +3 1124.0 0.0 1124.0 1124.0 1151.5206 0.0 1151.5206 1151.5206 5.3119226 1.6662182 7.496709 2.5901408 -0.01538305 6.505433797836304 +4 1176.0 0.0 1176.0 1176.0 1200.1287 0.0 1200.1287 1200.1287 5.2056518 1.5982985 7.3847084 2.3569124 -0.013717562 8.029346942901611 +5 1158.0 0.0 1158.0 1158.0 1188.6652 0.0 1188.6652 1188.6652 5.3105674 1.782296 7.800515 2.0591264 -0.017036721 9.547352075576782 +6 1187.0 0.0 1187.0 1187.0 1227.7499 0.0 1227.7499 1227.7499 5.2800403 1.8861856 8.190782 1.8425303 -0.02265191 11.06665301322937 +7 1157.0 0.0 1157.0 1157.0 1268.9706 0.0 1268.9706 1268.9706 5.4762006 1.8380868 8.36121 2.0251794 -0.062205777 12.584624767303467 +8 1174.0 0.0 1174.0 1174.0 1242.0238 0.0 1242.0238 1242.0238 5.776742 1.6408564 8.433153 2.505 -0.03781183 14.096492290496826 +9 1184.0 0.0 1184.0 1184.0 1174.2599 0.0 1174.2599 1174.2599 5.616263 1.8906851 8.666612 1.8428457 0.005410582 15.658498525619507 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1fc4e1fb1ec7eec52023ddf40e5f1ae521fcbb7 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc160d55365e856595a2f2d13738797d85fa62ce83f5579768bc7e1aa434201 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..99359b147e1ae96cb41b5f8b802b91701e4b4167 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 969.0 0.0 969.0 969.0 652.86145 0.0 652.86145 652.86145 0.14414199 0.031151053 0.18614006 0.09822309 0.28277177 1.6294469833374023 +1 1103.0 0.0 1103.0 1103.0 882.0578 0.0 882.0578 882.0578 2.451144 0.35985994 2.9899967 1.7108487 0.33526963 3.2637274265289307 +2 1135.0 0.0 1135.0 1135.0 960.75586 0.0 960.75586 960.75586 3.6939754 0.73008066 4.6222386 2.3335814 0.25737727 4.864926338195801 +3 1138.0 0.0 1138.0 1138.0 954.2068 0.0 954.2068 954.2068 4.070017 0.94288146 5.3400016 2.5666225 0.2743185 6.495613098144531 +4 1135.0 0.0 1135.0 1135.0 841.90515 0.0 841.90515 841.90515 4.1153016 0.90182793 5.342021 2.5547903 0.3960741 8.126822471618652 +5 812.0 0.0 812.0 812.0 812.0 0.0 812.0 812.0 3.6437478 0.4509163 4.3484116 2.5868795 0.32354915 9.755660057067871 +6 1144.0 0.0 1144.0 1144.0 933.24384 0.0 933.24384 933.24384 3.6029906 0.23639365 4.059921 2.741676 0.30993444 11.380826950073242 +7 1144.0 0.0 1144.0 1144.0 938.77954 0.0 938.77954 938.77954 4.1037803 0.57644045 4.8851786 2.8273032 0.30047005 13.012800931930542 +8 817.0 0.0 817.0 817.0 817.0 0.0 817.0 817.0 4.09069 0.78461653 5.227884 2.5871317 0.3324399 14.543687582015991 +9 816.0 0.0 816.0 816.0 816.0 0.0 816.0 816.0 3.6571302 0.2472473 4.1313453 2.8621936 0.31087673 16.17698884010315 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e9855b81e4b350ce54632a22c883ae5bc3f4b77 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276f415b593d9585472db5629736fe862372ca3b3326c1ab5f03d31e042ee11c +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e8bb46da1dcd68b98ab59d01ebb41ddb52642ba --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1009.0 0.0 1009.0 1009.0 1022.16797 0.0 1022.16797 1022.16797 0.2132047 0.042787187 0.27887252 0.15446824 -0.011216506 1.5374846458435059 +1 1158.0 0.0 1158.0 1158.0 1146.3192 0.0 1146.3192 1146.3192 4.287194 0.3480199 4.6768413 3.5652573 0.00649295 3.1237385272979736 +2 1136.0 0.0 1136.0 1136.0 1183.0306 0.0 1183.0306 1183.0306 5.070743 1.3248519 6.7394876 2.9550483 -0.026142046 4.7451255321502686 +3 1154.0 0.0 1154.0 1154.0 1172.1407 0.0 1172.1407 1172.1407 5.27241 1.6528273 7.426351 2.589364 -0.010078713 6.40146017074585 +4 1128.0 0.0 1128.0 1128.0 1143.442 0.0 1143.442 1143.442 5.263782 1.6090158 7.3939953 2.4137821 -0.008579209 8.014779806137085 +5 1171.0 0.0 1171.0 1171.0 1192.0204 0.0 1192.0204 1192.0204 5.1072497 1.8183484 7.648181 1.964186 -0.01167798 9.631031513214111 +6 1189.0 0.0 1189.0 1189.0 1220.4288 0.0 1220.4288 1220.4288 5.245264 1.9371377 8.151563 1.7184973 -0.017470613 11.250556945800781 +7 1159.0 0.0 1159.0 1159.0 1263.9722 0.0 1263.9722 1263.9722 5.4867663 1.8630611 8.297822 2.0054038 -0.058318004 12.860278844833374 +8 1161.0 0.0 1161.0 1161.0 1219.4999 0.0 1219.4999 1219.4999 5.7067165 1.6905651 8.332491 2.4044118 -0.0325177 14.476622104644775 +9 1200.0 0.0 1200.0 1200.0 1180.4674 0.0 1180.4674 1180.4674 5.5160165 1.9323374 8.593139 1.734514 0.010852054 16.08645796775818 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..548d98abb7c1270ba39e8ed1acdb561e24291a70 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e20cf11982939a62c82a77802a678650ee8f8eb9545ff91a3c40a586ff8804 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..c69832e9835837a6faaae7346c91f7449b071f16 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 954.0 0.0 954.0 954.0 668.0948 0.0 668.0948 668.0948 0.14391032 0.030783739 0.18578668 0.097890265 0.25527233 1.7034809589385986 +1 1059.0 0.0 1059.0 1059.0 880.30005 0.0 880.30005 880.30005 2.6123362 0.26732633 3.019003 1.8907772 0.3054697 3.3405532836914062 +2 1099.0 0.0 1099.0 1099.0 944.048 0.0 944.048 944.048 3.5920753 0.74030995 4.5635943 2.1490664 0.22787021 4.905843019485474 +3 1116.0 0.0 1116.0 1116.0 927.9279 0.0 927.9279 927.9279 4.026224 0.9658623 5.3856153 2.3828697 0.24681357 6.419819593429565 +4 1102.0 0.0 1102.0 1102.0 832.1054 0.0 832.1054 832.1054 3.9524424 0.9923887 5.4511447 2.1054041 0.36770397 7.9319517612457275 +5 808.0 0.0 808.0 808.0 808.0 0.0 808.0 808.0 3.6329916 0.5241222 4.4886074 2.2946682 0.29519513 9.443203449249268 +6 1140.0 0.0 1140.0 1140.0 912.6025 0.0 912.6025 912.6025 3.5414517 0.28236043 4.003264 2.5154777 0.28073776 10.959861755371094 +7 1139.0 0.0 1139.0 1139.0 933.8369 0.0 933.8369 933.8369 3.949693 0.88118356 5.2524185 2.1059723 0.27137956 12.471558094024658 +8 842.0 0.0 842.0 842.0 828.6844 0.0 828.6844 828.6844 3.9791253 0.941225 5.4319673 2.109551 0.30262908 14.01619577407837 +9 1111.0 0.0 1111.0 1111.0 914.0822 0.0 914.0822 914.0822 3.6446168 0.32956383 4.1361604 2.7111197 0.2813115 15.666466236114502 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9879c8f004381068a61adff69cb5bc39470a77a --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ac088e50ab9a0aa60513e1098c44ddc85dc9f6953a69e27738670a10fded3f +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..81be826e5ec6d99e7faa73b3a5f990f9b4b56dbe --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 972.0 0.0 972.0 972.0 984.6938 0.0 984.6938 984.6938 0.211579 0.04046322 0.27884093 0.1544471 -0.010082543 1.7365944385528564 +1 1127.0 0.0 1127.0 1127.0 1113.5631 0.0 1113.5631 1113.5631 4.159586 0.27912626 4.5003414 3.480257 0.00707902 3.4840445518493652 +2 1089.0 0.0 1089.0 1089.0 1137.8744 0.0 1137.8744 1137.8744 4.9767046 1.1194432 6.4365654 2.9823587 -0.025791213 5.0699238777160645 +3 1109.0 0.0 1109.0 1109.0 1126.9594 0.0 1126.9594 1126.9594 5.1694717 1.4040157 7.0854287 2.5595465 -0.009457365 6.654399871826172 +4 1105.0 0.0 1105.0 1105.0 1118.8917 0.0 1118.8917 1118.8917 5.1303077 1.4057866 7.1245637 2.3683422 -0.007311538 8.233429431915283 +5 1115.0 0.0 1115.0 1115.0 1135.9646 0.0 1135.9646 1135.9646 5.002605 1.5961031 7.3556356 2.4214263 -0.011033729 9.815142631530762 +6 1140.0 0.0 1140.0 1140.0 1170.7115 0.0 1170.7115 1170.7115 5.112533 1.5856723 7.6747427 1.8765056 -0.016163468 11.389655351638794 +7 1098.0 0.0 1098.0 1098.0 1205.5194 0.0 1205.5194 1205.5194 5.3550334 1.5047942 7.8033247 3.0084562 -0.056589186 12.96997880935669 +8 1127.0 0.0 1127.0 1127.0 1189.6857 0.0 1189.6857 1189.6857 5.56936 1.5296624 8.209306 2.3203254 -0.033009395 14.545027256011963 +9 1159.0 0.0 1159.0 1159.0 1137.631 0.0 1137.631 1137.631 5.387512 1.7438824 8.512893 1.7574779 0.011246681 16.142975091934204 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..1968cc4e850284df20a235ae85b53caf3930eeed --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f4b1e857f88b9a5aff3340d84094d6435150d93ceba03e1d41ff780b684706 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..22482f4f6364d5768e26cf22b3368d71e50e1a10 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 973.0 0.0 973.0 973.0 694.662 0.0 694.662 694.662 0.14365676 0.0307239 0.18567976 0.09778939 0.24697244 1.558793306350708 +1 1078.0 0.0 1078.0 1078.0 882.59326 0.0 882.59326 882.59326 2.6239333 0.31518146 3.0829072 1.8954208 0.2983309 3.0810515880584717 +2 1106.0 0.0 1106.0 1106.0 966.9103 0.0 966.9103 966.9103 3.759089 0.7501848 4.6976714 2.3135533 0.2204276 4.57916784286499 +3 1130.0 0.0 1130.0 1130.0 968.87177 0.0 968.87177 968.87177 4.1473646 1.0389292 5.550422 2.4351237 0.23870844 6.222944021224976 +4 1146.0 0.0 1146.0 1146.0 883.5273 0.0 883.5273 883.5273 4.1733065 1.047617 5.5759683 2.2192788 0.36103547 7.8501503467559814 +5 813.0 0.0 813.0 813.0 813.0 0.0 813.0 813.0 3.8126295 0.8231941 4.9781203 2.0010927 0.28780985 9.477843999862671 +6 1153.0 0.0 1153.0 1153.0 939.56793 0.0 939.56793 939.56793 3.5396867 0.2822938 3.9829445 2.458968 0.27328056 11.106434106826782 +7 1143.0 0.0 1143.0 1143.0 952.993 0.0 952.993 952.993 4.109153 0.8529048 5.189638 2.2652974 0.26389876 12.731141567230225 +8 839.0 0.0 839.0 839.0 826.59515 0.0 826.59515 826.59515 4.0551176 1.0523028 5.4699583 1.8837241 0.29536194 14.35535717010498 +9 1130.0 0.0 1130.0 1130.0 939.2314 0.0 939.2314 939.2314 3.6228032 0.3550596 4.07415 2.5550227 0.27409273 15.982172012329102 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..04d54a81b8b65d5891dc33f4a628a0d2ba325561 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f27323761f43d78e37be93432af6308a6536a92c7796b5f53158774d6bf0d4c2 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..edd8b341db9b031a71e028b27809ed6328bc4df0 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1005.0 0.0 1005.0 1005.0 1050.3073 0.0 1050.3073 1050.3073 0.2083331 0.038922932 0.27951896 0.15489888 -0.034480557 1.6062653064727783 +1 1132.0 0.0 1132.0 1132.0 1167.6956 0.0 1167.6956 1167.6956 4.417243 0.30045772 4.809263 3.5751004 -0.01786621 3.31565260887146 +2 1109.0 0.0 1109.0 1109.0 1210.0961 0.0 1210.0961 1210.0961 5.286 1.2887203 7.020565 2.9970827 -0.050751507 5.019345283508301 +3 1140.0 0.0 1140.0 1140.0 1205.8854 0.0 1205.8854 1205.8854 5.5683193 1.4816289 7.6672544 2.7909489 -0.032942563 6.717926263809204 +4 1121.0 0.0 1121.0 1121.0 1185.4083 0.0 1185.4083 1185.4083 5.550827 1.4996761 7.7361946 3.0177293 -0.032203898 8.419236421585083 +5 1136.0 0.0 1136.0 1136.0 1205.4613 0.0 1205.4613 1205.4613 5.4719586 1.6848688 8.039 2.107475 -0.034764916 10.121132850646973 +6 1156.0 0.0 1156.0 1156.0 1236.1248 0.0 1236.1248 1236.1248 5.559444 1.7833451 8.552049 2.8100243 -0.040083125 11.819284439086914 +7 1129.0 0.0 1129.0 1129.0 1289.9814 0.0 1289.9814 1289.9814 5.7034388 1.7226404 8.609366 3.206553 -0.0804901 13.520227432250977 +8 1145.0 0.0 1145.0 1145.0 1250.1304 0.0 1250.1304 1250.1304 6.012895 1.5530797 8.673737 2.972351 -0.05259113 15.213274955749512 +9 1161.0 0.0 1161.0 1161.0 1186.224 0.0 1186.224 1186.224 5.8791814 1.8160787 8.90383 3.2829866 -0.012612 16.91295027732849 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e97b1daf0474e8a3ee49d1ddef3c3fde4807161 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb67346217210b13452f837c729700c9ae7eb5ed25e144c36579a09311c518ab +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0a81d2836f08d7c650ba15c485320b95fa4962f --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 967.0 0.0 967.0 967.0 650.47815 0.0 650.47815 650.47815 0.14131632 0.030033182 0.18593313 0.09802827 0.2666568 1.6628952026367188 +1 1091.0 0.0 1091.0 1091.0 876.4549 0.0 876.4549 876.4549 2.5565624 0.32225707 3.0280733 1.7767868 0.31831637 3.3128859996795654 +2 1104.0 0.0 1104.0 1104.0 939.44507 0.0 939.44507 939.44507 3.850918 0.59140176 4.597783 2.7538157 0.24057762 4.967300176620483 +3 1132.0 0.0 1132.0 1132.0 943.1971 0.0 943.1971 943.1971 4.152701 0.9084487 5.3756094 2.494341 0.25828075 6.525930643081665 +4 1134.0 0.0 1134.0 1134.0 821.749 0.0 821.749 821.749 4.3288636 0.74346656 5.3766456 2.7144861 0.38079417 8.047613859176636 +5 796.0 0.0 796.0 796.0 796.0 0.0 796.0 796.0 3.7517662 0.4084951 4.4196243 2.5840096 0.30703163 9.564523458480835 +6 1162.0 0.0 1162.0 1162.0 905.5834 0.0 905.5834 905.5834 3.5776546 0.23350081 4.0045986 2.6295357 0.29304737 11.076406478881836 +7 1163.0 0.0 1163.0 1163.0 927.96704 0.0 927.96704 927.96704 4.136982 0.6175513 5.0085883 2.5484107 0.2838559 12.595866680145264 +8 794.0 0.0 794.0 794.0 793.68445 0.0 793.68445 793.68445 4.147046 0.7952608 5.3124604 2.2684364 0.31556553 14.116122245788574 +9 1135.0 0.0 1135.0 1135.0 911.79504 0.0 911.79504 911.79504 3.6932654 0.24959518 4.196717 2.832329 0.29407793 15.635130643844604 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f4ec68f2a79365af6888b72d9bc548046b878ec --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5de8d6527fdf635d9b770f2fa05e9dd9bb33def4e5e8e92c8df308b0cb854f +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..acd41875266f293afe3f6b39bd86d22e844fbc41 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 994.0 0.0 994.0 994.0 1021.0768 0.0 1021.0768 1021.0768 0.21250007 0.04066559 0.27916646 0.15466475 -0.021783084 1.668320894241333 +1 1109.0 0.0 1109.0 1109.0 1117.7433 0.0 1117.7433 1117.7433 4.2616544 0.31179178 4.665305 3.5361261 -0.0046065897 3.335503578186035 +2 1108.0 0.0 1108.0 1108.0 1176.506 0.0 1176.506 1176.506 4.9562883 1.2602054 6.7487607 2.8961759 -0.03611198 4.906333923339844 +3 1123.0 0.0 1123.0 1123.0 1161.7898 0.0 1161.7898 1161.7898 5.270969 1.5578464 7.5988173 2.6020186 -0.020426288 6.446594476699829 +4 1123.0 0.0 1123.0 1123.0 1158.9955 0.0 1158.9955 1158.9955 5.23163 1.5479801 7.62154 2.4043288 -0.0190451 7.98651647567749 +5 1119.0 0.0 1119.0 1119.0 1158.3104 0.0 1158.3104 1158.3104 5.1757507 1.7599212 8.004074 1.9698908 -0.022010192 9.531043529510498 +6 1128.0 0.0 1128.0 1128.0 1174.9309 0.0 1174.9309 1174.9309 5.2521195 1.8197953 8.52076 1.9909644 -0.027720481 11.089900732040405 +7 1122.0 0.0 1122.0 1122.0 1233.4878 0.0 1233.4878 1233.4878 5.360936 1.6240821 8.3361025 2.3635793 -0.06869224 12.642500877380371 +8 1141.0 0.0 1141.0 1141.0 1213.4513 0.0 1213.4513 1213.4513 5.664172 1.639796 8.690855 2.6192145 -0.040611833 14.194446086883545 +9 1136.0 0.0 1136.0 1136.0 1135.4099 0.0 1135.4099 1135.4099 5.582672 1.9019033 8.972779 2.0850525 0.00032451004 15.862299680709839 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..f17f69b469c5f10762ea6a09e25694d505db5815 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c545b3f2162f5bfd9d95e91d7aed169e0bd5f7b22bcd0ea4e9181c001501806a +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..863775821295263612e9011117843e0bd07e24a7 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 960.0 0.0 960.0 960.0 696.962 0.0 696.962 696.962 0.14461029 0.030458668 0.18551809 0.09763673 0.23443723 1.4871890544891357 +1 1067.0 0.0 1067.0 1067.0 876.4156 0.0 876.4156 876.4156 2.6846 0.26483467 3.0909948 1.9939945 0.28403038 2.9716782569885254 +2 1063.0 0.0 1063.0 1063.0 943.0397 0.0 943.0397 943.0397 3.7068694 0.7814084 4.7681146 2.210012 0.2071846 4.4772045612335205 +3 1096.0 0.0 1096.0 1096.0 940.43274 0.0 940.43274 940.43274 4.0503674 0.9625744 5.473732 2.595045 0.2261152 5.95758581161499 +4 1122.0 0.0 1122.0 1122.0 876.138 0.0 876.138 876.138 4.124876 0.95054567 5.554649 2.518483 0.34923637 7.440656900405884 +5 810.0 0.0 810.0 810.0 810.0 0.0 810.0 810.0 3.7733152 0.71175516 4.9754953 2.42703 0.27406585 8.979296445846558 +6 1143.0 0.0 1143.0 1143.0 943.0028 0.0 943.0028 943.0028 3.5873153 0.21049257 3.976318 2.8371928 0.25940034 10.609726667404175 +7 1124.0 0.0 1124.0 1124.0 940.73334 0.0 940.73334 940.73334 4.059508 0.76205575 5.230571 2.6240344 0.2500227 12.174387693405151 +8 832.0 0.0 832.0 832.0 820.45306 0.0 820.45306 820.45306 4.0821867 0.92500216 5.5930085 2.4797463 0.28163868 13.762090682983398 +9 1107.0 0.0 1107.0 1107.0 927.1604 0.0 927.1604 927.1604 3.6596467 0.27138594 4.0661035 3.148018 0.26025942 15.34944224357605 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ac632823c66e11f4c1919d53d437c3a120c305a --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e566593534bcc07af3a3c019cece2285a71715bba92eb4a81edd25c0c848479 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..85fbe4ba61ccd9238cfd631aa8098eca7d342293 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 961.0 0.0 961.0 961.0 658.6351 0.0 658.6351 658.6351 0.13925515 0.028934551 0.18562427 0.09773703 0.24266842 1.7062528133392334 +1 1115.0 0.0 1115.0 1115.0 882.6471 0.0 882.6471 882.6471 2.7469413 0.30195028 3.1831293 1.9095322 0.2922673 3.428800106048584 +2 1103.0 0.0 1103.0 1103.0 950.87756 0.0 950.87756 950.87756 3.941822 0.7039695 4.8088365 2.424278 0.21577637 4.998257398605347 +3 1138.0 0.0 1138.0 1138.0 947.5591 0.0 947.5591 947.5591 4.3396482 0.880968 5.4976707 2.4547722 0.23395738 6.566149711608887 +4 1151.0 0.0 1151.0 1151.0 851.09656 0.0 851.09656 851.09656 4.399286 0.82830334 5.5150275 2.2771075 0.35745353 8.141887426376343 +5 783.0 0.0 783.0 783.0 783.0 0.0 783.0 783.0 3.9612834 0.5851283 4.832699 2.4762652 0.28221935 9.71167802810669 +6 1158.0 0.0 1158.0 1158.0 925.7624 0.0 925.7624 925.7624 3.6690085 0.22637503 4.0836244 3.107776 0.2675549 11.303403377532959 +7 1158.0 0.0 1158.0 1158.0 941.23206 0.0 941.23206 941.23206 4.2673216 0.62286407 5.1033444 2.5053515 0.25805783 12.89110279083252 +8 808.0 0.0 808.0 808.0 795.8295 0.0 795.8295 795.8295 4.275804 0.81354845 5.431799 2.1283097 0.28976882 14.46428370475769 +9 1141.0 0.0 1141.0 1141.0 922.4609 0.0 922.4609 922.4609 3.73936 0.25211564 4.0857506 2.9073336 0.26814616 16.137130975723267 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..4691d8a14aef3e8509b8f7f6d9e06958acda52c6 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0df1af3e041777cfb0384b23866782c277c9e4ad6afb414cd24f84470374495 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3d32c9f8f861100d425f1b93ed3e8b11dff3a48 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 980.0 0.0 980.0 980.0 718.56384 0.0 718.56384 718.56384 0.14357658 0.030604603 0.18548897 0.09760922 0.2321811 1.6380438804626465 +1 1089.0 0.0 1089.0 1089.0 891.0919 0.0 891.0919 891.0919 2.8142233 0.22972487 3.1516528 2.1108203 0.2803231 3.2650067806243896 +2 1075.0 0.0 1075.0 1075.0 952.34393 0.0 952.34393 952.34393 3.6632435 0.7975244 4.717457 2.075314 0.20476793 4.8938682079315186 +3 1099.0 0.0 1099.0 1099.0 943.2908 0.0 943.2908 943.2908 4.1468287 0.94457436 5.4132915 2.502675 0.22339925 6.521366357803345 +4 1141.0 0.0 1141.0 1141.0 853.15894 0.0 853.15894 853.15894 4.149177 0.97772545 5.471329 2.2829554 0.34596258 8.145384073257446 +5 808.0 0.0 808.0 808.0 808.0 0.0 808.0 808.0 3.7084212 0.7831058 4.877923 1.9607472 0.27125865 9.769689559936523 +6 1142.0 0.0 1142.0 1142.0 936.8659 0.0 936.8659 936.8659 3.569337 0.26803124 4.0268626 2.5658014 0.25577822 11.398471355438232 +7 1145.0 0.0 1145.0 1145.0 956.43195 0.0 956.43195 956.43195 4.0238905 0.82438606 5.1553283 2.225504 0.24617216 13.037954568862915 +8 842.0 0.0 842.0 842.0 830.6262 0.0 830.6262 830.6262 4.0586476 1.010051 5.524897 2.057889 0.27741665 14.670298337936401 +9 1123.0 0.0 1123.0 1123.0 932.80743 0.0 932.80743 932.80743 3.6569283 0.32923123 4.0800157 2.7342982 0.2566702 16.301510334014893 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b21de6debb2de07049207f4e11525a26aeac482 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee182722fb4ba177d9bd74986bf940318019cb0d36a83afb50d12ab7b17d09ac +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..9a0eab9d80edad17b4b362449b9485c46ae13221 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 969.0 0.0 969.0 969.0 672.0358 0.0 672.0358 672.0358 0.13857247 0.028742015 0.18557829 0.0976936 0.23910148 1.7088229656219482 +1 1129.0 0.0 1129.0 1129.0 892.93854 0.0 892.93854 892.93854 2.7903426 0.26809582 3.1833777 2.220827 0.28893682 3.2944161891937256 +2 1108.0 0.0 1108.0 1108.0 950.0444 0.0 950.0444 950.0444 3.851257 0.8142764 4.8837566 1.9756792 0.212878 4.854521751403809 +3 1140.0 0.0 1140.0 1140.0 950.0861 0.0 950.0861 950.0861 4.295231 0.9214783 5.551752 2.4944773 0.23075852 6.406561613082886 +4 1161.0 0.0 1161.0 1161.0 863.2509 0.0 863.2509 863.2509 4.3084717 0.9155701 5.5986896 2.2193334 0.35488576 7.961761951446533 +5 777.0 0.0 777.0 777.0 777.0 0.0 777.0 777.0 3.9260473 0.62447685 4.9239335 2.5548265 0.27851456 9.513084650039673 +6 1156.0 0.0 1156.0 1156.0 915.9216 0.0 915.9216 915.9216 3.6206124 0.2194028 3.9964087 2.7010636 0.26411274 11.063723087310791 +7 1157.0 0.0 1157.0 1157.0 944.37555 0.0 944.37555 944.37555 4.159795 0.72802097 5.235955 2.2842658 0.25494617 12.639371395111084 +8 815.0 0.0 815.0 815.0 801.8212 0.0 801.8212 801.8212 4.259533 0.8243809 5.52667 2.2840571 0.28649062 14.626024007797241 +9 1140.0 0.0 1140.0 1140.0 923.73157 0.0 923.73157 923.73157 3.7145355 0.22608782 4.069248 2.8610425 0.26535982 16.209736824035645 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba2b777a9c6260a6148a57703c2c7893d825da1c --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcbb21a53a76ab81a8916d88fa3128417631d9f9330c82f2352d414e6ac97e7f +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c8df66385f3bdd0933b3bba6cb4e06b3e116cb1 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 981.0 0.0 981.0 981.0 729.52344 0.0 729.52344 729.52344 0.14621699 0.03135319 0.18559822 0.09771243 0.24064764 1.5561282634735107 +1 1088.0 0.0 1088.0 1088.0 886.0866 0.0 886.0866 886.0866 2.7020202 0.25672847 3.0952837 2.0296285 0.2913613 3.012709379196167 +2 1079.0 0.0 1079.0 1079.0 961.9102 0.0 961.9102 961.9102 3.5723522 0.8149151 4.6430964 2.1418855 0.21484354 4.471331357955933 +3 1157.0 0.0 1157.0 1157.0 982.6906 0.0 982.6906 982.6906 3.9439235 1.0480071 5.469137 2.4121728 0.23241222 5.962704181671143 +4 1146.0 0.0 1146.0 1146.0 898.5622 0.0 898.5622 898.5622 3.9444327 1.1628145 5.648532 2.0789957 0.3570521 7.541250944137573 +5 1151.0 0.0 1151.0 1151.0 953.1763 0.0 953.1763 953.1763 3.631081 0.8536078 4.9947143 2.1580963 0.28060052 9.022983312606812 +6 1159.0 0.0 1159.0 1159.0 976.00684 0.0 976.00684 976.00684 3.9881725 0.8392917 5.407794 2.6539266 0.26636583 10.534041166305542 +7 866.0 0.0 866.0 866.0 850.5669 0.0 850.5669 850.5669 4.0766387 0.7861288 5.3801 2.8064446 0.25722128 12.10569953918457 +8 1173.0 0.0 1173.0 1173.0 932.3998 0.0 932.3998 932.3998 3.7177083 0.37252915 4.268213 2.9544246 0.288836 13.681437015533447 +9 1133.0 0.0 1133.0 1133.0 922.1462 0.0 922.1462 922.1462 3.8427944 0.96484137 5.5094194 2.4508233 0.26758152 15.255890130996704 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..65014a2066a2b597885c148a5d41f0d8d54d177b --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e46393eb8368dce9768fa12ea544185077b5312ab57cd04f2c48f759befa2bf +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..37322acc277e2366b8d1f048d60876d991e2b7c0 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 950.0 0.0 950.0 950.0 624.57465 0.0 624.57465 624.57465 0.13728271 0.02764003 0.17739289 0.09782085 0.24955931 1.7624175548553467 +1 1104.0 0.0 1104.0 1104.0 864.0229 0.0 864.0229 864.0229 2.6868756 0.32664865 3.1714265 1.9677058 0.2999712 3.4865493774414062 +2 1085.0 0.0 1085.0 1085.0 929.5329 0.0 929.5329 929.5329 3.9385984 0.63623685 4.771843 2.6993408 0.22337249 5.236672639846802 +3 1126.0 0.0 1126.0 1126.0 934.0974 0.0 934.0974 934.0974 4.3167896 0.8250715 5.47447 2.8538456 0.24108426 6.991329669952393 +4 1130.0 0.0 1130.0 1130.0 836.7963 0.0 836.7963 836.7963 4.40971 0.7646041 5.5344744 2.9460492 0.36513525 8.742123365402222 +5 765.0 0.0 765.0 765.0 765.0 0.0 765.0 765.0 3.9137387 0.5343742 4.810089 2.7637703 0.28925234 10.372583150863647 +6 1151.0 0.0 1151.0 1151.0 898.08936 0.0 898.08936 898.08936 3.6634803 0.21009932 4.045622 3.1236024 0.27490366 11.979020118713379 +7 1144.0 0.0 1144.0 1144.0 915.7341 0.0 915.7341 915.7341 4.243589 0.55887103 5.113475 3.0631652 0.26573482 13.585446834564209 +8 794.0 0.0 794.0 794.0 780.3157 0.0 780.3157 780.3157 4.308112 0.69411063 5.4061522 2.8615723 0.2974857 15.204237461090088 +9 1127.0 0.0 1127.0 1127.0 900.92773 0.0 900.92773 900.92773 3.7712364 0.19661315 4.111161 3.2220929 0.27603468 16.81417679786682 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a821edf6d50af165b80019edb64cd2b391667af --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd17a108c633c829455980f2b8d8d04308a8f15568fa3abff7a0dcc4d8f3f41c +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3440da75a608922774950c0764ee390a196e9e3 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 932.0 0.0 932.0 932.0 673.4061 0.0 673.4061 673.4061 0.14281626 0.029394038 0.18531814 0.09950956 0.2189619 1.588291883468628 +1 1064.0 0.0 1064.0 1064.0 878.9449 0.0 878.9449 878.9449 2.746518 0.27065545 3.1781914 2.001384 0.26819548 3.144702911376953 +2 1050.0 0.0 1050.0 1050.0 933.098 0.0 933.098 933.098 3.8264673 0.8128083 4.934488 2.1933682 0.19258934 4.684319734573364 +3 1099.0 0.0 1099.0 1099.0 946.7618 0.0 946.7618 946.7618 4.156977 1.0257306 5.663101 2.3980792 0.21085584 6.228911638259888 +4 1102.0 0.0 1102.0 1102.0 855.78674 0.0 855.78674 855.78674 4.1780324 0.99315935 5.6630216 2.2131586 0.33498406 7.905541896820068 +5 796.0 0.0 796.0 796.0 796.0 0.0 796.0 796.0 3.81028 0.6577964 4.8907957 2.2253366 0.25831014 9.569766521453857 +6 1121.0 0.0 1121.0 1121.0 925.8105 0.0 925.8105 925.8105 3.6280527 0.26288044 4.0413876 2.5877035 0.24337813 11.241984367370605 +7 1116.0 0.0 1116.0 1116.0 937.9681 0.0 937.9681 937.9681 4.1028805 0.8392124 5.3190503 2.358667 0.23425166 12.913923025131226 +8 821.0 0.0 821.0 821.0 809.8258 0.0 809.8258 809.8258 4.125764 0.96294147 5.5976806 2.0679636 0.26604947 14.586566686630249 +9 1100.0 0.0 1100.0 1100.0 932.2601 0.0 932.2601 932.2601 3.701502 0.31825775 4.122156 2.7013092 0.24487671 16.259058713912964 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..84cfd31ff564d2861eb0795148a11c74fdb8db07 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3cab35feb3669f89b26c482435801781138130fc2557bac1f43a7950dafe5df +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..fbfdc0fe0f313d385843847c9c0f1463ef4fc383 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 940.0 0.0 940.0 940.0 669.8921 0.0 669.8921 669.8921 0.14199741 0.029582325 0.18546939 0.09965742 0.2306644 1.784482479095459 +1 1092.0 0.0 1092.0 1092.0 882.0882 0.0 882.0882 882.0882 2.7080455 0.2675291 3.1196795 1.9506763 0.28063086 3.430050849914551 +2 1071.0 0.0 1071.0 1071.0 940.26385 0.0 940.26385 940.26385 3.7942593 0.8024549 4.8537703 2.1586196 0.20523748 5.077784061431885 +3 1124.0 0.0 1124.0 1124.0 952.1106 0.0 952.1106 952.1106 4.184324 0.967879 5.5870686 2.6013608 0.2226535 6.689285755157471 +4 1136.0 0.0 1136.0 1136.0 858.6209 0.0 858.6209 858.6209 4.235004 0.9107053 5.6129923 2.5917037 0.34759206 8.244816780090332 +5 901.0 0.0 901.0 901.0 836.47034 0.0 836.47034 836.47034 3.791346 0.60990816 4.830906 2.7762403 0.26999888 9.76483941078186 +6 1150.0 0.0 1150.0 1150.0 940.9599 0.0 940.9599 940.9599 3.8311212 0.453859 4.666991 3.001936 0.25555032 11.28182077407837 +7 1111.0 0.0 1111.0 1111.0 920.2439 0.0 920.2439 920.2439 4.2120776 0.6020741 5.2347803 3.0140378 0.24677418 12.80098581314087 +8 1140.0 0.0 1140.0 1140.0 889.25366 0.0 889.25366 889.25366 4.1656322 0.75375086 5.4501276 2.799804 0.27829763 14.311757564544678 +9 1142.0 0.0 1142.0 1142.0 910.2665 0.0 910.2665 910.2665 4.04608 0.83105916 5.6033006 2.7701273 0.2574818 15.82450270652771 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..79e108d9bf9431ef61d488111c2edd07f82abe15 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8371dd2b4692345ef28a34a7b779aa599accd2dbe56117e1e0d279bfe06c8c7c +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4de2c607c603dfabfdf67a88a2363cc35830eef --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 969.0 0.0 969.0 969.0 1001.09015 0.0 1001.09015 1001.09015 0.20906036 0.038462576 0.2792403 0.15471393 -0.024440244 1.6964023113250732 +1 1125.0 0.0 1125.0 1125.0 1139.8951 0.0 1139.8951 1139.8951 4.2410755 0.24023664 4.5594068 3.5332797 -0.007454872 3.381429433822632 +2 1097.0 0.0 1097.0 1097.0 1176.4084 0.0 1176.4084 1176.4084 5.1085844 1.1353095 6.619614 2.9527528 -0.039744064 4.945310115814209 +3 1104.0 0.0 1104.0 1104.0 1147.9817 0.0 1147.9817 1147.9817 5.3691225 1.3366119 7.227293 2.9625316 -0.022235587 6.484909296035767 +4 1099.0 0.0 1099.0 1099.0 1141.8478 0.0 1141.8478 1141.8478 5.269952 1.3693477 7.2467556 2.3442254 -0.021424204 8.022419214248657 +5 1112.0 0.0 1112.0 1112.0 1158.6998 0.0 1158.6998 1158.6998 5.2014823 1.5150845 7.48344 2.5505195 -0.023361266 9.561407089233398 +6 1123.0 0.0 1123.0 1123.0 1182.7004 0.0 1182.7004 1182.7004 5.251117 1.5420007 7.8062267 2.6511471 -0.02986592 11.2586088180542 +7 1112.0 0.0 1112.0 1112.0 1251.8206 0.0 1251.8206 1251.8206 5.431638 1.488084 7.9090085 3.0246744 -0.06991106 12.877774238586426 +8 1124.0 0.0 1124.0 1124.0 1214.782 0.0 1214.782 1214.782 5.7987356 1.4505512 8.356214 2.579909 -0.045413613 14.45970892906189 +9 1144.0 0.0 1144.0 1144.0 1148.8652 0.0 1148.8652 1148.8652 5.646733 1.6764182 8.686464 2.0320463 -0.0024324954 16.09728503227234 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..70efb0e967a532bf29e894266b358e1c0b5f83ff --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428b7eacd157f3261b0f972db8233281cd5700f662c8c02f93a880c15efcd4b9 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..50b0dab3fecd93f85e1f4b87253699e7ebdfb174 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 950.0 0.0 950.0 950.0 639.19446 0.0 639.19446 639.19446 0.1403101 0.028773542 0.18571159 0.09781944 0.24944286 1.5979557037353516 +1 1061.0 0.0 1061.0 1061.0 851.10547 0.0 851.10547 851.10547 2.6332822 0.28733024 3.0850658 1.8468843 0.2989954 3.1984665393829346 +2 1041.0 0.0 1041.0 1041.0 906.8874 0.0 906.8874 906.8874 3.7900455 0.6247883 4.6364756 2.216647 0.22277898 4.814585447311401 +3 1063.0 0.0 1063.0 1063.0 902.42664 0.0 902.42664 902.42664 4.1203237 0.81065744 5.2966404 2.5061533 0.2407397 6.411687135696411 +4 1093.0 0.0 1093.0 1093.0 840.9646 0.0 840.9646 840.9646 4.2007217 0.7897101 5.3665195 2.396462 0.36421353 8.001651048660278 +5 1066.0 0.0 1066.0 1066.0 882.3243 0.0 882.3243 882.3243 3.8185031 0.57044506 4.7039504 2.1971014 0.28879836 9.591800451278687 +6 1119.0 0.0 1119.0 1119.0 904.7552 0.0 904.7552 904.7552 4.008738 0.62801003 5.0433335 2.7193644 0.27396995 11.177881479263306 +7 1074.0 0.0 1074.0 1074.0 903.6017 0.0 903.6017 903.6017 4.1922016 0.6174168 5.1540985 2.51493 0.2645931 12.791511535644531 +8 805.0 0.0 805.0 805.0 791.37726 0.0 791.37726 791.37726 4.151276 0.74073076 5.3729877 2.5267441 0.29615307 14.38655400276184 +9 1095.0 0.0 1095.0 1095.0 889.4276 0.0 889.4276 889.4276 3.7443585 0.24806657 4.0929894 3.0685394 0.27519703 15.978416442871094 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..4626b0a633e64a502d2654a6f3b09da73ee40439 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4894c49ec707aa482e0a1b1d2f5b2d9205e22874dd1c8cb106067b0f50ad303 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..4410ef842bab0613cebb104b2929a3b9178fbe9a --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 983.0 0.0 983.0 983.0 963.8302 0.0 963.8302 963.8302 0.2172839 0.043745913 0.27808142 0.15393394 0.017115943 1.5709655284881592 +1 1133.0 0.0 1133.0 1133.0 1076.4127 0.0 1076.4127 1076.4127 3.9829204 0.31597856 4.3780594 3.4114668 0.03344433 3.134471893310547 +2 1097.0 0.0 1097.0 1097.0 1098.3168 0.0 1098.3168 1098.3168 4.489793 1.222612 6.248055 2.7485442 -0.0007916242 4.699682712554932 +3 1099.0 0.0 1099.0 1099.0 1072.9763 0.0 1072.9763 1072.9763 4.6844273 1.6150017 7.1341743 2.3744173 0.015335552 6.2621729373931885 +4 1113.0 0.0 1113.0 1113.0 1081.1981 0.0 1081.1981 1081.1981 4.6028852 1.4964246 6.9791613 2.3124 0.018795505 7.830862283706665 +5 1104.0 0.0 1104.0 1104.0 1082.379 0.0 1082.379 1082.379 4.6057973 1.7765934 7.522626 1.9443967 0.01318337 9.400492191314697 +6 1108.0 0.0 1108.0 1108.0 1092.6937 0.0 1092.6937 1092.6937 4.606546 1.7701966 7.803634 1.9644201 0.009009138 10.857977628707886 +7 1099.0 0.0 1099.0 1099.0 1155.3097 0.0 1155.3097 1155.3097 4.7559743 1.6822646 7.786914 2.1779563 -0.033122525 12.301966190338135 +8 1129.0 0.0 1129.0 1129.0 1142.5061 0.0 1142.5061 1142.5061 5.1789975 1.6256504 8.2600975 2.6240883 -0.0079640895 13.742249727249146 +9 1134.0 0.0 1134.0 1134.0 1071.4701 0.0 1071.4701 1071.4701 4.9249816 1.8191441 8.612975 2.1423302 0.036782153 15.187167882919312 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..27c7179459a4bb3405f1ac2d098032c4b63acc1d --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663a73d3678b452bb16f3426169570ec1d187e828f8f812c2315f317dfcebbdb +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08ed65c5be8eadfbea9825061c5c3a3c34c908a4 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770168b3aaef05c84af851d3036d265dcee4f037db63d5c1823616342f1a4a47 +size 13916 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..034925bd92108ca1f8e5e6c8fd4cdbbde9e28523 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 957.0 0.0 957.0 957.0 637.5761 0.0 637.5761 637.5761 0.14007838 0.02888074 0.18580332 0.09790597 0.25656536 1.7595953941345215 +1 1108.0 0.0 1108.0 1108.0 880.7083 0.0 880.7083 880.7083 2.6069727 0.33599967 3.109939 1.9480541 0.30798298 3.469228744506836 +2 1101.0 0.0 1101.0 1101.0 936.56476 0.0 936.56476 936.56476 3.852493 0.7128252 4.7690225 2.378513 0.23062448 5.168588638305664 +3 1121.0 0.0 1121.0 1121.0 932.48444 0.0 932.48444 932.48444 4.2708974 0.922139 5.50832 2.313063 0.24804662 6.891006946563721 +4 1124.0 0.0 1124.0 1124.0 820.5739 0.0 820.5739 820.5739 4.273599 0.87016684 5.4727254 2.1037965 0.37230223 8.590726852416992 +5 775.0 0.0 775.0 775.0 775.0 0.0 775.0 775.0 3.7054665 0.47596452 4.4272575 2.1272068 0.29698217 10.284037828445435 +6 1144.0 0.0 1144.0 1144.0 899.1074 0.0 899.1074 899.1074 3.634008 0.2414347 4.058069 3.0538502 0.28245968 11.982211828231812 +7 1135.0 0.0 1135.0 1135.0 921.3262 0.0 921.3262 921.3262 4.0719156 0.69286823 5.0270724 2.2652032 0.2732399 13.68367338180542 +8 808.0 0.0 808.0 808.0 795.1824 0.0 795.1824 795.1824 4.2100472 0.8272979 5.360536 2.1090379 0.30518922 15.321784973144531 +9 1126.0 0.0 1126.0 1126.0 910.6747 0.0 910.6747 910.6747 3.7158313 0.26879352 4.0875983 2.8631577 0.28369576 16.95006275177002 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..65fa53ed03befa109cdb18919d1e41d142cb9e5f --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5588c37cda70d86cb673d5f37640d6f6e5794dd29372f69421d1c67155ef4883 +size 26198 diff --git a/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..524b60f4a267a0804a16a931a7070c2b6ef619c9 --- /dev/null +++ b/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfbase_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c0e5ddaa6502219c5a3b188956e89c1de4d56482f37e86b0bd5ee366b89ae +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e12215514b9d7bb83edcaf9091a9c7e5831eb4a --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 977.0 0.0 977.0 977.0 681.14825 0.0 681.14825 681.14825 0.14125657 0.030075919 0.1857557 0.09786102 0.25286496 1.6822750568389893 +1 1128.0 0.0 1128.0 1128.0 894.6829 0.0 894.6829 894.6829 2.7149596 0.29948303 3.1592789 1.9263338 0.30419475 3.3571808338165283 +2 1116.0 0.0 1116.0 1116.0 957.7189 0.0 957.7189 957.7189 3.8602312 0.75536144 4.8007755 2.2551553 0.22741437 5.033766508102417 +3 1158.0 0.0 1158.0 1158.0 962.6391 0.0 962.6391 962.6391 4.180794 0.9927978 5.5322514 2.4096556 0.24450675 6.709108114242554 +4 1157.0 0.0 1157.0 1157.0 864.36993 0.0 864.36993 864.36993 4.1868577 1.0023938 5.5783696 2.1511772 0.3694824 8.382591962814331 +5 796.0 0.0 796.0 796.0 796.0 0.0 796.0 796.0 3.8282166 0.71641314 4.926389 2.1449556 0.29307356 10.060532808303833 +6 1166.0 0.0 1166.0 1166.0 930.0075 0.0 930.0075 930.0075 3.6419904 0.27943805 4.1194944 2.986239 0.27895072 11.738348960876465 +7 1119.0 0.0 1119.0 1119.0 940.40344 0.0 940.40344 940.40344 4.076298 0.7874754 5.1683044 2.234679 0.2697829 13.289022207260132 +8 1169.0 0.0 1169.0 1169.0 864.43774 0.0 864.43774 864.43774 4.184391 0.86006975 5.4116416 2.2809181 0.30154675 14.829118967056274 +9 802.0 0.0 802.0 802.0 802.0 0.0 802.0 802.0 3.7758183 1.038865 5.4796367 1.8195307 0.2802678 16.366939067840576 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..b58821bdfcecd116df485c7e2cfd3a4599a300e9 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d267d869869e115c5388e22422e7c7bfd93e62a21bd3b19edb685d041af96906 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=0_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..259461d249de8d7ca407da5e28244b4ce5f97e3e --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1082.0 0.0 1082.0 1082.0 1035.284 0.0 1035.284 1035.284 0.21332677 0.042681575 0.27745777 0.15350708 0.03935606 1.587843656539917 +1 1188.0 0.0 1188.0 1188.0 1087.477 0.0 1087.477 1087.477 4.1148796 0.3782839 4.5764194 3.3703353 0.056032598 3.138770818710327 +2 1152.0 0.0 1152.0 1152.0 1111.499 0.0 1111.499 1111.499 4.565781 1.3735179 6.394923 2.5409548 0.022894569 4.765903949737549 +3 1167.0 0.0 1167.0 1167.0 1101.4982 0.0 1101.4982 1101.4982 4.7711945 1.8498933 7.2478876 2.0209146 0.037667014 6.3952600955963135 +4 1192.0 0.0 1192.0 1192.0 1120.3912 0.0 1120.3912 1120.3912 4.7372594 1.8945714 7.32146 1.6095517 0.04197453 8.020176887512207 +5 1208.0 0.0 1208.0 1208.0 1145.479 0.0 1145.479 1145.479 4.6693854 2.135284 7.6188526 1.2080014 0.03475307 9.646531343460083 +6 1218.0 0.0 1218.0 1218.0 1159.1083 0.0 1159.1083 1159.1083 4.8053956 2.1934674 7.98847 1.7792265 0.03273636 11.272494077682495 +7 1176.0 0.0 1176.0 1176.0 1196.4404 0.0 1196.4404 1196.4404 5.0116796 2.106347 8.1041565 2.0993812 -0.011368334 12.898009300231934 +8 1204.0 0.0 1204.0 1204.0 1183.1124 0.0 1183.1124 1183.1124 5.183908 1.91985 8.087294 1.7273027 0.011604801 14.52620244026184 +9 1223.0 0.0 1223.0 1223.0 1117.2764 0.0 1117.2764 1117.2764 5.0448747 2.1910853 8.393274 2.1129317 0.058734708 16.151551485061646 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..72e6a54c90d21f490b4a5a13021378d24867b01d --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bb4d8e3ae81adf84ae817ccf643264e19de5f6959f7c240781075596869a054 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..b249b52432a0cce7437d38616e6ad06f9ddc317a --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 948.0 0.0 948.0 948.0 658.5179 0.0 658.5179 658.5179 0.14155939 0.029809806 0.1856612 0.097771876 0.24553174 1.6726858615875244 +1 1093.0 0.0 1093.0 1093.0 881.8171 0.0 881.8171 881.8171 2.696423 0.2564375 3.089314 1.9304972 0.29453686 3.273401975631714 +2 1081.0 0.0 1081.0 1081.0 941.5524 0.0 941.5524 941.5524 3.7122257 0.81048304 4.7932453 2.0113137 0.21788768 4.7944111824035645 +3 1124.0 0.0 1124.0 1124.0 944.4275 0.0 944.4275 944.4275 4.16745 0.9578715 5.5328064 2.5059927 0.23690286 6.326678991317749 +4 1110.0 0.0 1110.0 1110.0 833.4575 0.0 833.4575 833.4575 4.1853604 1.0250765 5.6999307 2.1599512 0.3586801 7.857991695404053 +5 789.0 0.0 789.0 789.0 789.0 0.0 789.0 789.0 3.773337 0.5643806 4.697978 2.6678905 0.2848006 9.387629985809326 +6 1136.0 0.0 1136.0 1136.0 902.92694 0.0 902.92694 902.92694 3.6101334 0.24939266 4.007382 2.7141464 0.2700727 10.911539554595947 +7 1134.0 0.0 1134.0 1134.0 926.2728 0.0 926.2728 926.2728 4.003089 0.8431198 5.301027 2.1779304 0.26063666 12.434954643249512 +8 823.0 0.0 823.0 823.0 808.9843 0.0 808.9843 808.9843 4.0866475 0.882637 5.5110044 2.2613177 0.29199904 13.983832120895386 +9 1114.0 0.0 1114.0 1114.0 909.1823 0.0 909.1823 909.1823 3.6949892 0.28949437 4.152338 2.839722 0.27056465 15.505768060684204 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..27929711c3b324cfe2fa2e99abc5893bad18aa96 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64124b3d14cc39e50fe8c3711c201f6c235b6cf556da57e4a4d9285387769425 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=10_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..107ad69975d20b518a6fd28158b2f592d0daec17 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1048.0 0.0 1048.0 1048.0 1085.4106 0.0 1085.4106 1085.4106 0.2107489 0.0411604 0.27939534 0.15481697 -0.030024543 1.650672435760498 +1 1215.0 0.0 1215.0 1215.0 1241.3922 0.0 1241.3922 1241.3922 4.541141 0.3235134 4.899224 3.732144 -0.013905287 3.2908718585968018 +2 1185.0 0.0 1185.0 1185.0 1276.5332 0.0 1276.5332 1276.5332 5.353744 1.2917172 6.9288425 2.851285 -0.048226 4.934066534042358 +3 1181.0 0.0 1181.0 1181.0 1234.8168 0.0 1234.8168 1234.8168 5.6467967 1.7388566 7.788922 2.194901 -0.028534904 6.462401628494263 +4 1199.0 0.0 1199.0 1199.0 1249.04 0.0 1249.04 1249.04 5.5004454 1.7417974 7.7336297 1.8380308 -0.027180806 7.96668004989624 +5 1205.0 0.0 1205.0 1205.0 1261.615 0.0 1261.615 1261.615 5.52574 2.0578403 8.181607 1.12072 -0.029796556 9.480697393417358 +6 1202.0 0.0 1202.0 1202.0 1264.665 0.0 1264.665 1264.665 5.5424414 2.0994341 8.465796 1.9190642 -0.035706982 10.994497537612915 +7 1208.0 0.0 1208.0 1208.0 1337.3612 0.0 1337.3612 1337.3612 5.642287 1.8819869 8.360355 2.3567677 -0.07636431 12.525895595550537 +8 1236.0 0.0 1236.0 1236.0 1320.0471 0.0 1320.0471 1320.0471 6.069215 1.871396 8.789551 1.4295402 -0.04987976 14.032705068588257 +9 1227.0 0.0 1227.0 1227.0 1240.7715 0.0 1240.7715 1240.7715 5.9087396 2.192454 9.186419 0.52707267 -0.008361429 15.544472455978394 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..de8b03fb9444ba83f9a6520e296bcab811c1dd86 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e78319d549932a3a1ed55d318da7b099097d06fb2bef2d6ca47df10551b429f +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..e9b9123590499badd3a5a8540030fe4f47ef4c82 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 935.0 0.0 935.0 935.0 605.7615 0.0 605.7615 605.7615 0.14038551 0.028902015 0.18591297 0.09800927 0.26508737 1.7502467632293701 +1 1044.0 0.0 1044.0 1044.0 828.767 0.0 828.767 828.767 2.5460749 0.29592517 2.996389 1.9485313 0.31698585 3.4995484352111816 +2 1041.0 0.0 1041.0 1041.0 893.5657 0.0 893.5657 893.5657 3.7233825 0.57421255 4.4654884 2.1606305 0.24012049 5.2065465450286865 +3 1060.0 0.0 1060.0 1060.0 881.05493 0.0 881.05493 881.05493 4.072811 0.76183707 5.071249 2.2406101 0.25710523 6.955436944961548 +4 1066.0 0.0 1066.0 1066.0 771.7008 0.0 771.7008 771.7008 4.1261754 0.7123883 5.0313234 2.210163 0.38121688 8.703739166259766 +5 776.0 0.0 776.0 776.0 775.08484 0.0 775.08484 775.08484 3.590725 0.35868666 4.0109677 2.450011 0.3050449 10.452457904815674 +6 1110.0 0.0 1110.0 1110.0 803.2867 0.0 803.2867 803.2867 3.6173835 0.26239672 3.9849043 2.8673182 0.2915523 12.189692497253418 +7 1084.0 0.0 1084.0 1084.0 873.3112 0.0 873.3112 873.3112 3.6455116 0.9584207 4.8560557 0.9971664 0.28280345 13.930885076522827 +8 774.0 0.0 774.0 774.0 774.0 0.0 774.0 774.0 4.0240088 0.7191543 4.861592 1.6242115 0.31476772 15.68473482131958 +9 781.0 0.0 781.0 781.0 781.0 0.0 781.0 781.0 3.692902 0.27560806 4.0751433 2.2934146 0.2933088 17.287315845489502 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..83b1aae03d2f943f37fa2b580a9617436ccf39d4 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b2b4ce71e1f6ef1eeadfc67a3dba727968913fd65066bd32e29fd46ad82a740 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=11_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..01bfe2a7fcfd3d1dd7e2af05acf5c6531f39d7a1 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1019.0 0.0 1019.0 1019.0 1063.5458 0.0 1063.5458 1063.5458 0.21143244 0.04098034 0.27955574 0.15492325 -0.03580831 1.6662917137145996 +1 1194.0 0.0 1194.0 1194.0 1228.2252 0.0 1228.2252 1228.2252 4.5082383 0.34734222 4.900729 3.6632855 -0.018032506 3.3145816326141357 +2 1152.0 0.0 1152.0 1152.0 1247.1022 0.0 1247.1022 1247.1022 5.369632 1.2056292 6.860003 3.0766697 -0.050106138 4.972046613693237 +3 1150.0 0.0 1150.0 1150.0 1213.1136 0.0 1213.1136 1213.1136 5.672562 1.7246567 7.830571 2.4803286 -0.033234924 6.622156381607056 +4 1163.0 0.0 1163.0 1163.0 1226.4644 0.0 1226.4644 1226.4644 5.577432 1.7112551 7.79611 2.677406 -0.03340231 8.270950078964233 +5 1193.0 0.0 1193.0 1193.0 1259.0138 0.0 1259.0138 1259.0138 5.5487595 1.8858368 8.073771 1.8357501 -0.034762636 9.921444177627563 +6 1192.0 0.0 1192.0 1192.0 1270.418 0.0 1270.418 1270.418 5.652203 2.062521 8.581036 1.5649617 -0.04129462 11.566673517227173 +7 1182.0 0.0 1182.0 1182.0 1332.3088 0.0 1332.3088 1332.3088 5.7681465 2.0150342 8.726028 1.6905832 -0.079110265 13.216618061065674 +8 1187.0 0.0 1187.0 1187.0 1289.5142 0.0 1289.5142 1289.5142 6.1559324 1.7957879 8.816768 2.2983112 -0.053983316 14.86531114578247 +9 1191.0 0.0 1191.0 1191.0 1215.9827 0.0 1215.9827 1215.9827 5.9930377 2.0905585 9.140098 2.8133779 -0.013148516 16.51480793952942 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..94a48aeb1c552903a3a88736bb221492c0d30fdc --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f4224df278bc3f9f5d4a2391b346f38b2ece44ec97b7eca57ff3f447e5335a0 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..cbf520ab4bc87fdecd1b75992827cba91de72c08 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 950.0 0.0 950.0 950.0 624.57465 0.0 624.57465 624.57465 0.13728271 0.02764003 0.17739289 0.09782085 0.24955931 1.7624449729919434 +1 1104.0 0.0 1104.0 1104.0 864.0229 0.0 864.0229 864.0229 2.6868756 0.32664865 3.1714265 1.9677058 0.2999712 3.5128984451293945 +2 1085.0 0.0 1085.0 1085.0 929.5329 0.0 929.5329 929.5329 3.9385984 0.63623685 4.771843 2.6993408 0.22337249 5.188628196716309 +3 1126.0 0.0 1126.0 1126.0 934.0974 0.0 934.0974 934.0974 4.3167896 0.8250715 5.47447 2.8538456 0.24108426 6.8010337352752686 +4 1130.0 0.0 1130.0 1130.0 836.7963 0.0 836.7963 836.7963 4.40971 0.7646041 5.5344744 2.9460492 0.36513525 8.411404609680176 +5 765.0 0.0 765.0 765.0 765.0 0.0 765.0 765.0 3.9137387 0.5343742 4.810089 2.7637703 0.28925234 10.017868995666504 +6 1151.0 0.0 1151.0 1151.0 898.08936 0.0 898.08936 898.08936 3.6634803 0.21009932 4.045622 3.1236024 0.27490366 11.643146991729736 +7 1144.0 0.0 1144.0 1144.0 915.7341 0.0 915.7341 915.7341 4.243589 0.55887103 5.113475 3.0631652 0.26573482 13.259457111358643 +8 794.0 0.0 794.0 794.0 780.3157 0.0 780.3157 780.3157 4.308112 0.69411063 5.4061522 2.8615723 0.2974857 14.883937358856201 +9 1127.0 0.0 1127.0 1127.0 900.92773 0.0 900.92773 900.92773 3.7712364 0.19661315 4.111161 3.2220929 0.27603468 16.566587924957275 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a821edf6d50af165b80019edb64cd2b391667af --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd17a108c633c829455980f2b8d8d04308a8f15568fa3abff7a0dcc4d8f3f41c +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=12_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..faaa3f5e540c7205654fd4c3e3e6d41d332bde73 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1003.0 0.0 1003.0 1003.0 1043.1426 0.0 1043.1426 1043.1426 0.21594217 0.042398494 0.27950937 0.15489255 -0.034135014 1.5860681533813477 +1 1155.0 0.0 1155.0 1155.0 1186.5161 0.0 1186.5161 1186.5161 4.465547 0.33728796 4.852349 3.754195 -0.017528564 3.0716073513031006 +2 1156.0 0.0 1156.0 1156.0 1244.9674 0.0 1244.9674 1244.9674 5.093469 1.2698755 6.7328954 2.8538485 -0.049730897 4.557770729064941 +3 1147.0 0.0 1147.0 1147.0 1204.1937 0.0 1204.1937 1204.1937 5.4516683 1.8942302 7.952863 2.0523844 -0.031791523 6.041445970535278 +4 1145.0 0.0 1145.0 1145.0 1201.6532 0.0 1201.6532 1201.6532 5.3379397 1.8195059 7.8367734 1.8476216 -0.03147462 7.528115510940552 +5 1178.0 0.0 1178.0 1178.0 1236.5403 0.0 1236.5403 1236.5403 5.2476077 2.1112318 8.191514 1.1591213 -0.03252244 9.100481033325195 +6 1177.0 0.0 1177.0 1177.0 1248.1078 0.0 1248.1078 1248.1078 5.372261 2.2133186 8.645925 1.0085344 -0.039526567 10.665386438369751 +7 1186.0 0.0 1186.0 1186.0 1328.1876 0.0 1328.1876 1328.1876 5.52536 2.0993588 8.741831 1.2949618 -0.07899319 12.227421760559082 +8 1196.0 0.0 1196.0 1196.0 1289.8966 0.0 1289.8966 1289.8966 5.960823 2.0188942 9.069354 1.6995264 -0.052164927 13.741860151290894 +9 1181.0 0.0 1181.0 1181.0 1201.8701 0.0 1201.8701 1201.8701 5.747636 2.3010254 9.393 1.0643499 -0.011594921 15.389137029647827 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a7de6b07b102900e2ec73109bd554b7fe3e8fa1 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc8e6301c8ea309c38c8d135717da4679f3f7e9c7e25458bf26a1dec22cac59 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..a1822741bbb6ebeaa712f9f64a6d77e1f6bc7fae --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 978.0 0.0 978.0 978.0 694.13196 0.0 694.13196 694.13196 0.14089103 0.029968431 0.1856104 0.097723894 0.24158974 1.667717695236206 +1 1129.0 0.0 1129.0 1129.0 899.98645 0.0 899.98645 899.98645 2.741443 0.29088217 3.1473382 2.1953435 0.29248303 3.332165002822876 +2 1126.0 0.0 1126.0 1126.0 976.36066 0.0 976.36066 976.36066 3.8658674 0.7852431 4.8158026 2.219316 0.21530825 4.997639179229736 +3 1173.0 0.0 1173.0 1173.0 986.60223 0.0 986.60223 986.60223 4.241513 0.99284047 5.5840836 2.5905364 0.23358028 6.680014610290527 +4 1163.0 0.0 1163.0 1163.0 879.6238 0.0 879.6238 879.6238 4.3242736 0.9492962 5.656163 2.5725725 0.35734707 8.35954213142395 +5 1184.0 0.0 1184.0 1184.0 885.4465 0.0 885.4465 885.4465 3.8405068 0.735304 5.0082216 2.6149082 0.2816541 10.012380599975586 +6 1181.0 0.0 1181.0 1181.0 903.13306 0.0 903.13306 903.13306 3.8490727 1.109193 5.6395054 1.9591005 0.26769447 11.67524528503418 +7 1176.0 0.0 1176.0 1176.0 914.7805 0.0 914.7805 914.7805 3.9679482 1.0371895 5.641413 2.2172775 0.2586333 13.330604314804077 +8 1187.0 0.0 1187.0 1187.0 901.39844 0.0 901.39844 901.39844 4.028779 1.096547 5.862717 2.2223608 0.2902458 14.986984491348267 +9 1177.0 0.0 1177.0 1177.0 926.12634 0.0 926.12634 926.12634 4.069972 0.90560937 5.7804103 2.585876 0.26860142 16.649048328399658 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..07c0e73c8cb675cd24527a18f68ab2a393887b1e --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec286c13dbcec61c0ea74e3a6629f0787be99a2ca3263f5723dde5d957fd6a8 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=13_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b4e915addffd5c2625b5480c72c5a9a7cbd5c66 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1027.0 0.0 1027.0 1027.0 1084.0773 0.0 1084.0773 1084.0773 0.21099402 0.041077666 0.2798408 0.15511125 -0.04610443 1.6661317348480225 +1 1199.0 0.0 1199.0 1199.0 1252.5496 0.0 1252.5496 1252.5496 4.5829835 0.3539944 4.9678726 3.704935 -0.028214186 3.2276337146759033 +2 1169.0 0.0 1169.0 1169.0 1285.0754 0.0 1285.0754 1285.0754 5.513109 1.1924782 6.9289646 3.125702 -0.061253384 4.806156396865845 +3 1154.0 0.0 1154.0 1154.0 1235.3364 0.0 1235.3364 1235.3364 5.8127947 1.6822051 7.872531 2.8536654 -0.042830765 6.417284727096558 +4 1176.0 0.0 1176.0 1176.0 1258.6648 0.0 1258.6648 1258.6648 5.700894 1.677385 7.8394837 2.7955356 -0.043507203 8.027845621109009 +5 1182.0 0.0 1182.0 1182.0 1265.0676 0.0 1265.0676 1265.0676 5.7065306 1.8290913 8.117079 2.5561724 -0.043719217 9.679668426513672 +6 1208.0 0.0 1208.0 1208.0 1305.9209 0.0 1305.9209 1305.9209 5.751818 2.0026183 8.605963 1.6118636 -0.051563814 11.330337285995483 +7 1183.0 0.0 1183.0 1183.0 1351.0063 0.0 1351.0063 1351.0063 5.911577 1.9770893 8.821912 1.6982894 -0.08842492 12.981420278549194 +8 1193.0 0.0 1193.0 1193.0 1310.7039 0.0 1310.7039 1310.7039 6.2618723 1.7550119 8.909666 2.3279812 -0.061982363 14.638008832931519 +9 1205.0 0.0 1205.0 1205.0 1247.3988 0.0 1247.3988 1247.3988 6.1170497 2.0504994 9.279831 1.6802287 -0.022314996 16.181657791137695 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..6aaf73e5190f35fb1f9ac025c67c8de73b3cc84e --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a9b297d60129102bb064517eb1414fffdf32a9e5fe5cf1db7ff5bde76d686d0 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..c7437525bf23bc46c3ab1aeeff37dc007fdd70f0 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 950.0 0.0 950.0 950.0 624.57465 0.0 624.57465 624.57465 0.13728271 0.02764003 0.17739289 0.09782085 0.24955931 1.6869378089904785 +1 1104.0 0.0 1104.0 1104.0 864.0229 0.0 864.0229 864.0229 2.6868756 0.32664865 3.1714265 1.9677058 0.2999712 3.2955362796783447 +2 1085.0 0.0 1085.0 1085.0 929.5329 0.0 929.5329 929.5329 3.9385984 0.63623685 4.771843 2.6993408 0.22337249 4.9003612995147705 +3 1126.0 0.0 1126.0 1126.0 934.0974 0.0 934.0974 934.0974 4.3167896 0.8250715 5.47447 2.8538456 0.24108426 6.5054357051849365 +4 1130.0 0.0 1130.0 1130.0 836.7963 0.0 836.7963 836.7963 4.40971 0.7646041 5.5344744 2.9460492 0.36513525 8.119073152542114 +5 765.0 0.0 765.0 765.0 765.0 0.0 765.0 765.0 3.9137387 0.5343742 4.810089 2.7637703 0.28925234 9.725674152374268 +6 1151.0 0.0 1151.0 1151.0 898.08936 0.0 898.08936 898.08936 3.6634803 0.21009932 4.045622 3.1236024 0.27490366 11.329328536987305 +7 1144.0 0.0 1144.0 1144.0 915.7341 0.0 915.7341 915.7341 4.243589 0.55887103 5.113475 3.0631652 0.26573482 12.949270248413086 +8 794.0 0.0 794.0 794.0 780.3157 0.0 780.3157 780.3157 4.308112 0.69411063 5.4061522 2.8615723 0.2974857 14.558382272720337 +9 1127.0 0.0 1127.0 1127.0 900.92773 0.0 900.92773 900.92773 3.7712364 0.19661315 4.111161 3.2220929 0.27603468 16.164623022079468 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a821edf6d50af165b80019edb64cd2b391667af --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd17a108c633c829455980f2b8d8d04308a8f15568fa3abff7a0dcc4d8f3f41c +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=14_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..8ab40f1573f9b5f8854b5733bc76bf7ef7fba824 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1030.0 0.0 1030.0 1030.0 1137.3251 0.0 1137.3251 1137.3251 0.20913549 0.03902866 0.28081062 0.1557429 -0.08130698 1.586937427520752 +1 1200.0 0.0 1200.0 1200.0 1330.3545 0.0 1330.3545 1330.3545 4.8480806 0.2516734 5.139223 4.055459 -0.06520948 3.1679751873016357 +2 1123.0 0.0 1123.0 1123.0 1309.135 0.0 1309.135 1309.135 5.8624463 1.1777483 7.303607 3.297463 -0.09948419 4.743704557418823 +3 1175.0 0.0 1175.0 1175.0 1327.3838 0.0 1327.3838 1327.3838 5.990474 1.3598869 7.684102 3.2590377 -0.07619175 6.362450122833252 +4 1173.0 0.0 1173.0 1173.0 1329.3563 0.0 1329.3563 1329.3563 6.079537 1.5414082 8.044513 3.1041317 -0.07817824 7.967848539352417 +5 1201.0 0.0 1201.0 1201.0 1352.6914 0.0 1352.6914 1352.6914 6.0930877 1.7518407 8.432325 2.2556744 -0.07592161 9.554846048355103 +6 1218.0 0.0 1218.0 1218.0 1388.1177 0.0 1388.1177 1388.1177 6.172089 1.8419036 8.864119 2.819778 -0.08510144 11.24284839630127 +7 1189.0 0.0 1189.0 1189.0 1431.6259 0.0 1431.6259 1431.6259 6.348912 1.8506902 9.140154 2.3904648 -0.12131205 12.951537132263184 +8 1208.0 0.0 1208.0 1208.0 1391.6902 0.0 1391.6902 1391.6902 6.63923 1.7147804 9.2778015 3.7234092 -0.091844186 14.654871702194214 +9 1228.0 0.0 1228.0 1228.0 1340.7366 0.0 1340.7366 1340.7366 6.5105176 1.959498 9.614809 3.2689524 -0.056368664 16.357202291488647 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..63bc9682c799aa735b17afb412ff8f162c7f36e5 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4f9a590a8a0af8f871dba1eb85019db82d8dbe652214767b904019b51adff3 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec1a85963447d44ff80c00a1bfe9a946c05fdba2 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 969.0 0.0 969.0 969.0 669.20917 0.0 669.20917 669.20917 0.13904078 0.028865237 0.18562022 0.09773319 0.24235302 1.7199516296386719 +1 1130.0 0.0 1130.0 1130.0 887.78723 0.0 887.78723 887.78723 2.7671995 0.28547907 3.184353 1.9507256 0.2925277 3.3381383419036865 +2 1118.0 0.0 1118.0 1118.0 951.0581 0.0 951.0581 951.0581 3.9097512 0.70932066 4.793439 2.1914303 0.21624511 5.029691219329834 +3 1157.0 0.0 1157.0 1157.0 957.7288 0.0 957.7288 957.7288 4.279783 0.94340456 5.550804 2.284496 0.23388565 6.658996343612671 +4 1152.0 0.0 1152.0 1152.0 861.43414 0.0 861.43414 861.43414 4.306346 0.9446844 5.619209 1.9737432 0.35828084 8.37315821647644 +5 1148.0 0.0 1148.0 1148.0 919.54297 0.0 919.54297 919.54297 3.899985 0.67648387 4.901318 2.0185654 0.28204674 10.084834814071655 +6 1164.0 0.0 1164.0 1164.0 943.5246 0.0 943.5246 943.5246 4.1694913 0.7608039 5.354018 2.6060317 0.26756722 11.798571586608887 +7 891.0 0.0 891.0 891.0 844.24133 0.0 844.24133 844.24133 4.311433 0.66882163 5.348313 2.682955 0.25833306 13.514434099197388 +8 1172.0 0.0 1172.0 1172.0 884.9013 0.0 884.9013 884.9013 3.919056 0.49348724 4.74516 2.6103327 0.28999883 15.236512899398804 +9 837.0 0.0 837.0 837.0 816.8488 0.0 816.8488 816.8488 3.9945443 0.8040839 5.4597526 2.2637527 0.26868874 16.960970401763916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..1deeedc7a7de9456bdb2086e25b0ddb5e6a8015a --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:798247e0e19d8dae952d678fbfa21e0ecdb7b49491fee3bf988edec28c836d13 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=15_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..9e946ac5e3d6a64791fb447c49a461ce7c59afbf --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 935.0 0.0 935.0 935.0 662.0837 0.0 662.0837 662.0837 0.14571893 0.030283028 0.18562889 0.09774135 0.24302423 1.4826123714447021 +1 1030.0 0.0 1030.0 1030.0 857.9033 0.0 857.9033 857.9033 2.5776827 0.26584893 3.0229726 1.9001278 0.29367954 2.9774558544158936 +2 1010.0 0.0 1010.0 1010.0 907.9197 0.0 907.9197 907.9197 3.6649864 0.7163195 4.684132 2.2927494 0.21627174 4.491091251373291 +3 1054.0 0.0 1054.0 1054.0 919.021 0.0 919.021 919.021 3.962142 0.9382023 5.3809967 2.4664743 0.23474634 5.972697496414185 +4 1079.0 0.0 1079.0 1079.0 860.48816 0.0 860.48816 860.48816 4.0107226 0.9971693 5.5005336 2.196803 0.3570457 7.5748984813690186 +5 805.0 0.0 805.0 805.0 805.0 0.0 805.0 805.0 3.6988375 0.7486413 4.8880663 2.1098676 0.28356746 9.183146238327026 +6 1113.0 0.0 1113.0 1113.0 904.7485 0.0 904.7485 904.7485 3.5519357 0.2900082 4.0138164 2.5081236 0.26836613 10.786293268203735 +7 1031.0 0.0 1031.0 1031.0 916.6925 0.0 916.6925 916.6925 3.8977764 0.9033248 5.1946673 2.0145643 0.25920027 12.383512735366821 +8 1109.0 0.0 1109.0 1109.0 868.9623 0.0 868.9623 868.9623 3.9286816 0.9895018 5.4316096 2.0316062 0.2906031 13.983851194381714 +9 1093.0 0.0 1093.0 1093.0 888.6846 0.0 888.6846 888.6846 3.7273953 1.0201336 5.386931 1.8693169 0.2695452 15.58064317703247 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6857c67b453e004fc607dabec143a60803decab --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd43a1a41b6539871c15c55e23f0b5dd990f1fbc398fab5832429b6643e513e +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=1_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..a018cc5f85949a06b0448bf6382f910751d3dca5 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 967.0 0.0 967.0 967.0 681.23083 0.0 681.23083 681.23083 0.14513128 0.030651107 0.1858117 0.09791386 0.25721765 1.6183242797851562 +1 1032.0 0.0 1032.0 1032.0 856.3895 0.0 856.3895 856.3895 2.549102 0.28227776 3.014049 1.8733692 0.308088 3.219247817993164 +2 1058.0 0.0 1058.0 1058.0 929.4955 0.0 929.4955 929.4955 3.617406 0.59256893 4.445417 2.3754058 0.2315394 4.838155508041382 +3 1080.0 0.0 1080.0 1080.0 928.6793 0.0 928.6793 928.6793 3.954721 0.8896443 5.2977047 2.477169 0.24847372 6.450444459915161 +4 1077.0 0.0 1077.0 1077.0 853.82513 0.0 853.82513 853.82513 4.0107565 0.91179895 5.3925138 2.316854 0.37320232 8.065232753753662 +5 1078.0 0.0 1078.0 1078.0 910.8682 0.0 910.8682 910.8682 3.6947095 0.6975001 4.810051 2.1988597 0.2973874 9.677787780761719 +6 1112.0 0.0 1112.0 1112.0 900.9434 0.0 900.9434 900.9434 3.9460297 0.7438988 5.2278175 2.8149796 0.28253835 11.205829620361328 +7 1099.0 0.0 1099.0 1099.0 923.10284 0.0 923.10284 923.10284 3.9556818 0.7557464 5.1855397 2.430726 0.2731321 12.697757720947266 +8 838.0 0.0 838.0 838.0 822.4568 0.0 822.4568 822.4568 4.0299525 0.8913838 5.529956 2.290533 0.30477676 14.183594942092896 +9 1093.0 0.0 1093.0 1093.0 909.78595 0.0 909.78595 909.78595 3.6767828 0.30593503 4.097323 2.7796 0.2840527 15.66967487335205 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..2643b7eb9ddb6c66c31cfc2e2a30e15ea9b29423 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d031b709e99d78c32d680d7c53e7297d282251a8cfac6583f5fc2ffaf66435ac +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=2_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..349c407260f5c5f8ea6cbac44fc2068b99653348 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 951.0 0.0 951.0 951.0 714.8112 0.0 714.8112 714.8112 0.14458045 0.030198773 0.18522829 0.09942171 0.21201845 1.6103086471557617 +1 1074.0 0.0 1074.0 1074.0 882.2123 0.0 882.2123 882.2123 2.8365648 0.24206483 3.215212 2.1366177 0.26093617 3.2136573791503906 +2 1058.0 0.0 1058.0 1058.0 953.34875 0.0 953.34875 953.34875 3.7015505 0.8543171 4.8442416 2.1315057 0.1858817 4.790390729904175 +3 1123.0 0.0 1123.0 1123.0 980.2328 0.0 980.2328 980.2328 4.0784883 1.1142279 5.6875935 2.3582704 0.20395298 6.267019271850586 +4 1127.0 0.0 1127.0 1127.0 904.5539 0.0 904.5539 904.5539 4.1262307 1.1288725 5.7584734 2.1115181 0.32809198 7.7320897579193115 +5 1126.0 0.0 1126.0 1126.0 952.8888 0.0 952.8888 952.8888 3.8608317 0.89599997 5.2043552 2.1130133 0.25125045 9.19661283493042 +6 1141.0 0.0 1141.0 1141.0 946.5422 0.0 946.5422 946.5422 4.112151 0.9747658 5.693345 2.4195487 0.23627885 10.685249328613281 +7 1135.0 0.0 1135.0 1135.0 957.8667 0.0 957.8667 957.8667 4.076431 0.98180056 5.6353683 2.489035 0.22709386 12.266231298446655 +8 1124.0 0.0 1124.0 1124.0 933.7898 0.0 933.7898 933.7898 4.1648035 0.989854 5.7646937 2.456407 0.2587901 13.762157440185547 +9 1116.0 0.0 1116.0 1116.0 956.66504 0.0 956.66504 956.66504 4.0106835 0.93314314 5.680212 2.527186 0.2378131 15.323771238327026 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a9f7d661e4cabf709c65f181a48a53c23486815 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3fc29d4878ff8e0a132276387eda5f16f1451c5a9082de2eb40cafa7a873a93 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=3_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..6de6fe68079c6e62c5144626bc15ca32a94cee24 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1006.0 0.0 1006.0 1006.0 756.34686 0.0 756.34686 756.34686 0.14540903 0.031647217 0.1855669 0.097682826 0.23821831 1.6859495639801025 +1 1124.0 0.0 1124.0 1124.0 909.0425 0.0 909.0425 909.0425 2.8151662 0.2456107 3.1673958 2.137112 0.2885341 3.2906064987182617 +2 1132.0 0.0 1132.0 1132.0 991.88995 0.0 991.88995 991.88995 3.7007174 0.8399541 4.723731 2.2550092 0.21325825 4.876057386398315 +3 1178.0 0.0 1178.0 1178.0 976.2804 0.0 976.2804 976.2804 4.110649 1.0758297 5.548087 2.512912 0.22948825 6.461254358291626 +4 921.0 0.0 921.0 921.0 859.3034 0.0 859.3034 859.3034 3.9637122 1.1989617 5.6634984 2.0012105 0.354577 8.04603910446167 +5 1039.0 0.0 1039.0 1039.0 936.5794 0.0 936.5794 936.5794 3.703376 0.4792368 4.4081 2.5530481 0.2783147 9.558783769607544 +6 1068.0 0.0 1068.0 1068.0 967.2909 0.0 967.2909 967.2909 3.944641 0.69472694 5.132665 2.8043728 0.263636 11.168614864349365 +7 1184.0 0.0 1184.0 1184.0 963.2333 0.0 963.2333 963.2333 4.094749 0.6749972 5.2223716 3.0081646 0.25404707 12.754243612289429 +8 834.0 0.0 834.0 834.0 832.859 0.0 832.859 832.859 4.0525427 0.960053 5.719518 2.464125 0.2852476 14.340235948562622 +9 1166.0 0.0 1166.0 1166.0 966.3456 0.0 966.3456 966.3456 3.627185 0.22991484 4.007544 2.9822817 0.26479355 15.93325662612915 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..e93799ed6e1afb8fcc5b1905c2404796e3546dd5 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3162846eb2a4185a5fc1d6b657709b87b55b4e1adb3e17d319e4c02b20c676d0 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=4_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..9b757c2af6d1653f7cd4826ed26f0e851e1d0a75 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 999.0 0.0 999.0 999.0 715.2502 0.0 715.2502 715.2502 0.14356264 0.030791294 0.18575901 0.097864136 0.25312197 1.6426138877868652 +1 1134.0 0.0 1134.0 1134.0 913.51917 0.0 913.51917 913.51917 2.6839502 0.28988963 3.1024365 2.1912537 0.30369258 3.2731857299804688 +2 1124.0 0.0 1124.0 1124.0 978.2824 0.0 978.2824 978.2824 3.752766 0.83621264 4.780865 2.2111764 0.22522163 4.808213949203491 +3 1125.0 0.0 1125.0 1125.0 975.3555 0.0 975.3555 975.3555 4.132603 1.0759854 5.524093 2.3533044 0.24411848 6.313268661499023 +4 1133.0 0.0 1133.0 1133.0 909.33936 0.0 909.33936 909.33936 4.0953627 1.0673128 5.5015545 2.1759217 0.36605728 7.812548875808716 +5 1185.0 0.0 1185.0 1185.0 885.18243 0.0 885.18243 885.18243 3.7400084 0.78300786 4.838747 2.1659982 0.2939386 9.314082860946655 +6 851.0 0.0 851.0 851.0 850.4429 0.0 850.4429 850.4429 3.6899981 1.2052151 5.4930468 1.6773052 0.27858305 10.83012056350708 +7 1172.0 0.0 1172.0 1172.0 970.5073 0.0 970.5073 970.5073 3.6852882 0.27936965 4.0537972 3.1777422 0.2686571 12.329495906829834 +8 1167.0 0.0 1167.0 1167.0 941.3284 0.0 941.3284 941.3284 4.126623 0.86895597 5.341943 2.407453 0.30049485 13.830678224563599 +9 1149.0 0.0 1149.0 1149.0 937.4416 0.0 937.4416 937.4416 4.102587 0.783718 5.2457995 2.7311733 0.27873322 15.327075481414795 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5903f571dcb03189217f75b4bd44f3c7f86722e --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57ada3e167eb36668accb68c2f1fe528610a5227ddb45b526d69eb87838d713a +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=5_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..926ebd17b0b426351d5d76e99fcb627ae2506c4d --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 950.0 0.0 950.0 950.0 639.19446 0.0 639.19446 639.19446 0.1403101 0.028773542 0.18571159 0.09781944 0.24944286 1.5948634147644043 +1 1061.0 0.0 1061.0 1061.0 851.10547 0.0 851.10547 851.10547 2.6332822 0.28733024 3.0850658 1.8468843 0.2989954 3.2812540531158447 +2 1041.0 0.0 1041.0 1041.0 906.8874 0.0 906.8874 906.8874 3.7900455 0.6247883 4.6364756 2.216647 0.22277898 4.974666357040405 +3 1063.0 0.0 1063.0 1063.0 902.42664 0.0 902.42664 902.42664 4.1203237 0.81065744 5.2966404 2.5061533 0.2407397 6.658747434616089 +4 1093.0 0.0 1093.0 1093.0 840.9646 0.0 840.9646 840.9646 4.2007217 0.7897101 5.3665195 2.396462 0.36421353 8.355271816253662 +5 1066.0 0.0 1066.0 1066.0 882.3243 0.0 882.3243 882.3243 3.8185031 0.57044506 4.7039504 2.1971014 0.28879836 10.036536455154419 +6 1119.0 0.0 1119.0 1119.0 904.7552 0.0 904.7552 904.7552 4.008738 0.62801003 5.0433335 2.7193644 0.27396995 11.721006870269775 +7 1074.0 0.0 1074.0 1074.0 903.6017 0.0 903.6017 903.6017 4.1922016 0.6174168 5.1540985 2.51493 0.2645931 13.409032583236694 +8 805.0 0.0 805.0 805.0 791.37726 0.0 791.37726 791.37726 4.151276 0.74073076 5.3729877 2.5267441 0.29615307 15.0681893825531 +9 1095.0 0.0 1095.0 1095.0 889.4276 0.0 889.4276 889.4276 3.7443585 0.24806657 4.0929894 3.0685394 0.27519703 16.775659561157227 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..4626b0a633e64a502d2654a6f3b09da73ee40439 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4894c49ec707aa482e0a1b1d2f5b2d9205e22874dd1c8cb106067b0f50ad303 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=6_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..03875b31c34b3ba88cca948bf815939dbdc27547 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 961.0 0.0 961.0 961.0 672.80273 0.0 672.80273 672.80273 0.14228402 0.029810969 0.18562327 0.09773606 0.24259013 1.6813135147094727 +1 1070.0 0.0 1070.0 1070.0 865.4652 0.0 865.4652 865.4652 2.6969306 0.2687202 3.106968 1.9529791 0.29136008 3.367244005203247 +2 1043.0 0.0 1043.0 1043.0 923.9946 0.0 923.9946 923.9946 3.7815807 0.65467995 4.6253176 2.300645 0.21519959 4.906596422195435 +3 1061.0 0.0 1061.0 1061.0 917.9733 0.0 917.9733 917.9733 4.1338196 0.835981 5.270089 2.5112152 0.23370375 6.4491143226623535 +4 1079.0 0.0 1079.0 1079.0 858.1947 0.0 858.1947 858.1947 4.2125835 0.8333583 5.348319 2.3878515 0.35671312 7.98772406578064 +5 1079.0 0.0 1079.0 1079.0 902.5666 0.0 902.5666 902.5666 3.8713121 0.58241147 4.692998 2.287545 0.2818424 9.528369188308716 +6 1130.0 0.0 1130.0 1130.0 936.6758 0.0 936.6758 936.6758 4.0084295 0.6779001 5.056368 2.6824515 0.26665384 11.075465202331543 +7 1121.0 0.0 1121.0 1121.0 928.7351 0.0 928.7351 928.7351 4.2476254 0.6488751 5.169139 2.62055 0.2570382 12.617008209228516 +8 1137.0 0.0 1137.0 1137.0 883.9627 0.0 883.9627 883.9627 4.1591363 0.85517967 5.421999 2.4114182 0.2885263 14.16002631187439 +9 1118.0 0.0 1118.0 1118.0 898.7405 0.0 898.7405 898.7405 3.9529326 0.8295392 5.281395 2.2905223 0.2673902 15.736875295639038 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..69c4ab8f3ca50f1d6afb7d54cb54d316617e10ea --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d857b9113697e595913ffa57105c1be021a244840ea258c783669ad2fae60ffe +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=7_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..952fa54afe24ddb202024728413437b75ae3a16a --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1034.0 0.0 1034.0 1034.0 1079.9553 0.0 1079.9553 1079.9553 0.21605834 0.042365503 0.2796381 0.15497766 -0.038780898 1.6459784507751465 +1 1168.0 0.0 1168.0 1168.0 1205.9836 0.0 1205.9836 1205.9836 4.4630384 0.31057099 4.838979 3.8222902 -0.021125838 3.1876041889190674 +2 1144.0 0.0 1144.0 1144.0 1240.7693 0.0 1240.7693 1240.7693 5.171804 1.1441867 6.7337685 3.3595479 -0.05391091 4.710508108139038 +3 1133.0 0.0 1133.0 1133.0 1200.8767 0.0 1200.8767 1200.8767 5.5057535 1.6697459 7.808464 2.9652863 -0.037729874 6.2439258098602295 +4 1142.0 0.0 1142.0 1142.0 1208.809 0.0 1208.809 1208.809 5.3703012 1.6442893 7.7458825 2.7436423 -0.037115887 7.759350299835205 +5 1160.0 0.0 1160.0 1160.0 1231.2292 0.0 1231.2292 1231.2292 5.320814 1.9105731 8.124485 2.3501947 -0.03957261 9.277325630187988 +6 1202.0 0.0 1202.0 1202.0 1281.9568 0.0 1281.9568 1281.9568 5.4751816 1.9902446 8.565249 2.364463 -0.04444532 10.887712955474854 +7 1147.0 0.0 1147.0 1147.0 1294.9371 0.0 1294.9371 1294.9371 5.6184745 1.998229 8.893472 2.496355 -0.08218762 12.549166202545166 +8 1170.0 0.0 1170.0 1170.0 1270.8387 0.0 1270.8387 1270.8387 5.9251842 1.7787681 8.885064 2.9820418 -0.05605237 14.208756446838379 +9 1186.0 0.0 1186.0 1186.0 1215.1256 0.0 1215.1256 1215.1256 5.695678 1.9910988 9.058374 2.991399 -0.016180381 15.860268354415894 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..703c1de01c040e7fe30bf8445ca93713a1337cbc --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6aa8bd7a59edcc45d7c4bf6a4eb830ab42c04388f843218735ed51e369fbae +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..2fbeec5a803143ad20f08d4019d208108b0e4292 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 977.0 0.0 977.0 977.0 683.73364 0.0 683.73364 683.73364 0.14078824 0.029987799 0.18571347 0.09782122 0.24958813 1.6149437427520752 +1 1122.0 0.0 1122.0 1122.0 898.36395 0.0 898.36395 898.36395 2.7371242 0.286233 3.1588078 1.9286686 0.30058575 3.161893129348755 +2 1104.0 0.0 1104.0 1104.0 957.6627 0.0 957.6627 957.6627 3.78458 0.8458599 4.873567 2.0521176 0.22409979 4.802204847335815 +3 1135.0 0.0 1135.0 1135.0 957.87366 0.0 957.87366 957.87366 4.216182 0.9596212 5.5277905 2.5358 0.24131618 6.445584774017334 +4 1153.0 0.0 1153.0 1153.0 864.5506 0.0 864.5506 864.5506 4.2319617 0.9638845 5.585052 2.3941586 0.36605257 8.089558124542236 +5 1138.0 0.0 1138.0 1138.0 916.838 0.0 916.838 916.838 3.8831625 0.6942631 4.920284 2.668192 0.28947893 9.742228031158447 +6 1145.0 0.0 1145.0 1145.0 905.30005 0.0 905.30005 905.30005 4.094736 0.7900061 5.348906 2.6736426 0.27551678 11.31412410736084 +7 855.0 0.0 855.0 855.0 826.2363 0.0 826.2363 826.2363 4.050936 0.7764123 5.3433127 2.648725 0.2663269 12.902616024017334 +8 1167.0 0.0 1167.0 1167.0 930.9453 0.0 930.9453 930.9453 3.7333295 0.36951882 4.3909554 2.8912907 0.29804882 14.568324565887451 +9 1092.0 0.0 1092.0 1092.0 919.21265 0.0 919.21265 919.21265 4.119758 0.67275053 5.310509 3.0019815 0.2769032 16.198491096496582 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..13531a6747ab0c4fc72fec3f0b7d16139e73d24e --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dae0aa615adb5f2b257aa771845bbcca8926fdf861704600c88778a0c27adcc0 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=8_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..edc76ce7482707ea874f38210ce64c5fea6064cf --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 1032.0 0.0 1032.0 1032.0 1117.9751 0.0 1117.9751 1117.9751 0.21190602 0.041175347 0.28045586 0.15551326 -0.06839697 1.630922555923462 +1 1193.0 0.0 1193.0 1193.0 1292.8083 0.0 1292.8083 1292.8083 4.7634425 0.30209118 5.093944 4.0120144 -0.05255805 3.3065507411956787 +2 1145.0 0.0 1145.0 1145.0 1303.9799 0.0 1303.9799 1303.9799 5.671107 1.27375 7.2192593 3.2862499 -0.08720781 4.96855092048645 +3 1169.0 0.0 1169.0 1169.0 1291.3181 0.0 1291.3181 1291.3181 5.916274 1.5830145 7.889352 3.0657117 -0.06437777 6.525848388671875 +4 1169.0 0.0 1169.0 1169.0 1293.9768 0.0 1293.9768 1293.9768 5.8849673 1.6598476 8.037814 2.8002791 -0.065778 8.04737663269043 +5 1200.0 0.0 1200.0 1200.0 1322.1978 0.0 1322.1978 1322.1978 5.910547 1.8717549 8.416701 2.5331178 -0.06438224 9.57322382926941 +6 1213.0 0.0 1213.0 1213.0 1350.6469 0.0 1350.6469 1350.6469 6.046622 1.9628046 8.876022 2.542838 -0.072483465 11.091062068939209 +7 1183.0 0.0 1183.0 1183.0 1390.1986 0.0 1390.1986 1390.1986 6.1685896 1.9510728 9.097026 2.7358925 -0.10905148 12.614652395248413 +8 1206.0 0.0 1206.0 1206.0 1358.6039 0.0 1358.6039 1358.6039 6.488514 1.8134547 9.2469425 3.2008305 -0.08031809 14.133455276489258 +9 1226.0 0.0 1226.0 1226.0 1309.8999 0.0 1309.8999 1309.8999 6.3048472 2.055835 9.521513 3.0394502 -0.044158325 15.65134048461914 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..21f356bf9b789ad1a1864ce3205582b44dab7d97 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8666cd66c919d91e541dfd620ffb9620bfb373be2b891adb53c75c6c75a0fecf +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d47a887d534b2570c155ef0980d4a95ba5d4db3b --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s13/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc1964e1da16fa4042e1b8ee3e5bfa2bcc50bf376c632125ad82eaac00c5ab3 +size 13916 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f5edccabf4abca5f52e405a97bf91c4813f7991 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/progress.txt @@ -0,0 +1,11 @@ +Epoch AverageEpActualRet StdEpActualRet MaxEpActualRet MinEpActualRet AverageEpLambAdjRet StdEpLambAdjRet MaxEpLambAdjRet MinEpLambAdjRet AverageVVals StdVVals MaxVVals MinVVals Lamb Time +0 919.0 0.0 919.0 919.0 613.3221 0.0 613.3221 613.3221 0.14548475 0.03071772 0.18602628 0.09811598 0.27390492 1.6083781719207764 +1 1018.0 0.0 1018.0 1018.0 852.70416 0.0 852.70416 852.70416 2.3951375 0.3030792 2.8950012 1.6980454 0.32538527 3.2239272594451904 +2 1060.0 0.0 1060.0 1060.0 910.68384 0.0 910.68384 910.68384 3.6128173 0.635904 4.471431 2.3625295 0.24721181 4.822070360183716 +3 1099.0 0.0 1099.0 1099.0 917.91504 0.0 917.91504 917.91504 3.9292216 0.9183984 5.2811766 2.37148 0.26552042 6.429299831390381 +4 1074.0 0.0 1074.0 1074.0 852.43695 0.0 852.43695 852.43695 3.9310315 1.005079 5.448887 2.0264468 0.3833267 8.034744024276733 +5 1087.0 0.0 1087.0 1087.0 906.3388 0.0 906.3388 906.3388 3.7039654 0.7187894 4.7902145 2.0956912 0.31474012 9.637821912765503 +6 1121.0 0.0 1121.0 1121.0 896.8919 0.0 896.8919 896.8919 3.9213076 0.8141364 5.2739983 2.387634 0.30001137 11.250580310821533 +7 1111.0 0.0 1111.0 1111.0 909.1509 0.0 909.1509 909.1509 4.0072107 0.8491331 5.352795 2.3562946 0.29084903 12.85816764831543 +8 809.0 0.0 809.0 809.0 804.8208 0.0 804.8208 804.8208 3.966655 0.878105 5.4089427 2.2424142 0.32148242 14.467840194702148 +9 1082.0 0.0 1082.0 1082.0 888.71747 0.0 888.71747 888.71747 3.6598501 0.31349728 4.15523 2.757966 0.3010629 16.07224440574646 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..069cb1abacff786a08011b2ff3b2f08708bd8593 --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/pyt_save/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5be4dc0be636b62e819076cf81b33a38e3be81ad297b77a9c03368089606119 +size 26198 diff --git a/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71b09833e98af5f8dbfffe8aa9b62843a4a2ef0c --- /dev/null +++ b/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0/reward_1707780024.5255785_rfhuman_cotTrue_task_index=9_stage=0_iteration=0_n21b7.0dcounterexamplersample_randomp0_s42/vars.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0b3e44997516dc2dd223775ffcd4f30448697446f2cf2cfb38a45a2bc05f6 +size 13916