|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.34989503149055284, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 196.71429443359375, |
|
"epoch": 0.0006997900629811056, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 4.999993958495186e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 187.6428680419922, |
|
"epoch": 0.0013995801259622112, |
|
"grad_norm": 1.9764715433120728, |
|
"kl": 0.0, |
|
"learning_rate": 4.999975834009943e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 219.6428680419922, |
|
"epoch": 0.002099370188943317, |
|
"grad_norm": 1.4515221118927002, |
|
"kl": 0.0005440683453343809, |
|
"learning_rate": 4.99994562663187e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 186.00001525878906, |
|
"epoch": 0.0027991602519244225, |
|
"grad_norm": 1.2500755786895752, |
|
"kl": 0.00046860199654474854, |
|
"learning_rate": 4.999903336506967e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 174.85714721679688, |
|
"epoch": 0.0034989503149055285, |
|
"grad_norm": 1.2094088792800903, |
|
"kl": 0.0005466189468279481, |
|
"learning_rate": 4.999848963839629e-07, |
|
"loss": 0.0, |
|
"reward": 0.0714285746216774, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.0714285746216774, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 192.6428680419922, |
|
"epoch": 0.004198740377886634, |
|
"grad_norm": 0.00022282492136582732, |
|
"kl": 0.0005208642687648535, |
|
"learning_rate": 4.999782508892652e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 150.35714721679688, |
|
"epoch": 0.00489853044086774, |
|
"grad_norm": 1.9125372171401978, |
|
"kl": 0.0005498763057403266, |
|
"learning_rate": 4.999703971987225e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 186.07144165039062, |
|
"epoch": 0.005598320503848845, |
|
"grad_norm": 0.9886441826820374, |
|
"kl": 0.0004702982259914279, |
|
"learning_rate": 4.999613353502932e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 188.07144165039062, |
|
"epoch": 0.006298110566829951, |
|
"grad_norm": 1.7563767433166504, |
|
"kl": 0.0005437198560684919, |
|
"learning_rate": 4.999510653877754e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 139.7857208251953, |
|
"epoch": 0.006997900629811057, |
|
"grad_norm": 1.6960475444793701, |
|
"kl": 0.0006080380990169942, |
|
"learning_rate": 4.999395873608056e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 212.57144165039062, |
|
"epoch": 0.007697690692792162, |
|
"grad_norm": 0.7041858434677124, |
|
"kl": 0.0005200192099437118, |
|
"learning_rate": 4.999269013248595e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 191.07144165039062, |
|
"epoch": 0.008397480755773267, |
|
"grad_norm": 2.3034281730651855, |
|
"kl": 0.0005117644323036075, |
|
"learning_rate": 4.999130073412514e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.5050762891769409, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 155.42857360839844, |
|
"epoch": 0.009097270818754374, |
|
"grad_norm": 1.0909160375595093, |
|
"kl": 0.0005560470744967461, |
|
"learning_rate": 4.998979054771338e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 139.07144165039062, |
|
"epoch": 0.00979706088173548, |
|
"grad_norm": 1.8755857944488525, |
|
"kl": 0.0004631040501408279, |
|
"learning_rate": 4.998815958054968e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 224.50001525878906, |
|
"epoch": 0.010496850944716585, |
|
"grad_norm": 0.8215782642364502, |
|
"kl": 0.0006062331376597285, |
|
"learning_rate": 4.998640784051687e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 206.21429443359375, |
|
"epoch": 0.01119664100769769, |
|
"grad_norm": 1.3835495710372925, |
|
"kl": 0.0005705293151549995, |
|
"learning_rate": 4.998453533608144e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 175.00001525878906, |
|
"epoch": 0.011896431070678797, |
|
"grad_norm": 0.9386060237884521, |
|
"kl": 0.0005826915730722249, |
|
"learning_rate": 4.998254207629361e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 165.71429443359375, |
|
"epoch": 0.012596221133659902, |
|
"grad_norm": 1.5630214214324951, |
|
"kl": 0.0006076901918277144, |
|
"learning_rate": 4.998042807078718e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 178.35714721679688, |
|
"epoch": 0.013296011196641007, |
|
"grad_norm": 2.002047061920166, |
|
"kl": 0.0007087856065481901, |
|
"learning_rate": 4.997819332977961e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 195.1428680419922, |
|
"epoch": 0.013995801259622114, |
|
"grad_norm": 1.0907176733016968, |
|
"kl": 0.0005539478152059019, |
|
"learning_rate": 4.997583786407182e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 181.85714721679688, |
|
"epoch": 0.01469559132260322, |
|
"grad_norm": 1.4313894510269165, |
|
"kl": 0.000553421676158905, |
|
"learning_rate": 4.997336168504828e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 159.0, |
|
"epoch": 0.015395381385584325, |
|
"grad_norm": 2.166677474975586, |
|
"kl": 0.000688056752551347, |
|
"learning_rate": 4.997076480467687e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 196.71429443359375, |
|
"epoch": 0.01609517144856543, |
|
"grad_norm": 0.73772794008255, |
|
"kl": 0.0005970090860500932, |
|
"learning_rate": 4.996804723550882e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 183.00001525878906, |
|
"epoch": 0.016794961511546535, |
|
"grad_norm": 0.8837577700614929, |
|
"kl": 0.0007048293482512236, |
|
"learning_rate": 4.996520899067872e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 153.21429443359375, |
|
"epoch": 0.01749475157452764, |
|
"grad_norm": 0.8414170742034912, |
|
"kl": 0.0006337231025099754, |
|
"learning_rate": 4.996225008390438e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 196.7857208251953, |
|
"epoch": 0.01819454163750875, |
|
"grad_norm": 0.9930105805397034, |
|
"kl": 0.0007302867597900331, |
|
"learning_rate": 4.995917052948678e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 204.7857208251953, |
|
"epoch": 0.018894331700489854, |
|
"grad_norm": 1.2448654174804688, |
|
"kl": 0.0005582471494562924, |
|
"learning_rate": 4.995597034231005e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 188.85714721679688, |
|
"epoch": 0.01959412176347096, |
|
"grad_norm": 1.0431102514266968, |
|
"kl": 0.0006140515906736255, |
|
"learning_rate": 4.995264953784135e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 196.2857208251953, |
|
"epoch": 0.020293911826452064, |
|
"grad_norm": 0.6548807621002197, |
|
"kl": 0.0007696707616560161, |
|
"learning_rate": 4.994920813213081e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 174.85714721679688, |
|
"epoch": 0.02099370188943317, |
|
"grad_norm": 1.2723424434661865, |
|
"kl": 0.0008509230683557689, |
|
"learning_rate": 4.994564614181142e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 192.35714721679688, |
|
"epoch": 0.021693491952414275, |
|
"grad_norm": 1.4635212421417236, |
|
"kl": 0.0008967228350229561, |
|
"learning_rate": 4.994196358409902e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 163.57144165039062, |
|
"epoch": 0.02239328201539538, |
|
"grad_norm": 1.6527413129806519, |
|
"kl": 0.0010716207325458527, |
|
"learning_rate": 4.993816047679218e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 189.1428680419922, |
|
"epoch": 0.02309307207837649, |
|
"grad_norm": 1.802273154258728, |
|
"kl": 0.0008337192703038454, |
|
"learning_rate": 4.993423683827206e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 160.21429443359375, |
|
"epoch": 0.023792862141357594, |
|
"grad_norm": 1.957972526550293, |
|
"kl": 0.0009387803147546947, |
|
"learning_rate": 4.993019268750243e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 187.7857208251953, |
|
"epoch": 0.0244926522043387, |
|
"grad_norm": 1.3548457622528076, |
|
"kl": 0.0009079152951017022, |
|
"learning_rate": 4.992602804402949e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 176.00001525878906, |
|
"epoch": 0.025192442267319804, |
|
"grad_norm": 1.9381297826766968, |
|
"kl": 0.0008999091223813593, |
|
"learning_rate": 4.99217429279818e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 162.7857208251953, |
|
"epoch": 0.02589223233030091, |
|
"grad_norm": 0.9859722852706909, |
|
"kl": 0.0013975003967061639, |
|
"learning_rate": 4.991733736007021e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 173.6428680419922, |
|
"epoch": 0.026592022393282014, |
|
"grad_norm": 1.988441824913025, |
|
"kl": 0.0015703426906839013, |
|
"learning_rate": 4.991281136158773e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 141.35714721679688, |
|
"epoch": 0.02729181245626312, |
|
"grad_norm": 2.0865073204040527, |
|
"kl": 0.001419402426108718, |
|
"learning_rate": 4.990816495440942e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 165.0, |
|
"epoch": 0.02799160251924423, |
|
"grad_norm": 1.4797226190567017, |
|
"kl": 0.0013270487543195486, |
|
"learning_rate": 4.990339816099233e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 188.1428680419922, |
|
"epoch": 0.028691392582225334, |
|
"grad_norm": 0.6184691786766052, |
|
"kl": 0.0011400326620787382, |
|
"learning_rate": 4.989851100437532e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 209.00001525878906, |
|
"epoch": 0.02939118264520644, |
|
"grad_norm": 1.2628567218780518, |
|
"kl": 0.0012015528045594692, |
|
"learning_rate": 4.989350350817904e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 189.92857360839844, |
|
"epoch": 0.030090972708187544, |
|
"grad_norm": 0.8222217559814453, |
|
"kl": 0.0016094425227493048, |
|
"learning_rate": 4.988837569660572e-07, |
|
"loss": 0.0, |
|
"reward": 0.0714285746216774, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.0714285746216774, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 193.92857360839844, |
|
"epoch": 0.03079076277116865, |
|
"grad_norm": 0.8283395767211914, |
|
"kl": 0.0015535946004092693, |
|
"learning_rate": 4.988312759443914e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 200.92857360839844, |
|
"epoch": 0.031490552834149754, |
|
"grad_norm": 1.7838681936264038, |
|
"kl": 0.0012420819839462638, |
|
"learning_rate": 4.987775922704442e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 159.35714721679688, |
|
"epoch": 0.03219034289713086, |
|
"grad_norm": 1.0043693780899048, |
|
"kl": 0.002186823170632124, |
|
"learning_rate": 4.987227062036801e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 186.2857208251953, |
|
"epoch": 0.032890132960111965, |
|
"grad_norm": 1.891964077949524, |
|
"kl": 0.002127930987626314, |
|
"learning_rate": 4.986666180093742e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 177.2857208251953, |
|
"epoch": 0.03358992302309307, |
|
"grad_norm": 2.1152210235595703, |
|
"kl": 0.0016283347504213452, |
|
"learning_rate": 4.986093279586126e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 189.57144165039062, |
|
"epoch": 0.034289713086074175, |
|
"grad_norm": 0.0008509167237207294, |
|
"kl": 0.0014085659058764577, |
|
"learning_rate": 4.985508363282896e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 188.07144165039062, |
|
"epoch": 0.03498950314905528, |
|
"grad_norm": 1.111471176147461, |
|
"kl": 0.001829526387155056, |
|
"learning_rate": 4.984911434011071e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 198.6428680419922, |
|
"epoch": 0.03568929321203639, |
|
"grad_norm": 1.341188669204712, |
|
"kl": 0.0019986648112535477, |
|
"learning_rate": 4.984302494655734e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 51 |
|
}, |
|
{ |
|
"completion_length": 149.7857208251953, |
|
"epoch": 0.0363890832750175, |
|
"grad_norm": 2.1039445400238037, |
|
"kl": 0.004148606210947037, |
|
"learning_rate": 4.98368154816001e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 52 |
|
}, |
|
{ |
|
"completion_length": 174.71429443359375, |
|
"epoch": 0.0370888733379986, |
|
"grad_norm": 0.9583792686462402, |
|
"kl": 0.0040195719338953495, |
|
"learning_rate": 4.983048597525064e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 53 |
|
}, |
|
{ |
|
"completion_length": 167.0, |
|
"epoch": 0.03778866340097971, |
|
"grad_norm": 1.5592998266220093, |
|
"kl": 0.0030570521485060453, |
|
"learning_rate": 4.982403645810072e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 54 |
|
}, |
|
{ |
|
"completion_length": 165.1428680419922, |
|
"epoch": 0.03848845346396081, |
|
"grad_norm": 0.9802365303039551, |
|
"kl": 0.003059495473280549, |
|
"learning_rate": 4.981746696132217e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 146.92857360839844, |
|
"epoch": 0.03918824352694192, |
|
"grad_norm": 1.78388249874115, |
|
"kl": 0.0026406769175082445, |
|
"learning_rate": 4.981077751666674e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 56 |
|
}, |
|
{ |
|
"completion_length": 167.21429443359375, |
|
"epoch": 0.03988803358992302, |
|
"grad_norm": 1.3540505170822144, |
|
"kl": 0.0034094061702489853, |
|
"learning_rate": 4.980396815646585e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 57 |
|
}, |
|
{ |
|
"completion_length": 198.71429443359375, |
|
"epoch": 0.04058782365290413, |
|
"grad_norm": 2.1004791259765625, |
|
"kl": 0.0027298128698021173, |
|
"learning_rate": 4.979703891363054e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.5050762891769409, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 58 |
|
}, |
|
{ |
|
"completion_length": 196.00001525878906, |
|
"epoch": 0.041287613715885234, |
|
"grad_norm": 1.7114323377609253, |
|
"kl": 0.0020506808068603277, |
|
"learning_rate": 4.978998982165125e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 59 |
|
}, |
|
{ |
|
"completion_length": 212.71429443359375, |
|
"epoch": 0.04198740377886634, |
|
"grad_norm": 2.0191173553466797, |
|
"kl": 0.003132865997031331, |
|
"learning_rate": 4.978282091459768e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 181.6428680419922, |
|
"epoch": 0.042687193841847444, |
|
"grad_norm": 1.0076220035552979, |
|
"kl": 0.0017425131518393755, |
|
"learning_rate": 4.97755322271186e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 61 |
|
}, |
|
{ |
|
"completion_length": 167.0, |
|
"epoch": 0.04338698390482855, |
|
"grad_norm": 2.260714292526245, |
|
"kl": 0.0029773979913443327, |
|
"learning_rate": 4.976812379444175e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 62 |
|
}, |
|
{ |
|
"completion_length": 148.71429443359375, |
|
"epoch": 0.044086773967809655, |
|
"grad_norm": 2.721731185913086, |
|
"kl": 0.0040284739807248116, |
|
"learning_rate": 4.976059565237359e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.5050762891769409, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 63 |
|
}, |
|
{ |
|
"completion_length": 193.6428680419922, |
|
"epoch": 0.04478656403079076, |
|
"grad_norm": 1.34214448928833, |
|
"kl": 0.0031287071760743856, |
|
"learning_rate": 4.975294783729916e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 64 |
|
}, |
|
{ |
|
"completion_length": 190.1428680419922, |
|
"epoch": 0.04548635409377187, |
|
"grad_norm": 0.9148805141448975, |
|
"kl": 0.004262570291757584, |
|
"learning_rate": 4.97451803861819e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 175.71429443359375, |
|
"epoch": 0.04618614415675298, |
|
"grad_norm": 1.7284623384475708, |
|
"kl": 0.00435349065810442, |
|
"learning_rate": 4.97372933365635e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 66 |
|
}, |
|
{ |
|
"completion_length": 198.1428680419922, |
|
"epoch": 0.04688593421973408, |
|
"grad_norm": 1.8161473274230957, |
|
"kl": 0.004268465097993612, |
|
"learning_rate": 4.972928672656367e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.5050762891769409, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 67 |
|
}, |
|
{ |
|
"completion_length": 187.50001525878906, |
|
"epoch": 0.04758572428271519, |
|
"grad_norm": 1.525362491607666, |
|
"kl": 0.004342732485383749, |
|
"learning_rate": 4.972116059488e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 68 |
|
}, |
|
{ |
|
"completion_length": 179.35714721679688, |
|
"epoch": 0.04828551434569629, |
|
"grad_norm": 2.111542224884033, |
|
"kl": 0.004933161195367575, |
|
"learning_rate": 4.971291498078771e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 69 |
|
}, |
|
{ |
|
"completion_length": 192.07144165039062, |
|
"epoch": 0.0489853044086774, |
|
"grad_norm": 1.236236810684204, |
|
"kl": 0.003895305097103119, |
|
"learning_rate": 4.970454992413958e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 200.71429443359375, |
|
"epoch": 0.0496850944716585, |
|
"grad_norm": 1.3246337175369263, |
|
"kl": 0.0037796935066580772, |
|
"learning_rate": 4.969606546536559e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 71 |
|
}, |
|
{ |
|
"completion_length": 164.21429443359375, |
|
"epoch": 0.05038488453463961, |
|
"grad_norm": 1.7727972269058228, |
|
"kl": 0.004398949909955263, |
|
"learning_rate": 4.968746164547288e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 72 |
|
}, |
|
{ |
|
"completion_length": 157.0, |
|
"epoch": 0.05108467459762071, |
|
"grad_norm": 2.276618480682373, |
|
"kl": 0.005382773466408253, |
|
"learning_rate": 4.967873850604548e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 73 |
|
}, |
|
{ |
|
"completion_length": 186.1428680419922, |
|
"epoch": 0.05178446466060182, |
|
"grad_norm": 2.389155387878418, |
|
"kl": 0.0061807953752577305, |
|
"learning_rate": 4.966989608924407e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.5050762891769409, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 74 |
|
}, |
|
{ |
|
"completion_length": 176.07144165039062, |
|
"epoch": 0.052484254723582924, |
|
"grad_norm": 1.4952914714813232, |
|
"kl": 0.004904923029243946, |
|
"learning_rate": 4.966093443780587e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 179.2857208251953, |
|
"epoch": 0.05318404478656403, |
|
"grad_norm": 1.4025474786758423, |
|
"kl": 0.004885567817837, |
|
"learning_rate": 4.965185359504436e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 76 |
|
}, |
|
{ |
|
"completion_length": 191.2857208251953, |
|
"epoch": 0.053883834849545134, |
|
"grad_norm": 2.3730008602142334, |
|
"kl": 0.0020737627055495977, |
|
"learning_rate": 4.964265360484913e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.6060914993286133, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 77 |
|
}, |
|
{ |
|
"completion_length": 175.57144165039062, |
|
"epoch": 0.05458362491252624, |
|
"grad_norm": 1.5513525009155273, |
|
"kl": 0.004426781553775072, |
|
"learning_rate": 4.963333451168557e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 78 |
|
}, |
|
{ |
|
"completion_length": 202.57144165039062, |
|
"epoch": 0.055283414975507345, |
|
"grad_norm": 1.254483938217163, |
|
"kl": 0.003231517504900694, |
|
"learning_rate": 4.962389636059478e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 79 |
|
}, |
|
{ |
|
"completion_length": 149.71429443359375, |
|
"epoch": 0.05598320503848846, |
|
"grad_norm": 1.6224321126937866, |
|
"kl": 0.005682069342583418, |
|
"learning_rate": 4.961433919719327e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 168.71429443359375, |
|
"epoch": 0.05668299510146956, |
|
"grad_norm": 2.155405044555664, |
|
"kl": 0.0056697772815823555, |
|
"learning_rate": 4.960466306767274e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 81 |
|
}, |
|
{ |
|
"completion_length": 181.57144165039062, |
|
"epoch": 0.05738278516445067, |
|
"grad_norm": 2.1720130443573, |
|
"kl": 0.004250777419656515, |
|
"learning_rate": 4.959486801879991e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.5050762891769409, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 82 |
|
}, |
|
{ |
|
"completion_length": 182.21429443359375, |
|
"epoch": 0.05808257522743177, |
|
"grad_norm": 2.637775182723999, |
|
"kl": 0.005601090379059315, |
|
"learning_rate": 4.958495409791626e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 83 |
|
}, |
|
{ |
|
"completion_length": 208.42857360839844, |
|
"epoch": 0.05878236529041288, |
|
"grad_norm": 1.8412342071533203, |
|
"kl": 0.003481087274849415, |
|
"learning_rate": 4.957492135293776e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 84 |
|
}, |
|
{ |
|
"completion_length": 171.71429443359375, |
|
"epoch": 0.05948215535339398, |
|
"grad_norm": 1.6271586418151855, |
|
"kl": 0.004620662424713373, |
|
"learning_rate": 4.956476983235474e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 85 |
|
}, |
|
{ |
|
"completion_length": 163.21429443359375, |
|
"epoch": 0.06018194541637509, |
|
"grad_norm": 1.3987679481506348, |
|
"kl": 0.006008537020534277, |
|
"learning_rate": 4.955449958523155e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 86 |
|
}, |
|
{ |
|
"completion_length": 175.85714721679688, |
|
"epoch": 0.06088173547935619, |
|
"grad_norm": 1.1900994777679443, |
|
"kl": 0.007817798294126987, |
|
"learning_rate": 4.95441106612064e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 87 |
|
}, |
|
{ |
|
"completion_length": 173.6428680419922, |
|
"epoch": 0.0615815255423373, |
|
"grad_norm": 1.6161788702011108, |
|
"kl": 0.005173846147954464, |
|
"learning_rate": 4.953360311049107e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 88 |
|
}, |
|
{ |
|
"completion_length": 182.21429443359375, |
|
"epoch": 0.0622813156053184, |
|
"grad_norm": 1.1579365730285645, |
|
"kl": 0.004471412859857082, |
|
"learning_rate": 4.95229769838707e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 89 |
|
}, |
|
{ |
|
"completion_length": 179.2857208251953, |
|
"epoch": 0.06298110566829951, |
|
"grad_norm": 1.5062532424926758, |
|
"kl": 0.006312033161520958, |
|
"learning_rate": 4.951223233270353e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 173.2857208251953, |
|
"epoch": 0.06368089573128062, |
|
"grad_norm": 2.0784623622894287, |
|
"kl": 0.00826327409595251, |
|
"learning_rate": 4.950136920892063e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 91 |
|
}, |
|
{ |
|
"completion_length": 167.1428680419922, |
|
"epoch": 0.06438068579426172, |
|
"grad_norm": 0.5718992948532104, |
|
"kl": 0.0075133126229047775, |
|
"learning_rate": 4.949038766502571e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 92 |
|
}, |
|
{ |
|
"completion_length": 168.35714721679688, |
|
"epoch": 0.06508047585724283, |
|
"grad_norm": 0.0029800846241414547, |
|
"kl": 0.008064369671046734, |
|
"learning_rate": 4.947928775409481e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 93 |
|
}, |
|
{ |
|
"completion_length": 209.35714721679688, |
|
"epoch": 0.06578026592022393, |
|
"grad_norm": 1.2102426290512085, |
|
"kl": 0.008969871327280998, |
|
"learning_rate": 4.946806952977605e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 94 |
|
}, |
|
{ |
|
"completion_length": 180.85714721679688, |
|
"epoch": 0.06648005598320504, |
|
"grad_norm": 1.4967552423477173, |
|
"kl": 0.006019816268235445, |
|
"learning_rate": 4.94567330462894e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 95 |
|
}, |
|
{ |
|
"completion_length": 177.1428680419922, |
|
"epoch": 0.06717984604618614, |
|
"grad_norm": 2.391160726547241, |
|
"kl": 0.005439083557575941, |
|
"learning_rate": 4.944527835842641e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 96 |
|
}, |
|
{ |
|
"completion_length": 218.57144165039062, |
|
"epoch": 0.06787963610916725, |
|
"grad_norm": 0.8848944902420044, |
|
"kl": 0.005665027070790529, |
|
"learning_rate": 4.94337055215499e-07, |
|
"loss": 0.0, |
|
"reward": 0.2142857313156128, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.2142857313156128, |
|
"step": 97 |
|
}, |
|
{ |
|
"completion_length": 188.6428680419922, |
|
"epoch": 0.06857942617214835, |
|
"grad_norm": 1.6207823753356934, |
|
"kl": 0.006600015331059694, |
|
"learning_rate": 4.942201459159377e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 98 |
|
}, |
|
{ |
|
"completion_length": 215.1428680419922, |
|
"epoch": 0.06927921623512946, |
|
"grad_norm": 2.0508172512054443, |
|
"kl": 0.004012170247733593, |
|
"learning_rate": 4.941020562506264e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 99 |
|
}, |
|
{ |
|
"completion_length": 178.71429443359375, |
|
"epoch": 0.06997900629811056, |
|
"grad_norm": 1.683928370475769, |
|
"kl": 0.00840698555111885, |
|
"learning_rate": 4.939827867903168e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 100 |
|
}, |
|
{ |
|
"completion_length": 184.7857208251953, |
|
"epoch": 0.07067879636109167, |
|
"grad_norm": 1.2873668670654297, |
|
"kl": 0.012355304323136806, |
|
"learning_rate": 4.938623381114623e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 101 |
|
}, |
|
{ |
|
"completion_length": 176.1428680419922, |
|
"epoch": 0.07137858642407278, |
|
"grad_norm": 0.003792037256062031, |
|
"kl": 0.012887879274785519, |
|
"learning_rate": 4.937407107962162e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 102 |
|
}, |
|
{ |
|
"completion_length": 197.42857360839844, |
|
"epoch": 0.07207837648705388, |
|
"grad_norm": 1.305524468421936, |
|
"kl": 0.0113840876147151, |
|
"learning_rate": 4.936179054324278e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 103 |
|
}, |
|
{ |
|
"completion_length": 123.71429443359375, |
|
"epoch": 0.072778166550035, |
|
"grad_norm": 0.7037422060966492, |
|
"kl": 0.016403552144765854, |
|
"learning_rate": 4.934939226136406e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 104 |
|
}, |
|
{ |
|
"completion_length": 211.50001525878906, |
|
"epoch": 0.0734779566130161, |
|
"grad_norm": 0.5676343441009521, |
|
"kl": 0.004641632083803415, |
|
"learning_rate": 4.933687629390889e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 105 |
|
}, |
|
{ |
|
"completion_length": 183.7857208251953, |
|
"epoch": 0.0741777466759972, |
|
"grad_norm": 1.5358415842056274, |
|
"kl": 0.011431436985731125, |
|
"learning_rate": 4.932424270136948e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 106 |
|
}, |
|
{ |
|
"completion_length": 174.2857208251953, |
|
"epoch": 0.0748775367389783, |
|
"grad_norm": 1.5432114601135254, |
|
"kl": 0.018761329352855682, |
|
"learning_rate": 4.931149154480656e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 107 |
|
}, |
|
{ |
|
"completion_length": 183.1428680419922, |
|
"epoch": 0.07557732680195942, |
|
"grad_norm": 0.9925021529197693, |
|
"kl": 0.007842729799449444, |
|
"learning_rate": 4.929862288584908e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 108 |
|
}, |
|
{ |
|
"completion_length": 201.92857360839844, |
|
"epoch": 0.07627711686494051, |
|
"grad_norm": 1.465742826461792, |
|
"kl": 0.010015713982284069, |
|
"learning_rate": 4.928563678669387e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 109 |
|
}, |
|
{ |
|
"completion_length": 204.50001525878906, |
|
"epoch": 0.07697690692792163, |
|
"grad_norm": 1.558184027671814, |
|
"kl": 0.008194749243557453, |
|
"learning_rate": 4.927253331010543e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 110 |
|
}, |
|
{ |
|
"completion_length": 213.7857208251953, |
|
"epoch": 0.07767669699090272, |
|
"grad_norm": 0.7771201133728027, |
|
"kl": 0.011434059590101242, |
|
"learning_rate": 4.92593125194155e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 111 |
|
}, |
|
{ |
|
"completion_length": 163.1428680419922, |
|
"epoch": 0.07837648705388384, |
|
"grad_norm": 2.267927885055542, |
|
"kl": 0.021138539537787437, |
|
"learning_rate": 4.924597447852287e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 112 |
|
}, |
|
{ |
|
"completion_length": 215.92857360839844, |
|
"epoch": 0.07907627711686493, |
|
"grad_norm": 1.186661720275879, |
|
"kl": 0.011579162441194057, |
|
"learning_rate": 4.9232519251893e-07, |
|
"loss": 0.0, |
|
"reward": 0.2857142984867096, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.2857142984867096, |
|
"step": 113 |
|
}, |
|
{ |
|
"completion_length": 213.85714721679688, |
|
"epoch": 0.07977606717984605, |
|
"grad_norm": 1.2522072792053223, |
|
"kl": 0.01113107893615961, |
|
"learning_rate": 4.921894690455776e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 114 |
|
}, |
|
{ |
|
"completion_length": 200.2857208251953, |
|
"epoch": 0.08047585724282715, |
|
"grad_norm": 0.6697101593017578, |
|
"kl": 0.010488255880773067, |
|
"learning_rate": 4.920525750211507e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 115 |
|
}, |
|
{ |
|
"completion_length": 165.1428680419922, |
|
"epoch": 0.08117564730580826, |
|
"grad_norm": 1.1102581024169922, |
|
"kl": 0.01489555835723877, |
|
"learning_rate": 4.919145111072858e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 116 |
|
}, |
|
{ |
|
"completion_length": 204.85714721679688, |
|
"epoch": 0.08187543736878937, |
|
"grad_norm": 1.8439916372299194, |
|
"kl": 0.013555423356592655, |
|
"learning_rate": 4.917752779712743e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 117 |
|
}, |
|
{ |
|
"completion_length": 190.50001525878906, |
|
"epoch": 0.08257522743177047, |
|
"grad_norm": 1.460869550704956, |
|
"kl": 0.022136248648166656, |
|
"learning_rate": 4.91634876286058e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 118 |
|
}, |
|
{ |
|
"completion_length": 166.2857208251953, |
|
"epoch": 0.08327501749475158, |
|
"grad_norm": 2.0857505798339844, |
|
"kl": 0.024110617116093636, |
|
"learning_rate": 4.914933067302271e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 119 |
|
}, |
|
{ |
|
"completion_length": 191.57144165039062, |
|
"epoch": 0.08397480755773268, |
|
"grad_norm": 0.8973117470741272, |
|
"kl": 0.011640184558928013, |
|
"learning_rate": 4.91350569988016e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 120 |
|
}, |
|
{ |
|
"completion_length": 167.5, |
|
"epoch": 0.08467459762071379, |
|
"grad_norm": 2.02925705909729, |
|
"kl": 0.02297617308795452, |
|
"learning_rate": 4.912066667493005e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.5050762891769409, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 121 |
|
}, |
|
{ |
|
"completion_length": 226.4285888671875, |
|
"epoch": 0.08537438768369489, |
|
"grad_norm": 0.7617030739784241, |
|
"kl": 0.006312475074082613, |
|
"learning_rate": 4.910615977095942e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 122 |
|
}, |
|
{ |
|
"completion_length": 163.57144165039062, |
|
"epoch": 0.086074177746676, |
|
"grad_norm": 0.9568012356758118, |
|
"kl": 0.026632316410541534, |
|
"learning_rate": 4.909153635700454e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 123 |
|
}, |
|
{ |
|
"completion_length": 193.50001525878906, |
|
"epoch": 0.0867739678096571, |
|
"grad_norm": 1.9485886096954346, |
|
"kl": 0.020928634330630302, |
|
"learning_rate": 4.907679650374336e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.1428571492433548, |
|
"step": 124 |
|
}, |
|
{ |
|
"completion_length": 218.57144165039062, |
|
"epoch": 0.08747375787263821, |
|
"grad_norm": 1.3334918022155762, |
|
"kl": 0.011499721556901932, |
|
"learning_rate": 4.906194028241659e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 125 |
|
}, |
|
{ |
|
"completion_length": 180.57144165039062, |
|
"epoch": 0.08817354793561931, |
|
"grad_norm": 2.280923843383789, |
|
"kl": 0.016467615962028503, |
|
"learning_rate": 4.904696776482737e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 126 |
|
}, |
|
{ |
|
"completion_length": 221.21429443359375, |
|
"epoch": 0.08887333799860042, |
|
"grad_norm": 1.539788842201233, |
|
"kl": 0.007166714873164892, |
|
"learning_rate": 4.903187902334094e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 127 |
|
}, |
|
{ |
|
"completion_length": 195.1428680419922, |
|
"epoch": 0.08957312806158152, |
|
"grad_norm": 1.1616483926773071, |
|
"kl": 0.031080516055226326, |
|
"learning_rate": 4.901667413088425e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 128 |
|
}, |
|
{ |
|
"completion_length": 186.35714721679688, |
|
"epoch": 0.09027291812456263, |
|
"grad_norm": 2.2133805751800537, |
|
"kl": 0.01747089996933937, |
|
"learning_rate": 4.900135316094566e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 129 |
|
}, |
|
{ |
|
"completion_length": 191.6428680419922, |
|
"epoch": 0.09097270818754374, |
|
"grad_norm": 1.893270492553711, |
|
"kl": 0.02181846648454666, |
|
"learning_rate": 4.898591618757453e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 130 |
|
}, |
|
{ |
|
"completion_length": 156.0, |
|
"epoch": 0.09167249825052484, |
|
"grad_norm": 1.7143210172653198, |
|
"kl": 0.04193011671304703, |
|
"learning_rate": 4.897036328538091e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 131 |
|
}, |
|
{ |
|
"completion_length": 142.2857208251953, |
|
"epoch": 0.09237228831350595, |
|
"grad_norm": 0.0073611014522612095, |
|
"kl": 0.0435887835919857, |
|
"learning_rate": 4.895469452953513e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 132 |
|
}, |
|
{ |
|
"completion_length": 160.35714721679688, |
|
"epoch": 0.09307207837648705, |
|
"grad_norm": 2.0896999835968018, |
|
"kl": 0.03058517538011074, |
|
"learning_rate": 4.893890999576749e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 133 |
|
}, |
|
{ |
|
"completion_length": 220.00001525878906, |
|
"epoch": 0.09377186843946816, |
|
"grad_norm": 1.3652797937393188, |
|
"kl": 0.024748124182224274, |
|
"learning_rate": 4.892300976036786e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 134 |
|
}, |
|
{ |
|
"completion_length": 191.1428680419922, |
|
"epoch": 0.09447165850244926, |
|
"grad_norm": 1.4164782762527466, |
|
"kl": 0.03566557914018631, |
|
"learning_rate": 4.890699390018533e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 135 |
|
}, |
|
{ |
|
"completion_length": 166.42857360839844, |
|
"epoch": 0.09517144856543037, |
|
"grad_norm": 1.777478814125061, |
|
"kl": 0.03295962139964104, |
|
"learning_rate": 4.889086249262779e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 136 |
|
}, |
|
{ |
|
"completion_length": 171.00001525878906, |
|
"epoch": 0.09587123862841147, |
|
"grad_norm": 1.9500823020935059, |
|
"kl": 0.04803250730037689, |
|
"learning_rate": 4.887461561566165e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 137 |
|
}, |
|
{ |
|
"completion_length": 176.42857360839844, |
|
"epoch": 0.09657102869139259, |
|
"grad_norm": 1.932502031326294, |
|
"kl": 0.028310805559158325, |
|
"learning_rate": 4.885825334781136e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 138 |
|
}, |
|
{ |
|
"completion_length": 186.85714721679688, |
|
"epoch": 0.09727081875437368, |
|
"grad_norm": 1.4214417934417725, |
|
"kl": 0.02100592665374279, |
|
"learning_rate": 4.884177576815911e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 139 |
|
}, |
|
{ |
|
"completion_length": 200.57144165039062, |
|
"epoch": 0.0979706088173548, |
|
"grad_norm": 1.6438137292861938, |
|
"kl": 0.01962038315832615, |
|
"learning_rate": 4.882518295634438e-07, |
|
"loss": 0.0, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 140 |
|
}, |
|
{ |
|
"completion_length": 181.35714721679688, |
|
"epoch": 0.0986703988803359, |
|
"grad_norm": 1.0898855924606323, |
|
"kl": 0.03357081487774849, |
|
"learning_rate": 4.880847499256363e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 141 |
|
}, |
|
{ |
|
"completion_length": 198.7857208251953, |
|
"epoch": 0.099370188943317, |
|
"grad_norm": 0.8553615212440491, |
|
"kl": 0.020783616229891777, |
|
"learning_rate": 4.879165195756985e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 142 |
|
}, |
|
{ |
|
"completion_length": 163.1428680419922, |
|
"epoch": 0.1000699790062981, |
|
"grad_norm": 2.111443042755127, |
|
"kl": 0.04269000142812729, |
|
"learning_rate": 4.87747139326722e-07, |
|
"loss": 0.0, |
|
"reward": 0.3571428656578064, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.3571428656578064, |
|
"step": 143 |
|
}, |
|
{ |
|
"completion_length": 166.2857208251953, |
|
"epoch": 0.10076976906927922, |
|
"grad_norm": 1.4155843257904053, |
|
"kl": 0.04325467720627785, |
|
"learning_rate": 4.87576609997356e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 144 |
|
}, |
|
{ |
|
"completion_length": 183.07144165039062, |
|
"epoch": 0.10146955913226033, |
|
"grad_norm": 1.320349931716919, |
|
"kl": 0.04014229029417038, |
|
"learning_rate": 4.874049324118036e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 145 |
|
}, |
|
{ |
|
"completion_length": 173.35714721679688, |
|
"epoch": 0.10216934919524143, |
|
"grad_norm": 2.3059661388397217, |
|
"kl": 0.0478234700858593, |
|
"learning_rate": 4.872321073998174e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 146 |
|
}, |
|
{ |
|
"completion_length": 179.1428680419922, |
|
"epoch": 0.10286913925822254, |
|
"grad_norm": 1.71186363697052, |
|
"kl": 0.030521634966135025, |
|
"learning_rate": 4.870581357966961e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 147 |
|
}, |
|
{ |
|
"completion_length": 156.1428680419922, |
|
"epoch": 0.10356892932120364, |
|
"grad_norm": 1.8213680982589722, |
|
"kl": 0.029109062626957893, |
|
"learning_rate": 4.868830184432799e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 148 |
|
}, |
|
{ |
|
"completion_length": 183.07144165039062, |
|
"epoch": 0.10426871938418475, |
|
"grad_norm": 2.07926344871521, |
|
"kl": 0.031393494457006454, |
|
"learning_rate": 4.867067561859467e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 149 |
|
}, |
|
{ |
|
"completion_length": 160.2857208251953, |
|
"epoch": 0.10496850944716585, |
|
"grad_norm": 1.52970290184021, |
|
"kl": 0.05355866253376007, |
|
"learning_rate": 4.865293498766077e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 150 |
|
}, |
|
{ |
|
"completion_length": 201.85714721679688, |
|
"epoch": 0.10566829951014696, |
|
"grad_norm": 0.8261334300041199, |
|
"kl": 0.026441525667905807, |
|
"learning_rate": 4.86350800372704e-07, |
|
"loss": 0.0, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 151 |
|
}, |
|
{ |
|
"completion_length": 164.7857208251953, |
|
"epoch": 0.10636808957312806, |
|
"grad_norm": 1.560228705406189, |
|
"kl": 0.03294193744659424, |
|
"learning_rate": 4.861711085372015e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 152 |
|
}, |
|
{ |
|
"completion_length": 221.1428680419922, |
|
"epoch": 0.10706787963610917, |
|
"grad_norm": 1.467367172241211, |
|
"kl": 0.02558579109609127, |
|
"learning_rate": 4.859902752385879e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 153 |
|
}, |
|
{ |
|
"completion_length": 196.21429443359375, |
|
"epoch": 0.10776766969909027, |
|
"grad_norm": 0.007789432071149349, |
|
"kl": 0.04791080579161644, |
|
"learning_rate": 4.858083013508669e-07, |
|
"loss": 0.0, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 154 |
|
}, |
|
{ |
|
"completion_length": 167.85714721679688, |
|
"epoch": 0.10846745976207138, |
|
"grad_norm": 1.4503986835479736, |
|
"kl": 0.0463978573679924, |
|
"learning_rate": 4.856251877535556e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 155 |
|
}, |
|
{ |
|
"completion_length": 168.1428680419922, |
|
"epoch": 0.10916724982505248, |
|
"grad_norm": 1.077260971069336, |
|
"kl": 0.04970019310712814, |
|
"learning_rate": 4.854409353316796e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 156 |
|
}, |
|
{ |
|
"completion_length": 202.92857360839844, |
|
"epoch": 0.10986703988803359, |
|
"grad_norm": 1.7790058851242065, |
|
"kl": 0.03502151742577553, |
|
"learning_rate": 4.852555449757679e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 157 |
|
}, |
|
{ |
|
"completion_length": 146.07144165039062, |
|
"epoch": 0.11056682995101469, |
|
"grad_norm": 3.6491763591766357, |
|
"kl": 0.08282416313886642, |
|
"learning_rate": 4.850690175818503e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.6060914993286133, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 158 |
|
}, |
|
{ |
|
"completion_length": 175.50001525878906, |
|
"epoch": 0.1112666200139958, |
|
"grad_norm": 1.9410632848739624, |
|
"kl": 0.04635734483599663, |
|
"learning_rate": 4.848813540514516e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 159 |
|
}, |
|
{ |
|
"completion_length": 193.07144165039062, |
|
"epoch": 0.11196641007697691, |
|
"grad_norm": 1.0959712266921997, |
|
"kl": 0.03846450522542, |
|
"learning_rate": 4.846925552915878e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 160 |
|
}, |
|
{ |
|
"completion_length": 194.1428680419922, |
|
"epoch": 0.11266620013995801, |
|
"grad_norm": 1.617163896560669, |
|
"kl": 0.04549946263432503, |
|
"learning_rate": 4.845026222147619e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 161 |
|
}, |
|
{ |
|
"completion_length": 221.9285888671875, |
|
"epoch": 0.11336599020293912, |
|
"grad_norm": 0.004491452127695084, |
|
"kl": 0.03957492858171463, |
|
"learning_rate": 4.843115557389591e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 162 |
|
}, |
|
{ |
|
"completion_length": 192.42857360839844, |
|
"epoch": 0.11406578026592022, |
|
"grad_norm": 0.7011159658432007, |
|
"kl": 0.04245857894420624, |
|
"learning_rate": 4.841193567876428e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 163 |
|
}, |
|
{ |
|
"completion_length": 136.0, |
|
"epoch": 0.11476557032890133, |
|
"grad_norm": 1.3152964115142822, |
|
"kl": 0.06837064027786255, |
|
"learning_rate": 4.839260262897494e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 164 |
|
}, |
|
{ |
|
"completion_length": 160.92857360839844, |
|
"epoch": 0.11546536039188243, |
|
"grad_norm": 1.6138628721237183, |
|
"kl": 0.04630711302161217, |
|
"learning_rate": 4.83731565179685e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 165 |
|
}, |
|
{ |
|
"completion_length": 204.35714721679688, |
|
"epoch": 0.11616515045486354, |
|
"grad_norm": 2.033738136291504, |
|
"kl": 0.041600391268730164, |
|
"learning_rate": 4.835359743973194e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 166 |
|
}, |
|
{ |
|
"completion_length": 206.00001525878906, |
|
"epoch": 0.11686494051784464, |
|
"grad_norm": 1.2452963590621948, |
|
"kl": 0.03693629428744316, |
|
"learning_rate": 4.833392548879829e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 167 |
|
}, |
|
{ |
|
"completion_length": 160.5, |
|
"epoch": 0.11756473058082575, |
|
"grad_norm": 2.5279667377471924, |
|
"kl": 0.07246419787406921, |
|
"learning_rate": 4.831414076024611e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 168 |
|
}, |
|
{ |
|
"completion_length": 226.4285888671875, |
|
"epoch": 0.11826452064380685, |
|
"grad_norm": 0.5257318019866943, |
|
"kl": 0.009776223450899124, |
|
"learning_rate": 4.829424334969902e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 169 |
|
}, |
|
{ |
|
"completion_length": 208.35714721679688, |
|
"epoch": 0.11896431070678797, |
|
"grad_norm": 1.4458142518997192, |
|
"kl": 0.04920278489589691, |
|
"learning_rate": 4.827423335332524e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 170 |
|
}, |
|
{ |
|
"completion_length": 195.42857360839844, |
|
"epoch": 0.11966410076976906, |
|
"grad_norm": 1.7298688888549805, |
|
"kl": 0.04254806786775589, |
|
"learning_rate": 4.825411086783718e-07, |
|
"loss": 0.0, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 171 |
|
}, |
|
{ |
|
"completion_length": 187.35714721679688, |
|
"epoch": 0.12036389083275018, |
|
"grad_norm": 0.3279581069946289, |
|
"kl": 0.05654177814722061, |
|
"learning_rate": 4.823387599049092e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 172 |
|
}, |
|
{ |
|
"completion_length": 186.42857360839844, |
|
"epoch": 0.12106368089573127, |
|
"grad_norm": 1.4074788093566895, |
|
"kl": 0.06294237822294235, |
|
"learning_rate": 4.821352881908575e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 173 |
|
}, |
|
{ |
|
"completion_length": 186.92857360839844, |
|
"epoch": 0.12176347095871239, |
|
"grad_norm": 1.5504415035247803, |
|
"kl": 0.06913281977176666, |
|
"learning_rate": 4.819306945196367e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 174 |
|
}, |
|
{ |
|
"completion_length": 204.00001525878906, |
|
"epoch": 0.1224632610216935, |
|
"grad_norm": 1.8601367473602295, |
|
"kl": 0.04214514046907425, |
|
"learning_rate": 4.817249798800898e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 175 |
|
}, |
|
{ |
|
"completion_length": 195.07144165039062, |
|
"epoch": 0.1231630510846746, |
|
"grad_norm": 2.3070361614227295, |
|
"kl": 0.04426455870270729, |
|
"learning_rate": 4.81518145266478e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 176 |
|
}, |
|
{ |
|
"completion_length": 179.2857208251953, |
|
"epoch": 0.12386284114765571, |
|
"grad_norm": 1.0684874057769775, |
|
"kl": 0.052644453942775726, |
|
"learning_rate": 4.813101916784745e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 177 |
|
}, |
|
{ |
|
"completion_length": 204.57144165039062, |
|
"epoch": 0.1245626312106368, |
|
"grad_norm": 1.765725016593933, |
|
"kl": 0.03909705579280853, |
|
"learning_rate": 4.811011201211619e-07, |
|
"loss": 0.0, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 178 |
|
}, |
|
{ |
|
"completion_length": 187.42857360839844, |
|
"epoch": 0.12526242127361792, |
|
"grad_norm": 2.2498366832733154, |
|
"kl": 0.052292656153440475, |
|
"learning_rate": 4.808909316050253e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 179 |
|
}, |
|
{ |
|
"completion_length": 156.57144165039062, |
|
"epoch": 0.12596221133659902, |
|
"grad_norm": 0.8897135853767395, |
|
"kl": 0.07806211709976196, |
|
"learning_rate": 4.80679627145949e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 180 |
|
}, |
|
{ |
|
"completion_length": 196.42857360839844, |
|
"epoch": 0.12666200139958012, |
|
"grad_norm": 1.9912126064300537, |
|
"kl": 0.05071377381682396, |
|
"learning_rate": 4.804672077652102e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 181 |
|
}, |
|
{ |
|
"completion_length": 210.50001525878906, |
|
"epoch": 0.12736179146256124, |
|
"grad_norm": 1.0759501457214355, |
|
"kl": 0.05055666342377663, |
|
"learning_rate": 4.802536744894753e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 182 |
|
}, |
|
{ |
|
"completion_length": 208.57144165039062, |
|
"epoch": 0.12806158152554234, |
|
"grad_norm": 1.1025447845458984, |
|
"kl": 0.06112780049443245, |
|
"learning_rate": 4.80039028350794e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 183 |
|
}, |
|
{ |
|
"completion_length": 177.92857360839844, |
|
"epoch": 0.12876137158852344, |
|
"grad_norm": 1.1993169784545898, |
|
"kl": 0.06688597798347473, |
|
"learning_rate": 4.79823270386595e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 184 |
|
}, |
|
{ |
|
"completion_length": 180.50001525878906, |
|
"epoch": 0.12946116165150454, |
|
"grad_norm": 0.008792253211140633, |
|
"kl": 0.0775587409734726, |
|
"learning_rate": 4.796064016396803e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"completion_length": 201.21429443359375, |
|
"epoch": 0.13016095171448566, |
|
"grad_norm": 0.8877134919166565, |
|
"kl": 0.058806754648685455, |
|
"learning_rate": 4.793884231582209e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 186 |
|
}, |
|
{ |
|
"completion_length": 205.6428680419922, |
|
"epoch": 0.13086074177746676, |
|
"grad_norm": 1.8853565454483032, |
|
"kl": 0.04727332293987274, |
|
"learning_rate": 4.791693359957512e-07, |
|
"loss": 0.0, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 187 |
|
}, |
|
{ |
|
"completion_length": 172.85714721679688, |
|
"epoch": 0.13156053184044786, |
|
"grad_norm": 1.3133251667022705, |
|
"kl": 0.11437404900789261, |
|
"learning_rate": 4.789491412111643e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 188 |
|
}, |
|
{ |
|
"completion_length": 198.07144165039062, |
|
"epoch": 0.13226032190342898, |
|
"grad_norm": 2.346076726913452, |
|
"kl": 0.0714813843369484, |
|
"learning_rate": 4.787278398687061e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 189 |
|
}, |
|
{ |
|
"completion_length": 176.7857208251953, |
|
"epoch": 0.13296011196641008, |
|
"grad_norm": 2.2410197257995605, |
|
"kl": 0.06597181409597397, |
|
"learning_rate": 4.785054330379715e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 190 |
|
}, |
|
{ |
|
"completion_length": 175.1428680419922, |
|
"epoch": 0.13365990202939118, |
|
"grad_norm": 1.9752206802368164, |
|
"kl": 0.061408672481775284, |
|
"learning_rate": 4.782819217938978e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 191 |
|
}, |
|
{ |
|
"completion_length": 174.42857360839844, |
|
"epoch": 0.13435969209237228, |
|
"grad_norm": 1.4264745712280273, |
|
"kl": 0.09011290967464447, |
|
"learning_rate": 4.780573072167606e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 192 |
|
}, |
|
{ |
|
"completion_length": 180.35714721679688, |
|
"epoch": 0.1350594821553534, |
|
"grad_norm": 1.7050994634628296, |
|
"kl": 0.060319699347019196, |
|
"learning_rate": 4.778315903921676e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 193 |
|
}, |
|
{ |
|
"completion_length": 173.42857360839844, |
|
"epoch": 0.1357592722183345, |
|
"grad_norm": 1.6260625123977661, |
|
"kl": 0.09306792914867401, |
|
"learning_rate": 4.776047724110545e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 194 |
|
}, |
|
{ |
|
"completion_length": 185.35714721679688, |
|
"epoch": 0.1364590622813156, |
|
"grad_norm": 0.016425790265202522, |
|
"kl": 0.0992843434214592, |
|
"learning_rate": 4.773768543696789e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 195 |
|
}, |
|
{ |
|
"completion_length": 158.92857360839844, |
|
"epoch": 0.1371588523442967, |
|
"grad_norm": 2.4321110248565674, |
|
"kl": 0.10799706727266312, |
|
"learning_rate": 4.771478373696149e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 196 |
|
}, |
|
{ |
|
"completion_length": 176.85714721679688, |
|
"epoch": 0.13785864240727783, |
|
"grad_norm": 2.087998867034912, |
|
"kl": 0.05710326135158539, |
|
"learning_rate": 4.769177225177486e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 197 |
|
}, |
|
{ |
|
"completion_length": 190.21429443359375, |
|
"epoch": 0.13855843247025892, |
|
"grad_norm": 0.00513538857921958, |
|
"kl": 0.047487739473581314, |
|
"learning_rate": 4.766865109262719e-07, |
|
"loss": 0.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"completion_length": 160.92857360839844, |
|
"epoch": 0.13925822253324002, |
|
"grad_norm": 1.89316725730896, |
|
"kl": 0.09987679868936539, |
|
"learning_rate": 4.7645420371267744e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 199 |
|
}, |
|
{ |
|
"completion_length": 165.1428680419922, |
|
"epoch": 0.13995801259622112, |
|
"grad_norm": 1.454157829284668, |
|
"kl": 0.08893325924873352, |
|
"learning_rate": 4.762208019997534e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 200 |
|
}, |
|
{ |
|
"completion_length": 167.92857360839844, |
|
"epoch": 0.14065780265920225, |
|
"grad_norm": 1.7186192274093628, |
|
"kl": 0.07974901795387268, |
|
"learning_rate": 4.759863069155779e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 201 |
|
}, |
|
{ |
|
"completion_length": 170.0, |
|
"epoch": 0.14135759272218335, |
|
"grad_norm": 0.008198251947760582, |
|
"kl": 0.08952844887971878, |
|
"learning_rate": 4.757507195935134e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 202 |
|
}, |
|
{ |
|
"completion_length": 187.50001525878906, |
|
"epoch": 0.14205738278516444, |
|
"grad_norm": 1.3979917764663696, |
|
"kl": 0.07551268488168716, |
|
"learning_rate": 4.7551404117220163e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 203 |
|
}, |
|
{ |
|
"completion_length": 172.85714721679688, |
|
"epoch": 0.14275717284814557, |
|
"grad_norm": 1.877791166305542, |
|
"kl": 0.06155999377369881, |
|
"learning_rate": 4.7527627279555747e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 204 |
|
}, |
|
{ |
|
"completion_length": 189.6428680419922, |
|
"epoch": 0.14345696291112667, |
|
"grad_norm": 1.1798770427703857, |
|
"kl": 0.07333328574895859, |
|
"learning_rate": 4.75037415612764e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 205 |
|
}, |
|
{ |
|
"completion_length": 193.85714721679688, |
|
"epoch": 0.14415675297410777, |
|
"grad_norm": 0.004300639498978853, |
|
"kl": 0.04362697899341583, |
|
"learning_rate": 4.747974707782666e-07, |
|
"loss": 0.0, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 206 |
|
}, |
|
{ |
|
"completion_length": 187.21429443359375, |
|
"epoch": 0.14485654303708886, |
|
"grad_norm": 2.0763752460479736, |
|
"kl": 0.08255599439144135, |
|
"learning_rate": 4.745564394517677e-07, |
|
"loss": 0.0001, |
|
"reward": 0.4285714626312256, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.4285714626312256, |
|
"step": 207 |
|
}, |
|
{ |
|
"completion_length": 182.85714721679688, |
|
"epoch": 0.14555633310007, |
|
"grad_norm": 2.0227575302124023, |
|
"kl": 0.05811536684632301, |
|
"learning_rate": 4.743143227982208e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 208 |
|
}, |
|
{ |
|
"completion_length": 201.85714721679688, |
|
"epoch": 0.1462561231630511, |
|
"grad_norm": 2.1518890857696533, |
|
"kl": 0.06606665253639221, |
|
"learning_rate": 4.7407112198782507e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 209 |
|
}, |
|
{ |
|
"completion_length": 187.71429443359375, |
|
"epoch": 0.1469559132260322, |
|
"grad_norm": 1.5681232213974, |
|
"kl": 0.0777505487203598, |
|
"learning_rate": 4.738268381960195e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 210 |
|
}, |
|
{ |
|
"completion_length": 193.71429443359375, |
|
"epoch": 0.14765570328901328, |
|
"grad_norm": 2.0419561862945557, |
|
"kl": 0.08574096113443375, |
|
"learning_rate": 4.735814726034775e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 211 |
|
}, |
|
{ |
|
"completion_length": 176.71429443359375, |
|
"epoch": 0.1483554933519944, |
|
"grad_norm": 2.2535557746887207, |
|
"kl": 0.08857802301645279, |
|
"learning_rate": 4.73335026396101e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 212 |
|
}, |
|
{ |
|
"completion_length": 205.00001525878906, |
|
"epoch": 0.1490552834149755, |
|
"grad_norm": 1.9866529703140259, |
|
"kl": 0.05292090028524399, |
|
"learning_rate": 4.730875007650148e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 213 |
|
}, |
|
{ |
|
"completion_length": 151.1428680419922, |
|
"epoch": 0.1497550734779566, |
|
"grad_norm": 0.011737847700715065, |
|
"kl": 0.09154846519231796, |
|
"learning_rate": 4.7283889690656074e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 214 |
|
}, |
|
{ |
|
"completion_length": 174.21429443359375, |
|
"epoch": 0.1504548635409377, |
|
"grad_norm": 1.76470947265625, |
|
"kl": 0.07139679044485092, |
|
"learning_rate": 4.7258921602229183e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5714285969734192, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.5714285969734192, |
|
"step": 215 |
|
}, |
|
{ |
|
"completion_length": 153.6428680419922, |
|
"epoch": 0.15115465360391883, |
|
"grad_norm": 2.0710690021514893, |
|
"kl": 0.10847283899784088, |
|
"learning_rate": 4.723384593189668e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 216 |
|
}, |
|
{ |
|
"completion_length": 167.57144165039062, |
|
"epoch": 0.15185444366689993, |
|
"grad_norm": 1.4053919315338135, |
|
"kl": 0.1707335263490677, |
|
"learning_rate": 4.720866280085439e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 217 |
|
}, |
|
{ |
|
"completion_length": 201.7857208251953, |
|
"epoch": 0.15255423372988103, |
|
"grad_norm": 1.9279898405075073, |
|
"kl": 0.03592146560549736, |
|
"learning_rate": 4.718337233081751e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 218 |
|
}, |
|
{ |
|
"completion_length": 228.6428680419922, |
|
"epoch": 0.15325402379286215, |
|
"grad_norm": 1.699992299079895, |
|
"kl": 0.03932465240359306, |
|
"learning_rate": 4.715797464402005e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 219 |
|
}, |
|
{ |
|
"completion_length": 194.57144165039062, |
|
"epoch": 0.15395381385584325, |
|
"grad_norm": 1.894227385520935, |
|
"kl": 0.07857631146907806, |
|
"learning_rate": 4.7132469863214197e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 220 |
|
}, |
|
{ |
|
"completion_length": 193.85714721679688, |
|
"epoch": 0.15465360391882435, |
|
"grad_norm": 1.5377225875854492, |
|
"kl": 0.05296279862523079, |
|
"learning_rate": 4.7106858111669757e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 221 |
|
}, |
|
{ |
|
"completion_length": 174.57144165039062, |
|
"epoch": 0.15535339398180545, |
|
"grad_norm": 1.7744231224060059, |
|
"kl": 0.10999920219182968, |
|
"learning_rate": 4.7081139513173554e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 222 |
|
}, |
|
{ |
|
"completion_length": 200.85714721679688, |
|
"epoch": 0.15605318404478657, |
|
"grad_norm": 1.0742552280426025, |
|
"kl": 0.07724467664957047, |
|
"learning_rate": 4.7055314192028806e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 223 |
|
}, |
|
{ |
|
"completion_length": 191.85714721679688, |
|
"epoch": 0.15675297410776767, |
|
"grad_norm": 0.025704611092805862, |
|
"kl": 0.09701688587665558, |
|
"learning_rate": 4.702938227305457e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 224 |
|
}, |
|
{ |
|
"completion_length": 199.35714721679688, |
|
"epoch": 0.15745276417074877, |
|
"grad_norm": 2.207791328430176, |
|
"kl": 0.051319669932127, |
|
"learning_rate": 4.7003343881585076e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 225 |
|
}, |
|
{ |
|
"completion_length": 188.07144165039062, |
|
"epoch": 0.15815255423372987, |
|
"grad_norm": 2.289534568786621, |
|
"kl": 0.0778493583202362, |
|
"learning_rate": 4.6977199143469204e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 226 |
|
}, |
|
{ |
|
"completion_length": 177.71429443359375, |
|
"epoch": 0.158852344296711, |
|
"grad_norm": 0.006138416472822428, |
|
"kl": 0.06990097463130951, |
|
"learning_rate": 4.6950948185069784e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 227 |
|
}, |
|
{ |
|
"completion_length": 193.42857360839844, |
|
"epoch": 0.1595521343596921, |
|
"grad_norm": 1.7739338874816895, |
|
"kl": 0.08103703707456589, |
|
"learning_rate": 4.6924591133263046e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 228 |
|
}, |
|
{ |
|
"completion_length": 185.07144165039062, |
|
"epoch": 0.1602519244226732, |
|
"grad_norm": 0.008447487838566303, |
|
"kl": 0.10397353023290634, |
|
"learning_rate": 4.6898128115438006e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 229 |
|
}, |
|
{ |
|
"completion_length": 166.07144165039062, |
|
"epoch": 0.1609517144856543, |
|
"grad_norm": 1.9681862592697144, |
|
"kl": 0.09456028044223785, |
|
"learning_rate": 4.6871559259495825e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 230 |
|
}, |
|
{ |
|
"completion_length": 211.21429443359375, |
|
"epoch": 0.16165150454863542, |
|
"grad_norm": 1.1290661096572876, |
|
"kl": 0.06683924794197083, |
|
"learning_rate": 4.684488469384919e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 231 |
|
}, |
|
{ |
|
"completion_length": 201.6428680419922, |
|
"epoch": 0.16235129461161651, |
|
"grad_norm": 1.9820340871810913, |
|
"kl": 0.06850861012935638, |
|
"learning_rate": 4.681810454742172e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 232 |
|
}, |
|
{ |
|
"completion_length": 160.6428680419922, |
|
"epoch": 0.1630510846745976, |
|
"grad_norm": 1.4984009265899658, |
|
"kl": 0.07940242439508438, |
|
"learning_rate": 4.6791218949647323e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 233 |
|
}, |
|
{ |
|
"completion_length": 148.21429443359375, |
|
"epoch": 0.16375087473757874, |
|
"grad_norm": 1.4637259244918823, |
|
"kl": 0.1317509263753891, |
|
"learning_rate": 4.676422803046957e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 234 |
|
}, |
|
{ |
|
"completion_length": 183.85714721679688, |
|
"epoch": 0.16445066480055984, |
|
"grad_norm": 2.262010335922241, |
|
"kl": 0.09411350637674332, |
|
"learning_rate": 4.673713192034108e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.5, |
|
"step": 235 |
|
}, |
|
{ |
|
"completion_length": 171.50001525878906, |
|
"epoch": 0.16515045486354094, |
|
"grad_norm": 2.23311448097229, |
|
"kl": 0.09654156118631363, |
|
"learning_rate": 4.670993075022286e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 236 |
|
}, |
|
{ |
|
"completion_length": 156.92857360839844, |
|
"epoch": 0.16585024492652203, |
|
"grad_norm": 1.4783121347427368, |
|
"kl": 0.15734656155109406, |
|
"learning_rate": 4.668262465158374e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 237 |
|
}, |
|
{ |
|
"completion_length": 172.57144165039062, |
|
"epoch": 0.16655003498950316, |
|
"grad_norm": 1.6374164819717407, |
|
"kl": 0.1168614849448204, |
|
"learning_rate": 4.6655213756399644e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 238 |
|
}, |
|
{ |
|
"completion_length": 164.5, |
|
"epoch": 0.16724982505248426, |
|
"grad_norm": 1.1472550630569458, |
|
"kl": 0.09259577840566635, |
|
"learning_rate": 4.662769819715301e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 239 |
|
}, |
|
{ |
|
"completion_length": 179.2857208251953, |
|
"epoch": 0.16794961511546536, |
|
"grad_norm": 0.005632943473756313, |
|
"kl": 0.0763566866517067, |
|
"learning_rate": 4.6600078106832156e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"completion_length": 171.42857360839844, |
|
"epoch": 0.16864940517844645, |
|
"grad_norm": 2.182656764984131, |
|
"kl": 0.09295495599508286, |
|
"learning_rate": 4.65723536189306e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 241 |
|
}, |
|
{ |
|
"completion_length": 198.6428680419922, |
|
"epoch": 0.16934919524142758, |
|
"grad_norm": 1.6902788877487183, |
|
"kl": 0.04901423305273056, |
|
"learning_rate": 4.654452486744646e-07, |
|
"loss": 0.0, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 242 |
|
}, |
|
{ |
|
"completion_length": 185.07144165039062, |
|
"epoch": 0.17004898530440868, |
|
"grad_norm": 1.950689673423767, |
|
"kl": 0.08940432220697403, |
|
"learning_rate": 4.6516591986881747e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 243 |
|
}, |
|
{ |
|
"completion_length": 187.00001525878906, |
|
"epoch": 0.17074877536738978, |
|
"grad_norm": 2.1791460514068604, |
|
"kl": 0.09629469364881516, |
|
"learning_rate": 4.648855511224178e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 244 |
|
}, |
|
{ |
|
"completion_length": 166.35714721679688, |
|
"epoch": 0.17144856543037088, |
|
"grad_norm": 1.535792589187622, |
|
"kl": 0.12591317296028137, |
|
"learning_rate": 4.6460414379034475e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 245 |
|
}, |
|
{ |
|
"completion_length": 175.00001525878906, |
|
"epoch": 0.172148355493352, |
|
"grad_norm": 1.4731642007827759, |
|
"kl": 0.11351439356803894, |
|
"learning_rate": 4.643216992326974e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 246 |
|
}, |
|
{ |
|
"completion_length": 185.07144165039062, |
|
"epoch": 0.1728481455563331, |
|
"grad_norm": 0.593459963798523, |
|
"kl": 0.104140505194664, |
|
"learning_rate": 4.6403821881458794e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 247 |
|
}, |
|
{ |
|
"completion_length": 183.50001525878906, |
|
"epoch": 0.1735479356193142, |
|
"grad_norm": 2.4581611156463623, |
|
"kl": 0.09627315402030945, |
|
"learning_rate": 4.6375370390613493e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 248 |
|
}, |
|
{ |
|
"completion_length": 188.00001525878906, |
|
"epoch": 0.17424772568229532, |
|
"grad_norm": 1.4998970031738281, |
|
"kl": 0.06777679920196533, |
|
"learning_rate": 4.634681558824569e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 249 |
|
}, |
|
{ |
|
"completion_length": 164.1428680419922, |
|
"epoch": 0.17494751574527642, |
|
"grad_norm": 1.3858827352523804, |
|
"kl": 0.14686499536037445, |
|
"learning_rate": 4.631815761236658e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 250 |
|
}, |
|
{ |
|
"completion_length": 166.2857208251953, |
|
"epoch": 0.17564730580825752, |
|
"grad_norm": 1.2534476518630981, |
|
"kl": 0.08350227028131485, |
|
"learning_rate": 4.6289396601485984e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 251 |
|
}, |
|
{ |
|
"completion_length": 166.92857360839844, |
|
"epoch": 0.17634709587123862, |
|
"grad_norm": 2.1396963596343994, |
|
"kl": 0.0865258201956749, |
|
"learning_rate": 4.6260532694611746e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 252 |
|
}, |
|
{ |
|
"completion_length": 166.6428680419922, |
|
"epoch": 0.17704688593421974, |
|
"grad_norm": 1.9916664361953735, |
|
"kl": 0.10534200072288513, |
|
"learning_rate": 4.6231566031249005e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 253 |
|
}, |
|
{ |
|
"completion_length": 178.85714721679688, |
|
"epoch": 0.17774667599720084, |
|
"grad_norm": 1.3563461303710938, |
|
"kl": 0.11321873962879181, |
|
"learning_rate": 4.620249675139955e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 254 |
|
}, |
|
{ |
|
"completion_length": 195.21429443359375, |
|
"epoch": 0.17844646606018194, |
|
"grad_norm": 2.104259490966797, |
|
"kl": 0.09261036664247513, |
|
"learning_rate": 4.617332499556114e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 255 |
|
}, |
|
{ |
|
"completion_length": 178.1428680419922, |
|
"epoch": 0.17914625612316304, |
|
"grad_norm": 1.6002072095870972, |
|
"kl": 0.0814397856593132, |
|
"learning_rate": 4.6144050904726807e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 256 |
|
}, |
|
{ |
|
"completion_length": 150.92857360839844, |
|
"epoch": 0.17984604618614417, |
|
"grad_norm": 1.9661792516708374, |
|
"kl": 0.12051386386156082, |
|
"learning_rate": 4.611467462038421e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 257 |
|
}, |
|
{ |
|
"completion_length": 168.6428680419922, |
|
"epoch": 0.18054583624912526, |
|
"grad_norm": 0.008869047276675701, |
|
"kl": 0.09992615133523941, |
|
"learning_rate": 4.6085196284514917e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 258 |
|
}, |
|
{ |
|
"completion_length": 211.50001525878906, |
|
"epoch": 0.18124562631210636, |
|
"grad_norm": 1.525679111480713, |
|
"kl": 0.06289325654506683, |
|
"learning_rate": 4.605561603959373e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 259 |
|
}, |
|
{ |
|
"completion_length": 192.35714721679688, |
|
"epoch": 0.1819454163750875, |
|
"grad_norm": 1.9906597137451172, |
|
"kl": 0.06764493137598038, |
|
"learning_rate": 4.602593402858801e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 260 |
|
}, |
|
{ |
|
"completion_length": 174.35714721679688, |
|
"epoch": 0.18264520643806859, |
|
"grad_norm": 0.009503593668341637, |
|
"kl": 0.10391350090503693, |
|
"learning_rate": 4.5996150394956956e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 261 |
|
}, |
|
{ |
|
"completion_length": 179.00001525878906, |
|
"epoch": 0.18334499650104968, |
|
"grad_norm": 2.494394540786743, |
|
"kl": 0.09360729157924652, |
|
"learning_rate": 4.596626528265096e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 262 |
|
}, |
|
{ |
|
"completion_length": 186.07144165039062, |
|
"epoch": 0.18404478656403078, |
|
"grad_norm": 1.2480868101119995, |
|
"kl": 0.09742289036512375, |
|
"learning_rate": 4.593627883611084e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 263 |
|
}, |
|
{ |
|
"completion_length": 198.50001525878906, |
|
"epoch": 0.1847445766270119, |
|
"grad_norm": 0.007387984078377485, |
|
"kl": 0.09463205933570862, |
|
"learning_rate": 4.5906191200267227e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"completion_length": 189.92857360839844, |
|
"epoch": 0.185444366689993, |
|
"grad_norm": 1.158292531967163, |
|
"kl": 0.08757057040929794, |
|
"learning_rate": 4.587600252053978e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 265 |
|
}, |
|
{ |
|
"completion_length": 196.71429443359375, |
|
"epoch": 0.1861441567529741, |
|
"grad_norm": 1.756199598312378, |
|
"kl": 0.07568582892417908, |
|
"learning_rate": 4.584571294283655e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 266 |
|
}, |
|
{ |
|
"completion_length": 161.21429443359375, |
|
"epoch": 0.1868439468159552, |
|
"grad_norm": 2.2403769493103027, |
|
"kl": 0.13164493441581726, |
|
"learning_rate": 4.581532261355324e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 267 |
|
}, |
|
{ |
|
"completion_length": 172.07144165039062, |
|
"epoch": 0.18754373687893633, |
|
"grad_norm": 1.4092003107070923, |
|
"kl": 0.09087042510509491, |
|
"learning_rate": 4.57848316795725e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 268 |
|
}, |
|
{ |
|
"completion_length": 194.57144165039062, |
|
"epoch": 0.18824352694191743, |
|
"grad_norm": 1.1658211946487427, |
|
"kl": 0.09678646922111511, |
|
"learning_rate": 4.5754240288263235e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 269 |
|
}, |
|
{ |
|
"completion_length": 176.50001525878906, |
|
"epoch": 0.18894331700489853, |
|
"grad_norm": 0.9565421938896179, |
|
"kl": 0.11772450804710388, |
|
"learning_rate": 4.572354858747988e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 270 |
|
}, |
|
{ |
|
"completion_length": 169.21429443359375, |
|
"epoch": 0.18964310706787962, |
|
"grad_norm": 1.0944063663482666, |
|
"kl": 0.12249249219894409, |
|
"learning_rate": 4.569275672556168e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 271 |
|
}, |
|
{ |
|
"completion_length": 201.35714721679688, |
|
"epoch": 0.19034289713086075, |
|
"grad_norm": 0.015515509061515331, |
|
"kl": 0.10365406423807144, |
|
"learning_rate": 4.566186485133198e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 272 |
|
}, |
|
{ |
|
"completion_length": 170.42857360839844, |
|
"epoch": 0.19104268719384185, |
|
"grad_norm": 2.527707099914551, |
|
"kl": 0.1117943748831749, |
|
"learning_rate": 4.563087311409749e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 273 |
|
}, |
|
{ |
|
"completion_length": 164.07144165039062, |
|
"epoch": 0.19174247725682295, |
|
"grad_norm": 1.0743855237960815, |
|
"kl": 0.1141185462474823, |
|
"learning_rate": 4.559978166364762e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 274 |
|
}, |
|
{ |
|
"completion_length": 186.21429443359375, |
|
"epoch": 0.19244226731980407, |
|
"grad_norm": 1.6366993188858032, |
|
"kl": 0.09075611084699631, |
|
"learning_rate": 4.556859065025367e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 275 |
|
}, |
|
{ |
|
"completion_length": 190.00001525878906, |
|
"epoch": 0.19314205738278517, |
|
"grad_norm": 0.01647718995809555, |
|
"kl": 0.12315469980239868, |
|
"learning_rate": 4.553730022466817e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 276 |
|
}, |
|
{ |
|
"completion_length": 158.85714721679688, |
|
"epoch": 0.19384184744576627, |
|
"grad_norm": 1.7552223205566406, |
|
"kl": 0.1490643471479416, |
|
"learning_rate": 4.5505910538124125e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 277 |
|
}, |
|
{ |
|
"completion_length": 181.57144165039062, |
|
"epoch": 0.19454163750874737, |
|
"grad_norm": 2.0591378211975098, |
|
"kl": 0.12751373648643494, |
|
"learning_rate": 4.5474421742334294e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 278 |
|
}, |
|
{ |
|
"completion_length": 219.21429443359375, |
|
"epoch": 0.1952414275717285, |
|
"grad_norm": 2.145087957382202, |
|
"kl": 0.08176664263010025, |
|
"learning_rate": 4.5442833989490443e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 279 |
|
}, |
|
{ |
|
"completion_length": 207.85714721679688, |
|
"epoch": 0.1959412176347096, |
|
"grad_norm": 1.8617810010910034, |
|
"kl": 0.09537741541862488, |
|
"learning_rate": 4.541114743226262e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 280 |
|
}, |
|
{ |
|
"completion_length": 196.92857360839844, |
|
"epoch": 0.1966410076976907, |
|
"grad_norm": 1.545285701751709, |
|
"kl": 0.16356630623340607, |
|
"learning_rate": 4.537936222379841e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 281 |
|
}, |
|
{ |
|
"completion_length": 198.1428680419922, |
|
"epoch": 0.1973407977606718, |
|
"grad_norm": 1.760254144668579, |
|
"kl": 0.12825290858745575, |
|
"learning_rate": 4.5347478517722215e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 282 |
|
}, |
|
{ |
|
"completion_length": 190.21429443359375, |
|
"epoch": 0.1980405878236529, |
|
"grad_norm": 1.869924783706665, |
|
"kl": 0.09077086299657822, |
|
"learning_rate": 4.5315496468134484e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 283 |
|
}, |
|
{ |
|
"completion_length": 172.21429443359375, |
|
"epoch": 0.198740377886634, |
|
"grad_norm": 2.9378786087036133, |
|
"kl": 0.08580020815134048, |
|
"learning_rate": 4.5283416229610977e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 284 |
|
}, |
|
{ |
|
"completion_length": 213.35714721679688, |
|
"epoch": 0.1994401679496151, |
|
"grad_norm": 0.008922022767364979, |
|
"kl": 0.1027616560459137, |
|
"learning_rate": 4.525123795720203e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 285 |
|
}, |
|
{ |
|
"completion_length": 185.2857208251953, |
|
"epoch": 0.2001399580125962, |
|
"grad_norm": 0.02594582363963127, |
|
"kl": 0.14763151109218597, |
|
"learning_rate": 4.5218961806431793e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 286 |
|
}, |
|
{ |
|
"completion_length": 158.0, |
|
"epoch": 0.20083974807557733, |
|
"grad_norm": 1.2283246517181396, |
|
"kl": 0.1193583756685257, |
|
"learning_rate": 4.518658793329748e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 287 |
|
}, |
|
{ |
|
"completion_length": 201.85714721679688, |
|
"epoch": 0.20153953813855843, |
|
"grad_norm": 0.008988989517092705, |
|
"kl": 0.08033312857151031, |
|
"learning_rate": 4.5154116494268623e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 288 |
|
}, |
|
{ |
|
"completion_length": 230.2857208251953, |
|
"epoch": 0.20223932820153953, |
|
"grad_norm": 1.0760490894317627, |
|
"kl": 0.07163004577159882, |
|
"learning_rate": 4.5121547646286305e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 289 |
|
}, |
|
{ |
|
"completion_length": 162.5, |
|
"epoch": 0.20293911826452066, |
|
"grad_norm": 1.913016676902771, |
|
"kl": 0.11723782122135162, |
|
"learning_rate": 4.5088881546762405e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 290 |
|
}, |
|
{ |
|
"completion_length": 146.5, |
|
"epoch": 0.20363890832750176, |
|
"grad_norm": 1.1676669120788574, |
|
"kl": 0.15099795162677765, |
|
"learning_rate": 4.5056118353578845e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 291 |
|
}, |
|
{ |
|
"completion_length": 191.57144165039062, |
|
"epoch": 0.20433869839048285, |
|
"grad_norm": 1.674120545387268, |
|
"kl": 0.10339002311229706, |
|
"learning_rate": 4.502325822508681e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 292 |
|
}, |
|
{ |
|
"completion_length": 179.1428680419922, |
|
"epoch": 0.20503848845346395, |
|
"grad_norm": 1.9627193212509155, |
|
"kl": 0.12403272092342377, |
|
"learning_rate": 4.4990301320106005e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 293 |
|
}, |
|
{ |
|
"completion_length": 181.50001525878906, |
|
"epoch": 0.20573827851644508, |
|
"grad_norm": 0.00794507097452879, |
|
"kl": 0.10637389123439789, |
|
"learning_rate": 4.495724779792387e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 294 |
|
}, |
|
{ |
|
"completion_length": 152.35714721679688, |
|
"epoch": 0.20643806857942618, |
|
"grad_norm": 0.682245135307312, |
|
"kl": 0.12384691089391708, |
|
"learning_rate": 4.492409781829481e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 295 |
|
}, |
|
{ |
|
"completion_length": 171.50001525878906, |
|
"epoch": 0.20713785864240727, |
|
"grad_norm": 4.1288933753967285, |
|
"kl": 0.13875921070575714, |
|
"learning_rate": 4.4890851541439443e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 296 |
|
}, |
|
{ |
|
"completion_length": 198.71429443359375, |
|
"epoch": 0.20783764870538837, |
|
"grad_norm": 1.1377445459365845, |
|
"kl": 0.09420619904994965, |
|
"learning_rate": 4.4857509128043804e-07, |
|
"loss": 0.0001, |
|
"reward": 0.6428571939468384, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.6428571939468384, |
|
"step": 297 |
|
}, |
|
{ |
|
"completion_length": 164.07144165039062, |
|
"epoch": 0.2085374387683695, |
|
"grad_norm": 1.9819285869598389, |
|
"kl": 0.14920377731323242, |
|
"learning_rate": 4.4824070739258555e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 298 |
|
}, |
|
{ |
|
"completion_length": 181.71429443359375, |
|
"epoch": 0.2092372288313506, |
|
"grad_norm": 3.7659053802490234, |
|
"kl": 0.10090445727109909, |
|
"learning_rate": 4.4790536536698265e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 299 |
|
}, |
|
{ |
|
"completion_length": 192.35714721679688, |
|
"epoch": 0.2099370188943317, |
|
"grad_norm": 0.00978009682148695, |
|
"kl": 0.101994089782238, |
|
"learning_rate": 4.4756906682440566e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"completion_length": 177.07144165039062, |
|
"epoch": 0.2106368089573128, |
|
"grad_norm": 1.9101574420928955, |
|
"kl": 0.1070994883775711, |
|
"learning_rate": 4.4723181339025394e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 301 |
|
}, |
|
{ |
|
"completion_length": 180.35714721679688, |
|
"epoch": 0.21133659902029392, |
|
"grad_norm": 0.009579714387655258, |
|
"kl": 0.09214787185192108, |
|
"learning_rate": 4.4689360669454214e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 302 |
|
}, |
|
{ |
|
"completion_length": 187.07144165039062, |
|
"epoch": 0.21203638908327502, |
|
"grad_norm": 0.01488980557769537, |
|
"kl": 0.12565574049949646, |
|
"learning_rate": 4.4655444837189217e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 303 |
|
}, |
|
{ |
|
"completion_length": 192.71429443359375, |
|
"epoch": 0.21273617914625612, |
|
"grad_norm": 1.2768338918685913, |
|
"kl": 0.07897743582725525, |
|
"learning_rate": 4.4621434006152523e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 304 |
|
}, |
|
{ |
|
"completion_length": 191.42857360839844, |
|
"epoch": 0.21343596920923724, |
|
"grad_norm": 1.7220300436019897, |
|
"kl": 0.08046605437994003, |
|
"learning_rate": 4.458732834072543e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 305 |
|
}, |
|
{ |
|
"completion_length": 185.35714721679688, |
|
"epoch": 0.21413575927221834, |
|
"grad_norm": 1.9291383028030396, |
|
"kl": 0.10339696705341339, |
|
"learning_rate": 4.455312800574756e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 306 |
|
}, |
|
{ |
|
"completion_length": 160.21429443359375, |
|
"epoch": 0.21483554933519944, |
|
"grad_norm": 1.4560072422027588, |
|
"kl": 0.13349157571792603, |
|
"learning_rate": 4.45188331665161e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 307 |
|
}, |
|
{ |
|
"completion_length": 144.42857360839844, |
|
"epoch": 0.21553533939818054, |
|
"grad_norm": 1.605533480644226, |
|
"kl": 0.1337820589542389, |
|
"learning_rate": 4.4484443988785016e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 308 |
|
}, |
|
{ |
|
"completion_length": 162.2857208251953, |
|
"epoch": 0.21623512946116166, |
|
"grad_norm": 0.01384447980672121, |
|
"kl": 0.13544243574142456, |
|
"learning_rate": 4.4449960638764187e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 309 |
|
}, |
|
{ |
|
"completion_length": 180.07144165039062, |
|
"epoch": 0.21693491952414276, |
|
"grad_norm": 1.4379627704620361, |
|
"kl": 0.12317683547735214, |
|
"learning_rate": 4.44153832831187e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 310 |
|
}, |
|
{ |
|
"completion_length": 212.07144165039062, |
|
"epoch": 0.21763470958712386, |
|
"grad_norm": 2.2970993518829346, |
|
"kl": 0.1126500591635704, |
|
"learning_rate": 4.4380712088967956e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 311 |
|
}, |
|
{ |
|
"completion_length": 199.71429443359375, |
|
"epoch": 0.21833449965010496, |
|
"grad_norm": 1.1636662483215332, |
|
"kl": 0.0687713623046875, |
|
"learning_rate": 4.434594722388489e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 312 |
|
}, |
|
{ |
|
"completion_length": 196.85714721679688, |
|
"epoch": 0.21903428971308608, |
|
"grad_norm": 1.4445149898529053, |
|
"kl": 0.09801124781370163, |
|
"learning_rate": 4.4311088855895195e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 313 |
|
}, |
|
{ |
|
"completion_length": 184.57144165039062, |
|
"epoch": 0.21973407977606718, |
|
"grad_norm": 1.7435158491134644, |
|
"kl": 0.10646343976259232, |
|
"learning_rate": 4.427613715347647e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 314 |
|
}, |
|
{ |
|
"completion_length": 216.07144165039062, |
|
"epoch": 0.22043386983904828, |
|
"grad_norm": 1.4065042734146118, |
|
"kl": 0.10593009740114212, |
|
"learning_rate": 4.424109228555741e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 315 |
|
}, |
|
{ |
|
"completion_length": 181.21429443359375, |
|
"epoch": 0.22113365990202938, |
|
"grad_norm": 0.013224126771092415, |
|
"kl": 0.12971317768096924, |
|
"learning_rate": 4.420595442151701e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 316 |
|
}, |
|
{ |
|
"completion_length": 158.5, |
|
"epoch": 0.2218334499650105, |
|
"grad_norm": 0.01648276299238205, |
|
"kl": 0.13636274635791779, |
|
"learning_rate": 4.4170723731183734e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 317 |
|
}, |
|
{ |
|
"completion_length": 167.85714721679688, |
|
"epoch": 0.2225332400279916, |
|
"grad_norm": 1.4127261638641357, |
|
"kl": 0.15140336751937866, |
|
"learning_rate": 4.413540038483469e-07, |
|
"loss": 0.0002, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 318 |
|
}, |
|
{ |
|
"completion_length": 167.71429443359375, |
|
"epoch": 0.2232330300909727, |
|
"grad_norm": 1.6493871212005615, |
|
"kl": 0.08952382951974869, |
|
"learning_rate": 4.4099984553194803e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 319 |
|
}, |
|
{ |
|
"completion_length": 178.57144165039062, |
|
"epoch": 0.22393282015395383, |
|
"grad_norm": 0.01921817846596241, |
|
"kl": 0.13234639167785645, |
|
"learning_rate": 4.4064476407436005e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 320 |
|
}, |
|
{ |
|
"completion_length": 167.42857360839844, |
|
"epoch": 0.22463261021693492, |
|
"grad_norm": 1.6873424053192139, |
|
"kl": 0.11753804981708527, |
|
"learning_rate": 4.4028876119176417e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 321 |
|
}, |
|
{ |
|
"completion_length": 206.1428680419922, |
|
"epoch": 0.22533240027991602, |
|
"grad_norm": 0.009471292607486248, |
|
"kl": 0.0891999900341034, |
|
"learning_rate": 4.3993183860479475e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 322 |
|
}, |
|
{ |
|
"completion_length": 199.21429443359375, |
|
"epoch": 0.22603219034289712, |
|
"grad_norm": 0.008467334322631359, |
|
"kl": 0.0989324077963829, |
|
"learning_rate": 4.395739980385316e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 323 |
|
}, |
|
{ |
|
"completion_length": 178.71429443359375, |
|
"epoch": 0.22673198040587825, |
|
"grad_norm": 1.276065468788147, |
|
"kl": 0.1064041405916214, |
|
"learning_rate": 4.392152412224909e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 324 |
|
}, |
|
{ |
|
"completion_length": 193.92857360839844, |
|
"epoch": 0.22743177046885935, |
|
"grad_norm": 1.3665152788162231, |
|
"kl": 0.10998494923114777, |
|
"learning_rate": 4.388555698906177e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 325 |
|
}, |
|
{ |
|
"completion_length": 172.07144165039062, |
|
"epoch": 0.22813156053184044, |
|
"grad_norm": 1.9325965642929077, |
|
"kl": 0.10882139950990677, |
|
"learning_rate": 4.384949857812766e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 326 |
|
}, |
|
{ |
|
"completion_length": 164.85714721679688, |
|
"epoch": 0.22883135059482154, |
|
"grad_norm": 0.011548174545168877, |
|
"kl": 0.12972931563854218, |
|
"learning_rate": 4.381334906372443e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 327 |
|
}, |
|
{ |
|
"completion_length": 212.1428680419922, |
|
"epoch": 0.22953114065780267, |
|
"grad_norm": 1.3057656288146973, |
|
"kl": 0.10275336354970932, |
|
"learning_rate": 4.3777108620570047e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 328 |
|
}, |
|
{ |
|
"completion_length": 186.85714721679688, |
|
"epoch": 0.23023093072078377, |
|
"grad_norm": 0.9121779203414917, |
|
"kl": 0.10501497238874435, |
|
"learning_rate": 4.374077742382196e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 329 |
|
}, |
|
{ |
|
"completion_length": 183.6428680419922, |
|
"epoch": 0.23093072078376486, |
|
"grad_norm": 0.0072582438588142395, |
|
"kl": 0.09125156700611115, |
|
"learning_rate": 4.3704355649076256e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"completion_length": 220.85714721679688, |
|
"epoch": 0.23163051084674596, |
|
"grad_norm": 1.386562466621399, |
|
"kl": 0.07495894283056259, |
|
"learning_rate": 4.366784347236679e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 331 |
|
}, |
|
{ |
|
"completion_length": 186.7857208251953, |
|
"epoch": 0.2323303009097271, |
|
"grad_norm": 1.0252575874328613, |
|
"kl": 0.1316533386707306, |
|
"learning_rate": 4.363124107016435e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 332 |
|
}, |
|
{ |
|
"completion_length": 205.71429443359375, |
|
"epoch": 0.2330300909727082, |
|
"grad_norm": 1.5812278985977173, |
|
"kl": 0.10307396203279495, |
|
"learning_rate": 4.3594548619375817e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 333 |
|
}, |
|
{ |
|
"completion_length": 173.92857360839844, |
|
"epoch": 0.23372988103568929, |
|
"grad_norm": 2.1588902473449707, |
|
"kl": 0.1479392796754837, |
|
"learning_rate": 4.355776629734328e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 334 |
|
}, |
|
{ |
|
"completion_length": 193.92857360839844, |
|
"epoch": 0.2344296710986704, |
|
"grad_norm": 1.7225788831710815, |
|
"kl": 0.12053793668746948, |
|
"learning_rate": 4.35208942818432e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 335 |
|
}, |
|
{ |
|
"completion_length": 210.00001525878906, |
|
"epoch": 0.2351294611616515, |
|
"grad_norm": 1.6022684574127197, |
|
"kl": 0.11531992256641388, |
|
"learning_rate": 4.3483932751085546e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 336 |
|
}, |
|
{ |
|
"completion_length": 187.50001525878906, |
|
"epoch": 0.2358292512246326, |
|
"grad_norm": 1.0213714838027954, |
|
"kl": 0.10510554164648056, |
|
"learning_rate": 4.344688188371293e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 337 |
|
}, |
|
{ |
|
"completion_length": 151.5, |
|
"epoch": 0.2365290412876137, |
|
"grad_norm": 1.0320056676864624, |
|
"kl": 0.13573938608169556, |
|
"learning_rate": 4.3409741858799747e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 338 |
|
}, |
|
{ |
|
"completion_length": 212.21429443359375, |
|
"epoch": 0.23722883135059483, |
|
"grad_norm": 1.2659670114517212, |
|
"kl": 0.08855041116476059, |
|
"learning_rate": 4.3372512855851305e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 339 |
|
}, |
|
{ |
|
"completion_length": 201.50001525878906, |
|
"epoch": 0.23792862141357593, |
|
"grad_norm": 1.4099029302597046, |
|
"kl": 0.08404584974050522, |
|
"learning_rate": 4.3335195054802977e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 340 |
|
}, |
|
{ |
|
"completion_length": 211.50001525878906, |
|
"epoch": 0.23862841147655703, |
|
"grad_norm": 1.1142750978469849, |
|
"kl": 0.13875596225261688, |
|
"learning_rate": 4.329778863601929e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 341 |
|
}, |
|
{ |
|
"completion_length": 149.6428680419922, |
|
"epoch": 0.23932820153953813, |
|
"grad_norm": 1.3854038715362549, |
|
"kl": 0.13258212804794312, |
|
"learning_rate": 4.3260293780293097e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 342 |
|
}, |
|
{ |
|
"completion_length": 184.57144165039062, |
|
"epoch": 0.24002799160251925, |
|
"grad_norm": 1.7300869226455688, |
|
"kl": 0.12928970158100128, |
|
"learning_rate": 4.3222710668844676e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 343 |
|
}, |
|
{ |
|
"completion_length": 192.1428680419922, |
|
"epoch": 0.24072778166550035, |
|
"grad_norm": 1.8982024192810059, |
|
"kl": 0.105531707406044, |
|
"learning_rate": 4.318503948332086e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 344 |
|
}, |
|
{ |
|
"completion_length": 187.7857208251953, |
|
"epoch": 0.24142757172848145, |
|
"grad_norm": 2.488347053527832, |
|
"kl": 0.12668681144714355, |
|
"learning_rate": 4.3147280405794186e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 345 |
|
}, |
|
{ |
|
"completion_length": 192.92857360839844, |
|
"epoch": 0.24212736179146255, |
|
"grad_norm": 1.653713345527649, |
|
"kl": 0.0994969978928566, |
|
"learning_rate": 4.3109433618761953e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 346 |
|
}, |
|
{ |
|
"completion_length": 203.00001525878906, |
|
"epoch": 0.24282715185444367, |
|
"grad_norm": 0.010403181426227093, |
|
"kl": 0.10773642361164093, |
|
"learning_rate": 4.307149930514541e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 347 |
|
}, |
|
{ |
|
"completion_length": 206.07144165039062, |
|
"epoch": 0.24352694191742477, |
|
"grad_norm": 0.010696402750909328, |
|
"kl": 0.09887736290693283, |
|
"learning_rate": 4.3033477648288827e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 348 |
|
}, |
|
{ |
|
"completion_length": 191.00001525878906, |
|
"epoch": 0.24422673198040587, |
|
"grad_norm": 1.0522825717926025, |
|
"kl": 0.1295844316482544, |
|
"learning_rate": 4.299536883195862e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 349 |
|
}, |
|
{ |
|
"completion_length": 204.85714721679688, |
|
"epoch": 0.244926522043387, |
|
"grad_norm": 1.0449916124343872, |
|
"kl": 0.09472481161355972, |
|
"learning_rate": 4.2957173040342467e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 350 |
|
}, |
|
{ |
|
"completion_length": 171.07144165039062, |
|
"epoch": 0.2456263121063681, |
|
"grad_norm": 1.3337371349334717, |
|
"kl": 0.13681744039058685, |
|
"learning_rate": 4.2918890458048406e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 351 |
|
}, |
|
{ |
|
"completion_length": 189.00001525878906, |
|
"epoch": 0.2463261021693492, |
|
"grad_norm": 3.7665247917175293, |
|
"kl": 0.19837237894535065, |
|
"learning_rate": 4.288052127010398e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 352 |
|
}, |
|
{ |
|
"completion_length": 221.9285888671875, |
|
"epoch": 0.2470258922323303, |
|
"grad_norm": 0.011105585843324661, |
|
"kl": 0.12203062325716019, |
|
"learning_rate": 4.2842065661955284e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 353 |
|
}, |
|
{ |
|
"completion_length": 211.00001525878906, |
|
"epoch": 0.24772568229531142, |
|
"grad_norm": 1.684749722480774, |
|
"kl": 0.11504418402910233, |
|
"learning_rate": 4.2803523819466114e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 354 |
|
}, |
|
{ |
|
"completion_length": 198.2857208251953, |
|
"epoch": 0.24842547235829252, |
|
"grad_norm": 2.0817246437072754, |
|
"kl": 0.09736784547567368, |
|
"learning_rate": 4.2764895928917057e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 355 |
|
}, |
|
{ |
|
"completion_length": 194.92857360839844, |
|
"epoch": 0.2491252624212736, |
|
"grad_norm": 1.865821361541748, |
|
"kl": 0.13163846731185913, |
|
"learning_rate": 4.2726182177004566e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 356 |
|
}, |
|
{ |
|
"completion_length": 188.50001525878906, |
|
"epoch": 0.2498250524842547, |
|
"grad_norm": 2.311312198638916, |
|
"kl": 0.09675271064043045, |
|
"learning_rate": 4.2687382750840106e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 357 |
|
}, |
|
{ |
|
"completion_length": 198.50001525878906, |
|
"epoch": 0.25052484254723584, |
|
"grad_norm": 0.010544586926698685, |
|
"kl": 0.10706578195095062, |
|
"learning_rate": 4.2648497837949214e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 358 |
|
}, |
|
{ |
|
"completion_length": 201.00001525878906, |
|
"epoch": 0.25122463261021694, |
|
"grad_norm": 0.007542663719505072, |
|
"kl": 0.10160516202449799, |
|
"learning_rate": 4.260952762627061e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 359 |
|
}, |
|
{ |
|
"completion_length": 195.42857360839844, |
|
"epoch": 0.25192442267319803, |
|
"grad_norm": 0.010795813985168934, |
|
"kl": 0.11410945653915405, |
|
"learning_rate": 4.257047230415525e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"completion_length": 219.4285888671875, |
|
"epoch": 0.25262421273617913, |
|
"grad_norm": 0.0094209685921669, |
|
"kl": 0.11131899058818817, |
|
"learning_rate": 4.253133206036549e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 361 |
|
}, |
|
{ |
|
"completion_length": 220.71429443359375, |
|
"epoch": 0.25332400279916023, |
|
"grad_norm": 1.182874083518982, |
|
"kl": 0.10284953564405441, |
|
"learning_rate": 4.2492107084074086e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 362 |
|
}, |
|
{ |
|
"completion_length": 177.50001525878906, |
|
"epoch": 0.2540237928621414, |
|
"grad_norm": 0.03138711303472519, |
|
"kl": 0.1809418946504593, |
|
"learning_rate": 4.245279756486335e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 363 |
|
}, |
|
{ |
|
"completion_length": 155.7857208251953, |
|
"epoch": 0.2547235829251225, |
|
"grad_norm": 1.2833763360977173, |
|
"kl": 0.1380501240491867, |
|
"learning_rate": 4.2413403692724215e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 364 |
|
}, |
|
{ |
|
"completion_length": 162.6428680419922, |
|
"epoch": 0.2554233729881036, |
|
"grad_norm": 0.019785305485129356, |
|
"kl": 0.15245895087718964, |
|
"learning_rate": 4.237392565805529e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 365 |
|
}, |
|
{ |
|
"completion_length": 207.85714721679688, |
|
"epoch": 0.2561231630510847, |
|
"grad_norm": 0.012158805504441261, |
|
"kl": 0.11601593345403671, |
|
"learning_rate": 4.2334363651661946e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 366 |
|
}, |
|
{ |
|
"completion_length": 160.07144165039062, |
|
"epoch": 0.2568229531140658, |
|
"grad_norm": 1.5489096641540527, |
|
"kl": 0.16084842383861542, |
|
"learning_rate": 4.2294717864755446e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 367 |
|
}, |
|
{ |
|
"completion_length": 165.5, |
|
"epoch": 0.2575227431770469, |
|
"grad_norm": 1.485174536705017, |
|
"kl": 0.1078505590558052, |
|
"learning_rate": 4.225498848895196e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 368 |
|
}, |
|
{ |
|
"completion_length": 186.2857208251953, |
|
"epoch": 0.258222533240028, |
|
"grad_norm": 2.263561248779297, |
|
"kl": 0.11858272552490234, |
|
"learning_rate": 4.2215175716271643e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 369 |
|
}, |
|
{ |
|
"completion_length": 160.07144165039062, |
|
"epoch": 0.25892232330300907, |
|
"grad_norm": 1.1202062368392944, |
|
"kl": 0.14608243107795715, |
|
"learning_rate": 4.2175279739137756e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 370 |
|
}, |
|
{ |
|
"completion_length": 206.57144165039062, |
|
"epoch": 0.2596221133659902, |
|
"grad_norm": 0.019609833136200905, |
|
"kl": 0.12289752811193466, |
|
"learning_rate": 4.213530075037568e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 371 |
|
}, |
|
{ |
|
"completion_length": 182.50001525878906, |
|
"epoch": 0.2603219034289713, |
|
"grad_norm": 1.993719458580017, |
|
"kl": 0.14151142537593842, |
|
"learning_rate": 4.209523894321203e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 372 |
|
}, |
|
{ |
|
"completion_length": 165.2857208251953, |
|
"epoch": 0.2610216934919524, |
|
"grad_norm": 1.3514111042022705, |
|
"kl": 0.13945753872394562, |
|
"learning_rate": 4.205509451127368e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 373 |
|
}, |
|
{ |
|
"completion_length": 191.57144165039062, |
|
"epoch": 0.2617214835549335, |
|
"grad_norm": 0.010482234880328178, |
|
"kl": 0.10723592340946198, |
|
"learning_rate": 4.201486764858684e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 374 |
|
}, |
|
{ |
|
"completion_length": 174.42857360839844, |
|
"epoch": 0.2624212736179146, |
|
"grad_norm": 1.1648919582366943, |
|
"kl": 0.1254054754972458, |
|
"learning_rate": 4.1974558549576156e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 375 |
|
}, |
|
{ |
|
"completion_length": 191.57144165039062, |
|
"epoch": 0.2631210636808957, |
|
"grad_norm": 1.0762180089950562, |
|
"kl": 0.10756195336580276, |
|
"learning_rate": 4.1934167409063716e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 376 |
|
}, |
|
{ |
|
"completion_length": 180.50001525878906, |
|
"epoch": 0.2638208537438768, |
|
"grad_norm": 0.007885328494012356, |
|
"kl": 0.10615711659193039, |
|
"learning_rate": 4.1893694422268136e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 377 |
|
}, |
|
{ |
|
"completion_length": 173.92857360839844, |
|
"epoch": 0.26452064380685797, |
|
"grad_norm": 0.013112043030560017, |
|
"kl": 0.162028506398201, |
|
"learning_rate": 4.1853139784803606e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 378 |
|
}, |
|
{ |
|
"completion_length": 173.92857360839844, |
|
"epoch": 0.26522043386983907, |
|
"grad_norm": 1.892849087715149, |
|
"kl": 0.13766098022460938, |
|
"learning_rate": 4.1812503692678956e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 379 |
|
}, |
|
{ |
|
"completion_length": 210.00001525878906, |
|
"epoch": 0.26592022393282017, |
|
"grad_norm": 0.011618354357779026, |
|
"kl": 0.13500210642814636, |
|
"learning_rate": 4.177178634229671e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"completion_length": 204.7857208251953, |
|
"epoch": 0.26662001399580126, |
|
"grad_norm": 0.9967655539512634, |
|
"kl": 0.10203620791435242, |
|
"learning_rate": 4.173098793045212e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 381 |
|
}, |
|
{ |
|
"completion_length": 152.21429443359375, |
|
"epoch": 0.26731980405878236, |
|
"grad_norm": 1.4876607656478882, |
|
"kl": 0.18279418349266052, |
|
"learning_rate": 4.1690108654332235e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 382 |
|
}, |
|
{ |
|
"completion_length": 181.85714721679688, |
|
"epoch": 0.26801959412176346, |
|
"grad_norm": 0.009222305379807949, |
|
"kl": 0.10881803929805756, |
|
"learning_rate": 4.1649148711514914e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 383 |
|
}, |
|
{ |
|
"completion_length": 170.57144165039062, |
|
"epoch": 0.26871938418474456, |
|
"grad_norm": 1.0438907146453857, |
|
"kl": 0.11488223820924759, |
|
"learning_rate": 4.1608108299967915e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 384 |
|
}, |
|
{ |
|
"completion_length": 219.2857208251953, |
|
"epoch": 0.26941917424772566, |
|
"grad_norm": 1.0877920389175415, |
|
"kl": 0.1062377542257309, |
|
"learning_rate": 4.156698761804792e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 385 |
|
}, |
|
{ |
|
"completion_length": 175.71429443359375, |
|
"epoch": 0.2701189643107068, |
|
"grad_norm": 2.564737319946289, |
|
"kl": 0.1757022589445114, |
|
"learning_rate": 4.1525786864499555e-07, |
|
"loss": 0.0002, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 386 |
|
}, |
|
{ |
|
"completion_length": 197.85714721679688, |
|
"epoch": 0.2708187543736879, |
|
"grad_norm": 1.246633768081665, |
|
"kl": 0.11401327699422836, |
|
"learning_rate": 4.1484506238454477e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 387 |
|
}, |
|
{ |
|
"completion_length": 200.92857360839844, |
|
"epoch": 0.271518544436669, |
|
"grad_norm": 1.32391357421875, |
|
"kl": 0.1393825113773346, |
|
"learning_rate": 4.1443145939430356e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 388 |
|
}, |
|
{ |
|
"completion_length": 154.07144165039062, |
|
"epoch": 0.2722183344996501, |
|
"grad_norm": 1.0778306722640991, |
|
"kl": 0.1403672844171524, |
|
"learning_rate": 4.1401706167329953e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 389 |
|
}, |
|
{ |
|
"completion_length": 187.71429443359375, |
|
"epoch": 0.2729181245626312, |
|
"grad_norm": 0.9315563440322876, |
|
"kl": 0.14357517659664154, |
|
"learning_rate": 4.136018712244014e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 390 |
|
}, |
|
{ |
|
"completion_length": 185.85714721679688, |
|
"epoch": 0.2736179146256123, |
|
"grad_norm": 0.01690327189862728, |
|
"kl": 0.108391173183918, |
|
"learning_rate": 4.1318589005430903e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 391 |
|
}, |
|
{ |
|
"completion_length": 185.42857360839844, |
|
"epoch": 0.2743177046885934, |
|
"grad_norm": 1.1042805910110474, |
|
"kl": 0.1383601278066635, |
|
"learning_rate": 4.1276912017354445e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 392 |
|
}, |
|
{ |
|
"completion_length": 200.57144165039062, |
|
"epoch": 0.27501749475157455, |
|
"grad_norm": 1.1524347066879272, |
|
"kl": 0.13028042018413544, |
|
"learning_rate": 4.123515635964413e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 393 |
|
}, |
|
{ |
|
"completion_length": 203.00001525878906, |
|
"epoch": 0.27571728481455565, |
|
"grad_norm": 0.014327870681881905, |
|
"kl": 0.13895606994628906, |
|
"learning_rate": 4.119332223411357e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 394 |
|
}, |
|
{ |
|
"completion_length": 189.92857360839844, |
|
"epoch": 0.27641707487753675, |
|
"grad_norm": 0.010174551047384739, |
|
"kl": 0.1431262195110321, |
|
"learning_rate": 4.115140984295562e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 395 |
|
}, |
|
{ |
|
"completion_length": 204.6428680419922, |
|
"epoch": 0.27711686494051785, |
|
"grad_norm": 0.009927564300596714, |
|
"kl": 0.13393332064151764, |
|
"learning_rate": 4.1109419388741416e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 396 |
|
}, |
|
{ |
|
"completion_length": 130.6428680419922, |
|
"epoch": 0.27781665500349895, |
|
"grad_norm": 1.8417161703109741, |
|
"kl": 0.17228896915912628, |
|
"learning_rate": 4.1067351074419376e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 397 |
|
}, |
|
{ |
|
"completion_length": 160.1428680419922, |
|
"epoch": 0.27851644506648005, |
|
"grad_norm": 1.7102991342544556, |
|
"kl": 0.15284760296344757, |
|
"learning_rate": 4.1025205103314243e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 398 |
|
}, |
|
{ |
|
"completion_length": 153.35714721679688, |
|
"epoch": 0.27921623512946114, |
|
"grad_norm": 0.012750448659062386, |
|
"kl": 0.15221111476421356, |
|
"learning_rate": 4.0982981679126084e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 399 |
|
}, |
|
{ |
|
"completion_length": 192.21429443359375, |
|
"epoch": 0.27991602519244224, |
|
"grad_norm": 0.011978665366768837, |
|
"kl": 0.12264849990606308, |
|
"learning_rate": 4.0940681005929313e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"completion_length": 171.50001525878906, |
|
"epoch": 0.2806158152554234, |
|
"grad_norm": 1.029371738433838, |
|
"kl": 0.21356099843978882, |
|
"learning_rate": 4.089830328817171e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 401 |
|
}, |
|
{ |
|
"completion_length": 162.42857360839844, |
|
"epoch": 0.2813156053184045, |
|
"grad_norm": 0.023565975949168205, |
|
"kl": 0.2100272923707962, |
|
"learning_rate": 4.0855848730673426e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 402 |
|
}, |
|
{ |
|
"completion_length": 179.92857360839844, |
|
"epoch": 0.2820153953813856, |
|
"grad_norm": 2.285959005355835, |
|
"kl": 0.130794957280159, |
|
"learning_rate": 4.081331753862599e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7142857313156128, |
|
"reward_std": 0.4040610194206238, |
|
"rewards/check_gptzero_func": 0.7142857313156128, |
|
"step": 403 |
|
}, |
|
{ |
|
"completion_length": 180.85714721679688, |
|
"epoch": 0.2827151854443667, |
|
"grad_norm": 0.014847146347165108, |
|
"kl": 0.15708422660827637, |
|
"learning_rate": 4.077070991759132e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 404 |
|
}, |
|
{ |
|
"completion_length": 177.6428680419922, |
|
"epoch": 0.2834149755073478, |
|
"grad_norm": 0.010321750305593014, |
|
"kl": 0.14018230140209198, |
|
"learning_rate": 4.072802607350074e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 405 |
|
}, |
|
{ |
|
"completion_length": 159.57144165039062, |
|
"epoch": 0.2841147655703289, |
|
"grad_norm": 0.012742813676595688, |
|
"kl": 0.15737994015216827, |
|
"learning_rate": 4.0685266212653967e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 406 |
|
}, |
|
{ |
|
"completion_length": 205.7857208251953, |
|
"epoch": 0.28481455563331, |
|
"grad_norm": 1.4977048635482788, |
|
"kl": 0.12560279667377472, |
|
"learning_rate": 4.064243054171812e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 407 |
|
}, |
|
{ |
|
"completion_length": 186.21429443359375, |
|
"epoch": 0.28551434569629114, |
|
"grad_norm": 0.007791958283632994, |
|
"kl": 0.1278899908065796, |
|
"learning_rate": 4.059951926772674e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 408 |
|
}, |
|
{ |
|
"completion_length": 202.85714721679688, |
|
"epoch": 0.28621413575927224, |
|
"grad_norm": 0.006448006723076105, |
|
"kl": 0.10058410465717316, |
|
"learning_rate": 4.0556532598078753e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 409 |
|
}, |
|
{ |
|
"completion_length": 177.7857208251953, |
|
"epoch": 0.28691392582225334, |
|
"grad_norm": 2.2854204177856445, |
|
"kl": 0.15505185723304749, |
|
"learning_rate": 4.0513470740537494e-07, |
|
"loss": 0.0002, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 410 |
|
}, |
|
{ |
|
"completion_length": 143.35714721679688, |
|
"epoch": 0.28761371588523443, |
|
"grad_norm": 1.5281881093978882, |
|
"kl": 0.1289946287870407, |
|
"learning_rate": 4.047033390322971e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 411 |
|
}, |
|
{ |
|
"completion_length": 192.21429443359375, |
|
"epoch": 0.28831350594821553, |
|
"grad_norm": 1.303729772567749, |
|
"kl": 0.12751147150993347, |
|
"learning_rate": 4.0427122294644517e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 412 |
|
}, |
|
{ |
|
"completion_length": 185.35714721679688, |
|
"epoch": 0.28901329601119663, |
|
"grad_norm": 1.3215141296386719, |
|
"kl": 0.18720586597919464, |
|
"learning_rate": 4.038383612363243e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 413 |
|
}, |
|
{ |
|
"completion_length": 227.57144165039062, |
|
"epoch": 0.28971308607417773, |
|
"grad_norm": 0.0097136739641428, |
|
"kl": 0.09667594730854034, |
|
"learning_rate": 4.034047559940435e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 414 |
|
}, |
|
{ |
|
"completion_length": 170.57144165039062, |
|
"epoch": 0.2904128761371588, |
|
"grad_norm": 1.0739690065383911, |
|
"kl": 0.14166812598705292, |
|
"learning_rate": 4.0297040931530517e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 415 |
|
}, |
|
{ |
|
"completion_length": 184.42857360839844, |
|
"epoch": 0.29111266620014, |
|
"grad_norm": 1.1877142190933228, |
|
"kl": 0.10042338818311691, |
|
"learning_rate": 4.025353232993953e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 416 |
|
}, |
|
{ |
|
"completion_length": 155.5, |
|
"epoch": 0.2918124562631211, |
|
"grad_norm": 1.1831613779067993, |
|
"kl": 0.17097832262516022, |
|
"learning_rate": 4.020995000491735e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 417 |
|
}, |
|
{ |
|
"completion_length": 140.1428680419922, |
|
"epoch": 0.2925122463261022, |
|
"grad_norm": 0.012450830079615116, |
|
"kl": 0.18519897758960724, |
|
"learning_rate": 4.016629416710623e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 418 |
|
}, |
|
{ |
|
"completion_length": 173.1428680419922, |
|
"epoch": 0.2932120363890833, |
|
"grad_norm": 1.1009522676467896, |
|
"kl": 0.15228672325611115, |
|
"learning_rate": 4.012256502750372e-07, |
|
"loss": 0.0002, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 419 |
|
}, |
|
{ |
|
"completion_length": 132.85714721679688, |
|
"epoch": 0.2939118264520644, |
|
"grad_norm": 2.399618625640869, |
|
"kl": 0.12590643763542175, |
|
"learning_rate": 4.0078762797461675e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 420 |
|
}, |
|
{ |
|
"completion_length": 198.35714721679688, |
|
"epoch": 0.29461161651504547, |
|
"grad_norm": 0.009001773782074451, |
|
"kl": 0.12903183698654175, |
|
"learning_rate": 4.003488768868521e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 421 |
|
}, |
|
{ |
|
"completion_length": 180.00001525878906, |
|
"epoch": 0.29531140657802657, |
|
"grad_norm": 0.009517885744571686, |
|
"kl": 0.1431788057088852, |
|
"learning_rate": 3.999093991323166e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 422 |
|
}, |
|
{ |
|
"completion_length": 216.00001525878906, |
|
"epoch": 0.2960111966410077, |
|
"grad_norm": 1.2407546043395996, |
|
"kl": 0.14400172233581543, |
|
"learning_rate": 3.994691968350958e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 423 |
|
}, |
|
{ |
|
"completion_length": 167.2857208251953, |
|
"epoch": 0.2967109867039888, |
|
"grad_norm": 0.02178722620010376, |
|
"kl": 0.20456863939762115, |
|
"learning_rate": 3.9902827212277724e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 424 |
|
}, |
|
{ |
|
"completion_length": 193.35714721679688, |
|
"epoch": 0.2974107767669699, |
|
"grad_norm": 0.8949815034866333, |
|
"kl": 0.13538450002670288, |
|
"learning_rate": 3.9858662712643986e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 425 |
|
}, |
|
{ |
|
"completion_length": 163.42857360839844, |
|
"epoch": 0.298110566829951, |
|
"grad_norm": 1.2736574411392212, |
|
"kl": 0.13569849729537964, |
|
"learning_rate": 3.9814426398064397e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 426 |
|
}, |
|
{ |
|
"completion_length": 208.50001525878906, |
|
"epoch": 0.2988103568929321, |
|
"grad_norm": 1.8960295915603638, |
|
"kl": 0.09117873758077621, |
|
"learning_rate": 3.9770118482342087e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 427 |
|
}, |
|
{ |
|
"completion_length": 194.35714721679688, |
|
"epoch": 0.2995101469559132, |
|
"grad_norm": 1.1166753768920898, |
|
"kl": 0.09233411401510239, |
|
"learning_rate": 3.9725739179626244e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 428 |
|
}, |
|
{ |
|
"completion_length": 211.50001525878906, |
|
"epoch": 0.3002099370188943, |
|
"grad_norm": 0.9930157661437988, |
|
"kl": 0.14115986227989197, |
|
"learning_rate": 3.968128870441109e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 429 |
|
}, |
|
{ |
|
"completion_length": 161.0, |
|
"epoch": 0.3009097270818754, |
|
"grad_norm": 0.011348233558237553, |
|
"kl": 0.14792558550834656, |
|
"learning_rate": 3.963676727153481e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 430 |
|
}, |
|
{ |
|
"completion_length": 182.42857360839844, |
|
"epoch": 0.30160951714485656, |
|
"grad_norm": 0.009072246961295605, |
|
"kl": 0.13709795475006104, |
|
"learning_rate": 3.959217509617859e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 431 |
|
}, |
|
{ |
|
"completion_length": 189.1428680419922, |
|
"epoch": 0.30230930720783766, |
|
"grad_norm": 0.007533005904406309, |
|
"kl": 0.12774527072906494, |
|
"learning_rate": 3.954751239386549e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 432 |
|
}, |
|
{ |
|
"completion_length": 197.2857208251953, |
|
"epoch": 0.30300909727081876, |
|
"grad_norm": 0.00751620065420866, |
|
"kl": 0.0973627045750618, |
|
"learning_rate": 3.950277938045947e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 433 |
|
}, |
|
{ |
|
"completion_length": 226.50001525878906, |
|
"epoch": 0.30370888733379986, |
|
"grad_norm": 1.0351189374923706, |
|
"kl": 0.09664253145456314, |
|
"learning_rate": 3.9457976272164286e-07, |
|
"loss": 0.0001, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 434 |
|
}, |
|
{ |
|
"completion_length": 167.35714721679688, |
|
"epoch": 0.30440867739678096, |
|
"grad_norm": 0.00847623124718666, |
|
"kl": 0.14188340306282043, |
|
"learning_rate": 3.9413103285522495e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 435 |
|
}, |
|
{ |
|
"completion_length": 172.92857360839844, |
|
"epoch": 0.30510846745976206, |
|
"grad_norm": 0.009355437941849232, |
|
"kl": 0.14115223288536072, |
|
"learning_rate": 3.93681606374144e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 436 |
|
}, |
|
{ |
|
"completion_length": 176.00001525878906, |
|
"epoch": 0.30580825752274315, |
|
"grad_norm": 0.9071304202079773, |
|
"kl": 0.2700052857398987, |
|
"learning_rate": 3.932314854505697e-07, |
|
"loss": 0.0003, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 437 |
|
}, |
|
{ |
|
"completion_length": 194.7857208251953, |
|
"epoch": 0.3065080475857243, |
|
"grad_norm": 0.010158144868910313, |
|
"kl": 0.1656620353460312, |
|
"learning_rate": 3.927806722600283e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 438 |
|
}, |
|
{ |
|
"completion_length": 191.71429443359375, |
|
"epoch": 0.3072078376487054, |
|
"grad_norm": 0.9690079689025879, |
|
"kl": 0.13316388428211212, |
|
"learning_rate": 3.9232916898139177e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 439 |
|
}, |
|
{ |
|
"completion_length": 162.92857360839844, |
|
"epoch": 0.3079076277116865, |
|
"grad_norm": 0.013201621361076832, |
|
"kl": 0.17141349613666534, |
|
"learning_rate": 3.9187697779686766e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"completion_length": 166.2857208251953, |
|
"epoch": 0.3086074177746676, |
|
"grad_norm": 0.01558050885796547, |
|
"kl": 0.17944668233394623, |
|
"learning_rate": 3.9142410089198794e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 441 |
|
}, |
|
{ |
|
"completion_length": 167.7857208251953, |
|
"epoch": 0.3093072078376487, |
|
"grad_norm": 0.01309128850698471, |
|
"kl": 0.17336508631706238, |
|
"learning_rate": 3.909705404555992e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 442 |
|
}, |
|
{ |
|
"completion_length": 221.00001525878906, |
|
"epoch": 0.3100069979006298, |
|
"grad_norm": 1.2899380922317505, |
|
"kl": 0.1145109310746193, |
|
"learning_rate": 3.9051629867985135e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 443 |
|
}, |
|
{ |
|
"completion_length": 164.35714721679688, |
|
"epoch": 0.3107067879636109, |
|
"grad_norm": 0.024782003834843636, |
|
"kl": 0.16590863466262817, |
|
"learning_rate": 3.9006137776018756e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 444 |
|
}, |
|
{ |
|
"completion_length": 179.35714721679688, |
|
"epoch": 0.311406578026592, |
|
"grad_norm": 1.850183367729187, |
|
"kl": 0.15818960964679718, |
|
"learning_rate": 3.896057798953333e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 445 |
|
}, |
|
{ |
|
"completion_length": 204.50001525878906, |
|
"epoch": 0.31210636808957315, |
|
"grad_norm": 0.01804116554558277, |
|
"kl": 0.1586751490831375, |
|
"learning_rate": 3.8914950728728597e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 446 |
|
}, |
|
{ |
|
"completion_length": 193.71429443359375, |
|
"epoch": 0.31280615815255425, |
|
"grad_norm": 0.8885690569877625, |
|
"kl": 0.13960577547550201, |
|
"learning_rate": 3.886925621413041e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 447 |
|
}, |
|
{ |
|
"completion_length": 159.92857360839844, |
|
"epoch": 0.31350594821553535, |
|
"grad_norm": 1.829359531402588, |
|
"kl": 0.16523723304271698, |
|
"learning_rate": 3.882349466658967e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 448 |
|
}, |
|
{ |
|
"completion_length": 178.1428680419922, |
|
"epoch": 0.31420573827851644, |
|
"grad_norm": 0.011123896576464176, |
|
"kl": 0.1395711749792099, |
|
"learning_rate": 3.8777666307281277e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 449 |
|
}, |
|
{ |
|
"completion_length": 187.7857208251953, |
|
"epoch": 0.31490552834149754, |
|
"grad_norm": 1.3126987218856812, |
|
"kl": 0.18052725493907928, |
|
"learning_rate": 3.873177135770302e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 450 |
|
}, |
|
{ |
|
"completion_length": 179.00001525878906, |
|
"epoch": 0.31560531840447864, |
|
"grad_norm": 1.1596554517745972, |
|
"kl": 0.16147735714912415, |
|
"learning_rate": 3.8685810039674547e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 451 |
|
}, |
|
{ |
|
"completion_length": 219.71429443359375, |
|
"epoch": 0.31630510846745974, |
|
"grad_norm": 1.1789698600769043, |
|
"kl": 0.1065596416592598, |
|
"learning_rate": 3.8639782575336287e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 452 |
|
}, |
|
{ |
|
"completion_length": 187.7857208251953, |
|
"epoch": 0.3170048985304409, |
|
"grad_norm": 0.02577633410692215, |
|
"kl": 0.15941867232322693, |
|
"learning_rate": 3.8593689187148337e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 453 |
|
}, |
|
{ |
|
"completion_length": 206.42857360839844, |
|
"epoch": 0.317704688593422, |
|
"grad_norm": 0.00979100912809372, |
|
"kl": 0.11754533648490906, |
|
"learning_rate": 3.854753009788947e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 454 |
|
}, |
|
{ |
|
"completion_length": 161.7857208251953, |
|
"epoch": 0.3184044786564031, |
|
"grad_norm": 1.6515514850616455, |
|
"kl": 0.19671931862831116, |
|
"learning_rate": 3.850130553065595e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 455 |
|
}, |
|
{ |
|
"completion_length": 187.50001525878906, |
|
"epoch": 0.3191042687193842, |
|
"grad_norm": 0.01866884157061577, |
|
"kl": 0.14069817960262299, |
|
"learning_rate": 3.845501570886054e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 456 |
|
}, |
|
{ |
|
"completion_length": 198.92857360839844, |
|
"epoch": 0.3198040587823653, |
|
"grad_norm": 1.2667752504348755, |
|
"kl": 0.13826407492160797, |
|
"learning_rate": 3.840866085623138e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 457 |
|
}, |
|
{ |
|
"completion_length": 223.00001525878906, |
|
"epoch": 0.3205038488453464, |
|
"grad_norm": 1.4195311069488525, |
|
"kl": 0.1290283203125, |
|
"learning_rate": 3.836224119681094e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 458 |
|
}, |
|
{ |
|
"completion_length": 214.6428680419922, |
|
"epoch": 0.3212036389083275, |
|
"grad_norm": 0.013162168674170971, |
|
"kl": 0.0947316437959671, |
|
"learning_rate": 3.831575695495487e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 459 |
|
}, |
|
{ |
|
"completion_length": 196.85714721679688, |
|
"epoch": 0.3219034289713086, |
|
"grad_norm": 1.5086344480514526, |
|
"kl": 0.13508260250091553, |
|
"learning_rate": 3.826920835533101e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 460 |
|
}, |
|
{ |
|
"completion_length": 213.71429443359375, |
|
"epoch": 0.32260321903428973, |
|
"grad_norm": 0.01093920785933733, |
|
"kl": 0.1424056589603424, |
|
"learning_rate": 3.822259562291821e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 461 |
|
}, |
|
{ |
|
"completion_length": 167.5, |
|
"epoch": 0.32330300909727083, |
|
"grad_norm": 0.008407356217503548, |
|
"kl": 0.14815574884414673, |
|
"learning_rate": 3.817591898300534e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 462 |
|
}, |
|
{ |
|
"completion_length": 186.2857208251953, |
|
"epoch": 0.32400279916025193, |
|
"grad_norm": 1.7629343271255493, |
|
"kl": 0.14456768333911896, |
|
"learning_rate": 3.8129178661190085e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 463 |
|
}, |
|
{ |
|
"completion_length": 153.7857208251953, |
|
"epoch": 0.32470258922323303, |
|
"grad_norm": 1.3593230247497559, |
|
"kl": 0.19365935027599335, |
|
"learning_rate": 3.8082374883377956e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 464 |
|
}, |
|
{ |
|
"completion_length": 207.35714721679688, |
|
"epoch": 0.3254023792862141, |
|
"grad_norm": 1.344556212425232, |
|
"kl": 0.11241815984249115, |
|
"learning_rate": 3.8035507875781156e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 465 |
|
}, |
|
{ |
|
"completion_length": 197.07144165039062, |
|
"epoch": 0.3261021693491952, |
|
"grad_norm": 1.1704262495040894, |
|
"kl": 0.18255966901779175, |
|
"learning_rate": 3.7988577864917487e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 466 |
|
}, |
|
{ |
|
"completion_length": 184.35714721679688, |
|
"epoch": 0.3268019594121763, |
|
"grad_norm": 0.011217900551855564, |
|
"kl": 0.14471596479415894, |
|
"learning_rate": 3.7941585077609264e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 467 |
|
}, |
|
{ |
|
"completion_length": 175.2857208251953, |
|
"epoch": 0.3275017494751575, |
|
"grad_norm": 0.0099489726126194, |
|
"kl": 0.1380590945482254, |
|
"learning_rate": 3.7894529740982183e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 468 |
|
}, |
|
{ |
|
"completion_length": 175.07144165039062, |
|
"epoch": 0.3282015395381386, |
|
"grad_norm": 2.6390817165374756, |
|
"kl": 0.18724487721920013, |
|
"learning_rate": 3.784741208246431e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 469 |
|
}, |
|
{ |
|
"completion_length": 186.42857360839844, |
|
"epoch": 0.3289013296011197, |
|
"grad_norm": 0.011759848333895206, |
|
"kl": 0.15252262353897095, |
|
"learning_rate": 3.7800232329784874e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"completion_length": 176.6428680419922, |
|
"epoch": 0.3296011196641008, |
|
"grad_norm": 0.014718899503350258, |
|
"kl": 0.1921573430299759, |
|
"learning_rate": 3.7752990710973243e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 471 |
|
}, |
|
{ |
|
"completion_length": 175.71429443359375, |
|
"epoch": 0.33030090972708187, |
|
"grad_norm": 0.014721820130944252, |
|
"kl": 0.18067756295204163, |
|
"learning_rate": 3.7705687454357795e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 472 |
|
}, |
|
{ |
|
"completion_length": 238.35714721679688, |
|
"epoch": 0.33100069979006297, |
|
"grad_norm": 0.9916889071464539, |
|
"kl": 0.11218740046024323, |
|
"learning_rate": 3.765832278856481e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 473 |
|
}, |
|
{ |
|
"completion_length": 190.21429443359375, |
|
"epoch": 0.33170048985304407, |
|
"grad_norm": 0.03439653292298317, |
|
"kl": 0.20360586047172546, |
|
"learning_rate": 3.7610896942517355e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 474 |
|
}, |
|
{ |
|
"completion_length": 209.92857360839844, |
|
"epoch": 0.33240027991602517, |
|
"grad_norm": 0.010569430887699127, |
|
"kl": 0.12097764760255814, |
|
"learning_rate": 3.756341014543425e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"completion_length": 176.6428680419922, |
|
"epoch": 0.3331000699790063, |
|
"grad_norm": 1.319534182548523, |
|
"kl": 0.1647622138261795, |
|
"learning_rate": 3.7515862626828823e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 476 |
|
}, |
|
{ |
|
"completion_length": 176.21429443359375, |
|
"epoch": 0.3337998600419874, |
|
"grad_norm": 0.013711081817746162, |
|
"kl": 0.1440211832523346, |
|
"learning_rate": 3.7468254616507956e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 477 |
|
}, |
|
{ |
|
"completion_length": 166.2857208251953, |
|
"epoch": 0.3344996501049685, |
|
"grad_norm": 1.3477894067764282, |
|
"kl": 0.17784541845321655, |
|
"learning_rate": 3.742058634457085e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 478 |
|
}, |
|
{ |
|
"completion_length": 167.42857360839844, |
|
"epoch": 0.3351994401679496, |
|
"grad_norm": 0.011473475955426693, |
|
"kl": 0.16440436244010925, |
|
"learning_rate": 3.737285804140799e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 479 |
|
}, |
|
{ |
|
"completion_length": 171.1428680419922, |
|
"epoch": 0.3358992302309307, |
|
"grad_norm": 1.478050947189331, |
|
"kl": 0.18164397776126862, |
|
"learning_rate": 3.7325069937699994e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 480 |
|
}, |
|
{ |
|
"completion_length": 204.7857208251953, |
|
"epoch": 0.3365990202939118, |
|
"grad_norm": 0.01435791701078415, |
|
"kl": 0.17547443509101868, |
|
"learning_rate": 3.7277222264416504e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 481 |
|
}, |
|
{ |
|
"completion_length": 186.07144165039062, |
|
"epoch": 0.3372988103568929, |
|
"grad_norm": 0.018172932788729668, |
|
"kl": 0.16386054456233978, |
|
"learning_rate": 3.722931525281508e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 482 |
|
}, |
|
{ |
|
"completion_length": 182.2857208251953, |
|
"epoch": 0.33799860041987406, |
|
"grad_norm": 0.06221003085374832, |
|
"kl": 0.2264447659254074, |
|
"learning_rate": 3.718134913444007e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 483 |
|
}, |
|
{ |
|
"completion_length": 166.7857208251953, |
|
"epoch": 0.33869839048285516, |
|
"grad_norm": 2.0576183795928955, |
|
"kl": 0.16736780107021332, |
|
"learning_rate": 3.713332414112152e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 484 |
|
}, |
|
{ |
|
"completion_length": 208.50001525878906, |
|
"epoch": 0.33939818054583626, |
|
"grad_norm": 1.1399970054626465, |
|
"kl": 0.14454233646392822, |
|
"learning_rate": 3.708524050497399e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 485 |
|
}, |
|
{ |
|
"completion_length": 205.50001525878906, |
|
"epoch": 0.34009797060881736, |
|
"grad_norm": 1.0119779109954834, |
|
"kl": 0.1506783664226532, |
|
"learning_rate": 3.703709845839552e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 486 |
|
}, |
|
{ |
|
"completion_length": 203.2857208251953, |
|
"epoch": 0.34079776067179846, |
|
"grad_norm": 0.009078477509319782, |
|
"kl": 0.12604613602161407, |
|
"learning_rate": 3.6988898234066405e-07, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 487 |
|
}, |
|
{ |
|
"completion_length": 207.2857208251953, |
|
"epoch": 0.34149755073477955, |
|
"grad_norm": 1.2624167203903198, |
|
"kl": 0.1470993310213089, |
|
"learning_rate": 3.694064006494818e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 488 |
|
}, |
|
{ |
|
"completion_length": 226.00001525878906, |
|
"epoch": 0.34219734079776065, |
|
"grad_norm": 1.0285292863845825, |
|
"kl": 0.13139139115810394, |
|
"learning_rate": 3.6892324184282404e-07, |
|
"loss": 0.0001, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 489 |
|
}, |
|
{ |
|
"completion_length": 188.2857208251953, |
|
"epoch": 0.34289713086074175, |
|
"grad_norm": 0.01640986278653145, |
|
"kl": 0.18013492226600647, |
|
"learning_rate": 3.6843950825589575e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"completion_length": 201.00001525878906, |
|
"epoch": 0.3435969209237229, |
|
"grad_norm": 0.012897200882434845, |
|
"kl": 0.1696484386920929, |
|
"learning_rate": 3.6795520222668e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 491 |
|
}, |
|
{ |
|
"completion_length": 184.85714721679688, |
|
"epoch": 0.344296710986704, |
|
"grad_norm": 0.009800958447158337, |
|
"kl": 0.161555215716362, |
|
"learning_rate": 3.674703260959266e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 492 |
|
}, |
|
{ |
|
"completion_length": 183.85714721679688, |
|
"epoch": 0.3449965010496851, |
|
"grad_norm": 1.2248705625534058, |
|
"kl": 0.19914430379867554, |
|
"learning_rate": 3.669848822071407e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 493 |
|
}, |
|
{ |
|
"completion_length": 156.07144165039062, |
|
"epoch": 0.3456962911126662, |
|
"grad_norm": 0.01988043822348118, |
|
"kl": 0.2022208720445633, |
|
"learning_rate": 3.664988729065715e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 494 |
|
}, |
|
{ |
|
"completion_length": 176.92857360839844, |
|
"epoch": 0.3463960811756473, |
|
"grad_norm": 0.030839132145047188, |
|
"kl": 0.20638853311538696, |
|
"learning_rate": 3.660123005432011e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 495 |
|
}, |
|
{ |
|
"completion_length": 191.7857208251953, |
|
"epoch": 0.3470958712386284, |
|
"grad_norm": 1.8996195793151855, |
|
"kl": 0.19397129118442535, |
|
"learning_rate": 3.655251674687329e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 496 |
|
}, |
|
{ |
|
"completion_length": 228.1428680419922, |
|
"epoch": 0.3477956613016095, |
|
"grad_norm": 1.9669594764709473, |
|
"kl": 0.16077013313770294, |
|
"learning_rate": 3.650374760375804e-07, |
|
"loss": 0.0002, |
|
"reward": 0.785714328289032, |
|
"reward_std": 0.30304574966430664, |
|
"rewards/check_gptzero_func": 0.785714328289032, |
|
"step": 497 |
|
}, |
|
{ |
|
"completion_length": 220.2857208251953, |
|
"epoch": 0.34849545136459065, |
|
"grad_norm": 0.017728207632899284, |
|
"kl": 0.16397885978221893, |
|
"learning_rate": 3.6454922860685556e-07, |
|
"loss": 0.0002, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/check_gptzero_func": 1.0, |
|
"step": 498 |
|
}, |
|
{ |
|
"completion_length": 190.2857208251953, |
|
"epoch": 0.34919524142757175, |
|
"grad_norm": 1.5983257293701172, |
|
"kl": 0.16851238906383514, |
|
"learning_rate": 3.640604275363579e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8571429252624512, |
|
"reward_std": 0.2020305097103119, |
|
"rewards/check_gptzero_func": 0.8571429252624512, |
|
"step": 499 |
|
}, |
|
{ |
|
"completion_length": 175.07144165039062, |
|
"epoch": 0.34989503149055284, |
|
"grad_norm": 1.2043014764785767, |
|
"kl": 0.19212080538272858, |
|
"learning_rate": 3.6357107518856256e-07, |
|
"loss": 0.0002, |
|
"reward": 0.9285714626312256, |
|
"reward_std": 0.10101525485515594, |
|
"rewards/check_gptzero_func": 0.9285714626312256, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1429, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|