|
{ |
|
"best_metric": 4.027148246765137, |
|
"best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/npi-only/transformer/4/checkpoints/checkpoint-152640", |
|
"epoch": 1.0250006060157382, |
|
"eval_steps": 10, |
|
"global_step": 152640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999998362119627e-05, |
|
"loss": 11.1231, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999161405248948e-05, |
|
"loss": 6.824, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.998322810497896e-05, |
|
"loss": 6.1929, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.997484215746844e-05, |
|
"loss": 5.9737, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.996645620995792e-05, |
|
"loss": 5.8118, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99580702624474e-05, |
|
"loss": 5.7019, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994968431493688e-05, |
|
"loss": 5.598, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994129836742636e-05, |
|
"loss": 5.5315, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.993291241991584e-05, |
|
"loss": 5.444, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.992452647240532e-05, |
|
"loss": 5.4024, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99161405248948e-05, |
|
"loss": 5.3529, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.990775457738428e-05, |
|
"loss": 5.3115, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.989938500867749e-05, |
|
"loss": 5.2496, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.989099906116697e-05, |
|
"loss": 5.2092, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.988261311365645e-05, |
|
"loss": 5.1764, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.987422716614593e-05, |
|
"loss": 5.1286, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.986584121863541e-05, |
|
"loss": 5.1127, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.985745527112489e-05, |
|
"loss": 5.0799, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.984906932361437e-05, |
|
"loss": 5.0415, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.984068337610385e-05, |
|
"loss": 5.0196, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.983229742859333e-05, |
|
"loss": 5.004, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9823927859886547e-05, |
|
"loss": 4.9743, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9815558291179756e-05, |
|
"loss": 4.955, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9807172343669236e-05, |
|
"loss": 4.9223, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9798786396158716e-05, |
|
"loss": 4.9166, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9790400448648195e-05, |
|
"loss": 4.8945, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9782014501137675e-05, |
|
"loss": 4.8715, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9773628553627155e-05, |
|
"loss": 4.8521, |
|
"step": 13824 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9765242606116635e-05, |
|
"loss": 4.8266, |
|
"step": 14336 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9756856658606115e-05, |
|
"loss": 4.8179, |
|
"step": 14848 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9748487089899324e-05, |
|
"loss": 4.803, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9740101142388804e-05, |
|
"loss": 4.7952, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9731715194878284e-05, |
|
"loss": 4.7789, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.97233456261715e-05, |
|
"loss": 4.7661, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.971495967866098e-05, |
|
"loss": 4.7568, |
|
"step": 17408 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.970657373115046e-05, |
|
"loss": 4.7461, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.969818778363994e-05, |
|
"loss": 4.7349, |
|
"step": 18432 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.968981821493315e-05, |
|
"loss": 4.7039, |
|
"step": 18944 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.968143226742263e-05, |
|
"loss": 4.7035, |
|
"step": 19456 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.967304631991211e-05, |
|
"loss": 4.6783, |
|
"step": 19968 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.966466037240159e-05, |
|
"loss": 4.6836, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.965627442489107e-05, |
|
"loss": 4.6638, |
|
"step": 20992 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.964788847738054e-05, |
|
"loss": 4.6661, |
|
"step": 21504 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.963950252987002e-05, |
|
"loss": 4.6447, |
|
"step": 22016 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.963113296116324e-05, |
|
"loss": 4.643, |
|
"step": 22528 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.962274701365272e-05, |
|
"loss": 4.6326, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.96143610661422e-05, |
|
"loss": 4.6258, |
|
"step": 23552 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.960597511863168e-05, |
|
"loss": 4.6149, |
|
"step": 24064 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.959758917112116e-05, |
|
"loss": 4.5886, |
|
"step": 24576 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.95892359812181e-05, |
|
"loss": 4.5976, |
|
"step": 25088 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.958085003370758e-05, |
|
"loss": 4.5989, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.957246408619706e-05, |
|
"loss": 4.5916, |
|
"step": 26112 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.956407813868654e-05, |
|
"loss": 4.5715, |
|
"step": 26624 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9555692191176016e-05, |
|
"loss": 4.5587, |
|
"step": 27136 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.954732262246923e-05, |
|
"loss": 4.5602, |
|
"step": 27648 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.953893667495871e-05, |
|
"loss": 4.5471, |
|
"step": 28160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.953055072744819e-05, |
|
"loss": 4.5613, |
|
"step": 28672 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.952216477993767e-05, |
|
"loss": 4.5225, |
|
"step": 29184 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.951377883242715e-05, |
|
"loss": 4.5405, |
|
"step": 29696 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.950539288491663e-05, |
|
"loss": 4.5297, |
|
"step": 30208 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.949700693740611e-05, |
|
"loss": 4.5101, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.948862098989559e-05, |
|
"loss": 4.5199, |
|
"step": 31232 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.94802514211888e-05, |
|
"loss": 4.5046, |
|
"step": 31744 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.947186547367828e-05, |
|
"loss": 4.4946, |
|
"step": 32256 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.946347952616776e-05, |
|
"loss": 4.4899, |
|
"step": 32768 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.945509357865724e-05, |
|
"loss": 4.4998, |
|
"step": 33280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.944672400995045e-05, |
|
"loss": 4.4796, |
|
"step": 33792 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.943833806243993e-05, |
|
"loss": 4.4763, |
|
"step": 34304 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.942995211492941e-05, |
|
"loss": 4.4694, |
|
"step": 34816 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.942156616741889e-05, |
|
"loss": 4.4607, |
|
"step": 35328 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9413196598712105e-05, |
|
"loss": 4.4691, |
|
"step": 35840 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.940482703000532e-05, |
|
"loss": 4.4753, |
|
"step": 36352 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9396441082494794e-05, |
|
"loss": 4.4483, |
|
"step": 36864 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9388055134984274e-05, |
|
"loss": 4.4599, |
|
"step": 37376 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9379669187473754e-05, |
|
"loss": 4.463, |
|
"step": 37888 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9371283239963234e-05, |
|
"loss": 4.4457, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.936291367125644e-05, |
|
"loss": 4.4344, |
|
"step": 38912 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.935452772374592e-05, |
|
"loss": 4.4367, |
|
"step": 39424 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.93461417762354e-05, |
|
"loss": 4.4313, |
|
"step": 39936 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.933775582872488e-05, |
|
"loss": 4.4215, |
|
"step": 40448 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.932936988121436e-05, |
|
"loss": 4.4167, |
|
"step": 40960 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.932100031250758e-05, |
|
"loss": 4.4271, |
|
"step": 41472 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.931261436499706e-05, |
|
"loss": 4.4261, |
|
"step": 41984 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.930422841748654e-05, |
|
"loss": 4.4082, |
|
"step": 42496 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.929584246997602e-05, |
|
"loss": 4.3877, |
|
"step": 43008 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.92874565224655e-05, |
|
"loss": 4.4003, |
|
"step": 43520 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.927907057495498e-05, |
|
"loss": 4.4049, |
|
"step": 44032 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.927070100624819e-05, |
|
"loss": 4.3923, |
|
"step": 44544 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.926231505873767e-05, |
|
"loss": 4.3974, |
|
"step": 45056 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.925392911122715e-05, |
|
"loss": 4.3791, |
|
"step": 45568 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.924554316371663e-05, |
|
"loss": 4.3776, |
|
"step": 46080 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.923715721620611e-05, |
|
"loss": 4.3726, |
|
"step": 46592 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.922878764749932e-05, |
|
"loss": 4.3707, |
|
"step": 47104 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.92204016999888e-05, |
|
"loss": 4.3775, |
|
"step": 47616 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.921201575247828e-05, |
|
"loss": 4.3662, |
|
"step": 48128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.920362980496776e-05, |
|
"loss": 4.3823, |
|
"step": 48640 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.919524385745724e-05, |
|
"loss": 4.3652, |
|
"step": 49152 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.918685790994672e-05, |
|
"loss": 4.3479, |
|
"step": 49664 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.91784719624362e-05, |
|
"loss": 4.3519, |
|
"step": 50176 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9170086014925676e-05, |
|
"loss": 4.3586, |
|
"step": 50688 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.916173282502262e-05, |
|
"loss": 4.3489, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.91533468775121e-05, |
|
"loss": 4.3467, |
|
"step": 51712 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.914496093000158e-05, |
|
"loss": 4.3452, |
|
"step": 52224 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.913657498249106e-05, |
|
"loss": 4.3354, |
|
"step": 52736 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.912818903498054e-05, |
|
"loss": 4.3377, |
|
"step": 53248 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.911980308747002e-05, |
|
"loss": 4.316, |
|
"step": 53760 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.91114171399595e-05, |
|
"loss": 4.3228, |
|
"step": 54272 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.910304757125272e-05, |
|
"loss": 4.327, |
|
"step": 54784 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.90946616237422e-05, |
|
"loss": 4.3217, |
|
"step": 55296 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.908627567623168e-05, |
|
"loss": 4.315, |
|
"step": 55808 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.907788972872115e-05, |
|
"loss": 4.3248, |
|
"step": 56320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9069520160014366e-05, |
|
"loss": 4.2985, |
|
"step": 56832 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9061134212503846e-05, |
|
"loss": 4.3105, |
|
"step": 57344 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9052748264993326e-05, |
|
"loss": 4.3034, |
|
"step": 57856 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.90443623174828e-05, |
|
"loss": 4.3096, |
|
"step": 58368 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9035992748776015e-05, |
|
"loss": 4.316, |
|
"step": 58880 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9027606801265495e-05, |
|
"loss": 4.3044, |
|
"step": 59392 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9019220853754975e-05, |
|
"loss": 4.298, |
|
"step": 59904 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9010834906244455e-05, |
|
"loss": 4.292, |
|
"step": 60416 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.900246533753767e-05, |
|
"loss": 4.2983, |
|
"step": 60928 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.899407939002715e-05, |
|
"loss": 4.2913, |
|
"step": 61440 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8985693442516624e-05, |
|
"loss": 4.2913, |
|
"step": 61952 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8977307495006104e-05, |
|
"loss": 4.2818, |
|
"step": 62464 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.896893792629932e-05, |
|
"loss": 4.2804, |
|
"step": 62976 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.89605519787888e-05, |
|
"loss": 4.2816, |
|
"step": 63488 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.895216603127827e-05, |
|
"loss": 4.2698, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.894378008376775e-05, |
|
"loss": 4.2752, |
|
"step": 64512 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.893539413625723e-05, |
|
"loss": 4.2736, |
|
"step": 65024 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.892702456755045e-05, |
|
"loss": 4.2692, |
|
"step": 65536 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.891863862003992e-05, |
|
"loss": 4.2598, |
|
"step": 66048 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.891025267252941e-05, |
|
"loss": 4.2771, |
|
"step": 66560 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.890186672501889e-05, |
|
"loss": 4.2773, |
|
"step": 67072 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.88934971563121e-05, |
|
"loss": 4.2651, |
|
"step": 67584 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.888511120880158e-05, |
|
"loss": 4.2555, |
|
"step": 68096 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.887672526129106e-05, |
|
"loss": 4.2629, |
|
"step": 68608 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.886833931378054e-05, |
|
"loss": 4.2624, |
|
"step": 69120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8859969745073747e-05, |
|
"loss": 4.2621, |
|
"step": 69632 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8851583797563226e-05, |
|
"loss": 4.257, |
|
"step": 70144 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8843197850052706e-05, |
|
"loss": 4.2564, |
|
"step": 70656 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8834811902542186e-05, |
|
"loss": 4.2518, |
|
"step": 71168 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8826442333835395e-05, |
|
"loss": 4.2488, |
|
"step": 71680 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8818056386324875e-05, |
|
"loss": 4.2436, |
|
"step": 72192 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.880967043881436e-05, |
|
"loss": 4.2466, |
|
"step": 72704 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.880128449130384e-05, |
|
"loss": 4.2379, |
|
"step": 73216 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.879291492259705e-05, |
|
"loss": 4.2334, |
|
"step": 73728 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.878452897508653e-05, |
|
"loss": 4.2444, |
|
"step": 74240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.877614302757601e-05, |
|
"loss": 4.2327, |
|
"step": 74752 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.876775708006549e-05, |
|
"loss": 4.2407, |
|
"step": 75264 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.875937113255497e-05, |
|
"loss": 4.2297, |
|
"step": 75776 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.875100156384818e-05, |
|
"loss": 4.2235, |
|
"step": 76288 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 4.195745468139648, |
|
"eval_runtime": 307.7836, |
|
"eval_samples_per_second": 1239.803, |
|
"eval_steps_per_second": 38.745, |
|
"step": 76320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.874261561633766e-05, |
|
"loss": 4.2157, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.873422966882714e-05, |
|
"loss": 4.2142, |
|
"step": 77312 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.872584372131662e-05, |
|
"loss": 4.2256, |
|
"step": 77824 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.871747415260983e-05, |
|
"loss": 4.2177, |
|
"step": 78336 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8709088205099316e-05, |
|
"loss": 4.2193, |
|
"step": 78848 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8700702257588796e-05, |
|
"loss": 4.2085, |
|
"step": 79360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8692316310078276e-05, |
|
"loss": 4.2066, |
|
"step": 79872 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8683946741371485e-05, |
|
"loss": 4.1983, |
|
"step": 80384 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8675560793860965e-05, |
|
"loss": 4.2119, |
|
"step": 80896 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8667174846350445e-05, |
|
"loss": 4.2045, |
|
"step": 81408 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8658788898839925e-05, |
|
"loss": 4.2167, |
|
"step": 81920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8650419330133134e-05, |
|
"loss": 4.1982, |
|
"step": 82432 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.864204976142634e-05, |
|
"loss": 4.1934, |
|
"step": 82944 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.863366381391582e-05, |
|
"loss": 4.188, |
|
"step": 83456 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.86252778664053e-05, |
|
"loss": 4.1854, |
|
"step": 83968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.861689191889478e-05, |
|
"loss": 4.1827, |
|
"step": 84480 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.860850597138426e-05, |
|
"loss": 4.1927, |
|
"step": 84992 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.860012002387375e-05, |
|
"loss": 4.1782, |
|
"step": 85504 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.859173407636323e-05, |
|
"loss": 4.1782, |
|
"step": 86016 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.858334812885271e-05, |
|
"loss": 4.1981, |
|
"step": 86528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.857497856014592e-05, |
|
"loss": 4.1831, |
|
"step": 87040 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.85665926126354e-05, |
|
"loss": 4.1831, |
|
"step": 87552 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.855820666512488e-05, |
|
"loss": 4.173, |
|
"step": 88064 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.854982071761436e-05, |
|
"loss": 4.1847, |
|
"step": 88576 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.854145114890757e-05, |
|
"loss": 4.1698, |
|
"step": 89088 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.853306520139705e-05, |
|
"loss": 4.1711, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.852467925388653e-05, |
|
"loss": 4.1689, |
|
"step": 90112 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.851629330637601e-05, |
|
"loss": 4.1623, |
|
"step": 90624 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8507923737669216e-05, |
|
"loss": 4.1563, |
|
"step": 91136 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.84995377901587e-05, |
|
"loss": 4.1566, |
|
"step": 91648 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.849115184264818e-05, |
|
"loss": 4.1653, |
|
"step": 92160 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.848276589513766e-05, |
|
"loss": 4.1647, |
|
"step": 92672 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.847439632643087e-05, |
|
"loss": 4.159, |
|
"step": 93184 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.846601037892035e-05, |
|
"loss": 4.1649, |
|
"step": 93696 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.845762443140983e-05, |
|
"loss": 4.1598, |
|
"step": 94208 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.844923848389931e-05, |
|
"loss": 4.158, |
|
"step": 94720 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.844086891519252e-05, |
|
"loss": 4.1461, |
|
"step": 95232 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8432482967682e-05, |
|
"loss": 4.1469, |
|
"step": 95744 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.842409702017148e-05, |
|
"loss": 4.1362, |
|
"step": 96256 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.841571107266096e-05, |
|
"loss": 4.1488, |
|
"step": 96768 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.840734150395417e-05, |
|
"loss": 4.1345, |
|
"step": 97280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.839895555644366e-05, |
|
"loss": 4.1483, |
|
"step": 97792 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.839056960893314e-05, |
|
"loss": 4.1345, |
|
"step": 98304 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8382200040226346e-05, |
|
"loss": 4.138, |
|
"step": 98816 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8373814092715826e-05, |
|
"loss": 4.1386, |
|
"step": 99328 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8365428145205306e-05, |
|
"loss": 4.1348, |
|
"step": 99840 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8357042197694786e-05, |
|
"loss": 4.1334, |
|
"step": 100352 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8348672628987995e-05, |
|
"loss": 4.1186, |
|
"step": 100864 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8340286681477475e-05, |
|
"loss": 4.1242, |
|
"step": 101376 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8331900733966955e-05, |
|
"loss": 4.1353, |
|
"step": 101888 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8323514786456435e-05, |
|
"loss": 4.1308, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8315145217749644e-05, |
|
"loss": 4.1231, |
|
"step": 102912 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8306759270239124e-05, |
|
"loss": 4.1126, |
|
"step": 103424 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.829837332272861e-05, |
|
"loss": 4.1226, |
|
"step": 103936 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.828998737521809e-05, |
|
"loss": 4.1088, |
|
"step": 104448 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.82816178065113e-05, |
|
"loss": 4.1323, |
|
"step": 104960 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.827323185900078e-05, |
|
"loss": 4.1026, |
|
"step": 105472 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.826484591149026e-05, |
|
"loss": 4.1241, |
|
"step": 105984 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.825645996397974e-05, |
|
"loss": 4.1165, |
|
"step": 106496 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.824809039527295e-05, |
|
"loss": 4.1038, |
|
"step": 107008 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.823970444776243e-05, |
|
"loss": 4.1143, |
|
"step": 107520 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.823131850025191e-05, |
|
"loss": 4.1111, |
|
"step": 108032 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.822293255274139e-05, |
|
"loss": 4.0983, |
|
"step": 108544 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.82145629840346e-05, |
|
"loss": 4.0998, |
|
"step": 109056 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.820617703652408e-05, |
|
"loss": 4.114, |
|
"step": 109568 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8197791089013564e-05, |
|
"loss": 4.1013, |
|
"step": 110080 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8189405141503044e-05, |
|
"loss": 4.0995, |
|
"step": 110592 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.818103557279625e-05, |
|
"loss": 4.096, |
|
"step": 111104 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.817264962528573e-05, |
|
"loss": 4.0922, |
|
"step": 111616 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.816426367777521e-05, |
|
"loss": 4.1017, |
|
"step": 112128 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.815587773026469e-05, |
|
"loss": 4.1133, |
|
"step": 112640 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.81475081615579e-05, |
|
"loss": 4.0929, |
|
"step": 113152 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.813912221404738e-05, |
|
"loss": 4.1094, |
|
"step": 113664 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.813073626653686e-05, |
|
"loss": 4.1092, |
|
"step": 114176 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.812235031902634e-05, |
|
"loss": 4.0954, |
|
"step": 114688 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.811398075031955e-05, |
|
"loss": 4.0906, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.810559480280903e-05, |
|
"loss": 4.0995, |
|
"step": 115712 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.809720885529852e-05, |
|
"loss": 4.0949, |
|
"step": 116224 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8088822907788e-05, |
|
"loss": 4.0859, |
|
"step": 116736 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.808045333908121e-05, |
|
"loss": 4.0888, |
|
"step": 117248 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.807206739157069e-05, |
|
"loss": 4.0938, |
|
"step": 117760 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.806368144406017e-05, |
|
"loss": 4.101, |
|
"step": 118272 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.805529549654965e-05, |
|
"loss": 4.0883, |
|
"step": 118784 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8046925927842856e-05, |
|
"loss": 4.0733, |
|
"step": 119296 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8038539980332336e-05, |
|
"loss": 4.0826, |
|
"step": 119808 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8030154032821816e-05, |
|
"loss": 4.0863, |
|
"step": 120320 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8021768085311296e-05, |
|
"loss": 4.0839, |
|
"step": 120832 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8013398516604505e-05, |
|
"loss": 4.0899, |
|
"step": 121344 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8005012569093985e-05, |
|
"loss": 4.0756, |
|
"step": 121856 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.799662662158347e-05, |
|
"loss": 4.0714, |
|
"step": 122368 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7988240674072945e-05, |
|
"loss": 4.0713, |
|
"step": 122880 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.797987110536616e-05, |
|
"loss": 4.0738, |
|
"step": 123392 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.797148515785564e-05, |
|
"loss": 4.0777, |
|
"step": 123904 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.796309921034512e-05, |
|
"loss": 4.0739, |
|
"step": 124416 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7954713262834594e-05, |
|
"loss": 4.0909, |
|
"step": 124928 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.794634369412781e-05, |
|
"loss": 4.0787, |
|
"step": 125440 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.793795774661729e-05, |
|
"loss": 4.06, |
|
"step": 125952 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.792957179910677e-05, |
|
"loss": 4.0717, |
|
"step": 126464 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.792118585159624e-05, |
|
"loss": 4.0721, |
|
"step": 126976 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.791279990408572e-05, |
|
"loss": 4.0722, |
|
"step": 127488 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.790441395657521e-05, |
|
"loss": 4.0694, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.789604438786842e-05, |
|
"loss": 4.0671, |
|
"step": 128512 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.78876584403579e-05, |
|
"loss": 4.0618, |
|
"step": 129024 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.787927249284738e-05, |
|
"loss": 4.0715, |
|
"step": 129536 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.787088654533686e-05, |
|
"loss": 4.0471, |
|
"step": 130048 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.786250059782634e-05, |
|
"loss": 4.0601, |
|
"step": 130560 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.785413102911955e-05, |
|
"loss": 4.0593, |
|
"step": 131072 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.784574508160903e-05, |
|
"loss": 4.0614, |
|
"step": 131584 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.783735913409851e-05, |
|
"loss": 4.0509, |
|
"step": 132096 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.782897318658799e-05, |
|
"loss": 4.0616, |
|
"step": 132608 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.782058723907747e-05, |
|
"loss": 4.0475, |
|
"step": 133120 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.781220129156695e-05, |
|
"loss": 4.0539, |
|
"step": 133632 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.780383172286016e-05, |
|
"loss": 4.0513, |
|
"step": 134144 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.779544577534964e-05, |
|
"loss": 4.052, |
|
"step": 134656 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.778705982783912e-05, |
|
"loss": 4.0673, |
|
"step": 135168 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.77786738803286e-05, |
|
"loss": 4.058, |
|
"step": 135680 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.777030431162181e-05, |
|
"loss": 4.051, |
|
"step": 136192 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.776191836411129e-05, |
|
"loss": 4.0467, |
|
"step": 136704 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.775353241660077e-05, |
|
"loss": 4.0513, |
|
"step": 137216 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.774514646909025e-05, |
|
"loss": 4.0494, |
|
"step": 137728 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.773676052157973e-05, |
|
"loss": 4.0472, |
|
"step": 138240 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.772837457406921e-05, |
|
"loss": 4.0433, |
|
"step": 138752 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.771998862655869e-05, |
|
"loss": 4.0426, |
|
"step": 139264 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.771160267904817e-05, |
|
"loss": 4.0466, |
|
"step": 139776 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.770323311034138e-05, |
|
"loss": 4.0342, |
|
"step": 140288 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.769484716283086e-05, |
|
"loss": 4.0414, |
|
"step": 140800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.768646121532034e-05, |
|
"loss": 4.0402, |
|
"step": 141312 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.767807526780983e-05, |
|
"loss": 4.0405, |
|
"step": 141824 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7669705699103036e-05, |
|
"loss": 4.0289, |
|
"step": 142336 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7661319751592516e-05, |
|
"loss": 4.0511, |
|
"step": 142848 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7652933804081996e-05, |
|
"loss": 4.0546, |
|
"step": 143360 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7644547856571476e-05, |
|
"loss": 4.0387, |
|
"step": 143872 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7636194666668415e-05, |
|
"loss": 4.0316, |
|
"step": 144384 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7627808719157894e-05, |
|
"loss": 4.039, |
|
"step": 144896 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7619422771647374e-05, |
|
"loss": 4.0425, |
|
"step": 145408 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7611036824136854e-05, |
|
"loss": 4.042, |
|
"step": 145920 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7602650876626334e-05, |
|
"loss": 4.0368, |
|
"step": 146432 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.759428130791955e-05, |
|
"loss": 4.0373, |
|
"step": 146944 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.758589536040903e-05, |
|
"loss": 4.0337, |
|
"step": 147456 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.757750941289851e-05, |
|
"loss": 4.0385, |
|
"step": 147968 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.756912346538799e-05, |
|
"loss": 4.0265, |
|
"step": 148480 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.756073751787747e-05, |
|
"loss": 4.0361, |
|
"step": 148992 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.755235157036695e-05, |
|
"loss": 4.0328, |
|
"step": 149504 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.754398200166016e-05, |
|
"loss": 4.0204, |
|
"step": 150016 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.753559605414964e-05, |
|
"loss": 4.034, |
|
"step": 150528 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.752721010663912e-05, |
|
"loss": 4.0251, |
|
"step": 151040 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.75188241591286e-05, |
|
"loss": 4.0333, |
|
"step": 151552 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.751043821161808e-05, |
|
"loss": 4.0217, |
|
"step": 152064 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.750205226410755e-05, |
|
"loss": 4.019, |
|
"step": 152576 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 4.027148246765137, |
|
"eval_runtime": 308.4603, |
|
"eval_samples_per_second": 1237.083, |
|
"eval_steps_per_second": 38.66, |
|
"step": 152640 |
|
} |
|
], |
|
"logging_steps": 512, |
|
"max_steps": 3052726, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 10, |
|
"total_flos": 1.0644055367242752e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|