|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9965075669383, |
|
"global_step": 1287, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.905, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9999880457421163e-05, |
|
"loss": 0.6497, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9999521832542736e-05, |
|
"loss": 0.6121, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9998924133938902e-05, |
|
"loss": 0.7236, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9998087375899756e-05, |
|
"loss": 0.7515, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9997011578430938e-05, |
|
"loss": 0.7073, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9995696767253165e-05, |
|
"loss": 0.6146, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9994142973801627e-05, |
|
"loss": 0.5923, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9992350235225215e-05, |
|
"loss": 0.5629, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.999031859438565e-05, |
|
"loss": 0.5383, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9988048099856443e-05, |
|
"loss": 0.516, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9985538805921757e-05, |
|
"loss": 0.5035, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.998279077257508e-05, |
|
"loss": 0.5244, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9979804065517808e-05, |
|
"loss": 0.486, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9976578756157684e-05, |
|
"loss": 0.4945, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9973114921607055e-05, |
|
"loss": 0.4966, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9969412644681077e-05, |
|
"loss": 0.4935, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9965472013895685e-05, |
|
"loss": 0.4739, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.996129312346552e-05, |
|
"loss": 0.4913, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9956876073301645e-05, |
|
"loss": 0.4641, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9952220969009175e-05, |
|
"loss": 0.4691, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9947327921884746e-05, |
|
"loss": 0.4666, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.994219704891385e-05, |
|
"loss": 0.4501, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9936828472768043e-05, |
|
"loss": 0.4558, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9931222321802016e-05, |
|
"loss": 0.4712, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9925378730050518e-05, |
|
"loss": 0.4661, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9919297837225152e-05, |
|
"loss": 0.4735, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9912979788711042e-05, |
|
"loss": 0.4526, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.990642473556335e-05, |
|
"loss": 0.4453, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9899632834503662e-05, |
|
"loss": 0.4713, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.989260424791626e-05, |
|
"loss": 0.4622, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9885339143844217e-05, |
|
"loss": 0.4585, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.987783769598538e-05, |
|
"loss": 0.4576, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9870100083688242e-05, |
|
"loss": 0.4353, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9862126491947624e-05, |
|
"loss": 0.4509, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.985391711140027e-05, |
|
"loss": 0.4402, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9845472138320282e-05, |
|
"loss": 0.437, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9836791774614437e-05, |
|
"loss": 0.4613, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.982787622781735e-05, |
|
"loss": 0.4567, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9818725711086506e-05, |
|
"loss": 0.4541, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.980934044319718e-05, |
|
"loss": 0.4398, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9799720648537197e-05, |
|
"loss": 0.4283, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.978986655710157e-05, |
|
"loss": 0.4443, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9779778404487e-05, |
|
"loss": 0.4457, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9769456431886244e-05, |
|
"loss": 0.4326, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9758900886082343e-05, |
|
"loss": 0.4557, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9748112019442734e-05, |
|
"loss": 0.4402, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9737090089913205e-05, |
|
"loss": 0.465, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9725835361011726e-05, |
|
"loss": 0.4387, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.971434810182217e-05, |
|
"loss": 0.4479, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9702628586987846e-05, |
|
"loss": 0.4344, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9690677096704964e-05, |
|
"loss": 0.4302, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9678493916715914e-05, |
|
"loss": 0.4331, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.966607933830245e-05, |
|
"loss": 0.4224, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9653433658278717e-05, |
|
"loss": 0.4225, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9640557178984152e-05, |
|
"loss": 0.4177, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9627450208276265e-05, |
|
"loss": 0.4546, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9614113059523273e-05, |
|
"loss": 0.4257, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9600546051596604e-05, |
|
"loss": 0.4453, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9586749508863284e-05, |
|
"loss": 0.458, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9572723761178168e-05, |
|
"loss": 0.4287, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.955846914387607e-05, |
|
"loss": 0.4581, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.954398599776373e-05, |
|
"loss": 0.4343, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.952927466911168e-05, |
|
"loss": 0.4431, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9514335509645948e-05, |
|
"loss": 0.4332, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9499168876539666e-05, |
|
"loss": 0.4315, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9483775132404517e-05, |
|
"loss": 0.4403, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.946815464528208e-05, |
|
"loss": 0.4618, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9452307788635015e-05, |
|
"loss": 0.4292, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9436234941338145e-05, |
|
"loss": 0.4333, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9419936487669396e-05, |
|
"loss": 0.4557, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.94034128173006e-05, |
|
"loss": 0.4575, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.938666432528819e-05, |
|
"loss": 0.4012, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9369691412063755e-05, |
|
"loss": 0.4579, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9352494483424456e-05, |
|
"loss": 0.4337, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9335073950523335e-05, |
|
"loss": 0.4142, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9317430229859474e-05, |
|
"loss": 0.4545, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.929956374326805e-05, |
|
"loss": 0.4679, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.928147491791024e-05, |
|
"loss": 0.4178, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9263164186263003e-05, |
|
"loss": 0.4474, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9244631986108768e-05, |
|
"loss": 0.4237, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.922587876052492e-05, |
|
"loss": 0.4456, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.920690495787326e-05, |
|
"loss": 0.412, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.918771103178924e-05, |
|
"loss": 0.4279, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.916829744117115e-05, |
|
"loss": 0.413, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9148664650169128e-05, |
|
"loss": 0.4508, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9128813128174063e-05, |
|
"loss": 0.4054, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9108743349806382e-05, |
|
"loss": 0.4021, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.90884557949047e-05, |
|
"loss": 0.4392, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9067950948514343e-05, |
|
"loss": 0.4414, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate&qu |