|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 10075, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.8014888337469e-06, |
|
"loss": 0.2058, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.602977667493797e-06, |
|
"loss": 0.1586, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.404466501240696e-06, |
|
"loss": 0.1283, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.205955334987593e-06, |
|
"loss": 0.1288, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.007444168734492e-06, |
|
"loss": 0.1259, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.80893300248139e-06, |
|
"loss": 0.1196, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.610421836228289e-06, |
|
"loss": 0.125, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.411910669975186e-06, |
|
"loss": 0.1152, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.213399503722085e-06, |
|
"loss": 0.1141, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.014888337468984e-06, |
|
"loss": 0.1088, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 7.816377171215881e-06, |
|
"loss": 0.0938, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.6178660049627794e-06, |
|
"loss": 0.0882, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.4193548387096784e-06, |
|
"loss": 0.0908, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.220843672456577e-06, |
|
"loss": 0.0982, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.022332506203474e-06, |
|
"loss": 0.0938, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.823821339950373e-06, |
|
"loss": 0.0997, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.625310173697271e-06, |
|
"loss": 0.0903, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.426799007444169e-06, |
|
"loss": 0.1015, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.228287841191067e-06, |
|
"loss": 0.0719, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.029776674937966e-06, |
|
"loss": 0.0874, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5.831265508684864e-06, |
|
"loss": 0.0783, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.632754342431762e-06, |
|
"loss": 0.0737, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 5.434243176178661e-06, |
|
"loss": 0.0657, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.235732009925558e-06, |
|
"loss": 0.0754, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.037220843672457e-06, |
|
"loss": 0.0734, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 0.081, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 4.640198511166253e-06, |
|
"loss": 0.0701, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.4416873449131515e-06, |
|
"loss": 0.0781, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.24317617866005e-06, |
|
"loss": 0.074, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.044665012406948e-06, |
|
"loss": 0.0722, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 0.0551, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.6476426799007445e-06, |
|
"loss": 0.0526, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.4491315136476427e-06, |
|
"loss": 0.0645, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.2506203473945412e-06, |
|
"loss": 0.064, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.0521091811414394e-06, |
|
"loss": 0.0569, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.8535980148883375e-06, |
|
"loss": 0.066, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.655086848635236e-06, |
|
"loss": 0.0616, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.4565756823821343e-06, |
|
"loss": 0.0622, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2580645161290324e-06, |
|
"loss": 0.0561, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.0595533498759305e-06, |
|
"loss": 0.0544, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.861042183622829e-06, |
|
"loss": 0.0541, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.662531017369727e-06, |
|
"loss": 0.0532, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.4640198511166254e-06, |
|
"loss": 0.046, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.2655086848635238e-06, |
|
"loss": 0.0522, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.066997518610422e-06, |
|
"loss": 0.0575, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.684863523573202e-07, |
|
"loss": 0.0534, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.699751861042183e-07, |
|
"loss": 0.0466, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.714640198511167e-07, |
|
"loss": 0.0485, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.7295285359801494e-07, |
|
"loss": 0.0624, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.444168734491315e-08, |
|
"loss": 0.0436, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 10075, |
|
"total_flos": 66843438723916800, |
|
"train_runtime": 8275.4395, |
|
"train_samples_per_second": 1.217 |
|
} |
|
], |
|
"max_steps": 10075, |
|
"num_train_epochs": 5, |
|
"total_flos": 66843438723916800, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|