Cherran's picture
Upload folder using huggingface_hub
929cbcc verified
raw
history blame
8.72 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9890909090909092,
"eval_steps": 5,
"global_step": 136,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07272727272727272,
"eval_loss": 0.8767483830451965,
"eval_runtime": 30.8146,
"eval_samples_per_second": 15.285,
"eval_steps_per_second": 3.829,
"step": 5
},
{
"epoch": 0.14545454545454545,
"grad_norm": 3.115849256515503,
"learning_rate": 3.571428571428572e-05,
"loss": 3.2126,
"step": 10
},
{
"epoch": 0.14545454545454545,
"eval_loss": 0.779396116733551,
"eval_runtime": 27.2217,
"eval_samples_per_second": 17.302,
"eval_steps_per_second": 4.335,
"step": 10
},
{
"epoch": 0.21818181818181817,
"eval_loss": 0.7435232996940613,
"eval_runtime": 27.2836,
"eval_samples_per_second": 17.263,
"eval_steps_per_second": 4.325,
"step": 15
},
{
"epoch": 0.2909090909090909,
"grad_norm": 2.84679913520813,
"learning_rate": 7.142857142857143e-05,
"loss": 2.9019,
"step": 20
},
{
"epoch": 0.2909090909090909,
"eval_loss": 0.6930269598960876,
"eval_runtime": 27.2747,
"eval_samples_per_second": 17.269,
"eval_steps_per_second": 4.326,
"step": 20
},
{
"epoch": 0.36363636363636365,
"eval_loss": 0.6732496619224548,
"eval_runtime": 27.2843,
"eval_samples_per_second": 17.263,
"eval_steps_per_second": 4.325,
"step": 25
},
{
"epoch": 0.43636363636363634,
"grad_norm": 2.5042567253112793,
"learning_rate": 9.998342337571565e-05,
"loss": 2.9314,
"step": 30
},
{
"epoch": 0.43636363636363634,
"eval_loss": 0.6518906950950623,
"eval_runtime": 27.2739,
"eval_samples_per_second": 17.269,
"eval_steps_per_second": 4.326,
"step": 30
},
{
"epoch": 0.509090909090909,
"eval_loss": 0.6448661088943481,
"eval_runtime": 27.279,
"eval_samples_per_second": 17.266,
"eval_steps_per_second": 4.326,
"step": 35
},
{
"epoch": 0.5818181818181818,
"grad_norm": 2.148580312728882,
"learning_rate": 9.940439480455386e-05,
"loss": 2.3866,
"step": 40
},
{
"epoch": 0.5818181818181818,
"eval_loss": 0.6359825730323792,
"eval_runtime": 27.2872,
"eval_samples_per_second": 17.261,
"eval_steps_per_second": 4.324,
"step": 40
},
{
"epoch": 0.6545454545454545,
"eval_loss": 0.6275559663772583,
"eval_runtime": 27.2773,
"eval_samples_per_second": 17.267,
"eval_steps_per_second": 4.326,
"step": 45
},
{
"epoch": 0.7272727272727273,
"grad_norm": 3.116907835006714,
"learning_rate": 9.800749368358009e-05,
"loss": 2.7002,
"step": 50
},
{
"epoch": 0.7272727272727273,
"eval_loss": 0.6227777004241943,
"eval_runtime": 27.2702,
"eval_samples_per_second": 17.272,
"eval_steps_per_second": 4.327,
"step": 50
},
{
"epoch": 0.8,
"eval_loss": 0.6195926070213318,
"eval_runtime": 27.263,
"eval_samples_per_second": 17.276,
"eval_steps_per_second": 4.328,
"step": 55
},
{
"epoch": 0.8727272727272727,
"grad_norm": 4.004441738128662,
"learning_rate": 9.581584522435024e-05,
"loss": 2.5389,
"step": 60
},
{
"epoch": 0.8727272727272727,
"eval_loss": 0.6146515011787415,
"eval_runtime": 27.269,
"eval_samples_per_second": 17.272,
"eval_steps_per_second": 4.327,
"step": 60
},
{
"epoch": 0.9454545454545454,
"eval_loss": 0.610045850276947,
"eval_runtime": 27.3064,
"eval_samples_per_second": 17.249,
"eval_steps_per_second": 4.321,
"step": 65
},
{
"epoch": 1.029090909090909,
"grad_norm": 2.843431234359741,
"learning_rate": 9.286573140381662e-05,
"loss": 2.4095,
"step": 70
},
{
"epoch": 1.029090909090909,
"eval_loss": 0.6076003909111023,
"eval_runtime": 27.2492,
"eval_samples_per_second": 17.285,
"eval_steps_per_second": 4.33,
"step": 70
},
{
"epoch": 1.1018181818181818,
"eval_loss": 0.6160494089126587,
"eval_runtime": 27.2598,
"eval_samples_per_second": 17.278,
"eval_steps_per_second": 4.329,
"step": 75
},
{
"epoch": 1.1745454545454546,
"grad_norm": 4.093331336975098,
"learning_rate": 8.920599032883554e-05,
"loss": 2.0692,
"step": 80
},
{
"epoch": 1.1745454545454546,
"eval_loss": 0.6218172311782837,
"eval_runtime": 27.2772,
"eval_samples_per_second": 17.267,
"eval_steps_per_second": 4.326,
"step": 80
},
{
"epoch": 1.2472727272727273,
"eval_loss": 0.6184111833572388,
"eval_runtime": 27.3075,
"eval_samples_per_second": 17.248,
"eval_steps_per_second": 4.321,
"step": 85
},
{
"epoch": 1.32,
"grad_norm": 2.453794240951538,
"learning_rate": 8.489720773831717e-05,
"loss": 1.8616,
"step": 90
},
{
"epoch": 1.32,
"eval_loss": 0.6163813471794128,
"eval_runtime": 27.2607,
"eval_samples_per_second": 17.278,
"eval_steps_per_second": 4.329,
"step": 90
},
{
"epoch": 1.3927272727272726,
"eval_loss": 0.6139249801635742,
"eval_runtime": 27.2587,
"eval_samples_per_second": 17.279,
"eval_steps_per_second": 4.329,
"step": 95
},
{
"epoch": 1.4654545454545453,
"grad_norm": 3.4409964084625244,
"learning_rate": 8.001071402741842e-05,
"loss": 2.1618,
"step": 100
},
{
"epoch": 1.4654545454545453,
"eval_loss": 0.6118062734603882,
"eval_runtime": 27.245,
"eval_samples_per_second": 17.288,
"eval_steps_per_second": 4.331,
"step": 100
},
{
"epoch": 1.538181818181818,
"eval_loss": 0.610755205154419,
"eval_runtime": 27.2505,
"eval_samples_per_second": 17.284,
"eval_steps_per_second": 4.33,
"step": 105
},
{
"epoch": 1.6109090909090908,
"grad_norm": 3.2959372997283936,
"learning_rate": 7.462740339769324e-05,
"loss": 2.0259,
"step": 110
},
{
"epoch": 1.6109090909090908,
"eval_loss": 0.6130332350730896,
"eval_runtime": 27.2607,
"eval_samples_per_second": 17.278,
"eval_steps_per_second": 4.329,
"step": 110
},
{
"epoch": 1.6836363636363636,
"eval_loss": 0.6104254722595215,
"eval_runtime": 27.2481,
"eval_samples_per_second": 17.286,
"eval_steps_per_second": 4.331,
"step": 115
},
{
"epoch": 1.7563636363636363,
"grad_norm": 2.7095448970794678,
"learning_rate": 6.883639468175927e-05,
"loss": 1.8146,
"step": 120
},
{
"epoch": 1.7563636363636363,
"eval_loss": 0.6095255613327026,
"eval_runtime": 27.2419,
"eval_samples_per_second": 17.29,
"eval_steps_per_second": 4.332,
"step": 120
},
{
"epoch": 1.829090909090909,
"eval_loss": 0.6098220944404602,
"eval_runtime": 27.2413,
"eval_samples_per_second": 17.29,
"eval_steps_per_second": 4.332,
"step": 125
},
{
"epoch": 1.9018181818181819,
"grad_norm": 3.7072970867156982,
"learning_rate": 6.273355601206144e-05,
"loss": 1.6977,
"step": 130
},
{
"epoch": 1.9018181818181819,
"eval_loss": 0.6083605289459229,
"eval_runtime": 27.262,
"eval_samples_per_second": 17.277,
"eval_steps_per_second": 4.328,
"step": 130
},
{
"epoch": 1.9745454545454546,
"eval_loss": 0.6045976281166077,
"eval_runtime": 27.2423,
"eval_samples_per_second": 17.289,
"eval_steps_per_second": 4.332,
"step": 135
}
],
"logging_steps": 10,
"max_steps": 272,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.413948656205824e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}