Rakhman16's picture
Training in progress, step 1500, checkpoint
19062b2 verified
raw
history blame
9.18 kB
{
"best_metric": 0.11999432742595673,
"best_model_checkpoint": "./fine-tuned/checkpoint-1500",
"epoch": 0.26345832967418986,
"eval_steps": 100,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008781944322472996,
"grad_norm": 212427.96875,
"learning_rate": 2.9934129632882487e-05,
"loss": 0.5421,
"step": 50
},
{
"epoch": 0.017563888644945992,
"grad_norm": 15316.291015625,
"learning_rate": 2.9868259265764974e-05,
"loss": 0.1903,
"step": 100
},
{
"epoch": 0.017563888644945992,
"eval_loss": 0.16095133125782013,
"eval_runtime": 175.5949,
"eval_samples_per_second": 25.399,
"eval_steps_per_second": 3.178,
"step": 100
},
{
"epoch": 0.026345832967418988,
"grad_norm": 21344.13671875,
"learning_rate": 2.980238889864746e-05,
"loss": 0.1742,
"step": 150
},
{
"epoch": 0.035127777289891984,
"grad_norm": 26603.357421875,
"learning_rate": 2.973651853152995e-05,
"loss": 0.164,
"step": 200
},
{
"epoch": 0.035127777289891984,
"eval_loss": 0.14671418070793152,
"eval_runtime": 175.3478,
"eval_samples_per_second": 25.435,
"eval_steps_per_second": 3.182,
"step": 200
},
{
"epoch": 0.04390972161236498,
"grad_norm": 18468.01953125,
"learning_rate": 2.9670648164412437e-05,
"loss": 0.1697,
"step": 250
},
{
"epoch": 0.052691665934837977,
"grad_norm": 15799.6875,
"learning_rate": 2.9604777797294924e-05,
"loss": 0.161,
"step": 300
},
{
"epoch": 0.052691665934837977,
"eval_loss": 0.14008501172065735,
"eval_runtime": 175.2345,
"eval_samples_per_second": 25.452,
"eval_steps_per_second": 3.184,
"step": 300
},
{
"epoch": 0.06147361025731097,
"grad_norm": 17163.763671875,
"learning_rate": 2.953890743017741e-05,
"loss": 0.1634,
"step": 350
},
{
"epoch": 0.07025555457978397,
"grad_norm": 17603.025390625,
"learning_rate": 2.94730370630599e-05,
"loss": 0.1543,
"step": 400
},
{
"epoch": 0.07025555457978397,
"eval_loss": 0.13591521978378296,
"eval_runtime": 175.0506,
"eval_samples_per_second": 25.478,
"eval_steps_per_second": 3.188,
"step": 400
},
{
"epoch": 0.07903749890225696,
"grad_norm": 12623.9189453125,
"learning_rate": 2.9407166695942387e-05,
"loss": 0.1417,
"step": 450
},
{
"epoch": 0.08781944322472995,
"grad_norm": 14828.5,
"learning_rate": 2.9341296328824874e-05,
"loss": 0.1403,
"step": 500
},
{
"epoch": 0.08781944322472995,
"eval_loss": 0.13329531252384186,
"eval_runtime": 175.1721,
"eval_samples_per_second": 25.461,
"eval_steps_per_second": 3.185,
"step": 500
},
{
"epoch": 0.09660138754720295,
"grad_norm": 16192.8515625,
"learning_rate": 2.927542596170736e-05,
"loss": 0.1444,
"step": 550
},
{
"epoch": 0.10538333186967595,
"grad_norm": 20510.47265625,
"learning_rate": 2.9209555594589847e-05,
"loss": 0.1466,
"step": 600
},
{
"epoch": 0.10538333186967595,
"eval_loss": 0.1307835429906845,
"eval_runtime": 175.06,
"eval_samples_per_second": 25.477,
"eval_steps_per_second": 3.187,
"step": 600
},
{
"epoch": 0.11416527619214895,
"grad_norm": 10555.8408203125,
"learning_rate": 2.9143685227472337e-05,
"loss": 0.1472,
"step": 650
},
{
"epoch": 0.12294722051462194,
"grad_norm": 12451.990234375,
"learning_rate": 2.907781486035482e-05,
"loss": 0.1415,
"step": 700
},
{
"epoch": 0.12294722051462194,
"eval_loss": 0.1288571059703827,
"eval_runtime": 175.1799,
"eval_samples_per_second": 25.46,
"eval_steps_per_second": 3.185,
"step": 700
},
{
"epoch": 0.13172916483709493,
"grad_norm": 11173.96875,
"learning_rate": 2.901194449323731e-05,
"loss": 0.1368,
"step": 750
},
{
"epoch": 0.14051110915956794,
"grad_norm": 47561.75,
"learning_rate": 2.8946074126119797e-05,
"loss": 0.1399,
"step": 800
},
{
"epoch": 0.14051110915956794,
"eval_loss": 0.12726937234401703,
"eval_runtime": 175.2229,
"eval_samples_per_second": 25.453,
"eval_steps_per_second": 3.185,
"step": 800
},
{
"epoch": 0.14929305348204092,
"grad_norm": 11766.6767578125,
"learning_rate": 2.8880203759002283e-05,
"loss": 0.1433,
"step": 850
},
{
"epoch": 0.15807499780451392,
"grad_norm": 14977.416015625,
"learning_rate": 2.881433339188477e-05,
"loss": 0.1371,
"step": 900
},
{
"epoch": 0.15807499780451392,
"eval_loss": 0.12529444694519043,
"eval_runtime": 174.8253,
"eval_samples_per_second": 25.511,
"eval_steps_per_second": 3.192,
"step": 900
},
{
"epoch": 0.1668569421269869,
"grad_norm": 11109.173828125,
"learning_rate": 2.874846302476726e-05,
"loss": 0.1292,
"step": 950
},
{
"epoch": 0.1756388864494599,
"grad_norm": 9897.7958984375,
"learning_rate": 2.8682592657649747e-05,
"loss": 0.1351,
"step": 1000
},
{
"epoch": 0.1756388864494599,
"eval_loss": 0.12485189735889435,
"eval_runtime": 174.8115,
"eval_samples_per_second": 25.513,
"eval_steps_per_second": 3.192,
"step": 1000
},
{
"epoch": 0.18442083077193291,
"grad_norm": 20060.55859375,
"learning_rate": 2.8616722290532233e-05,
"loss": 0.1303,
"step": 1050
},
{
"epoch": 0.1932027750944059,
"grad_norm": 10244.4052734375,
"learning_rate": 2.855085192341472e-05,
"loss": 0.1413,
"step": 1100
},
{
"epoch": 0.1932027750944059,
"eval_loss": 0.12359971553087234,
"eval_runtime": 175.122,
"eval_samples_per_second": 25.468,
"eval_steps_per_second": 3.186,
"step": 1100
},
{
"epoch": 0.2019847194168789,
"grad_norm": 36993.25,
"learning_rate": 2.848498155629721e-05,
"loss": 0.1275,
"step": 1150
},
{
"epoch": 0.2107666637393519,
"grad_norm": 11102.2646484375,
"learning_rate": 2.8419111189179697e-05,
"loss": 0.1377,
"step": 1200
},
{
"epoch": 0.2107666637393519,
"eval_loss": 0.12276890873908997,
"eval_runtime": 175.1309,
"eval_samples_per_second": 25.467,
"eval_steps_per_second": 3.186,
"step": 1200
},
{
"epoch": 0.21954860806182488,
"grad_norm": 10398.369140625,
"learning_rate": 2.835324082206218e-05,
"loss": 0.1356,
"step": 1250
},
{
"epoch": 0.2283305523842979,
"grad_norm": 14664.177734375,
"learning_rate": 2.828737045494467e-05,
"loss": 0.1309,
"step": 1300
},
{
"epoch": 0.2283305523842979,
"eval_loss": 0.1219501867890358,
"eval_runtime": 174.8703,
"eval_samples_per_second": 25.505,
"eval_steps_per_second": 3.191,
"step": 1300
},
{
"epoch": 0.23711249670677087,
"grad_norm": 9694.1875,
"learning_rate": 2.8221500087827156e-05,
"loss": 0.1271,
"step": 1350
},
{
"epoch": 0.24589444102924388,
"grad_norm": 17376.810546875,
"learning_rate": 2.8155629720709643e-05,
"loss": 0.1434,
"step": 1400
},
{
"epoch": 0.24589444102924388,
"eval_loss": 0.12065327912569046,
"eval_runtime": 174.9734,
"eval_samples_per_second": 25.49,
"eval_steps_per_second": 3.189,
"step": 1400
},
{
"epoch": 0.2546763853517169,
"grad_norm": 13443.2255859375,
"learning_rate": 2.808975935359213e-05,
"loss": 0.1383,
"step": 1450
},
{
"epoch": 0.26345832967418986,
"grad_norm": 10927.8994140625,
"learning_rate": 2.802388898647462e-05,
"loss": 0.125,
"step": 1500
},
{
"epoch": 0.26345832967418986,
"eval_loss": 0.11999432742595673,
"eval_runtime": 174.9084,
"eval_samples_per_second": 25.499,
"eval_steps_per_second": 3.19,
"step": 1500
}
],
"logging_steps": 50,
"max_steps": 22772,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7307494686720000.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}