|
{ |
|
"best_metric": 0.11999432742595673, |
|
"best_model_checkpoint": "./fine-tuned/checkpoint-1500", |
|
"epoch": 0.26345832967418986, |
|
"eval_steps": 100, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008781944322472996, |
|
"grad_norm": 212427.96875, |
|
"learning_rate": 2.9934129632882487e-05, |
|
"loss": 0.5421, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.017563888644945992, |
|
"grad_norm": 15316.291015625, |
|
"learning_rate": 2.9868259265764974e-05, |
|
"loss": 0.1903, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.017563888644945992, |
|
"eval_loss": 0.16095133125782013, |
|
"eval_runtime": 175.5949, |
|
"eval_samples_per_second": 25.399, |
|
"eval_steps_per_second": 3.178, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026345832967418988, |
|
"grad_norm": 21344.13671875, |
|
"learning_rate": 2.980238889864746e-05, |
|
"loss": 0.1742, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.035127777289891984, |
|
"grad_norm": 26603.357421875, |
|
"learning_rate": 2.973651853152995e-05, |
|
"loss": 0.164, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.035127777289891984, |
|
"eval_loss": 0.14671418070793152, |
|
"eval_runtime": 175.3478, |
|
"eval_samples_per_second": 25.435, |
|
"eval_steps_per_second": 3.182, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04390972161236498, |
|
"grad_norm": 18468.01953125, |
|
"learning_rate": 2.9670648164412437e-05, |
|
"loss": 0.1697, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.052691665934837977, |
|
"grad_norm": 15799.6875, |
|
"learning_rate": 2.9604777797294924e-05, |
|
"loss": 0.161, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.052691665934837977, |
|
"eval_loss": 0.14008501172065735, |
|
"eval_runtime": 175.2345, |
|
"eval_samples_per_second": 25.452, |
|
"eval_steps_per_second": 3.184, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06147361025731097, |
|
"grad_norm": 17163.763671875, |
|
"learning_rate": 2.953890743017741e-05, |
|
"loss": 0.1634, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07025555457978397, |
|
"grad_norm": 17603.025390625, |
|
"learning_rate": 2.94730370630599e-05, |
|
"loss": 0.1543, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07025555457978397, |
|
"eval_loss": 0.13591521978378296, |
|
"eval_runtime": 175.0506, |
|
"eval_samples_per_second": 25.478, |
|
"eval_steps_per_second": 3.188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07903749890225696, |
|
"grad_norm": 12623.9189453125, |
|
"learning_rate": 2.9407166695942387e-05, |
|
"loss": 0.1417, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08781944322472995, |
|
"grad_norm": 14828.5, |
|
"learning_rate": 2.9341296328824874e-05, |
|
"loss": 0.1403, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08781944322472995, |
|
"eval_loss": 0.13329531252384186, |
|
"eval_runtime": 175.1721, |
|
"eval_samples_per_second": 25.461, |
|
"eval_steps_per_second": 3.185, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09660138754720295, |
|
"grad_norm": 16192.8515625, |
|
"learning_rate": 2.927542596170736e-05, |
|
"loss": 0.1444, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10538333186967595, |
|
"grad_norm": 20510.47265625, |
|
"learning_rate": 2.9209555594589847e-05, |
|
"loss": 0.1466, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10538333186967595, |
|
"eval_loss": 0.1307835429906845, |
|
"eval_runtime": 175.06, |
|
"eval_samples_per_second": 25.477, |
|
"eval_steps_per_second": 3.187, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11416527619214895, |
|
"grad_norm": 10555.8408203125, |
|
"learning_rate": 2.9143685227472337e-05, |
|
"loss": 0.1472, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12294722051462194, |
|
"grad_norm": 12451.990234375, |
|
"learning_rate": 2.907781486035482e-05, |
|
"loss": 0.1415, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12294722051462194, |
|
"eval_loss": 0.1288571059703827, |
|
"eval_runtime": 175.1799, |
|
"eval_samples_per_second": 25.46, |
|
"eval_steps_per_second": 3.185, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13172916483709493, |
|
"grad_norm": 11173.96875, |
|
"learning_rate": 2.901194449323731e-05, |
|
"loss": 0.1368, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14051110915956794, |
|
"grad_norm": 47561.75, |
|
"learning_rate": 2.8946074126119797e-05, |
|
"loss": 0.1399, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14051110915956794, |
|
"eval_loss": 0.12726937234401703, |
|
"eval_runtime": 175.2229, |
|
"eval_samples_per_second": 25.453, |
|
"eval_steps_per_second": 3.185, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14929305348204092, |
|
"grad_norm": 11766.6767578125, |
|
"learning_rate": 2.8880203759002283e-05, |
|
"loss": 0.1433, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.15807499780451392, |
|
"grad_norm": 14977.416015625, |
|
"learning_rate": 2.881433339188477e-05, |
|
"loss": 0.1371, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15807499780451392, |
|
"eval_loss": 0.12529444694519043, |
|
"eval_runtime": 174.8253, |
|
"eval_samples_per_second": 25.511, |
|
"eval_steps_per_second": 3.192, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1668569421269869, |
|
"grad_norm": 11109.173828125, |
|
"learning_rate": 2.874846302476726e-05, |
|
"loss": 0.1292, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1756388864494599, |
|
"grad_norm": 9897.7958984375, |
|
"learning_rate": 2.8682592657649747e-05, |
|
"loss": 0.1351, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1756388864494599, |
|
"eval_loss": 0.12485189735889435, |
|
"eval_runtime": 174.8115, |
|
"eval_samples_per_second": 25.513, |
|
"eval_steps_per_second": 3.192, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18442083077193291, |
|
"grad_norm": 20060.55859375, |
|
"learning_rate": 2.8616722290532233e-05, |
|
"loss": 0.1303, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.1932027750944059, |
|
"grad_norm": 10244.4052734375, |
|
"learning_rate": 2.855085192341472e-05, |
|
"loss": 0.1413, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1932027750944059, |
|
"eval_loss": 0.12359971553087234, |
|
"eval_runtime": 175.122, |
|
"eval_samples_per_second": 25.468, |
|
"eval_steps_per_second": 3.186, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2019847194168789, |
|
"grad_norm": 36993.25, |
|
"learning_rate": 2.848498155629721e-05, |
|
"loss": 0.1275, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2107666637393519, |
|
"grad_norm": 11102.2646484375, |
|
"learning_rate": 2.8419111189179697e-05, |
|
"loss": 0.1377, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2107666637393519, |
|
"eval_loss": 0.12276890873908997, |
|
"eval_runtime": 175.1309, |
|
"eval_samples_per_second": 25.467, |
|
"eval_steps_per_second": 3.186, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.21954860806182488, |
|
"grad_norm": 10398.369140625, |
|
"learning_rate": 2.835324082206218e-05, |
|
"loss": 0.1356, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2283305523842979, |
|
"grad_norm": 14664.177734375, |
|
"learning_rate": 2.828737045494467e-05, |
|
"loss": 0.1309, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2283305523842979, |
|
"eval_loss": 0.1219501867890358, |
|
"eval_runtime": 174.8703, |
|
"eval_samples_per_second": 25.505, |
|
"eval_steps_per_second": 3.191, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.23711249670677087, |
|
"grad_norm": 9694.1875, |
|
"learning_rate": 2.8221500087827156e-05, |
|
"loss": 0.1271, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.24589444102924388, |
|
"grad_norm": 17376.810546875, |
|
"learning_rate": 2.8155629720709643e-05, |
|
"loss": 0.1434, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.24589444102924388, |
|
"eval_loss": 0.12065327912569046, |
|
"eval_runtime": 174.9734, |
|
"eval_samples_per_second": 25.49, |
|
"eval_steps_per_second": 3.189, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2546763853517169, |
|
"grad_norm": 13443.2255859375, |
|
"learning_rate": 2.808975935359213e-05, |
|
"loss": 0.1383, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.26345832967418986, |
|
"grad_norm": 10927.8994140625, |
|
"learning_rate": 2.802388898647462e-05, |
|
"loss": 0.125, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.26345832967418986, |
|
"eval_loss": 0.11999432742595673, |
|
"eval_runtime": 174.9084, |
|
"eval_samples_per_second": 25.499, |
|
"eval_steps_per_second": 3.19, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 22772, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7307494686720000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|