|
{ |
|
"best_metric": 2.126384735107422, |
|
"best_model_checkpoint": "saves/Gemma-2B/lora/train_2024-03-01-04-36-32/checkpoint-100", |
|
"epoch": 0.17777777777777778, |
|
"eval_steps": 100, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.6079277992248535, |
|
"learning_rate": 4.999960939662063e-05, |
|
"loss": 3.747, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.2283411026000977, |
|
"learning_rate": 4.999843759868819e-05, |
|
"loss": 3.5789, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 41.573001861572266, |
|
"learning_rate": 4.999648464281934e-05, |
|
"loss": 3.1683, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.080965518951416, |
|
"learning_rate": 4.9993750590040575e-05, |
|
"loss": 2.8275, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.576275825500488, |
|
"learning_rate": 4.999023552578632e-05, |
|
"loss": 2.6758, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 18.012842178344727, |
|
"learning_rate": 4.998593955989626e-05, |
|
"loss": 2.6287, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.738934516906738, |
|
"learning_rate": 4.9980862826611875e-05, |
|
"loss": 2.5284, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.353776216506958, |
|
"learning_rate": 4.9975005484572305e-05, |
|
"loss": 2.2608, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.6298699378967285, |
|
"learning_rate": 4.9968367716809374e-05, |
|
"loss": 2.2475, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 50.594207763671875, |
|
"learning_rate": 4.996094973074183e-05, |
|
"loss": 2.2007, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.126384735107422, |
|
"eval_runtime": 124.9221, |
|
"eval_samples_per_second": 8.005, |
|
"eval_steps_per_second": 2.001, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5620, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 8.247898064093184e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|