File size: 2,678 Bytes
e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 c665e99 e3b7b94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
{
"best_metric": 0.6066474914550781,
"best_model_checkpoint": "mikhail-panzo/zlm_b64_le5_s12000/checkpoint-500",
"epoch": 0.4187604690117253,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04187604690117253,
"grad_norm": 4.788218975067139,
"learning_rate": 2.4500000000000004e-07,
"loss": 1.1819,
"step": 50
},
{
"epoch": 0.08375209380234507,
"grad_norm": 15.618935585021973,
"learning_rate": 4.95e-07,
"loss": 1.0042,
"step": 100
},
{
"epoch": 0.12562814070351758,
"grad_norm": 2.7821555137634277,
"learning_rate": 7.4e-07,
"loss": 0.9676,
"step": 150
},
{
"epoch": 0.16750418760469013,
"grad_norm": 3.759263038635254,
"learning_rate": 9.85e-07,
"loss": 0.9362,
"step": 200
},
{
"epoch": 0.20938023450586266,
"grad_norm": 3.906439781188965,
"learning_rate": 1.235e-06,
"loss": 0.8667,
"step": 250
},
{
"epoch": 0.25125628140703515,
"grad_norm": 4.397643566131592,
"learning_rate": 1.485e-06,
"loss": 0.8455,
"step": 300
},
{
"epoch": 0.2931323283082077,
"grad_norm": 2.94077467918396,
"learning_rate": 1.7350000000000001e-06,
"loss": 0.8376,
"step": 350
},
{
"epoch": 0.33500837520938026,
"grad_norm": 4.161074161529541,
"learning_rate": 1.985e-06,
"loss": 0.7521,
"step": 400
},
{
"epoch": 0.3768844221105528,
"grad_norm": 2.8088552951812744,
"learning_rate": 2.235e-06,
"loss": 0.7444,
"step": 450
},
{
"epoch": 0.4187604690117253,
"grad_norm": 2.62283992767334,
"learning_rate": 2.4850000000000003e-06,
"loss": 0.7129,
"step": 500
},
{
"epoch": 0.4187604690117253,
"eval_loss": 0.6066474914550781,
"eval_runtime": 213.4911,
"eval_samples_per_second": 39.763,
"eval_steps_per_second": 4.974,
"step": 500
}
],
"logging_steps": 50,
"max_steps": 12000,
"num_input_tokens_seen": 0,
"num_train_epochs": 11,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4878617293034496.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}
|