|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.10425240054869685, |
|
"eval_steps": 500, |
|
"global_step": 38, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0027434842249657062, |
|
"grad_norm": 0.3216973543167114, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.039, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0054869684499314125, |
|
"grad_norm": 0.3218615651130676, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.063, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00823045267489712, |
|
"grad_norm": 0.3071180582046509, |
|
"learning_rate": 6e-06, |
|
"loss": 2.0298, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010973936899862825, |
|
"grad_norm": 0.30708086490631104, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.9489, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013717421124828532, |
|
"grad_norm": 0.31064078211784363, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0401, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01646090534979424, |
|
"grad_norm": 0.35925883054733276, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.0963, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019204389574759947, |
|
"grad_norm": 0.3267571032047272, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 2.0252, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02194787379972565, |
|
"grad_norm": 0.30728623270988464, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.0108, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.024691358024691357, |
|
"grad_norm": 0.2928607761859894, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.9542, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.027434842249657063, |
|
"grad_norm": 0.30577352643013, |
|
"learning_rate": 2e-05, |
|
"loss": 2.017, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03017832647462277, |
|
"grad_norm": 0.3024803102016449, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.982, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03292181069958848, |
|
"grad_norm": 0.28839072585105896, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.9217, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03566529492455418, |
|
"grad_norm": 0.2843893766403198, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 1.9608, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.038408779149519894, |
|
"grad_norm": 0.2703002095222473, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 1.9696, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0411522633744856, |
|
"grad_norm": 0.24636265635490417, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8818, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0438957475994513, |
|
"grad_norm": 0.2405432015657425, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.975, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04663923182441701, |
|
"grad_norm": 0.24582137167453766, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 1.923, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04938271604938271, |
|
"grad_norm": 0.2504767179489136, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.9781, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05212620027434842, |
|
"grad_norm": 0.2394665777683258, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.8959, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05486968449931413, |
|
"grad_norm": 0.24969030916690826, |
|
"learning_rate": 4e-05, |
|
"loss": 1.855, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05761316872427984, |
|
"grad_norm": 0.2694351077079773, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.9602, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06035665294924554, |
|
"grad_norm": 0.25622957944869995, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.8208, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06310013717421124, |
|
"grad_norm": 0.24535588920116425, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.7967, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06584362139917696, |
|
"grad_norm": 0.2737885117530823, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.7841, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06858710562414266, |
|
"grad_norm": 0.2646300196647644, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7744, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07133058984910837, |
|
"grad_norm": 0.2676407992839813, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 1.7359, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 0.2649776041507721, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 1.7205, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07681755829903979, |
|
"grad_norm": 0.296818345785141, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 1.696, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07956104252400549, |
|
"grad_norm": 0.31905728578567505, |
|
"learning_rate": 5.8e-05, |
|
"loss": 1.7261, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0823045267489712, |
|
"grad_norm": 0.4174517095088959, |
|
"learning_rate": 6e-05, |
|
"loss": 1.6451, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0850480109739369, |
|
"grad_norm": 0.4545894265174866, |
|
"learning_rate": 6.2e-05, |
|
"loss": 1.5867, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0877914951989026, |
|
"grad_norm": 0.45722702145576477, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 1.5184, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09053497942386832, |
|
"grad_norm": 0.4953472316265106, |
|
"learning_rate": 6.6e-05, |
|
"loss": 1.4793, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09327846364883402, |
|
"grad_norm": 0.5516601800918579, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 1.4967, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09602194787379972, |
|
"grad_norm": 0.5295405983924866, |
|
"learning_rate": 7e-05, |
|
"loss": 1.4445, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09876543209876543, |
|
"grad_norm": 0.3918333351612091, |
|
"learning_rate": 7.2e-05, |
|
"loss": 1.3956, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10150891632373114, |
|
"grad_norm": 0.4032560884952545, |
|
"learning_rate": 7.4e-05, |
|
"loss": 1.3773, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10425240054869685, |
|
"grad_norm": 0.30622419714927673, |
|
"learning_rate": 7.6e-05, |
|
"loss": 1.2721, |
|
"step": 38 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 364, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0847572478513971e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|