File size: 2,518 Bytes
6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f 0243293 6696c3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 35,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02857142857142857,
"grad_norm": 1.1929763555526733,
"learning_rate": 5e-05,
"loss": 0.842,
"step": 1
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.5904757380485535,
"learning_rate": 0.00019948693233918952,
"loss": 0.7602,
"step": 5
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.35242587327957153,
"learning_rate": 0.00018207634412072764,
"loss": 0.5166,
"step": 10
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.23204153776168823,
"learning_rate": 0.00014403941515576344,
"loss": 0.3699,
"step": 15
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.21575891971588135,
"learning_rate": 9.493508311612874e-05,
"loss": 0.2784,
"step": 20
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.27512410283088684,
"learning_rate": 4.710359896730379e-05,
"loss": 0.222,
"step": 25
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.3192328214645386,
"learning_rate": 1.2565338385541792e-05,
"loss": 0.1813,
"step": 30
},
{
"epoch": 1.0,
"grad_norm": 0.11489653587341309,
"learning_rate": 0.0,
"loss": 0.1727,
"step": 35
},
{
"epoch": 1.0,
"eval_loss": 0.1722353994846344,
"eval_runtime": 11.0271,
"eval_samples_per_second": 5.623,
"eval_steps_per_second": 0.363,
"step": 35
},
{
"epoch": 1.0,
"step": 35,
"total_flos": 5.19321114598441e+16,
"train_loss": 0.3596490706716265,
"train_runtime": 336.8523,
"train_samples_per_second": 1.657,
"train_steps_per_second": 0.104
}
],
"logging_steps": 5,
"max_steps": 35,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.19321114598441e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|