File size: 3,025 Bytes
0c3eda2 ae00ba9 0c3eda2 5fdc735 0c3eda2 ae00ba9 0c3eda2 ae00ba9 0c3eda2 ae00ba9 0c3eda2 ae00ba9 0c3eda2 ae00ba9 0c3eda2 ae00ba9 0c3eda2 ae00ba9 0c3eda2 ae00ba9 0c3eda2 dd06f81 ae00ba9 dd06f81 ae00ba9 dd06f81 ae00ba9 dd06f81 ae00ba9 dd06f81 ae00ba9 dd06f81 ae00ba9 dd06f81 ae00ba9 dd06f81 5fdc735 ae00ba9 5fdc735 ae00ba9 5fdc735 ae00ba9 5fdc735 ae00ba9 5fdc735 ae00ba9 5fdc735 ae00ba9 5fdc735 ae00ba9 5fdc735 0c3eda2 ae00ba9 0c3eda2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04864864864864865,
"eval_steps": 3,
"global_step": 9,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005405405405405406,
"grad_norm": 0.2000586837530136,
"learning_rate": 2e-05,
"loss": 2.3204,
"step": 1
},
{
"epoch": 0.005405405405405406,
"eval_loss": 2.2008719444274902,
"eval_runtime": 9.7979,
"eval_samples_per_second": 7.961,
"eval_steps_per_second": 3.98,
"step": 1
},
{
"epoch": 0.010810810810810811,
"grad_norm": 0.17591507732868195,
"learning_rate": 4e-05,
"loss": 2.6394,
"step": 2
},
{
"epoch": 0.016216216216216217,
"grad_norm": 0.19049879908561707,
"learning_rate": 6e-05,
"loss": 2.2241,
"step": 3
},
{
"epoch": 0.016216216216216217,
"eval_loss": 2.1992404460906982,
"eval_runtime": 9.8906,
"eval_samples_per_second": 7.886,
"eval_steps_per_second": 3.943,
"step": 3
},
{
"epoch": 0.021621621621621623,
"grad_norm": 0.21317099034786224,
"learning_rate": 8e-05,
"loss": 2.0634,
"step": 4
},
{
"epoch": 0.02702702702702703,
"grad_norm": 0.16591224074363708,
"learning_rate": 0.0001,
"loss": 2.2234,
"step": 5
},
{
"epoch": 0.032432432432432434,
"grad_norm": 0.22719058394432068,
"learning_rate": 0.00012,
"loss": 2.4093,
"step": 6
},
{
"epoch": 0.032432432432432434,
"eval_loss": 2.1915857791900635,
"eval_runtime": 9.9284,
"eval_samples_per_second": 7.856,
"eval_steps_per_second": 3.928,
"step": 6
},
{
"epoch": 0.03783783783783784,
"grad_norm": 0.24013416469097137,
"learning_rate": 0.00014,
"loss": 2.034,
"step": 7
},
{
"epoch": 0.043243243243243246,
"grad_norm": 0.19061008095741272,
"learning_rate": 0.00016,
"loss": 2.0807,
"step": 8
},
{
"epoch": 0.04864864864864865,
"grad_norm": 0.22366337478160858,
"learning_rate": 0.00018,
"loss": 2.2467,
"step": 9
},
{
"epoch": 0.04864864864864865,
"eval_loss": 2.179177761077881,
"eval_runtime": 9.9529,
"eval_samples_per_second": 7.837,
"eval_steps_per_second": 3.918,
"step": 9
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6057323004952576.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|