File size: 4,293 Bytes
55e4d10 0b8b0b6 55e4d10 e765e19 2c49107 e9385a1 b5a2a1c 462d731 0b8b0b6 55e4d10 0b8b0b6 55e4d10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
{
"best_metric": 0.5598750842730023,
"best_model_checkpoint": "results/checkpoint-3500",
"epoch": 0.7,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 6.6e-05,
"loss": 2.0145,
"step": 500
},
{
"epoch": 0.1,
"eval_bleu": 0.1150567678154011,
"eval_exact_match": 0.132,
"eval_loss": 0.9738793969154358,
"eval_rouge1": 0.5382577445488361,
"eval_rouge2": 0.3545728830144713,
"eval_rougeL": 0.5187026156468759,
"eval_rougeLsum": 0.5259109026044895,
"eval_runtime": 59.0107,
"eval_samples_per_second": 33.892,
"eval_steps_per_second": 1.695,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 9.828070175438598e-05,
"loss": 0.8761,
"step": 1000
},
{
"epoch": 0.2,
"eval_bleu": 0.11272954562081616,
"eval_exact_match": 0.1475,
"eval_loss": 0.901577353477478,
"eval_rouge1": 0.5359795989196405,
"eval_rouge2": 0.3649742450836393,
"eval_rougeL": 0.5187273882783475,
"eval_rougeLsum": 0.5253745777261007,
"eval_runtime": 55.4587,
"eval_samples_per_second": 36.063,
"eval_steps_per_second": 1.803,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 9.477192982456141e-05,
"loss": 0.8139,
"step": 1500
},
{
"epoch": 0.3,
"eval_bleu": 0.11562105292517781,
"eval_exact_match": 0.1425,
"eval_loss": 0.8843896389007568,
"eval_rouge1": 0.5385440798738839,
"eval_rouge2": 0.3630877932160531,
"eval_rougeL": 0.520238931080043,
"eval_rougeLsum": 0.5274836357515698,
"eval_runtime": 55.2097,
"eval_samples_per_second": 36.226,
"eval_steps_per_second": 1.811,
"step": 1500
},
{
"epoch": 0.4,
"learning_rate": 9.126315789473685e-05,
"loss": 0.7745,
"step": 2000
},
{
"epoch": 0.4,
"eval_bleu": 0.11999069408174769,
"eval_exact_match": 0.152,
"eval_loss": 0.8501775860786438,
"eval_rouge1": 0.5495869647995661,
"eval_rouge2": 0.37385624817390106,
"eval_rougeL": 0.5307934771920211,
"eval_rougeLsum": 0.5391651788581161,
"eval_runtime": 56.8684,
"eval_samples_per_second": 35.169,
"eval_steps_per_second": 1.758,
"step": 2000
},
{
"epoch": 0.5,
"learning_rate": 8.77543859649123e-05,
"loss": 0.7416,
"step": 2500
},
{
"epoch": 0.5,
"eval_bleu": 0.12262269562147479,
"eval_exact_match": 0.148,
"eval_loss": 0.8417201042175293,
"eval_rouge1": 0.5524556612739725,
"eval_rouge2": 0.37743352139147385,
"eval_rougeL": 0.5333960691072436,
"eval_rougeLsum": 0.5408174364513855,
"eval_runtime": 55.5707,
"eval_samples_per_second": 35.99,
"eval_steps_per_second": 1.8,
"step": 2500
},
{
"epoch": 0.6,
"learning_rate": 8.424561403508773e-05,
"loss": 0.7097,
"step": 3000
},
{
"epoch": 0.6,
"eval_bleu": 0.12307160579590622,
"eval_exact_match": 0.155,
"eval_loss": 0.8275034427642822,
"eval_rouge1": 0.5571591325212851,
"eval_rouge2": 0.3846841785043018,
"eval_rougeL": 0.5390722174679519,
"eval_rougeLsum": 0.5456202385734745,
"eval_runtime": 56.9486,
"eval_samples_per_second": 35.119,
"eval_steps_per_second": 1.756,
"step": 3000
},
{
"epoch": 0.7,
"learning_rate": 8.073684210526316e-05,
"loss": 0.6923,
"step": 3500
},
{
"epoch": 0.7,
"eval_bleu": 0.1269094917224047,
"eval_exact_match": 0.156,
"eval_loss": 0.8161399960517883,
"eval_rouge1": 0.5598750842730023,
"eval_rouge2": 0.38392351740583797,
"eval_rougeL": 0.5409288136858783,
"eval_rougeLsum": 0.5484962105303921,
"eval_runtime": 55.0573,
"eval_samples_per_second": 36.326,
"eval_steps_per_second": 1.816,
"step": 3500
}
],
"max_steps": 15000,
"num_train_epochs": 3,
"total_flos": 8985722848542720.0,
"trial_name": null,
"trial_params": null
}
|