|
{ |
|
"best_metric": 0.8072579503059387, |
|
"best_model_checkpoint": "/tmp/model/checkpoint-190", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.263157894736843e-06, |
|
"loss": 1.6094, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.0526315789473686e-05, |
|
"loss": 1.6094, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 1.6096, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.1052631578947372e-05, |
|
"loss": 1.6089, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 1.6085, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 1.6068, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.68421052631579e-05, |
|
"loss": 1.6105, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2105263157894745e-05, |
|
"loss": 1.6058, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 1.6108, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.2631578947368424e-05, |
|
"loss": 1.6079, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.7894736842105274e-05, |
|
"loss": 1.6044, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.31578947368421e-05, |
|
"loss": 1.6069, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.842105263157895e-05, |
|
"loss": 1.6007, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.36842105263158e-05, |
|
"loss": 1.6083, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.894736842105263e-05, |
|
"loss": 1.6077, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.421052631578949e-05, |
|
"loss": 1.5968, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.947368421052632e-05, |
|
"loss": 1.5903, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 1.6045, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5974, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.09411764705882353, |
|
"eval_f1_micro": 0.3076923076923077, |
|
"eval_f1_weighted": 0.14479638009049772, |
|
"eval_loss": 1.5974059104919434, |
|
"eval_precision_macro": 0.06153846153846154, |
|
"eval_precision_micro": 0.3076923076923077, |
|
"eval_precision_weighted": 0.09467455621301776, |
|
"eval_recall_macro": 0.2, |
|
"eval_recall_micro": 0.3076923076923077, |
|
"eval_recall_weighted": 0.3076923076923077, |
|
"eval_runtime": 0.2784, |
|
"eval_samples_per_second": 140.073, |
|
"eval_steps_per_second": 10.775, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00010526315789473685, |
|
"loss": 1.5782, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00011052631578947368, |
|
"loss": 1.5568, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00011578947368421055, |
|
"loss": 1.603, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00011578947368421055, |
|
"loss": 1.5393, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00012105263157894738, |
|
"loss": 1.5972, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001263157894736842, |
|
"loss": 1.6077, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00013157894736842105, |
|
"loss": 1.5841, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001368421052631579, |
|
"loss": 1.5846, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00014210526315789476, |
|
"loss": 1.5916, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001473684210526316, |
|
"loss": 1.5476, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00015263157894736842, |
|
"loss": 1.6294, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00015789473684210527, |
|
"loss": 1.571, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001631578947368421, |
|
"loss": 1.6475, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00016842105263157898, |
|
"loss": 1.5652, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001736842105263158, |
|
"loss": 1.5198, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00017894736842105264, |
|
"loss": 1.5658, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00018421052631578948, |
|
"loss": 1.5823, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00018947368421052632, |
|
"loss": 1.5813, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00019473684210526317, |
|
"loss": 1.5338, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.09411764705882353, |
|
"eval_f1_micro": 0.3076923076923077, |
|
"eval_f1_weighted": 0.14479638009049772, |
|
"eval_loss": 1.571889877319336, |
|
"eval_precision_macro": 0.06153846153846154, |
|
"eval_precision_micro": 0.3076923076923077, |
|
"eval_precision_weighted": 0.09467455621301776, |
|
"eval_recall_macro": 0.2, |
|
"eval_recall_micro": 0.3076923076923077, |
|
"eval_recall_weighted": 0.3076923076923077, |
|
"eval_runtime": 0.1048, |
|
"eval_samples_per_second": 372.122, |
|
"eval_steps_per_second": 28.625, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5117, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00020526315789473685, |
|
"loss": 1.4995, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0002105263157894737, |
|
"loss": 1.6392, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00021578947368421054, |
|
"loss": 1.4529, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00022105263157894735, |
|
"loss": 1.5848, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002263157894736842, |
|
"loss": 1.5591, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0002315789473684211, |
|
"loss": 1.5641, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0002368421052631579, |
|
"loss": 1.499, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00024210526315789475, |
|
"loss": 1.6476, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0002473684210526316, |
|
"loss": 1.4727, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0002526315789473684, |
|
"loss": 1.5519, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0002578947368421053, |
|
"loss": 1.5057, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0002631578947368421, |
|
"loss": 1.6801, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00026842105263157897, |
|
"loss": 1.5518, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0002736842105263158, |
|
"loss": 1.4608, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002789473684210526, |
|
"loss": 1.5961, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0002842105263157895, |
|
"loss": 1.3564, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00028947368421052634, |
|
"loss": 1.5493, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0002947368421052632, |
|
"loss": 1.5723, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.09411764705882353, |
|
"eval_f1_micro": 0.3076923076923077, |
|
"eval_f1_weighted": 0.14479638009049772, |
|
"eval_loss": 1.5472002029418945, |
|
"eval_precision_macro": 0.06153846153846154, |
|
"eval_precision_micro": 0.3076923076923077, |
|
"eval_precision_weighted": 0.09467455621301776, |
|
"eval_recall_macro": 0.2, |
|
"eval_recall_micro": 0.3076923076923077, |
|
"eval_recall_weighted": 0.3076923076923077, |
|
"eval_runtime": 0.1034, |
|
"eval_samples_per_second": 377.243, |
|
"eval_steps_per_second": 29.019, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00030000000000000003, |
|
"loss": 1.3807, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.00030526315789473684, |
|
"loss": 1.4009, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.0003105263157894737, |
|
"loss": 1.3475, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00031578947368421053, |
|
"loss": 1.4923, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0003210526315789474, |
|
"loss": 1.3802, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.0003263157894736842, |
|
"loss": 1.4757, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00033157894736842103, |
|
"loss": 1.4177, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00033684210526315796, |
|
"loss": 1.7288, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00034210526315789477, |
|
"loss": 1.4219, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0003473684210526316, |
|
"loss": 1.6314, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00035263157894736846, |
|
"loss": 1.5195, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0003578947368421053, |
|
"loss": 1.4379, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00036315789473684214, |
|
"loss": 1.6594, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.00036842105263157896, |
|
"loss": 1.5372, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.0003736842105263158, |
|
"loss": 1.6277, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.00037894736842105265, |
|
"loss": 1.3156, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00038421052631578946, |
|
"loss": 1.7242, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00038947368421052633, |
|
"loss": 1.4844, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0003947368421052632, |
|
"loss": 1.4152, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.09411764705882353, |
|
"eval_f1_micro": 0.3076923076923077, |
|
"eval_f1_weighted": 0.14479638009049772, |
|
"eval_loss": 1.4960436820983887, |
|
"eval_precision_macro": 0.06153846153846154, |
|
"eval_precision_micro": 0.3076923076923077, |
|
"eval_precision_weighted": 0.09467455621301776, |
|
"eval_recall_macro": 0.2, |
|
"eval_recall_micro": 0.3076923076923077, |
|
"eval_recall_weighted": 0.3076923076923077, |
|
"eval_runtime": 0.1073, |
|
"eval_samples_per_second": 363.387, |
|
"eval_steps_per_second": 27.953, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0004, |
|
"loss": 1.2319, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0004052631578947369, |
|
"loss": 1.5189, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0004105263157894737, |
|
"loss": 1.5146, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0004157894736842105, |
|
"loss": 1.4695, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0004210526315789474, |
|
"loss": 1.343, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0004263157894736842, |
|
"loss": 1.4702, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0004315789473684211, |
|
"loss": 1.6262, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0004368421052631579, |
|
"loss": 1.5298, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0004421052631578947, |
|
"loss": 1.2922, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0004473684210526316, |
|
"loss": 1.2764, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0004526315789473684, |
|
"loss": 1.2834, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00045789473684210527, |
|
"loss": 1.3713, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0004631578947368422, |
|
"loss": 1.5645, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.000468421052631579, |
|
"loss": 1.3019, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0004736842105263158, |
|
"loss": 1.3534, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0004789473684210527, |
|
"loss": 1.4572, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0004842105263157895, |
|
"loss": 1.5372, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0004894736842105264, |
|
"loss": 1.4614, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0004947368421052632, |
|
"loss": 1.214, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.41025641025641024, |
|
"eval_f1_macro": 0.21165501165501163, |
|
"eval_f1_micro": 0.41025641025641024, |
|
"eval_f1_weighted": 0.29765106688183607, |
|
"eval_loss": 1.392277717590332, |
|
"eval_precision_macro": 0.17407407407407408, |
|
"eval_precision_micro": 0.41025641025641024, |
|
"eval_precision_weighted": 0.24216524216524218, |
|
"eval_recall_macro": 0.2866666666666667, |
|
"eval_recall_micro": 0.41025641025641024, |
|
"eval_recall_weighted": 0.41025641025641024, |
|
"eval_runtime": 0.1067, |
|
"eval_samples_per_second": 365.557, |
|
"eval_steps_per_second": 28.12, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0005, |
|
"loss": 1.1431, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0005052631578947368, |
|
"loss": 1.1986, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.0005105263157894738, |
|
"loss": 1.3605, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.0005157894736842106, |
|
"loss": 1.4709, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.0005210526315789474, |
|
"loss": 1.4103, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0005263157894736842, |
|
"loss": 1.5161, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.000531578947368421, |
|
"loss": 1.4339, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0005368421052631579, |
|
"loss": 1.2471, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0005421052631578948, |
|
"loss": 1.3335, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.0005473684210526316, |
|
"loss": 1.1644, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.0005526315789473684, |
|
"loss": 1.5695, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0005578947368421052, |
|
"loss": 1.099, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.0005631578947368421, |
|
"loss": 1.3556, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.000568421052631579, |
|
"loss": 1.4156, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0005736842105263159, |
|
"loss": 1.3777, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.0005789473684210527, |
|
"loss": 1.8594, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.0005842105263157895, |
|
"loss": 0.995, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0005894736842105264, |
|
"loss": 1.2018, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0005947368421052632, |
|
"loss": 0.9971, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.48717948717948717, |
|
"eval_f1_macro": 0.33415204678362576, |
|
"eval_f1_micro": 0.48717948717948717, |
|
"eval_f1_weighted": 0.4098065677013045, |
|
"eval_loss": 1.2535957098007202, |
|
"eval_precision_macro": 0.35025641025641024, |
|
"eval_precision_micro": 0.48717948717948717, |
|
"eval_precision_weighted": 0.4003944773175543, |
|
"eval_recall_macro": 0.3733333333333333, |
|
"eval_recall_micro": 0.48717948717948717, |
|
"eval_recall_weighted": 0.48717948717948717, |
|
"eval_runtime": 0.1096, |
|
"eval_samples_per_second": 355.926, |
|
"eval_steps_per_second": 27.379, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0006000000000000001, |
|
"loss": 1.1295, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.0006052631578947369, |
|
"loss": 1.4946, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0006105263157894737, |
|
"loss": 1.8178, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.0006157894736842106, |
|
"loss": 1.1574, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0006210526315789474, |
|
"loss": 0.9329, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.0006263157894736842, |
|
"loss": 1.2374, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0006315789473684211, |
|
"loss": 1.5585, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.0006368421052631579, |
|
"loss": 1.0872, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.0006421052631578948, |
|
"loss": 0.9432, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0006473684210526316, |
|
"loss": 1.2214, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.0006526315789473684, |
|
"loss": 1.0156, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.0006578947368421052, |
|
"loss": 1.2343, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0006631578947368421, |
|
"loss": 1.2906, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.000668421052631579, |
|
"loss": 1.5074, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.0006736842105263159, |
|
"loss": 1.0378, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0006789473684210527, |
|
"loss": 1.3764, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.0006842105263157895, |
|
"loss": 0.9798, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0006894736842105264, |
|
"loss": 1.0204, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0006947368421052632, |
|
"loss": 1.0225, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_f1_macro": 0.5685314685314686, |
|
"eval_f1_micro": 0.6923076923076923, |
|
"eval_f1_weighted": 0.6504691889307274, |
|
"eval_loss": 1.1684703826904297, |
|
"eval_precision_macro": 0.5473015873015872, |
|
"eval_precision_micro": 0.6923076923076923, |
|
"eval_precision_weighted": 0.6344322344322344, |
|
"eval_recall_macro": 0.6142857142857142, |
|
"eval_recall_micro": 0.6923076923076923, |
|
"eval_recall_weighted": 0.6923076923076923, |
|
"eval_runtime": 0.0989, |
|
"eval_samples_per_second": 394.464, |
|
"eval_steps_per_second": 30.343, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0007000000000000001, |
|
"loss": 0.8319, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.0007052631578947369, |
|
"loss": 1.0276, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.0007105263157894737, |
|
"loss": 0.9607, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.0007157894736842105, |
|
"loss": 1.2387, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0007210526315789474, |
|
"loss": 1.2796, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0007263157894736843, |
|
"loss": 0.7215, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.0007315789473684211, |
|
"loss": 1.0518, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.0007368421052631579, |
|
"loss": 0.7813, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0007421052631578947, |
|
"loss": 1.0502, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.0007473684210526316, |
|
"loss": 1.378, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.0007526315789473685, |
|
"loss": 1.2363, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.0007526315789473685, |
|
"loss": 1.4102, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0007578947368421053, |
|
"loss": 0.8184, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.0007631578947368421, |
|
"loss": 1.0921, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.0007684210526315789, |
|
"loss": 0.7126, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.0007736842105263159, |
|
"loss": 1.0646, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.0007789473684210527, |
|
"loss": 0.9532, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.0007842105263157896, |
|
"loss": 0.7724, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0007894736842105264, |
|
"loss": 0.8753, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_f1_macro": 0.5992857142857144, |
|
"eval_f1_micro": 0.717948717948718, |
|
"eval_f1_weighted": 0.6808608058608059, |
|
"eval_loss": 1.0412697792053223, |
|
"eval_precision_macro": 0.5655555555555555, |
|
"eval_precision_micro": 0.717948717948718, |
|
"eval_precision_weighted": 0.6524216524216524, |
|
"eval_recall_macro": 0.6433333333333333, |
|
"eval_recall_micro": 0.717948717948718, |
|
"eval_recall_weighted": 0.717948717948718, |
|
"eval_runtime": 0.1034, |
|
"eval_samples_per_second": 377.218, |
|
"eval_steps_per_second": 29.017, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0007947368421052632, |
|
"loss": 0.6541, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.0008, |
|
"loss": 1.2863, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.0008052631578947369, |
|
"loss": 1.0602, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.0008105263157894738, |
|
"loss": 1.2225, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.0008157894736842106, |
|
"loss": 0.9046, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0008210526315789474, |
|
"loss": 0.6939, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.0008263157894736842, |
|
"loss": 0.644, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 0.000831578947368421, |
|
"loss": 0.9403, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.000836842105263158, |
|
"loss": 0.5107, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0008421052631578948, |
|
"loss": 1.0886, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.0008473684210526316, |
|
"loss": 1.3385, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.0008526315789473684, |
|
"loss": 1.2766, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.0008578947368421052, |
|
"loss": 0.6848, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.0008631578947368422, |
|
"loss": 0.9881, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.000868421052631579, |
|
"loss": 0.6309, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.0008736842105263158, |
|
"loss": 0.7585, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.0008789473684210526, |
|
"loss": 0.6653, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0008842105263157894, |
|
"loss": 1.0764, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0008894736842105263, |
|
"loss": 0.6241, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_f1_macro": 0.6221158958001063, |
|
"eval_f1_micro": 0.7435897435897437, |
|
"eval_f1_weighted": 0.7144862934336619, |
|
"eval_loss": 0.9631038308143616, |
|
"eval_precision_macro": 0.6077777777777779, |
|
"eval_precision_micro": 0.7435897435897436, |
|
"eval_precision_weighted": 0.7235042735042736, |
|
"eval_recall_macro": 0.6766666666666666, |
|
"eval_recall_micro": 0.7435897435897436, |
|
"eval_recall_weighted": 0.7435897435897436, |
|
"eval_runtime": 0.1019, |
|
"eval_samples_per_second": 382.713, |
|
"eval_steps_per_second": 29.439, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.0008947368421052632, |
|
"loss": 0.6013, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.0009, |
|
"loss": 0.8411, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.0009052631578947368, |
|
"loss": 0.9775, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 0.0009105263157894736, |
|
"loss": 0.9363, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.0009157894736842105, |
|
"loss": 0.348, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.0009210526315789473, |
|
"loss": 0.7705, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0009263157894736844, |
|
"loss": 0.7397, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0009315789473684212, |
|
"loss": 0.3971, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.000936842105263158, |
|
"loss": 1.4934, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 0.0009421052631578948, |
|
"loss": 1.2191, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.0009473684210526316, |
|
"loss": 1.3495, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 0.0009526315789473686, |
|
"loss": 1.2079, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.0009578947368421054, |
|
"loss": 0.9686, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.0009631578947368422, |
|
"loss": 1.6675, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.000968421052631579, |
|
"loss": 0.7272, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 0.0009736842105263158, |
|
"loss": 0.563, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.0009789473684210528, |
|
"loss": 1.2433, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 0.0009842105263157895, |
|
"loss": 1.353, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0009894736842105264, |
|
"loss": 0.5994, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_f1_macro": 0.608552036199095, |
|
"eval_f1_micro": 0.717948717948718, |
|
"eval_f1_weighted": 0.6738484743009631, |
|
"eval_loss": 0.8072579503059387, |
|
"eval_precision_macro": 0.5864285714285714, |
|
"eval_precision_micro": 0.717948717948718, |
|
"eval_precision_weighted": 0.6695970695970695, |
|
"eval_recall_macro": 0.6633333333333333, |
|
"eval_recall_micro": 0.717948717948718, |
|
"eval_recall_weighted": 0.717948717948718, |
|
"eval_runtime": 0.1002, |
|
"eval_samples_per_second": 389.316, |
|
"eval_steps_per_second": 29.947, |
|
"step": 190 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 95000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5000, |
|
"save_steps": 500, |
|
"total_flos": 3.228625190633472e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|