|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 83.3056, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 95.8665, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 97.972, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 76.0463, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 85.1404, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 75.9646, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 86.754, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 87.0865, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5e-06, |
|
"loss": 81.6646, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 69.9096, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5e-06, |
|
"loss": 79.4672, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5e-06, |
|
"loss": 87.2991, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3e-06, |
|
"loss": 86.586, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"loss": 68.1378, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 56.5916, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.5e-06, |
|
"loss": 73.8732, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-06, |
|
"loss": 53.5963, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 37.9982, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6e-06, |
|
"loss": 45.8435, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 40.7565, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 37.2394, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.5e-06, |
|
"loss": 35.8049, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 35.8737, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 28.8236, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9e-06, |
|
"loss": 26.5792, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.5e-06, |
|
"loss": 20.3758, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-05, |
|
"loss": 18.2907, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.05e-05, |
|
"loss": 14.3071, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 9.7062, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 8.8146, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.2e-05, |
|
"loss": 5.3928, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.25e-05, |
|
"loss": 5.5117, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 4.8147, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 3.1978, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 3.1655, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.45e-05, |
|
"loss": 3.0614, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.5187, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.55e-05, |
|
"loss": 3.2278, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.8507, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.65e-05, |
|
"loss": 2.275, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 2.0627, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.75e-05, |
|
"loss": 2.8432, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8e-05, |
|
"loss": 2.2149, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.85e-05, |
|
"loss": 2.0645, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9e-05, |
|
"loss": 2.2229, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 3.0316, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3195, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.05e-05, |
|
"loss": 1.7498, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.1e-05, |
|
"loss": 2.5683, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.15e-05, |
|
"loss": 2.3456, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 2.0581, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.25e-05, |
|
"loss": 2.2809, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 2.3892, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.35e-05, |
|
"loss": 2.0544, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.4e-05, |
|
"loss": 2.1116, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.45e-05, |
|
"loss": 2.5165, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.3629, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"loss": 2.0548, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 1.9614, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"loss": 2.0164, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 1.9013, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 2.0752, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 2.136, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 2.0806, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9e-05, |
|
"loss": 1.736, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.95e-05, |
|
"loss": 1.9597, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0509, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.05e-05, |
|
"loss": 1.938, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.1e-05, |
|
"loss": 2.0082, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.15e-05, |
|
"loss": 2.4926, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.6101, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 1.7914, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.3e-05, |
|
"loss": 2.0132, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.35e-05, |
|
"loss": 1.6407, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 2.1127, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.45e-05, |
|
"loss": 2.3478, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.5e-05, |
|
"loss": 2.3729, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.55e-05, |
|
"loss": 1.7706, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.8671, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.65e-05, |
|
"loss": 1.7771, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.7e-05, |
|
"loss": 1.9566, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.9079, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.8e-05, |
|
"loss": 2.0504, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.85e-05, |
|
"loss": 2.2557, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 2.054, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 2.0602, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9849, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.05e-05, |
|
"loss": 2.0352, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1e-05, |
|
"loss": 2.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.15e-05, |
|
"loss": 1.9662, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.7563, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.25e-05, |
|
"loss": 1.8681, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.6307, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.35e-05, |
|
"loss": 1.7752, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.9107, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 2.1152, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.8788, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.55e-05, |
|
"loss": 1.9173, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.6978, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 1.9182, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.551979660987854, |
|
"eval_runtime": 4178.1095, |
|
"eval_samples_per_second": 27.385, |
|
"eval_steps_per_second": 13.693, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.6893, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.6763, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.7825, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.85e-05, |
|
"loss": 1.5596, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.7855, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.6738, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8853, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9999228941119745e-05, |
|
"loss": 1.5495, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.999691581204152e-05, |
|
"loss": 1.74, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9993060755450015e-05, |
|
"loss": 1.6969, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.998766400914329e-05, |
|
"loss": 1.6482, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9980725906018074e-05, |
|
"loss": 1.7053, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9972246874049254e-05, |
|
"loss": 1.6929, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9962227436263453e-05, |
|
"loss": 1.7185, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.995066821070679e-05, |
|
"loss": 1.5287, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9937569910406756e-05, |
|
"loss": 1.8321, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.99229333433282e-05, |
|
"loss": 1.7286, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.990675941232353e-05, |
|
"loss": 1.3939, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9889049115077005e-05, |
|
"loss": 1.7136, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9869803544043166e-05, |
|
"loss": 1.6449, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.98490238863795e-05, |
|
"loss": 1.4612, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.982671142387316e-05, |
|
"loss": 1.4071, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.980286753286195e-05, |
|
"loss": 1.4904, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9777493684149375e-05, |
|
"loss": 1.7834, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.975059144291394e-05, |
|
"loss": 1.7078, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.972216246861262e-05, |
|
"loss": 1.4912, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 1.8625, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.966073142941239e-05, |
|
"loss": 1.5348, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.962773315386935e-05, |
|
"loss": 1.4056, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9593215723738404e-05, |
|
"loss": 1.1799, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9557181268217227e-05, |
|
"loss": 1.2923, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.951963201008076e-05, |
|
"loss": 1.3528, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9480570265544144e-05, |
|
"loss": 1.2402, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.943999844411977e-05, |
|
"loss": 1.3492, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.939791904846869e-05, |
|
"loss": 1.329, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.935433467424624e-05, |
|
"loss": 1.3137, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9309248009941914e-05, |
|
"loss": 1.1167, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9262661836713564e-05, |
|
"loss": 1.0645, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9214579028215776e-05, |
|
"loss": 1.3611, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.916500255042268e-05, |
|
"loss": 1.2777, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9113935461444955e-05, |
|
"loss": 1.2108, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.906138091134118e-05, |
|
"loss": 1.1025, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.900734214192358e-05, |
|
"loss": 1.0768, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8951822486557986e-05, |
|
"loss": 1.3846, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8894825369958255e-05, |
|
"loss": 1.1072, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8836354307975026e-05, |
|
"loss": 1.2182, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 1.2538, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8715004865637614e-05, |
|
"loss": 1.1044, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8652133970688636e-05, |
|
"loss": 1.0941, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8587804100704845e-05, |
|
"loss": 0.9369, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.852201922385564e-05, |
|
"loss": 0.9077, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8454783398062106e-05, |
|
"loss": 1.0606, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.838610077074669e-05, |
|
"loss": 1.2465, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8315975578577355e-05, |
|
"loss": 1.16, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8244412147206284e-05, |
|
"loss": 1.2837, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.817141489100302e-05, |
|
"loss": 1.2553, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 1.0804, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.8021137003525664e-05, |
|
"loss": 0.9331, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.794386564209953e-05, |
|
"loss": 0.952, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.7865178994965344e-05, |
|
"loss": 0.9735, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.7785081915886134e-05, |
|
"loss": 0.8972, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.7703579345627035e-05, |
|
"loss": 1.2156, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.762067631165049e-05, |
|
"loss": 1.2272, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.753637792780614e-05, |
|
"loss": 0.8432, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.745068939401539e-05, |
|
"loss": 0.9018, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.7363615995950626e-05, |
|
"loss": 0.8768, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 0.8826, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.718533617648209e-05, |
|
"loss": 0.907, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.709414075221734e-05, |
|
"loss": 0.9951, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.7001582457278304e-05, |
|
"loss": 0.9882, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.690766700109659e-05, |
|
"loss": 1.0546, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.681240017681993e-05, |
|
"loss": 0.7883, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.671578786095478e-05, |
|
"loss": 1.1422, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.661783601300388e-05, |
|
"loss": 0.9893, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.65185506750986e-05, |
|
"loss": 1.0088, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6417937971626245e-05, |
|
"loss": 0.8233, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6316004108852305e-05, |
|
"loss": 1.0708, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.6212755374537596e-05, |
|
"loss": 1.0227, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.610819813755038e-05, |
|
"loss": 0.9609, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.600233884747355e-05, |
|
"loss": 0.8691, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.5895184034206765e-05, |
|
"loss": 1.0931, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.5786740307563636e-05, |
|
"loss": 0.8013, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.567701435686404e-05, |
|
"loss": 0.817, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.55660129505215e-05, |
|
"loss": 1.0999, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.545374293562559e-05, |
|
"loss": 1.0032, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.534021123751968e-05, |
|
"loss": 0.9359, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.92, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.5109390881752114e-05, |
|
"loss": 0.8375, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.499211646217727e-05, |
|
"loss": 1.1646, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.487360883468775e-05, |
|
"loss": 0.9532, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.4753875309392266e-05, |
|
"loss": 1.2272, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.463292327201862e-05, |
|
"loss": 1.0792, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.451076018345825e-05, |
|
"loss": 0.9567, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.4387393579305865e-05, |
|
"loss": 0.6387, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.426283106939474e-05, |
|
"loss": 1.1238, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.4137080337327205e-05, |
|
"loss": 0.8722, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.401014914000078e-05, |
|
"loss": 0.8679, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3882045307129594e-05, |
|
"loss": 0.9063, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.375277674076149e-05, |
|
"loss": 1.0135, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3622351414790554e-05, |
|
"loss": 1.0026, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.3784873485565186, |
|
"eval_runtime": 4792.6678, |
|
"eval_samples_per_second": 23.873, |
|
"eval_steps_per_second": 11.937, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.349077737446525e-05, |
|
"loss": 1.4974, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.335806273589214e-05, |
|
"loss": 1.3078, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3224215685535294e-05, |
|
"loss": 1.6193, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3089244479711236e-05, |
|
"loss": 1.3764, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.295315744407972e-05, |
|
"loss": 1.5635, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.281596297313013e-05, |
|
"loss": 1.4103, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 1.3984, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.25382856442714e-05, |
|
"loss": 1.5663, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2397819914807856e-05, |
|
"loss": 1.348, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.225628100586093e-05, |
|
"loss": 1.4036, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.211367764821722e-05, |
|
"loss": 1.4676, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.197001863832355e-05, |
|
"loss": 1.2909, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.182531283774434e-05, |
|
"loss": 1.3502, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.1679569172614996e-05, |
|
"loss": 1.3483, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.1532796633091296e-05, |
|
"loss": 1.4727, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.138500427279485e-05, |
|
"loss": 1.216, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.123620120825459e-05, |
|
"loss": 1.2367, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.1086396618344476e-05, |
|
"loss": 1.4552, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.093559974371725e-05, |
|
"loss": 1.6128, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.0783819886234445e-05, |
|
"loss": 1.2725, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.063106640839264e-05, |
|
"loss": 1.3104, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.047734873274586e-05, |
|
"loss": 1.4107, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.0322676341324415e-05, |
|
"loss": 1.2777, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.0167058775049996e-05, |
|
"loss": 1.2636, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.0010505633147106e-05, |
|
"loss": 1.2956, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.985302657255097e-05, |
|
"loss": 1.1578, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 1.3663, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.953532960799577e-05, |
|
"loss": 1.3132, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.937513130108197e-05, |
|
"loss": 1.114, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.92140462683566e-05, |
|
"loss": 1.266, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.905208444630327e-05, |
|
"loss": 1.3218, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.888925582549006e-05, |
|
"loss": 1.2201, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.87255704499533e-05, |
|
"loss": 1.3779, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.856103841657797e-05, |
|
"loss": 1.1852, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.8395669874474915e-05, |
|
"loss": 1.254, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.822947502435477e-05, |
|
"loss": 1.5058, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.8062464117898724e-05, |
|
"loss": 1.5119, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.789464745712619e-05, |
|
"loss": 1.1567, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.7726035393759285e-05, |
|
"loss": 1.2842, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.755663832858432e-05, |
|
"loss": 1.3541, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.7386466710810194e-05, |
|
"loss": 1.2379, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.721553103742388e-05, |
|
"loss": 1.2396, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.704384185254288e-05, |
|
"loss": 1.2146, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.6871409746764865e-05, |
|
"loss": 1.2261, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.6698245356514335e-05, |
|
"loss": 1.4124, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.652435936338656e-05, |
|
"loss": 0.9498, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.634976249348867e-05, |
|
"loss": 1.147, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.6174465516778035e-05, |
|
"loss": 1.2871, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.599847924639788e-05, |
|
"loss": 1.3978, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.582181453801036e-05, |
|
"loss": 1.2626, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.564448228912682e-05, |
|
"loss": 1.2497, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.54664934384357e-05, |
|
"loss": 1.1724, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.528785896512772e-05, |
|
"loss": 1.2546, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.510858988821863e-05, |
|
"loss": 1.668, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.4928697265869515e-05, |
|
"loss": 1.2541, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.474819219470471e-05, |
|
"loss": 1.0438, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 1.2363, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.438538928063208e-05, |
|
"loss": 1.1372, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.4203113817116957e-05, |
|
"loss": 1.0645, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.402027066219105e-05, |
|
"loss": 1.077, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.383687109448143e-05, |
|
"loss": 1.2661, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.365292642693732e-05, |
|
"loss": 1.2931, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.346844800613229e-05, |
|
"loss": 1.4293, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.3283447211564276e-05, |
|
"loss": 1.4949, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.309793545495374e-05, |
|
"loss": 1.456, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.2911924179539656e-05, |
|
"loss": 1.4819, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 1.3641, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.253844899861239e-05, |
|
"loss": 1.2427, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.23510081308076e-05, |
|
"loss": 1.3287, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.2163113818194964e-05, |
|
"loss": 1.4616, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.1974777650980735e-05, |
|
"loss": 1.502, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.178601124662686e-05, |
|
"loss": 1.2544, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.1596826249134324e-05, |
|
"loss": 1.2749, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.140723432832492e-05, |
|
"loss": 1.3278, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.121724717912138e-05, |
|
"loss": 1.2634, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.102687652082597e-05, |
|
"loss": 1.4866, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.083613409639764e-05, |
|
"loss": 1.2986, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.06450316717276e-05, |
|
"loss": 1.2379, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.045358103491357e-05, |
|
"loss": 1.2092, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.026179399553264e-05, |
|
"loss": 1.3209, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.0069682383912813e-05, |
|
"loss": 1.4146, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.9877258050403212e-05, |
|
"loss": 1.2452, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.9684532864643122e-05, |
|
"loss": 1.3337, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.949151871482982e-05, |
|
"loss": 1.3038, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.929822750698524e-05, |
|
"loss": 1.3654, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.9104671164221576e-05, |
|
"loss": 1.4753, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.8910861626005776e-05, |
|
"loss": 1.2995, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.871681084742308e-05, |
|
"loss": 1.2287, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.8522530798439567e-05, |
|
"loss": 1.1932, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.832803346316381e-05, |
|
"loss": 1.2312, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.8133330839107608e-05, |
|
"loss": 1.4553, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.7938434936445945e-05, |
|
"loss": 1.1896, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.774335777727613e-05, |
|
"loss": 1.2998, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.754811139487625e-05, |
|
"loss": 1.2403, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.7352707832962865e-05, |
|
"loss": 1.5125, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.7157159144948092e-05, |
|
"loss": 1.2591, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.6961477393196126e-05, |
|
"loss": 1.2907, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.6765674648279172e-05, |
|
"loss": 1.3376, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.656976298823284e-05, |
|
"loss": 1.1303, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.637375449781115e-05, |
|
"loss": 1.3983, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.3045283555984497, |
|
"eval_runtime": 4572.2865, |
|
"eval_samples_per_second": 25.024, |
|
"eval_steps_per_second": 12.512, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.6177661267741065e-05, |
|
"loss": 1.2213, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.598149539397672e-05, |
|
"loss": 1.3029, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.578526897695321e-05, |
|
"loss": 1.4005, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.558899412084026e-05, |
|
"loss": 1.4929, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.539268293279552e-05, |
|
"loss": 1.2246, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.5196347522217784e-05, |
|
"loss": 1.2211, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.3308, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.480365247778223e-05, |
|
"loss": 1.471, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.460731706720449e-05, |
|
"loss": 1.2822, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.4411005879159753e-05, |
|
"loss": 1.1612, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.4214731023046793e-05, |
|
"loss": 1.4232, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.4018504606023293e-05, |
|
"loss": 1.1328, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3822338732258937e-05, |
|
"loss": 1.3512, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3626245502188864e-05, |
|
"loss": 1.2963, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3430237011767167e-05, |
|
"loss": 1.453, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.323432535172084e-05, |
|
"loss": 1.4939, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.303852260680388e-05, |
|
"loss": 1.2504, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.284284085505192e-05, |
|
"loss": 1.2362, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.2647292167037144e-05, |
|
"loss": 1.3078, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.2451888605123754e-05, |
|
"loss": 1.442, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.225664222272387e-05, |
|
"loss": 1.4557, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.2061565063554064e-05, |
|
"loss": 1.472, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.186666916089239e-05, |
|
"loss": 1.292, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1671966536836196e-05, |
|
"loss": 1.4428, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1477469201560435e-05, |
|
"loss": 1.2264, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1283189152576925e-05, |
|
"loss": 1.3822, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1089138373994223e-05, |
|
"loss": 1.4051, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.089532883577843e-05, |
|
"loss": 1.2993, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.070177249301476e-05, |
|
"loss": 1.3713, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0508481285170186e-05, |
|
"loss": 1.297, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.031546713535688e-05, |
|
"loss": 1.4, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0122741949596797e-05, |
|
"loss": 1.3429, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9930317616087196e-05, |
|
"loss": 1.3994, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9738206004467363e-05, |
|
"loss": 1.5288, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9546418965086442e-05, |
|
"loss": 1.2621, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.935496832827241e-05, |
|
"loss": 1.4992, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9163865903602374e-05, |
|
"loss": 1.2588, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.897312347917404e-05, |
|
"loss": 1.5055, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8782752820878634e-05, |
|
"loss": 1.2727, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8592765671675084e-05, |
|
"loss": 0.9645, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8403173750865685e-05, |
|
"loss": 1.2442, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8213988753373146e-05, |
|
"loss": 1.3001, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.802522234901927e-05, |
|
"loss": 1.3376, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.783688618180504e-05, |
|
"loss": 1.3671, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7648991869192405e-05, |
|
"loss": 1.2345, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.746155100138761e-05, |
|
"loss": 1.151, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 1.3213, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7088075820460346e-05, |
|
"loss": 1.2663, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.690206454504627e-05, |
|
"loss": 1.5264, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6716552788435724e-05, |
|
"loss": 1.3686, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6531551993867717e-05, |
|
"loss": 1.3005, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6347073573062672e-05, |
|
"loss": 1.2474, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6163128905518578e-05, |
|
"loss": 1.3205, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5979729337808955e-05, |
|
"loss": 1.4708, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5796886182883053e-05, |
|
"loss": 0.9178, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.561461071936792e-05, |
|
"loss": 1.1726, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5432914190872757e-05, |
|
"loss": 1.4034, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5251807805295302e-05, |
|
"loss": 1.2072, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5071302734130489e-05, |
|
"loss": 1.0984, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4891410111781378e-05, |
|
"loss": 1.29, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4712141034872282e-05, |
|
"loss": 1.1494, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4533506561564306e-05, |
|
"loss": 1.3977, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4355517710873184e-05, |
|
"loss": 1.2525, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4178185461989662e-05, |
|
"loss": 1.1666, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4001520753602121e-05, |
|
"loss": 1.3341, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3825534483221974e-05, |
|
"loss": 1.2377, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3650237506511331e-05, |
|
"loss": 1.3557, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3475640636613446e-05, |
|
"loss": 1.1775, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.330175464348567e-05, |
|
"loss": 1.3356, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.312859025323514e-05, |
|
"loss": 1.4126, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2956158147457115e-05, |
|
"loss": 1.4473, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2784468962576136e-05, |
|
"loss": 1.1903, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.261353328918981e-05, |
|
"loss": 1.357, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2443361671415687e-05, |
|
"loss": 1.1659, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2273964606240718e-05, |
|
"loss": 1.3215, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2105352542873815e-05, |
|
"loss": 1.3099, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.1937535882101281e-05, |
|
"loss": 1.2418, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1770524975645238e-05, |
|
"loss": 1.4179, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1604330125525079e-05, |
|
"loss": 1.1883, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1438961583422037e-05, |
|
"loss": 1.0938, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1274429550046704e-05, |
|
"loss": 1.2686, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1110744174509952e-05, |
|
"loss": 1.2447, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.0947915553696742e-05, |
|
"loss": 1.3854, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.07859537316434e-05, |
|
"loss": 1.126, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.0624868698918045e-05, |
|
"loss": 1.3622, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.0464670392004235e-05, |
|
"loss": 1.4624, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 1.3589, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0146973427449038e-05, |
|
"loss": 1.3111, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.989494366852904e-06, |
|
"loss": 1.3436, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.832941224950012e-06, |
|
"loss": 1.4172, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.677323658675594e-06, |
|
"loss": 1.587, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.522651267254149e-06, |
|
"loss": 1.3277, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.368933591607378e-06, |
|
"loss": 1.3433, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.216180113765558e-06, |
|
"loss": 1.3489, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.064400256282757e-06, |
|
"loss": 1.4577, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.913603381655528e-06, |
|
"loss": 1.214, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.763798791745411e-06, |
|
"loss": 1.5018, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.614995727205156e-06, |
|
"loss": 1.2649, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.467203366908707e-06, |
|
"loss": 1.3475, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.320430827385003e-06, |
|
"loss": 1.4731, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.2786192893981934, |
|
"eval_runtime": 4649.2009, |
|
"eval_samples_per_second": 24.61, |
|
"eval_steps_per_second": 12.305, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.174687162255672e-06, |
|
"loss": 1.3442, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.029981361676456e-06, |
|
"loss": 1.3979, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.886322351782783e-06, |
|
"loss": 1.3131, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.743718994139071e-06, |
|
"loss": 1.4709, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.602180085192143e-06, |
|
"loss": 1.4698, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.461714355728608e-06, |
|
"loss": 1.4866, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.3223304703363135e-06, |
|
"loss": 1.3777, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.184037026869867e-06, |
|
"loss": 1.4086, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.046842555920283e-06, |
|
"loss": 1.2764, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.91075552028877e-06, |
|
"loss": 1.3425, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.775784314464717e-06, |
|
"loss": 1.2727, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.641937264107867e-06, |
|
"loss": 1.3265, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.509222625534755e-06, |
|
"loss": 1.5573, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.377648585209456e-06, |
|
"loss": 1.2084, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.247223259238511e-06, |
|
"loss": 1.3, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.117954692870412e-06, |
|
"loss": 1.2939, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.989850859999227e-06, |
|
"loss": 1.3688, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.8629196626728e-06, |
|
"loss": 1.3784, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.737168930605272e-06, |
|
"loss": 1.4403, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.612606420694141e-06, |
|
"loss": 1.3014, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.489239816541755e-06, |
|
"loss": 1.3343, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.367076727981382e-06, |
|
"loss": 1.4141, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.24612469060774e-06, |
|
"loss": 1.4151, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.12639116531225e-06, |
|
"loss": 1.2296, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.007883537822736e-06, |
|
"loss": 1.4797, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.890609118247888e-06, |
|
"loss": 1.299, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 1.2994, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.659788762480327e-06, |
|
"loss": 1.4391, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.54625706437441e-06, |
|
"loss": 1.2876, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.433987049478508e-06, |
|
"loss": 1.2708, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.322985643135952e-06, |
|
"loss": 1.463, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.213259692436367e-06, |
|
"loss": 1.2678, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.104815965793249e-06, |
|
"loss": 1.2462, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9976611525264525e-06, |
|
"loss": 1.266, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.891801862449629e-06, |
|
"loss": 1.3158, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.7872446254624104e-06, |
|
"loss": 1.1578, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6839958911476957e-06, |
|
"loss": 1.0904, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5820620283737616e-06, |
|
"loss": 1.3362, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.4814493249014116e-06, |
|
"loss": 1.2818, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.382163986996126e-06, |
|
"loss": 1.326, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.284212139045223e-06, |
|
"loss": 1.3425, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.187599823180071e-06, |
|
"loss": 1.5054, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.092332998903416e-06, |
|
"loss": 1.2616, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.9984175427217016e-06, |
|
"loss": 1.394, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.9058592477826636e-06, |
|
"loss": 1.4627, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.8146638235179213e-06, |
|
"loss": 1.218, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7248368952908053e-06, |
|
"loss": 1.5982, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6363840040493747e-06, |
|
"loss": 1.2318, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5493106059846116e-06, |
|
"loss": 1.5119, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.4636220721938554e-06, |
|
"loss": 1.4656, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.379323688349516e-06, |
|
"loss": 1.4699, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.296420654372966e-06, |
|
"loss": 1.3212, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.2149180841138676e-06, |
|
"loss": 1.4466, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.1348210050346595e-06, |
|
"loss": 1.3307, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0561343579004715e-06, |
|
"loss": 1.2307, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9788629964743455e-06, |
|
"loss": 1.3392, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9030116872178316e-06, |
|
"loss": 1.3646, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8285851089969802e-06, |
|
"loss": 1.4209, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7555878527937164e-06, |
|
"loss": 1.5344, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6840244214226502e-06, |
|
"loss": 1.3534, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6138992292533183e-06, |
|
"loss": 1.1762, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5452166019378989e-06, |
|
"loss": 1.6005, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4779807761443636e-06, |
|
"loss": 1.4277, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4121958992951629e-06, |
|
"loss": 1.5691, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3478660293113676e-06, |
|
"loss": 1.3057, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.284995134362385e-06, |
|
"loss": 1.4196, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 1.3259, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.16364569202497e-06, |
|
"loss": 1.4289, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.105174630041747e-06, |
|
"loss": 1.4029, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0481775134420225e-06, |
|
"loss": 1.3532, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.926578580764234e-07, |
|
"loss": 1.3901, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.386190886588208e-07, |
|
"loss": 1.3493, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.860645385550481e-07, |
|
"loss": 1.4783, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.349974495773183e-07, |
|
"loss": 1.406, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.854209717842231e-07, |
|
"loss": 1.1884, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.373381632864384e-07, |
|
"loss": 1.4137, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.907519900580861e-07, |
|
"loss": 1.4141, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.456653257537665e-07, |
|
"loss": 1.4732, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.020809515313142e-07, |
|
"loss": 1.6633, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.600015558802352e-07, |
|
"loss": 1.2823, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.194297344558536e-07, |
|
"loss": 1.2236, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.803679899192392e-07, |
|
"loss": 1.5525, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.4281873178278475e-07, |
|
"loss": 1.4783, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.067842762616014e-07, |
|
"loss": 1.359, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7226684613065333e-07, |
|
"loss": 1.5369, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3926857058761417e-07, |
|
"loss": 1.2518, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.077914851215585e-07, |
|
"loss": 1.3272, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.778375313873871e-07, |
|
"loss": 1.3623, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.494085570860616e-07, |
|
"loss": 1.5239, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.2250631585063186e-07, |
|
"loss": 1.3619, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.9713246713805588e-07, |
|
"loss": 1.1757, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.732885761268427e-07, |
|
"loss": 1.193, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.509761136205101e-07, |
|
"loss": 1.469, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3019645595683806e-07, |
|
"loss": 1.2552, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.109508849230001e-07, |
|
"loss": 1.3539, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.324058767646859e-08, |
|
"loss": 1.2407, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.706665667180091e-08, |
|
"loss": 1.1192, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.243008959324892e-08, |
|
"loss": 1.2434, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9331789293211026e-08, |
|
"loss": 1.4577, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7772563736551694e-08, |
|
"loss": 1.4546, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.2736687660217285, |
|
"eval_runtime": 4627.6472, |
|
"eval_samples_per_second": 24.725, |
|
"eval_steps_per_second": 12.362, |
|
"step": 500 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 1.4859211112448e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|