|
{ |
|
"best_metric": 0.753384912959381, |
|
"best_model_checkpoint": "../models/t5-picard/checkpoint-2368", |
|
"epoch": 99.98963557338682, |
|
"global_step": 7400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001, |
|
"loss": 3.3904, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8308, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8985, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7145, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4975, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4788, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4007, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3452, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3293, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3214, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3254, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2502, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.261, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2192, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2354, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2376, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1848, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_exec": 0.528046421663443, |
|
"eval_loss": 0.1530311554670334, |
|
"eval_runtime": 253.3827, |
|
"eval_samples_per_second": 4.081, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1615, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1756, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2114, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1703, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1485, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.119, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1273, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1625, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.13, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1265, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1088, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1397, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1404, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1136, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0919, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1114, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_exec": 0.5309477756286267, |
|
"eval_loss": 0.1567097008228302, |
|
"eval_runtime": 282.0248, |
|
"eval_samples_per_second": 3.666, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.131, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0972, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0924, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0915, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1105, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1299, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0861, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0779, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0677, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0879, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0953, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0766, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0621, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0793, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0929, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.074, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_exec": 0.6876208897485493, |
|
"eval_loss": 0.1364244967699051, |
|
"eval_runtime": 267.8144, |
|
"eval_samples_per_second": 3.861, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0661, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.059, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0857, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0743, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0755, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0572, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1111, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0819, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0695, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0553, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0473, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.051, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.065, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0554, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0494, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0468, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_exec": 0.6876208897485493, |
|
"eval_loss": 0.15044596791267395, |
|
"eval_runtime": 292.7353, |
|
"eval_samples_per_second": 3.532, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0588, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0591, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0498, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0414, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.048, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0612, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.051, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.042, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2851, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0504, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0575, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0389, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0352, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0335, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0431, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0417, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_exec": 0.706963249516441, |
|
"eval_loss": 0.15714390575885773, |
|
"eval_runtime": 277.5044, |
|
"eval_samples_per_second": 3.726, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0341, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0426, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0407, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0446, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0408, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0323, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.032, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0428, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0452, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.033, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0238, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0383, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0393, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.033, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0261, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0204, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_exec": 0.7040618955512572, |
|
"eval_loss": 0.17861126363277435, |
|
"eval_runtime": 288.586, |
|
"eval_samples_per_second": 3.583, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0283, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0328, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0303, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0225, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0243, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0319, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0392, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0265, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0198, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0293, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0326, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0288, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.023, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0227, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0273, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.029, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_exec": 0.6924564796905223, |
|
"eval_loss": 0.17845195531845093, |
|
"eval_runtime": 299.4799, |
|
"eval_samples_per_second": 3.453, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0276, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0227, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0185, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0244, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0281, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0181, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0132, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0205, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0274, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0209, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0161, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0186, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0202, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0246, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0177, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0147, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_exec": 0.7214700193423598, |
|
"eval_loss": 0.1890781968832016, |
|
"eval_runtime": 285.738, |
|
"eval_samples_per_second": 3.619, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.023, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0231, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0169, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0161, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0115, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0273, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.022, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0154, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0123, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0159, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0198, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0213, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0148, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0137, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0181, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_exec": 0.7040618955512572, |
|
"eval_loss": 0.22579778730869293, |
|
"eval_runtime": 297.1361, |
|
"eval_samples_per_second": 3.48, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0196, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0141, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0192, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0181, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0108, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0112, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0092, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0153, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0169, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.011, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0095, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0128, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0161, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0128, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0103, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"eval_exec": 0.6895551257253385, |
|
"eval_loss": 0.2176068276166916, |
|
"eval_runtime": 294.0528, |
|
"eval_samples_per_second": 3.516, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0074, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0152, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0152, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0114, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0116, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.015, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0127, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0098, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.009, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0101, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0211, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0141, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0089, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.008, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_exec": 0.688588007736944, |
|
"eval_loss": 0.2283668965101242, |
|
"eval_runtime": 296.2258, |
|
"eval_samples_per_second": 3.491, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0156, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0082, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.01, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0111, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0103, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0084, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0063, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0091, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0119, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0089, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.008, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0079, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0103, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0134, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0088, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_exec": 0.7147001934235977, |
|
"eval_loss": 0.22069412469863892, |
|
"eval_runtime": 296.2474, |
|
"eval_samples_per_second": 3.49, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0079, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0079, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0126, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0084, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0084, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0122, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0076, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0071, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0107, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0146, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.008, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0057, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"eval_exec": 0.7098646034816247, |
|
"eval_loss": 0.25100022554397583, |
|
"eval_runtime": 292.4465, |
|
"eval_samples_per_second": 3.536, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0096, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0079, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0099, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0133, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0087, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0073, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.007, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0098, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0119, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0121, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0103, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0072, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0055, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"eval_exec": 0.7098646034816247, |
|
"eval_loss": 0.24743221700191498, |
|
"eval_runtime": 295.2581, |
|
"eval_samples_per_second": 3.502, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0068, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0081, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0095, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0069, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0091, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0062, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0084, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0064, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0057, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0091, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0078, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0061, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0098, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0072, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"eval_exec": 0.6924564796905223, |
|
"eval_loss": 0.25289151072502136, |
|
"eval_runtime": 288.8532, |
|
"eval_samples_per_second": 3.58, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0111, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0069, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0057, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0052, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0068, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0052, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0078, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"eval_exec": 0.7311411992263056, |
|
"eval_loss": 0.26111745834350586, |
|
"eval_runtime": 286.014, |
|
"eval_samples_per_second": 3.615, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.007, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0079, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0073, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0069, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"eval_exec": 0.7205029013539652, |
|
"eval_loss": 0.2667447030544281, |
|
"eval_runtime": 283.0779, |
|
"eval_samples_per_second": 3.653, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0076, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0083, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0064, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0042, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0066, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0053, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0064, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0078, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"eval_exec": 0.730174081237911, |
|
"eval_loss": 0.24596308171749115, |
|
"eval_runtime": 290.0696, |
|
"eval_samples_per_second": 3.565, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0058, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0082, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0044, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"eval_exec": 0.7050290135396519, |
|
"eval_loss": 0.28547874093055725, |
|
"eval_runtime": 289.8855, |
|
"eval_samples_per_second": 3.567, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0044, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0052, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 17.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"eval_exec": 0.695357833655706, |
|
"eval_loss": 0.2833567261695862, |
|
"eval_runtime": 296.5011, |
|
"eval_samples_per_second": 3.487, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0065, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.006, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0042, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0053, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0199, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.008, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"eval_exec": 0.7021276595744681, |
|
"eval_loss": 0.2530258595943451, |
|
"eval_runtime": 289.9942, |
|
"eval_samples_per_second": 3.566, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_exec": 0.7330754352030948, |
|
"eval_loss": 0.2696512043476105, |
|
"eval_runtime": 291.6539, |
|
"eval_samples_per_second": 3.545, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 19.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 19.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.01, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0051, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"eval_exec": 0.7224371373307543, |
|
"eval_loss": 0.2764066755771637, |
|
"eval_runtime": 304.8058, |
|
"eval_samples_per_second": 3.392, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 20.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0042, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0074, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 20.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 20.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 20.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_exec": 0.7156673114119922, |
|
"eval_loss": 0.2632952034473419, |
|
"eval_runtime": 295.0708, |
|
"eval_samples_per_second": 3.504, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 21.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0053, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 21.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 21.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 21.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 21.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"eval_exec": 0.7224371373307543, |
|
"eval_loss": 0.2584507465362549, |
|
"eval_runtime": 296.7113, |
|
"eval_samples_per_second": 3.485, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 21.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 21.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 22.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 22.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 22.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 22.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0042, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"eval_exec": 0.7263056092843327, |
|
"eval_loss": 0.262117475271225, |
|
"eval_runtime": 293.7879, |
|
"eval_samples_per_second": 3.52, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 22.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 22.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0178, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 22.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 23.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 23.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"eval_exec": 0.7205029013539652, |
|
"eval_loss": 0.2860746383666992, |
|
"eval_runtime": 301.1885, |
|
"eval_samples_per_second": 3.433, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 23.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 23.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 24.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 24.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 24.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"eval_exec": 0.7243713733075435, |
|
"eval_loss": 0.27234283089637756, |
|
"eval_runtime": 296.6131, |
|
"eval_samples_per_second": 3.486, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 24.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 24.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 24.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 25.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"eval_exec": 0.718568665377176, |
|
"eval_loss": 0.2882576584815979, |
|
"eval_runtime": 293.4007, |
|
"eval_samples_per_second": 3.524, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 25.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 25.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 25.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0044, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 25.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 25.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_exec": 0.6963249516441006, |
|
"eval_loss": 0.2879628837108612, |
|
"eval_runtime": 294.5537, |
|
"eval_samples_per_second": 3.51, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 26.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 26.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 26.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 26.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 26.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 26.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 26.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 26.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"eval_exec": 0.7272727272727273, |
|
"eval_loss": 0.29901865124702454, |
|
"eval_runtime": 296.6574, |
|
"eval_samples_per_second": 3.486, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 26.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 27.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 27.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 27.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 27.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 27.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 27.67, |
|
"eval_exec": 0.7195357833655706, |
|
"eval_loss": 0.3057432770729065, |
|
"eval_runtime": 300.1856, |
|
"eval_samples_per_second": 3.445, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 27.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 27.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 27.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 28.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 28.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 28.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 28.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 28.53, |
|
"eval_exec": 0.7282398452611218, |
|
"eval_loss": 0.28940409421920776, |
|
"eval_runtime": 294.4149, |
|
"eval_samples_per_second": 3.512, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 28.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 28.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 29.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 29.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 29.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"eval_exec": 0.7166344294003868, |
|
"eval_loss": 0.308432400226593, |
|
"eval_runtime": 289.3433, |
|
"eval_samples_per_second": 3.574, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 29.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 29.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 29.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0044, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 29.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 29.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 29.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 30.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 30.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 30.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 30.27, |
|
"eval_exec": 0.7176015473887815, |
|
"eval_loss": 0.28124359250068665, |
|
"eval_runtime": 292.5108, |
|
"eval_samples_per_second": 3.535, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 30.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 30.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 30.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 30.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 30.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 31.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 31.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_exec": 0.7214700193423598, |
|
"eval_loss": 0.30306151509284973, |
|
"eval_runtime": 285.8576, |
|
"eval_samples_per_second": 3.617, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 31.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 31.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 31.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 31.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 31.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 31.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 31.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 31.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 31.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 31.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 31.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 31.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"eval_exec": 0.753384912959381, |
|
"eval_loss": 0.2954598069190979, |
|
"eval_runtime": 301.2457, |
|
"eval_samples_per_second": 3.432, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 32.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 32.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 32.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 32.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 32.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 32.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 32.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 32.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 32.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 32.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"eval_exec": 0.730174081237911, |
|
"eval_loss": 0.2934824824333191, |
|
"eval_runtime": 296.4371, |
|
"eval_samples_per_second": 3.488, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 32.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 32.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 33.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 33.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 33.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 33.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 33.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 33.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 33.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2476 |
|
}, |
|
{ |
|
"epoch": 33.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 33.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 33.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 33.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 33.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 33.72, |
|
"eval_exec": 0.695357833655706, |
|
"eval_loss": 0.28229427337646484, |
|
"eval_runtime": 288.5686, |
|
"eval_samples_per_second": 3.583, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 33.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 33.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 34.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 34.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2524 |
|
}, |
|
{ |
|
"epoch": 34.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 34.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2532 |
|
}, |
|
{ |
|
"epoch": 34.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 34.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 34.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 34.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 34.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 34.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 34.59, |
|
"eval_exec": 0.718568665377176, |
|
"eval_loss": 0.2955181896686554, |
|
"eval_runtime": 294.421, |
|
"eval_samples_per_second": 3.512, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 34.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2564 |
|
}, |
|
{ |
|
"epoch": 34.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 2572 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 35.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 35.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2612 |
|
}, |
|
{ |
|
"epoch": 35.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 35.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 35.45, |
|
"eval_exec": 0.7243713733075435, |
|
"eval_loss": 0.3102652430534363, |
|
"eval_runtime": 296.911, |
|
"eval_samples_per_second": 3.483, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 35.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 35.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2636 |
|
}, |
|
{ |
|
"epoch": 35.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 35.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2644 |
|
}, |
|
{ |
|
"epoch": 35.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2648 |
|
}, |
|
{ |
|
"epoch": 35.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 35.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 35.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 36.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 36.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2672 |
|
}, |
|
{ |
|
"epoch": 36.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2684 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"eval_exec": 0.7030947775628626, |
|
"eval_loss": 0.2786959409713745, |
|
"eval_runtime": 286.0516, |
|
"eval_samples_per_second": 3.615, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 36.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2692 |
|
}, |
|
{ |
|
"epoch": 36.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2696 |
|
}, |
|
{ |
|
"epoch": 36.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 36.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 36.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2708 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 36.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2716 |
|
}, |
|
{ |
|
"epoch": 36.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2724 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 36.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 37.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 37.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 37.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2748 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"eval_exec": 0.7195357833655706, |
|
"eval_loss": 0.3104759156703949, |
|
"eval_runtime": 302.4048, |
|
"eval_samples_per_second": 3.419, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 37.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 37.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2764 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2768 |
|
}, |
|
{ |
|
"epoch": 37.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 37.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 37.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 37.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 37.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 37.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 37.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 37.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2804 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 38.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 38.05, |
|
"eval_exec": 0.7011605415860735, |
|
"eval_loss": 0.3018852472305298, |
|
"eval_runtime": 286.3546, |
|
"eval_samples_per_second": 3.611, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 38.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 38.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 2824 |
|
}, |
|
{ |
|
"epoch": 38.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2828 |
|
}, |
|
{ |
|
"epoch": 38.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 2836 |
|
}, |
|
{ |
|
"epoch": 38.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 38.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 38.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 38.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 38.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2864 |
|
}, |
|
{ |
|
"epoch": 38.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2872 |
|
}, |
|
{ |
|
"epoch": 38.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2876 |
|
}, |
|
{ |
|
"epoch": 38.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 38.91, |
|
"eval_exec": 0.7098646034816247, |
|
"eval_loss": 0.295253187417984, |
|
"eval_runtime": 292.7859, |
|
"eval_samples_per_second": 3.532, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 38.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 39.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 39.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2892 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 39.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2908 |
|
}, |
|
{ |
|
"epoch": 39.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 39.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 39.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 39.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 39.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2932 |
|
}, |
|
{ |
|
"epoch": 39.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 39.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 39.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 39.78, |
|
"eval_exec": 0.7156673114119922, |
|
"eval_loss": 0.3224295973777771, |
|
"eval_runtime": 296.2398, |
|
"eval_samples_per_second": 3.49, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 39.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 2948 |
|
}, |
|
{ |
|
"epoch": 39.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 39.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 40.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 40.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 40.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 40.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 40.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 2984 |
|
}, |
|
{ |
|
"epoch": 40.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 40.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 3004 |
|
}, |
|
{ |
|
"epoch": 40.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 40.64, |
|
"eval_exec": 0.7040618955512572, |
|
"eval_loss": 0.3050314486026764, |
|
"eval_runtime": 289.6078, |
|
"eval_samples_per_second": 3.57, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 40.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 40.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 40.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3028 |
|
}, |
|
{ |
|
"epoch": 40.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 3032 |
|
}, |
|
{ |
|
"epoch": 41.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 41.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 41.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3044 |
|
}, |
|
{ |
|
"epoch": 41.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 41.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 41.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 41.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 41.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 41.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"eval_exec": 0.7088974854932302, |
|
"eval_loss": 0.30448201298713684, |
|
"eval_runtime": 294.3306, |
|
"eval_samples_per_second": 3.513, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 41.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 3076 |
|
}, |
|
{ |
|
"epoch": 41.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 41.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3088 |
|
}, |
|
{ |
|
"epoch": 41.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3092 |
|
}, |
|
{ |
|
"epoch": 41.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 41.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 41.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 42.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 3112 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 42.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 42.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 42.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 42.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"eval_exec": 0.7243713733075435, |
|
"eval_loss": 0.3050415515899658, |
|
"eval_runtime": 293.6495, |
|
"eval_samples_per_second": 3.521, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 42.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 42.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 42.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3148 |
|
}, |
|
{ |
|
"epoch": 42.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 3152 |
|
}, |
|
{ |
|
"epoch": 42.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 42.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3164 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 42.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 42.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 43.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 43.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3188 |
|
}, |
|
{ |
|
"epoch": 43.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 43.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"eval_exec": 0.7040618955512572, |
|
"eval_loss": 0.3184911012649536, |
|
"eval_runtime": 290.8614, |
|
"eval_samples_per_second": 3.555, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 43.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 3208 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 3212 |
|
}, |
|
{ |
|
"epoch": 43.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 43.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 43.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 43.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 43.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 43.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 3236 |
|
}, |
|
{ |
|
"epoch": 43.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 43.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3244 |
|
}, |
|
{ |
|
"epoch": 43.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 43.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 3252 |
|
}, |
|
{ |
|
"epoch": 43.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3256 |
|
}, |
|
{ |
|
"epoch": 44.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 44.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 44.11, |
|
"eval_exec": 0.7108317214700194, |
|
"eval_loss": 0.2903190851211548, |
|
"eval_runtime": 296.1114, |
|
"eval_samples_per_second": 3.492, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 44.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3272 |
|
}, |
|
{ |
|
"epoch": 44.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 44.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 44.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3284 |
|
}, |
|
{ |
|
"epoch": 44.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3288 |
|
}, |
|
{ |
|
"epoch": 44.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3292 |
|
}, |
|
{ |
|
"epoch": 44.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 44.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 44.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 44.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3308 |
|
}, |
|
{ |
|
"epoch": 44.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3316 |
|
}, |
|
{ |
|
"epoch": 44.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3324 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"eval_exec": 0.7166344294003868, |
|
"eval_loss": 0.29221683740615845, |
|
"eval_runtime": 297.9619, |
|
"eval_samples_per_second": 3.47, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 45.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3332 |
|
}, |
|
{ |
|
"epoch": 45.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3336 |
|
}, |
|
{ |
|
"epoch": 45.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 45.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 45.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3352 |
|
}, |
|
{ |
|
"epoch": 45.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3356 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 3364 |
|
}, |
|
{ |
|
"epoch": 45.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3368 |
|
}, |
|
{ |
|
"epoch": 45.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 45.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 3376 |
|
}, |
|
{ |
|
"epoch": 45.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 45.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 45.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 3388 |
|
}, |
|
{ |
|
"epoch": 45.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 45.83, |
|
"eval_exec": 0.7166344294003868, |
|
"eval_loss": 0.30226799845695496, |
|
"eval_runtime": 296.3235, |
|
"eval_samples_per_second": 3.489, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 45.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 3396 |
|
}, |
|
{ |
|
"epoch": 45.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 45.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 46.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 46.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3412 |
|
}, |
|
{ |
|
"epoch": 46.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 46.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 46.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 46.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3428 |
|
}, |
|
{ |
|
"epoch": 46.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 46.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3436 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 46.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3444 |
|
}, |
|
{ |
|
"epoch": 46.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3448 |
|
}, |
|
{ |
|
"epoch": 46.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3452 |
|
}, |
|
{ |
|
"epoch": 46.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 46.7, |
|
"eval_exec": 0.7437137330754352, |
|
"eval_loss": 0.30926698446273804, |
|
"eval_runtime": 297.8667, |
|
"eval_samples_per_second": 3.471, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 46.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3464 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3468 |
|
}, |
|
{ |
|
"epoch": 46.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3476 |
|
}, |
|
{ |
|
"epoch": 47.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 47.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 47.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 47.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 47.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 47.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3504 |
|
}, |
|
{ |
|
"epoch": 47.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3508 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3512 |
|
}, |
|
{ |
|
"epoch": 47.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 3516 |
|
}, |
|
{ |
|
"epoch": 47.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0054, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 47.56, |
|
"eval_exec": 0.7350096711798839, |
|
"eval_loss": 0.27321064472198486, |
|
"eval_runtime": 285.4659, |
|
"eval_samples_per_second": 3.622, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3524 |
|
}, |
|
{ |
|
"epoch": 47.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 47.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3532 |
|
}, |
|
{ |
|
"epoch": 47.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 47.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3544 |
|
}, |
|
{ |
|
"epoch": 47.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3548 |
|
}, |
|
{ |
|
"epoch": 47.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 48.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3556 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 48.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 48.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 48.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 48.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3576 |
|
}, |
|
{ |
|
"epoch": 48.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 48.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 48.43, |
|
"eval_exec": 0.746615087040619, |
|
"eval_loss": 0.2905672490596771, |
|
"eval_runtime": 293.155, |
|
"eval_samples_per_second": 3.527, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 48.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3592 |
|
}, |
|
{ |
|
"epoch": 48.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 48.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 48.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3608 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 48.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0057, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 48.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3624 |
|
}, |
|
{ |
|
"epoch": 49.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 3628 |
|
}, |
|
{ |
|
"epoch": 49.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 3632 |
|
}, |
|
{ |
|
"epoch": 49.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3636 |
|
}, |
|
{ |
|
"epoch": 49.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 49.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3644 |
|
}, |
|
{ |
|
"epoch": 49.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 49.29, |
|
"eval_exec": 0.7388781431334622, |
|
"eval_loss": 0.2956254184246063, |
|
"eval_runtime": 295.8069, |
|
"eval_samples_per_second": 3.496, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 49.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3652 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3656 |
|
}, |
|
{ |
|
"epoch": 49.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 49.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 3664 |
|
}, |
|
{ |
|
"epoch": 49.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3668 |
|
}, |
|
{ |
|
"epoch": 49.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3676 |
|
}, |
|
{ |
|
"epoch": 49.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 49.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3684 |
|
}, |
|
{ |
|
"epoch": 49.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3688 |
|
}, |
|
{ |
|
"epoch": 49.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 49.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 50.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3704 |
|
}, |
|
{ |
|
"epoch": 50.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 50.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 50.16, |
|
"eval_exec": 0.7437137330754352, |
|
"eval_loss": 0.30492928624153137, |
|
"eval_runtime": 291.3729, |
|
"eval_samples_per_second": 3.549, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 50.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 3716 |
|
}, |
|
{ |
|
"epoch": 50.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 50.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 50.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3728 |
|
}, |
|
{ |
|
"epoch": 50.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3732 |
|
}, |
|
{ |
|
"epoch": 50.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3736 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 50.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 50.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 3748 |
|
}, |
|
{ |
|
"epoch": 50.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 50.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 50.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3764 |
|
}, |
|
{ |
|
"epoch": 50.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3772 |
|
}, |
|
{ |
|
"epoch": 51.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 51.03, |
|
"eval_exec": 0.7495164410058027, |
|
"eval_loss": 0.32364675402641296, |
|
"eval_runtime": 300.6402, |
|
"eval_samples_per_second": 3.439, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 51.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 51.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3784 |
|
}, |
|
{ |
|
"epoch": 51.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3788 |
|
}, |
|
{ |
|
"epoch": 51.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3792 |
|
}, |
|
{ |
|
"epoch": 51.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3796 |
|
}, |
|
{ |
|
"epoch": 51.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 51.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 51.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 51.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 3812 |
|
}, |
|
{ |
|
"epoch": 51.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 51.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 51.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3824 |
|
}, |
|
{ |
|
"epoch": 51.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3828 |
|
}, |
|
{ |
|
"epoch": 51.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3832 |
|
}, |
|
{ |
|
"epoch": 51.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3836 |
|
}, |
|
{ |
|
"epoch": 51.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 51.88, |
|
"eval_exec": 0.7253384912959381, |
|
"eval_loss": 0.30479735136032104, |
|
"eval_runtime": 283.847, |
|
"eval_samples_per_second": 3.643, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 51.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 51.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 52.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 52.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3856 |
|
}, |
|
{ |
|
"epoch": 52.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 52.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 52.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3868 |
|
}, |
|
{ |
|
"epoch": 52.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 52.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 52.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 52.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3884 |
|
}, |
|
{ |
|
"epoch": 52.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 52.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 3892 |
|
}, |
|
{ |
|
"epoch": 52.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3896 |
|
}, |
|
{ |
|
"epoch": 52.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 52.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 52.75, |
|
"eval_exec": 0.7214700193423598, |
|
"eval_loss": 0.3040502965450287, |
|
"eval_runtime": 296.1224, |
|
"eval_samples_per_second": 3.492, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 3908 |
|
}, |
|
{ |
|
"epoch": 52.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3912 |
|
}, |
|
{ |
|
"epoch": 52.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3916 |
|
}, |
|
{ |
|
"epoch": 52.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 53.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 3924 |
|
}, |
|
{ |
|
"epoch": 53.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 3928 |
|
}, |
|
{ |
|
"epoch": 53.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 3932 |
|
}, |
|
{ |
|
"epoch": 53.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 53.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 53.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 3944 |
|
}, |
|
{ |
|
"epoch": 53.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3948 |
|
}, |
|
{ |
|
"epoch": 53.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 53.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 53.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 53.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3964 |
|
}, |
|
{ |
|
"epoch": 53.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 53.62, |
|
"eval_exec": 0.7330754352030948, |
|
"eval_loss": 0.3174877464771271, |
|
"eval_runtime": 296.7562, |
|
"eval_samples_per_second": 3.484, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 53.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3972 |
|
}, |
|
{ |
|
"epoch": 53.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 3976 |
|
}, |
|
{ |
|
"epoch": 53.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 53.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 3984 |
|
}, |
|
{ |
|
"epoch": 53.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 3988 |
|
}, |
|
{ |
|
"epoch": 53.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 3992 |
|
}, |
|
{ |
|
"epoch": 53.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 54.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 54.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4008 |
|
}, |
|
{ |
|
"epoch": 54.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4012 |
|
}, |
|
{ |
|
"epoch": 54.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4016 |
|
}, |
|
{ |
|
"epoch": 54.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 54.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 4024 |
|
}, |
|
{ |
|
"epoch": 54.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 54.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 54.48, |
|
"eval_exec": 0.7340425531914894, |
|
"eval_loss": 0.3035335838794708, |
|
"eval_runtime": 288.5745, |
|
"eval_samples_per_second": 3.583, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 54.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 4036 |
|
}, |
|
{ |
|
"epoch": 54.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 54.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 4044 |
|
}, |
|
{ |
|
"epoch": 54.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 54.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4052 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 54.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 54.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 55.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 4072 |
|
}, |
|
{ |
|
"epoch": 55.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4076 |
|
}, |
|
{ |
|
"epoch": 55.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 55.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4084 |
|
}, |
|
{ |
|
"epoch": 55.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 4088 |
|
}, |
|
{ |
|
"epoch": 55.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 55.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 55.35, |
|
"eval_exec": 0.7321083172147002, |
|
"eval_loss": 0.3005565404891968, |
|
"eval_runtime": 293.428, |
|
"eval_samples_per_second": 3.524, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 55.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 55.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 55.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4108 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 4112 |
|
}, |
|
{ |
|
"epoch": 55.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4116 |
|
}, |
|
{ |
|
"epoch": 55.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 55.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4124 |
|
}, |
|
{ |
|
"epoch": 55.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 55.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4132 |
|
}, |
|
{ |
|
"epoch": 55.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4136 |
|
}, |
|
{ |
|
"epoch": 55.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 55.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4144 |
|
}, |
|
{ |
|
"epoch": 56.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4148 |
|
}, |
|
{ |
|
"epoch": 56.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4152 |
|
}, |
|
{ |
|
"epoch": 56.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4156 |
|
}, |
|
{ |
|
"epoch": 56.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 56.21, |
|
"eval_exec": 0.723404255319149, |
|
"eval_loss": 0.3162212371826172, |
|
"eval_runtime": 294.7911, |
|
"eval_samples_per_second": 3.508, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 56.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4164 |
|
}, |
|
{ |
|
"epoch": 56.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0055, |
|
"step": 4168 |
|
}, |
|
{ |
|
"epoch": 56.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4172 |
|
}, |
|
{ |
|
"epoch": 56.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 56.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 56.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4184 |
|
}, |
|
{ |
|
"epoch": 56.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4188 |
|
}, |
|
{ |
|
"epoch": 56.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 56.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4196 |
|
}, |
|
{ |
|
"epoch": 56.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4204 |
|
}, |
|
{ |
|
"epoch": 56.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4208 |
|
}, |
|
{ |
|
"epoch": 56.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 57.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 57.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 57.08, |
|
"eval_exec": 0.706963249516441, |
|
"eval_loss": 0.3172769248485565, |
|
"eval_runtime": 287.3743, |
|
"eval_samples_per_second": 3.598, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 57.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 57.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4232 |
|
}, |
|
{ |
|
"epoch": 57.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4236 |
|
}, |
|
{ |
|
"epoch": 57.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 57.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 4244 |
|
}, |
|
{ |
|
"epoch": 57.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4248 |
|
}, |
|
{ |
|
"epoch": 57.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4252 |
|
}, |
|
{ |
|
"epoch": 57.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 57.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 57.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 4264 |
|
}, |
|
{ |
|
"epoch": 57.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4268 |
|
}, |
|
{ |
|
"epoch": 57.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4272 |
|
}, |
|
{ |
|
"epoch": 57.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4276 |
|
}, |
|
{ |
|
"epoch": 57.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 57.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4284 |
|
}, |
|
{ |
|
"epoch": 57.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 57.94, |
|
"eval_exec": 0.7224371373307543, |
|
"eval_loss": 0.2908557057380676, |
|
"eval_runtime": 293.2594, |
|
"eval_samples_per_second": 3.526, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 57.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 58.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4296 |
|
}, |
|
{ |
|
"epoch": 58.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 58.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 4304 |
|
}, |
|
{ |
|
"epoch": 58.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4308 |
|
}, |
|
{ |
|
"epoch": 58.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 58.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4316 |
|
}, |
|
{ |
|
"epoch": 58.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 58.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4324 |
|
}, |
|
{ |
|
"epoch": 58.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4328 |
|
}, |
|
{ |
|
"epoch": 58.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 58.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4336 |
|
}, |
|
{ |
|
"epoch": 58.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 58.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 58.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4348 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"eval_exec": 0.7166344294003868, |
|
"eval_loss": 0.31116729974746704, |
|
"eval_runtime": 295.9051, |
|
"eval_samples_per_second": 3.494, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 58.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 4356 |
|
}, |
|
{ |
|
"epoch": 58.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 58.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4364 |
|
}, |
|
{ |
|
"epoch": 59.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 59.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4372 |
|
}, |
|
{ |
|
"epoch": 59.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4376 |
|
}, |
|
{ |
|
"epoch": 59.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 59.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4388 |
|
}, |
|
{ |
|
"epoch": 59.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4392 |
|
}, |
|
{ |
|
"epoch": 59.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4396 |
|
}, |
|
{ |
|
"epoch": 59.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 59.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4404 |
|
}, |
|
{ |
|
"epoch": 59.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 59.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4412 |
|
}, |
|
{ |
|
"epoch": 59.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 59.67, |
|
"eval_exec": 0.7388781431334622, |
|
"eval_loss": 0.3091637194156647, |
|
"eval_runtime": 297.2444, |
|
"eval_samples_per_second": 3.479, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 59.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 59.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 4424 |
|
}, |
|
{ |
|
"epoch": 59.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 4428 |
|
}, |
|
{ |
|
"epoch": 59.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 4432 |
|
}, |
|
{ |
|
"epoch": 59.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4436 |
|
}, |
|
{ |
|
"epoch": 59.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 60.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4444 |
|
}, |
|
{ |
|
"epoch": 60.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 60.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4452 |
|
}, |
|
{ |
|
"epoch": 60.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4456 |
|
}, |
|
{ |
|
"epoch": 60.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 60.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 60.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4468 |
|
}, |
|
{ |
|
"epoch": 60.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4472 |
|
}, |
|
{ |
|
"epoch": 60.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4476 |
|
}, |
|
{ |
|
"epoch": 60.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 60.53, |
|
"eval_exec": 0.7243713733075435, |
|
"eval_loss": 0.3012126684188843, |
|
"eval_runtime": 289.2665, |
|
"eval_samples_per_second": 3.575, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 60.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4484 |
|
}, |
|
{ |
|
"epoch": 60.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4488 |
|
}, |
|
{ |
|
"epoch": 60.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4492 |
|
}, |
|
{ |
|
"epoch": 60.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4496 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 60.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 4504 |
|
}, |
|
{ |
|
"epoch": 60.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 60.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 61.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4516 |
|
}, |
|
{ |
|
"epoch": 61.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 61.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 61.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4528 |
|
}, |
|
{ |
|
"epoch": 61.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4532 |
|
}, |
|
{ |
|
"epoch": 61.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4536 |
|
}, |
|
{ |
|
"epoch": 61.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"eval_exec": 0.7272727272727273, |
|
"eval_loss": 0.30137544870376587, |
|
"eval_runtime": 286.7428, |
|
"eval_samples_per_second": 3.606, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 61.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 4548 |
|
}, |
|
{ |
|
"epoch": 61.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 4552 |
|
}, |
|
{ |
|
"epoch": 61.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4556 |
|
}, |
|
{ |
|
"epoch": 61.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 61.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4564 |
|
}, |
|
{ |
|
"epoch": 61.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4568 |
|
}, |
|
{ |
|
"epoch": 61.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4572 |
|
}, |
|
{ |
|
"epoch": 61.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 61.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 61.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4584 |
|
}, |
|
{ |
|
"epoch": 61.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 62.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 4592 |
|
}, |
|
{ |
|
"epoch": 62.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 62.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 62.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4604 |
|
}, |
|
{ |
|
"epoch": 62.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 62.27, |
|
"eval_exec": 0.723404255319149, |
|
"eval_loss": 0.3098083734512329, |
|
"eval_runtime": 291.4042, |
|
"eval_samples_per_second": 3.548, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 62.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 4612 |
|
}, |
|
{ |
|
"epoch": 62.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4616 |
|
}, |
|
{ |
|
"epoch": 62.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 62.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4624 |
|
}, |
|
{ |
|
"epoch": 62.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 4628 |
|
}, |
|
{ |
|
"epoch": 62.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 4632 |
|
}, |
|
{ |
|
"epoch": 62.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4636 |
|
}, |
|
{ |
|
"epoch": 62.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 62.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4648 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4652 |
|
}, |
|
{ |
|
"epoch": 62.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 62.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 63.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4664 |
|
}, |
|
{ |
|
"epoch": 63.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4668 |
|
}, |
|
{ |
|
"epoch": 63.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 63.13, |
|
"eval_exec": 0.7437137330754352, |
|
"eval_loss": 0.32493457198143005, |
|
"eval_runtime": 288.524, |
|
"eval_samples_per_second": 3.584, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 63.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4676 |
|
}, |
|
{ |
|
"epoch": 63.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 63.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4684 |
|
}, |
|
{ |
|
"epoch": 63.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4688 |
|
}, |
|
{ |
|
"epoch": 63.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4692 |
|
}, |
|
{ |
|
"epoch": 63.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4696 |
|
}, |
|
{ |
|
"epoch": 63.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 63.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 63.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4708 |
|
}, |
|
{ |
|
"epoch": 63.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 63.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4716 |
|
}, |
|
{ |
|
"epoch": 63.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 63.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4724 |
|
}, |
|
{ |
|
"epoch": 63.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4728 |
|
}, |
|
{ |
|
"epoch": 63.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4732 |
|
}, |
|
{ |
|
"epoch": 63.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 63.99, |
|
"eval_exec": 0.7427466150870407, |
|
"eval_loss": 0.3356438875198364, |
|
"eval_runtime": 295.1379, |
|
"eval_samples_per_second": 3.503, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 64.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 64.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4744 |
|
}, |
|
{ |
|
"epoch": 64.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4748 |
|
}, |
|
{ |
|
"epoch": 64.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 64.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4756 |
|
}, |
|
{ |
|
"epoch": 64.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 64.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4764 |
|
}, |
|
{ |
|
"epoch": 64.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4768 |
|
}, |
|
{ |
|
"epoch": 64.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4772 |
|
}, |
|
{ |
|
"epoch": 64.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4776 |
|
}, |
|
{ |
|
"epoch": 64.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 64.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4784 |
|
}, |
|
{ |
|
"epoch": 64.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 64.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4792 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4796 |
|
}, |
|
{ |
|
"epoch": 64.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 64.86, |
|
"eval_exec": 0.7398452611218569, |
|
"eval_loss": 0.3491382598876953, |
|
"eval_runtime": 296.7752, |
|
"eval_samples_per_second": 3.484, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 64.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4804 |
|
}, |
|
{ |
|
"epoch": 64.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4808 |
|
}, |
|
{ |
|
"epoch": 65.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 65.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4816 |
|
}, |
|
{ |
|
"epoch": 65.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 65.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4824 |
|
}, |
|
{ |
|
"epoch": 65.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 4828 |
|
}, |
|
{ |
|
"epoch": 65.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4832 |
|
}, |
|
{ |
|
"epoch": 65.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 65.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 65.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4844 |
|
}, |
|
{ |
|
"epoch": 65.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4848 |
|
}, |
|
{ |
|
"epoch": 65.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4852 |
|
}, |
|
{ |
|
"epoch": 65.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4856 |
|
}, |
|
{ |
|
"epoch": 65.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 65.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 65.72, |
|
"eval_exec": 0.7272727272727273, |
|
"eval_loss": 0.34306177496910095, |
|
"eval_runtime": 286.5304, |
|
"eval_samples_per_second": 3.609, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 65.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4868 |
|
}, |
|
{ |
|
"epoch": 65.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 65.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4876 |
|
}, |
|
{ |
|
"epoch": 65.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 65.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4884 |
|
}, |
|
{ |
|
"epoch": 66.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4888 |
|
}, |
|
{ |
|
"epoch": 66.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4892 |
|
}, |
|
{ |
|
"epoch": 66.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 66.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 66.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4904 |
|
}, |
|
{ |
|
"epoch": 66.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4908 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 4912 |
|
}, |
|
{ |
|
"epoch": 66.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 4916 |
|
}, |
|
{ |
|
"epoch": 66.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 66.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 4924 |
|
}, |
|
{ |
|
"epoch": 66.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 66.59, |
|
"eval_exec": 0.7050290135396519, |
|
"eval_loss": 0.3411843478679657, |
|
"eval_runtime": 292.9024, |
|
"eval_samples_per_second": 3.53, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 66.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 4932 |
|
}, |
|
{ |
|
"epoch": 66.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4936 |
|
}, |
|
{ |
|
"epoch": 66.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4944 |
|
}, |
|
{ |
|
"epoch": 66.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 4948 |
|
}, |
|
{ |
|
"epoch": 66.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4952 |
|
}, |
|
{ |
|
"epoch": 66.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4956 |
|
}, |
|
{ |
|
"epoch": 67.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 67.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 4964 |
|
}, |
|
{ |
|
"epoch": 67.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 67.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4972 |
|
}, |
|
{ |
|
"epoch": 67.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4976 |
|
}, |
|
{ |
|
"epoch": 67.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 67.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 4984 |
|
}, |
|
{ |
|
"epoch": 67.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 4988 |
|
}, |
|
{ |
|
"epoch": 67.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 67.45, |
|
"eval_exec": 0.7224371373307543, |
|
"eval_loss": 0.33966416120529175, |
|
"eval_runtime": 289.6629, |
|
"eval_samples_per_second": 3.57, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 67.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 4996 |
|
}, |
|
{ |
|
"epoch": 67.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 67.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5004 |
|
}, |
|
{ |
|
"epoch": 67.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 5008 |
|
}, |
|
{ |
|
"epoch": 67.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5012 |
|
}, |
|
{ |
|
"epoch": 67.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 67.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 67.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5024 |
|
}, |
|
{ |
|
"epoch": 67.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5028 |
|
}, |
|
{ |
|
"epoch": 67.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5032 |
|
}, |
|
{ |
|
"epoch": 68.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5036 |
|
}, |
|
{ |
|
"epoch": 68.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 68.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5044 |
|
}, |
|
{ |
|
"epoch": 68.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5048 |
|
}, |
|
{ |
|
"epoch": 68.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5052 |
|
}, |
|
{ |
|
"epoch": 68.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 68.32, |
|
"eval_exec": 0.723404255319149, |
|
"eval_loss": 0.31566286087036133, |
|
"eval_runtime": 285.389, |
|
"eval_samples_per_second": 3.623, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 68.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 68.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5064 |
|
}, |
|
{ |
|
"epoch": 68.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5068 |
|
}, |
|
{ |
|
"epoch": 68.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5072 |
|
}, |
|
{ |
|
"epoch": 68.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5076 |
|
}, |
|
{ |
|
"epoch": 68.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 68.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 68.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 5088 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5092 |
|
}, |
|
{ |
|
"epoch": 68.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 5096 |
|
}, |
|
{ |
|
"epoch": 68.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 5104 |
|
}, |
|
{ |
|
"epoch": 69.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5108 |
|
}, |
|
{ |
|
"epoch": 69.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 5112 |
|
}, |
|
{ |
|
"epoch": 69.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5116 |
|
}, |
|
{ |
|
"epoch": 69.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 69.19, |
|
"eval_exec": 0.7108317214700194, |
|
"eval_loss": 0.3202356994152069, |
|
"eval_runtime": 289.377, |
|
"eval_samples_per_second": 3.573, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 69.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5124 |
|
}, |
|
{ |
|
"epoch": 69.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5128 |
|
}, |
|
{ |
|
"epoch": 69.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5132 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5136 |
|
}, |
|
{ |
|
"epoch": 69.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 69.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 5144 |
|
}, |
|
{ |
|
"epoch": 69.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5148 |
|
}, |
|
{ |
|
"epoch": 69.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 69.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5156 |
|
}, |
|
{ |
|
"epoch": 69.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0071, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 69.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5164 |
|
}, |
|
{ |
|
"epoch": 69.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5168 |
|
}, |
|
{ |
|
"epoch": 69.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5172 |
|
}, |
|
{ |
|
"epoch": 69.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5176 |
|
}, |
|
{ |
|
"epoch": 69.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 70.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 70.05, |
|
"eval_exec": 0.7040618955512572, |
|
"eval_loss": 0.31820809841156006, |
|
"eval_runtime": 291.8188, |
|
"eval_samples_per_second": 3.543, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 70.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5188 |
|
}, |
|
{ |
|
"epoch": 70.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5192 |
|
}, |
|
{ |
|
"epoch": 70.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 5196 |
|
}, |
|
{ |
|
"epoch": 70.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 70.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5204 |
|
}, |
|
{ |
|
"epoch": 70.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 70.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5212 |
|
}, |
|
{ |
|
"epoch": 70.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 70.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 70.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5224 |
|
}, |
|
{ |
|
"epoch": 70.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5228 |
|
}, |
|
{ |
|
"epoch": 70.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5232 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 5236 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 70.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 70.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 70.91, |
|
"eval_exec": 0.7176015473887815, |
|
"eval_loss": 0.35949328541755676, |
|
"eval_runtime": 287.4088, |
|
"eval_samples_per_second": 3.598, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 70.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 71.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5256 |
|
}, |
|
{ |
|
"epoch": 71.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 71.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5264 |
|
}, |
|
{ |
|
"epoch": 71.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5268 |
|
}, |
|
{ |
|
"epoch": 71.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5272 |
|
}, |
|
{ |
|
"epoch": 71.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5276 |
|
}, |
|
{ |
|
"epoch": 71.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 71.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5284 |
|
}, |
|
{ |
|
"epoch": 71.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 5288 |
|
}, |
|
{ |
|
"epoch": 71.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5292 |
|
}, |
|
{ |
|
"epoch": 71.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5296 |
|
}, |
|
{ |
|
"epoch": 71.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 71.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 71.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 5308 |
|
}, |
|
{ |
|
"epoch": 71.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 71.78, |
|
"eval_exec": 0.7205029013539652, |
|
"eval_loss": 0.304627001285553, |
|
"eval_runtime": 285.1203, |
|
"eval_samples_per_second": 3.627, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 71.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5316 |
|
}, |
|
{ |
|
"epoch": 71.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 71.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 5324 |
|
}, |
|
{ |
|
"epoch": 71.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 5328 |
|
}, |
|
{ |
|
"epoch": 72.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 72.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5336 |
|
}, |
|
{ |
|
"epoch": 72.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 72.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5344 |
|
}, |
|
{ |
|
"epoch": 72.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 5348 |
|
}, |
|
{ |
|
"epoch": 72.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 5352 |
|
}, |
|
{ |
|
"epoch": 72.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5356 |
|
}, |
|
{ |
|
"epoch": 72.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 72.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5364 |
|
}, |
|
{ |
|
"epoch": 72.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5368 |
|
}, |
|
{ |
|
"epoch": 72.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5372 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"eval_exec": 0.7321083172147002, |
|
"eval_loss": 0.3293524980545044, |
|
"eval_runtime": 290.4949, |
|
"eval_samples_per_second": 3.559, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 72.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 72.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5384 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5388 |
|
}, |
|
{ |
|
"epoch": 72.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 5392 |
|
}, |
|
{ |
|
"epoch": 72.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5396 |
|
}, |
|
{ |
|
"epoch": 72.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 73.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 5404 |
|
}, |
|
{ |
|
"epoch": 73.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 73.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5412 |
|
}, |
|
{ |
|
"epoch": 73.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5416 |
|
}, |
|
{ |
|
"epoch": 73.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 73.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5424 |
|
}, |
|
{ |
|
"epoch": 73.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 5428 |
|
}, |
|
{ |
|
"epoch": 73.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5432 |
|
}, |
|
{ |
|
"epoch": 73.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5436 |
|
}, |
|
{ |
|
"epoch": 73.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 73.51, |
|
"eval_exec": 0.7350096711798839, |
|
"eval_loss": 0.31576061248779297, |
|
"eval_runtime": 291.4555, |
|
"eval_samples_per_second": 3.548, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 73.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5444 |
|
}, |
|
{ |
|
"epoch": 73.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5448 |
|
}, |
|
{ |
|
"epoch": 73.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5452 |
|
}, |
|
{ |
|
"epoch": 73.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 73.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 73.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 73.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5468 |
|
}, |
|
{ |
|
"epoch": 73.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 73.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5476 |
|
}, |
|
{ |
|
"epoch": 74.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 74.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5484 |
|
}, |
|
{ |
|
"epoch": 74.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 5488 |
|
}, |
|
{ |
|
"epoch": 74.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5492 |
|
}, |
|
{ |
|
"epoch": 74.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5496 |
|
}, |
|
{ |
|
"epoch": 74.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 74.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 74.37, |
|
"eval_exec": 0.7021276595744681, |
|
"eval_loss": 0.3458137512207031, |
|
"eval_runtime": 285.7195, |
|
"eval_samples_per_second": 3.619, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 74.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5508 |
|
}, |
|
{ |
|
"epoch": 74.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 5512 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5516 |
|
}, |
|
{ |
|
"epoch": 74.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 74.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5524 |
|
}, |
|
{ |
|
"epoch": 74.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5528 |
|
}, |
|
{ |
|
"epoch": 74.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 5532 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5536 |
|
}, |
|
{ |
|
"epoch": 74.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 74.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5544 |
|
}, |
|
{ |
|
"epoch": 74.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 5548 |
|
}, |
|
{ |
|
"epoch": 75.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 5552 |
|
}, |
|
{ |
|
"epoch": 75.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5556 |
|
}, |
|
{ |
|
"epoch": 75.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 75.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 5564 |
|
}, |
|
{ |
|
"epoch": 75.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 75.24, |
|
"eval_exec": 0.7108317214700194, |
|
"eval_loss": 0.3360079526901245, |
|
"eval_runtime": 289.9247, |
|
"eval_samples_per_second": 3.566, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 75.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5572 |
|
}, |
|
{ |
|
"epoch": 75.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5576 |
|
}, |
|
{ |
|
"epoch": 75.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 75.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 5584 |
|
}, |
|
{ |
|
"epoch": 75.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5588 |
|
}, |
|
{ |
|
"epoch": 75.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 5592 |
|
}, |
|
{ |
|
"epoch": 75.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5596 |
|
}, |
|
{ |
|
"epoch": 75.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 75.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5604 |
|
}, |
|
{ |
|
"epoch": 75.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5608 |
|
}, |
|
{ |
|
"epoch": 75.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 5612 |
|
}, |
|
{ |
|
"epoch": 75.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 75.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 75.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5624 |
|
}, |
|
{ |
|
"epoch": 76.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5628 |
|
}, |
|
{ |
|
"epoch": 76.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 76.11, |
|
"eval_exec": 0.706963249516441, |
|
"eval_loss": 0.3310911953449249, |
|
"eval_runtime": 288.5245, |
|
"eval_samples_per_second": 3.584, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 76.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5636 |
|
}, |
|
{ |
|
"epoch": 76.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 76.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5644 |
|
}, |
|
{ |
|
"epoch": 76.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5648 |
|
}, |
|
{ |
|
"epoch": 76.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0036, |
|
"step": 5652 |
|
}, |
|
{ |
|
"epoch": 76.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5656 |
|
}, |
|
{ |
|
"epoch": 76.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 76.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 76.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5668 |
|
}, |
|
{ |
|
"epoch": 76.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5672 |
|
}, |
|
{ |
|
"epoch": 76.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 5676 |
|
}, |
|
{ |
|
"epoch": 76.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5684 |
|
}, |
|
{ |
|
"epoch": 76.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 76.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5692 |
|
}, |
|
{ |
|
"epoch": 76.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 76.96, |
|
"eval_exec": 0.706963249516441, |
|
"eval_loss": 0.3491285741329193, |
|
"eval_runtime": 290.4295, |
|
"eval_samples_per_second": 3.56, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 77.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 77.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 77.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 5708 |
|
}, |
|
{ |
|
"epoch": 77.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5712 |
|
}, |
|
{ |
|
"epoch": 77.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5716 |
|
}, |
|
{ |
|
"epoch": 77.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 77.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 5724 |
|
}, |
|
{ |
|
"epoch": 77.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 5728 |
|
}, |
|
{ |
|
"epoch": 77.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5732 |
|
}, |
|
{ |
|
"epoch": 77.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5736 |
|
}, |
|
{ |
|
"epoch": 77.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 77.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5744 |
|
}, |
|
{ |
|
"epoch": 77.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5748 |
|
}, |
|
{ |
|
"epoch": 77.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5752 |
|
}, |
|
{ |
|
"epoch": 77.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5756 |
|
}, |
|
{ |
|
"epoch": 77.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 77.83, |
|
"eval_exec": 0.7108317214700194, |
|
"eval_loss": 0.33614587783813477, |
|
"eval_runtime": 284.0797, |
|
"eval_samples_per_second": 3.64, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 77.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5764 |
|
}, |
|
{ |
|
"epoch": 77.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5768 |
|
}, |
|
{ |
|
"epoch": 77.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 78.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5776 |
|
}, |
|
{ |
|
"epoch": 78.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 78.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5784 |
|
}, |
|
{ |
|
"epoch": 78.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5788 |
|
}, |
|
{ |
|
"epoch": 78.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 78.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5796 |
|
}, |
|
{ |
|
"epoch": 78.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 78.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5804 |
|
}, |
|
{ |
|
"epoch": 78.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 78.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 78.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5816 |
|
}, |
|
{ |
|
"epoch": 78.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 78.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 78.7, |
|
"eval_exec": 0.7117988394584139, |
|
"eval_loss": 0.3427754342556, |
|
"eval_runtime": 290.913, |
|
"eval_samples_per_second": 3.554, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 78.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 78.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 5832 |
|
}, |
|
{ |
|
"epoch": 78.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5836 |
|
}, |
|
{ |
|
"epoch": 78.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 78.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5844 |
|
}, |
|
{ |
|
"epoch": 79.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5848 |
|
}, |
|
{ |
|
"epoch": 79.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 79.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 79.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 79.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5864 |
|
}, |
|
{ |
|
"epoch": 79.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 5868 |
|
}, |
|
{ |
|
"epoch": 79.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 5872 |
|
}, |
|
{ |
|
"epoch": 79.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5876 |
|
}, |
|
{ |
|
"epoch": 79.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 79.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5884 |
|
}, |
|
{ |
|
"epoch": 79.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 79.56, |
|
"eval_exec": 0.7011605415860735, |
|
"eval_loss": 0.34577926993370056, |
|
"eval_runtime": 287.6027, |
|
"eval_samples_per_second": 3.595, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 79.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5892 |
|
}, |
|
{ |
|
"epoch": 79.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5896 |
|
}, |
|
{ |
|
"epoch": 79.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 79.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5904 |
|
}, |
|
{ |
|
"epoch": 79.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5908 |
|
}, |
|
{ |
|
"epoch": 79.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5912 |
|
}, |
|
{ |
|
"epoch": 79.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 5916 |
|
}, |
|
{ |
|
"epoch": 79.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 80.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5924 |
|
}, |
|
{ |
|
"epoch": 80.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 80.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5932 |
|
}, |
|
{ |
|
"epoch": 80.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5936 |
|
}, |
|
{ |
|
"epoch": 80.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 80.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 5944 |
|
}, |
|
{ |
|
"epoch": 80.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5948 |
|
}, |
|
{ |
|
"epoch": 80.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 80.43, |
|
"eval_exec": 0.7050290135396519, |
|
"eval_loss": 0.32949239015579224, |
|
"eval_runtime": 280.7542, |
|
"eval_samples_per_second": 3.683, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 80.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5956 |
|
}, |
|
{ |
|
"epoch": 80.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 80.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5964 |
|
}, |
|
{ |
|
"epoch": 80.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 5968 |
|
}, |
|
{ |
|
"epoch": 80.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 5972 |
|
}, |
|
{ |
|
"epoch": 80.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5976 |
|
}, |
|
{ |
|
"epoch": 80.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 80.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 80.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 5988 |
|
}, |
|
{ |
|
"epoch": 80.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 5992 |
|
}, |
|
{ |
|
"epoch": 81.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 5996 |
|
}, |
|
{ |
|
"epoch": 81.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6004 |
|
}, |
|
{ |
|
"epoch": 81.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6008 |
|
}, |
|
{ |
|
"epoch": 81.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6012 |
|
}, |
|
{ |
|
"epoch": 81.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 81.29, |
|
"eval_exec": 0.7040618955512572, |
|
"eval_loss": 0.3443618714809418, |
|
"eval_runtime": 284.4305, |
|
"eval_samples_per_second": 3.635, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 81.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 81.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6024 |
|
}, |
|
{ |
|
"epoch": 81.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6028 |
|
}, |
|
{ |
|
"epoch": 81.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6032 |
|
}, |
|
{ |
|
"epoch": 81.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 6036 |
|
}, |
|
{ |
|
"epoch": 81.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 81.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6044 |
|
}, |
|
{ |
|
"epoch": 81.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 81.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6052 |
|
}, |
|
{ |
|
"epoch": 81.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6056 |
|
}, |
|
{ |
|
"epoch": 81.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 81.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6064 |
|
}, |
|
{ |
|
"epoch": 81.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6068 |
|
}, |
|
{ |
|
"epoch": 82.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6072 |
|
}, |
|
{ |
|
"epoch": 82.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 82.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 82.16, |
|
"eval_exec": 0.7253384912959381, |
|
"eval_loss": 0.35400378704071045, |
|
"eval_runtime": 287.759, |
|
"eval_samples_per_second": 3.593, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 82.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6084 |
|
}, |
|
{ |
|
"epoch": 82.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6088 |
|
}, |
|
{ |
|
"epoch": 82.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6092 |
|
}, |
|
{ |
|
"epoch": 82.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6096 |
|
}, |
|
{ |
|
"epoch": 82.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 82.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 6104 |
|
}, |
|
{ |
|
"epoch": 82.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6108 |
|
}, |
|
{ |
|
"epoch": 82.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6112 |
|
}, |
|
{ |
|
"epoch": 82.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6116 |
|
}, |
|
{ |
|
"epoch": 82.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 82.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6124 |
|
}, |
|
{ |
|
"epoch": 82.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6128 |
|
}, |
|
{ |
|
"epoch": 82.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6132 |
|
}, |
|
{ |
|
"epoch": 82.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 82.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 83.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 83.03, |
|
"eval_exec": 0.690522243713733, |
|
"eval_loss": 0.3448183834552765, |
|
"eval_runtime": 284.9854, |
|
"eval_samples_per_second": 3.628, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 83.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6148 |
|
}, |
|
{ |
|
"epoch": 83.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6152 |
|
}, |
|
{ |
|
"epoch": 83.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6156 |
|
}, |
|
{ |
|
"epoch": 83.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 83.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6164 |
|
}, |
|
{ |
|
"epoch": 83.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6168 |
|
}, |
|
{ |
|
"epoch": 83.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6172 |
|
}, |
|
{ |
|
"epoch": 83.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6176 |
|
}, |
|
{ |
|
"epoch": 83.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 83.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6184 |
|
}, |
|
{ |
|
"epoch": 83.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6188 |
|
}, |
|
{ |
|
"epoch": 83.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0, |
|
"step": 6192 |
|
}, |
|
{ |
|
"epoch": 83.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6196 |
|
}, |
|
{ |
|
"epoch": 83.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 83.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6204 |
|
}, |
|
{ |
|
"epoch": 83.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 83.88, |
|
"eval_exec": 0.7156673114119922, |
|
"eval_loss": 0.3485656976699829, |
|
"eval_runtime": 290.1762, |
|
"eval_samples_per_second": 3.563, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 83.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6212 |
|
}, |
|
{ |
|
"epoch": 83.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6216 |
|
}, |
|
{ |
|
"epoch": 84.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 84.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 6224 |
|
}, |
|
{ |
|
"epoch": 84.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6228 |
|
}, |
|
{ |
|
"epoch": 84.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6232 |
|
}, |
|
{ |
|
"epoch": 84.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6236 |
|
}, |
|
{ |
|
"epoch": 84.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 84.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6244 |
|
}, |
|
{ |
|
"epoch": 84.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6248 |
|
}, |
|
{ |
|
"epoch": 84.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 6252 |
|
}, |
|
{ |
|
"epoch": 84.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6256 |
|
}, |
|
{ |
|
"epoch": 84.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 84.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 84.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6268 |
|
}, |
|
{ |
|
"epoch": 84.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 84.75, |
|
"eval_exec": 0.7243713733075435, |
|
"eval_loss": 0.33773335814476013, |
|
"eval_runtime": 286.9564, |
|
"eval_samples_per_second": 3.603, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6276 |
|
}, |
|
{ |
|
"epoch": 84.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6284 |
|
}, |
|
{ |
|
"epoch": 84.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6288 |
|
}, |
|
{ |
|
"epoch": 85.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6292 |
|
}, |
|
{ |
|
"epoch": 85.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 6296 |
|
}, |
|
{ |
|
"epoch": 85.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 85.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6304 |
|
}, |
|
{ |
|
"epoch": 85.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6308 |
|
}, |
|
{ |
|
"epoch": 85.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6312 |
|
}, |
|
{ |
|
"epoch": 85.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6316 |
|
}, |
|
{ |
|
"epoch": 85.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 85.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 85.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6328 |
|
}, |
|
{ |
|
"epoch": 85.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6332 |
|
}, |
|
{ |
|
"epoch": 85.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 85.62, |
|
"eval_exec": 0.7243713733075435, |
|
"eval_loss": 0.33152279257774353, |
|
"eval_runtime": 290.7991, |
|
"eval_samples_per_second": 3.556, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 85.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 85.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 6344 |
|
}, |
|
{ |
|
"epoch": 85.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0027, |
|
"step": 6348 |
|
}, |
|
{ |
|
"epoch": 85.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 6352 |
|
}, |
|
{ |
|
"epoch": 85.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6356 |
|
}, |
|
{ |
|
"epoch": 85.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 85.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6364 |
|
}, |
|
{ |
|
"epoch": 86.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 86.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6372 |
|
}, |
|
{ |
|
"epoch": 86.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6376 |
|
}, |
|
{ |
|
"epoch": 86.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 86.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 86.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 6388 |
|
}, |
|
{ |
|
"epoch": 86.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 6392 |
|
}, |
|
{ |
|
"epoch": 86.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 86.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 86.48, |
|
"eval_exec": 0.7147001934235977, |
|
"eval_loss": 0.33703288435935974, |
|
"eval_runtime": 284.2076, |
|
"eval_samples_per_second": 3.638, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 86.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6404 |
|
}, |
|
{ |
|
"epoch": 86.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6408 |
|
}, |
|
{ |
|
"epoch": 86.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6412 |
|
}, |
|
{ |
|
"epoch": 86.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6416 |
|
}, |
|
{ |
|
"epoch": 86.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 86.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6424 |
|
}, |
|
{ |
|
"epoch": 86.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6428 |
|
}, |
|
{ |
|
"epoch": 86.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6436 |
|
}, |
|
{ |
|
"epoch": 87.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 87.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6444 |
|
}, |
|
{ |
|
"epoch": 87.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 87.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6452 |
|
}, |
|
{ |
|
"epoch": 87.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6456 |
|
}, |
|
{ |
|
"epoch": 87.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 87.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 87.35, |
|
"eval_exec": 0.7253384912959381, |
|
"eval_loss": 0.3449646830558777, |
|
"eval_runtime": 294.4474, |
|
"eval_samples_per_second": 3.512, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 87.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6468 |
|
}, |
|
{ |
|
"epoch": 87.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 6472 |
|
}, |
|
{ |
|
"epoch": 87.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6476 |
|
}, |
|
{ |
|
"epoch": 87.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 87.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6484 |
|
}, |
|
{ |
|
"epoch": 87.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 87.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6492 |
|
}, |
|
{ |
|
"epoch": 87.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 87.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 87.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6504 |
|
}, |
|
{ |
|
"epoch": 87.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6508 |
|
}, |
|
{ |
|
"epoch": 87.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 6512 |
|
}, |
|
{ |
|
"epoch": 88.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6516 |
|
}, |
|
{ |
|
"epoch": 88.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 88.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6524 |
|
}, |
|
{ |
|
"epoch": 88.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 88.21, |
|
"eval_exec": 0.7050290135396519, |
|
"eval_loss": 0.3305092751979828, |
|
"eval_runtime": 286.599, |
|
"eval_samples_per_second": 3.608, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 88.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 6532 |
|
}, |
|
{ |
|
"epoch": 88.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6536 |
|
}, |
|
{ |
|
"epoch": 88.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 88.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 6544 |
|
}, |
|
{ |
|
"epoch": 88.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6548 |
|
}, |
|
{ |
|
"epoch": 88.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 88.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6556 |
|
}, |
|
{ |
|
"epoch": 88.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 88.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6564 |
|
}, |
|
{ |
|
"epoch": 88.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6568 |
|
}, |
|
{ |
|
"epoch": 88.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 88.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6576 |
|
}, |
|
{ |
|
"epoch": 88.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 88.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6584 |
|
}, |
|
{ |
|
"epoch": 89.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6588 |
|
}, |
|
{ |
|
"epoch": 89.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 89.08, |
|
"eval_exec": 0.7166344294003868, |
|
"eval_loss": 0.3231656849384308, |
|
"eval_runtime": 284.3494, |
|
"eval_samples_per_second": 3.636, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 89.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6596 |
|
}, |
|
{ |
|
"epoch": 89.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 89.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6604 |
|
}, |
|
{ |
|
"epoch": 89.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6608 |
|
}, |
|
{ |
|
"epoch": 89.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 89.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6616 |
|
}, |
|
{ |
|
"epoch": 89.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 89.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 89.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6628 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6632 |
|
}, |
|
{ |
|
"epoch": 89.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6636 |
|
}, |
|
{ |
|
"epoch": 89.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 89.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6644 |
|
}, |
|
{ |
|
"epoch": 89.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 6648 |
|
}, |
|
{ |
|
"epoch": 89.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6652 |
|
}, |
|
{ |
|
"epoch": 89.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 89.94, |
|
"eval_exec": 0.7127659574468085, |
|
"eval_loss": 0.3296290338039398, |
|
"eval_runtime": 289.4346, |
|
"eval_samples_per_second": 3.572, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 89.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 90.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6664 |
|
}, |
|
{ |
|
"epoch": 90.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 6668 |
|
}, |
|
{ |
|
"epoch": 90.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6672 |
|
}, |
|
{ |
|
"epoch": 90.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6676 |
|
}, |
|
{ |
|
"epoch": 90.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 90.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6684 |
|
}, |
|
{ |
|
"epoch": 90.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 90.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6692 |
|
}, |
|
{ |
|
"epoch": 90.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 90.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 90.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6704 |
|
}, |
|
{ |
|
"epoch": 90.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6708 |
|
}, |
|
{ |
|
"epoch": 90.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6712 |
|
}, |
|
{ |
|
"epoch": 90.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6716 |
|
}, |
|
{ |
|
"epoch": 90.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 90.8, |
|
"eval_exec": 0.7050290135396519, |
|
"eval_loss": 0.33742156624794006, |
|
"eval_runtime": 286.2212, |
|
"eval_samples_per_second": 3.613, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 90.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 6724 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6728 |
|
}, |
|
{ |
|
"epoch": 90.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 6732 |
|
}, |
|
{ |
|
"epoch": 91.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6736 |
|
}, |
|
{ |
|
"epoch": 91.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 91.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6744 |
|
}, |
|
{ |
|
"epoch": 91.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6748 |
|
}, |
|
{ |
|
"epoch": 91.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6752 |
|
}, |
|
{ |
|
"epoch": 91.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6756 |
|
}, |
|
{ |
|
"epoch": 91.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 91.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6764 |
|
}, |
|
{ |
|
"epoch": 91.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6768 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6772 |
|
}, |
|
{ |
|
"epoch": 91.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6776 |
|
}, |
|
{ |
|
"epoch": 91.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"eval_exec": 0.7030947775628626, |
|
"eval_loss": 0.3439423739910126, |
|
"eval_runtime": 283.1873, |
|
"eval_samples_per_second": 3.651, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 91.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6788 |
|
}, |
|
{ |
|
"epoch": 91.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 6792 |
|
}, |
|
{ |
|
"epoch": 91.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6796 |
|
}, |
|
{ |
|
"epoch": 91.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 91.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6804 |
|
}, |
|
{ |
|
"epoch": 91.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6808 |
|
}, |
|
{ |
|
"epoch": 92.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6812 |
|
}, |
|
{ |
|
"epoch": 92.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6816 |
|
}, |
|
{ |
|
"epoch": 92.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 92.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6824 |
|
}, |
|
{ |
|
"epoch": 92.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6828 |
|
}, |
|
{ |
|
"epoch": 92.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6832 |
|
}, |
|
{ |
|
"epoch": 92.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 6836 |
|
}, |
|
{ |
|
"epoch": 92.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 92.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6844 |
|
}, |
|
{ |
|
"epoch": 92.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 92.53, |
|
"eval_exec": 0.7050290135396519, |
|
"eval_loss": 0.3420410752296448, |
|
"eval_runtime": 289.1079, |
|
"eval_samples_per_second": 3.577, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 92.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6852 |
|
}, |
|
{ |
|
"epoch": 92.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6856 |
|
}, |
|
{ |
|
"epoch": 92.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 92.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 92.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6868 |
|
}, |
|
{ |
|
"epoch": 92.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6872 |
|
}, |
|
{ |
|
"epoch": 92.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6876 |
|
}, |
|
{ |
|
"epoch": 92.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 93.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6884 |
|
}, |
|
{ |
|
"epoch": 93.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6888 |
|
}, |
|
{ |
|
"epoch": 93.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6892 |
|
}, |
|
{ |
|
"epoch": 93.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6896 |
|
}, |
|
{ |
|
"epoch": 93.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 93.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6904 |
|
}, |
|
{ |
|
"epoch": 93.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 6908 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"eval_exec": 0.7263056092843327, |
|
"eval_loss": 0.337966650724411, |
|
"eval_runtime": 283.4007, |
|
"eval_samples_per_second": 3.649, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 93.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 6916 |
|
}, |
|
{ |
|
"epoch": 93.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 93.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6924 |
|
}, |
|
{ |
|
"epoch": 93.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6928 |
|
}, |
|
{ |
|
"epoch": 93.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 6932 |
|
}, |
|
{ |
|
"epoch": 93.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6936 |
|
}, |
|
{ |
|
"epoch": 93.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 93.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 93.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6948 |
|
}, |
|
{ |
|
"epoch": 93.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6952 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 6956 |
|
}, |
|
{ |
|
"epoch": 94.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 94.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 6964 |
|
}, |
|
{ |
|
"epoch": 94.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6968 |
|
}, |
|
{ |
|
"epoch": 94.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6972 |
|
}, |
|
{ |
|
"epoch": 94.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 94.27, |
|
"eval_exec": 0.7176015473887815, |
|
"eval_loss": 0.364266037940979, |
|
"eval_runtime": 284.3795, |
|
"eval_samples_per_second": 3.636, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 94.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 94.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 6984 |
|
}, |
|
{ |
|
"epoch": 94.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6988 |
|
}, |
|
{ |
|
"epoch": 94.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 6992 |
|
}, |
|
{ |
|
"epoch": 94.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 6996 |
|
}, |
|
{ |
|
"epoch": 94.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 94.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 7004 |
|
}, |
|
{ |
|
"epoch": 94.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7008 |
|
}, |
|
{ |
|
"epoch": 94.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7012 |
|
}, |
|
{ |
|
"epoch": 94.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 7016 |
|
}, |
|
{ |
|
"epoch": 94.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 94.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7024 |
|
}, |
|
{ |
|
"epoch": 94.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7028 |
|
}, |
|
{ |
|
"epoch": 95.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7032 |
|
}, |
|
{ |
|
"epoch": 95.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7036 |
|
}, |
|
{ |
|
"epoch": 95.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 95.13, |
|
"eval_exec": 0.730174081237911, |
|
"eval_loss": 0.3686355650424957, |
|
"eval_runtime": 284.9995, |
|
"eval_samples_per_second": 3.628, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 95.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7044 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7048 |
|
}, |
|
{ |
|
"epoch": 95.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 7052 |
|
}, |
|
{ |
|
"epoch": 95.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7056 |
|
}, |
|
{ |
|
"epoch": 95.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7064 |
|
}, |
|
{ |
|
"epoch": 95.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 95.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 7072 |
|
}, |
|
{ |
|
"epoch": 95.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7076 |
|
}, |
|
{ |
|
"epoch": 95.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 95.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7084 |
|
}, |
|
{ |
|
"epoch": 95.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7088 |
|
}, |
|
{ |
|
"epoch": 95.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 7092 |
|
}, |
|
{ |
|
"epoch": 95.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7096 |
|
}, |
|
{ |
|
"epoch": 95.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 95.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 95.99, |
|
"eval_exec": 0.7176015473887815, |
|
"eval_loss": 0.3639739453792572, |
|
"eval_runtime": 281.8061, |
|
"eval_samples_per_second": 3.669, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 96.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 7108 |
|
}, |
|
{ |
|
"epoch": 96.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 7112 |
|
}, |
|
{ |
|
"epoch": 96.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7116 |
|
}, |
|
{ |
|
"epoch": 96.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 96.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7124 |
|
}, |
|
{ |
|
"epoch": 96.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 7128 |
|
}, |
|
{ |
|
"epoch": 96.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7132 |
|
}, |
|
{ |
|
"epoch": 96.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7136 |
|
}, |
|
{ |
|
"epoch": 96.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 96.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 7144 |
|
}, |
|
{ |
|
"epoch": 96.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 7148 |
|
}, |
|
{ |
|
"epoch": 96.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7152 |
|
}, |
|
{ |
|
"epoch": 96.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7156 |
|
}, |
|
{ |
|
"epoch": 96.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 96.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 7164 |
|
}, |
|
{ |
|
"epoch": 96.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 96.86, |
|
"eval_exec": 0.7224371373307543, |
|
"eval_loss": 0.35076427459716797, |
|
"eval_runtime": 285.1481, |
|
"eval_samples_per_second": 3.626, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 96.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7172 |
|
}, |
|
{ |
|
"epoch": 96.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 97.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 97.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7184 |
|
}, |
|
{ |
|
"epoch": 97.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 7188 |
|
}, |
|
{ |
|
"epoch": 97.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 97.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7196 |
|
}, |
|
{ |
|
"epoch": 97.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 97.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 7204 |
|
}, |
|
{ |
|
"epoch": 97.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 7208 |
|
}, |
|
{ |
|
"epoch": 97.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 7212 |
|
}, |
|
{ |
|
"epoch": 97.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7216 |
|
}, |
|
{ |
|
"epoch": 97.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 97.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7224 |
|
}, |
|
{ |
|
"epoch": 97.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 7228 |
|
}, |
|
{ |
|
"epoch": 97.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 97.72, |
|
"eval_exec": 0.723404255319149, |
|
"eval_loss": 0.34438732266426086, |
|
"eval_runtime": 287.451, |
|
"eval_samples_per_second": 3.597, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 97.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7236 |
|
}, |
|
{ |
|
"epoch": 97.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 97.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7244 |
|
}, |
|
{ |
|
"epoch": 97.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7248 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7252 |
|
}, |
|
{ |
|
"epoch": 98.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 7256 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 98.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7264 |
|
}, |
|
{ |
|
"epoch": 98.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7268 |
|
}, |
|
{ |
|
"epoch": 98.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 7272 |
|
}, |
|
{ |
|
"epoch": 98.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7276 |
|
}, |
|
{ |
|
"epoch": 98.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 98.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7284 |
|
}, |
|
{ |
|
"epoch": 98.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7288 |
|
}, |
|
{ |
|
"epoch": 98.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7292 |
|
}, |
|
{ |
|
"epoch": 98.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 98.59, |
|
"eval_exec": 0.718568665377176, |
|
"eval_loss": 0.3525933623313904, |
|
"eval_runtime": 283.2747, |
|
"eval_samples_per_second": 3.65, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 98.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 98.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 7304 |
|
}, |
|
{ |
|
"epoch": 98.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7308 |
|
}, |
|
{ |
|
"epoch": 98.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7312 |
|
}, |
|
{ |
|
"epoch": 98.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 98.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 98.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7324 |
|
}, |
|
{ |
|
"epoch": 99.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7328 |
|
}, |
|
{ |
|
"epoch": 99.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7332 |
|
}, |
|
{ |
|
"epoch": 99.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7336 |
|
}, |
|
{ |
|
"epoch": 99.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 99.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7344 |
|
}, |
|
{ |
|
"epoch": 99.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0003, |
|
"step": 7348 |
|
}, |
|
{ |
|
"epoch": 99.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7352 |
|
}, |
|
{ |
|
"epoch": 99.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7356 |
|
}, |
|
{ |
|
"epoch": 99.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 99.45, |
|
"eval_exec": 0.7311411992263056, |
|
"eval_loss": 0.3651147186756134, |
|
"eval_runtime": 287.5898, |
|
"eval_samples_per_second": 3.595, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 99.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 7364 |
|
}, |
|
{ |
|
"epoch": 99.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7368 |
|
}, |
|
{ |
|
"epoch": 99.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0004, |
|
"step": 7372 |
|
}, |
|
{ |
|
"epoch": 99.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7376 |
|
}, |
|
{ |
|
"epoch": 99.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 99.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 7384 |
|
}, |
|
{ |
|
"epoch": 99.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0001, |
|
"step": 7388 |
|
}, |
|
{ |
|
"epoch": 99.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 99.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0002, |
|
"step": 7396 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"step": 7400, |
|
"total_flos": 5.036125754774262e+18, |
|
"train_loss": 0.00013556757536748224, |
|
"train_runtime": 24498.5901, |
|
"train_samples_per_second": 61.04, |
|
"train_steps_per_second": 0.302 |
|
} |
|
], |
|
"max_steps": 7400, |
|
"num_train_epochs": 100, |
|
"total_flos": 5.036125754774262e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|