|
{ |
|
"best_metric": 0.8735, |
|
"best_model_checkpoint": "swin-tiny-finetuned-cifar100/checkpoint-3905", |
|
"epoch": 4.99968, |
|
"global_step": 3905, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.115089514066497e-07, |
|
"loss": 4.6445, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0230179028132994e-06, |
|
"loss": 4.6363, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.534526854219949e-06, |
|
"loss": 4.6361, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.0460358056265987e-06, |
|
"loss": 4.6511, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5575447570332483e-06, |
|
"loss": 4.6596, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.069053708439898e-06, |
|
"loss": 4.6345, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5805626598465474e-06, |
|
"loss": 4.6177, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.092071611253197e-06, |
|
"loss": 4.6046, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.603580562659847e-06, |
|
"loss": 4.6372, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.1150895140664966e-06, |
|
"loss": 4.5815, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.626598465473146e-06, |
|
"loss": 4.5585, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.138107416879796e-06, |
|
"loss": 4.6204, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.649616368286445e-06, |
|
"loss": 4.5496, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.161125319693095e-06, |
|
"loss": 4.5638, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.672634271099745e-06, |
|
"loss": 4.5451, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.184143222506395e-06, |
|
"loss": 4.5389, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 4.4942, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.207161125319694e-06, |
|
"loss": 4.4929, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.718670076726344e-06, |
|
"loss": 4.4607, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.0230179028132993e-05, |
|
"loss": 4.4665, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.0741687979539643e-05, |
|
"loss": 4.438, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.1253196930946292e-05, |
|
"loss": 4.415, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 4.3831, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.2276214833759591e-05, |
|
"loss": 4.3877, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.2787723785166241e-05, |
|
"loss": 4.3453, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.329923273657289e-05, |
|
"loss": 4.3025, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.381074168797954e-05, |
|
"loss": 4.2795, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.432225063938619e-05, |
|
"loss": 4.2353, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.483375959079284e-05, |
|
"loss": 4.1786, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.534526854219949e-05, |
|
"loss": 4.1939, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.585677749360614e-05, |
|
"loss": 4.0723, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.636828644501279e-05, |
|
"loss": 4.0102, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.687979539641944e-05, |
|
"loss": 3.9292, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 3.8505, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.790281329923274e-05, |
|
"loss": 3.7539, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8414322250639388e-05, |
|
"loss": 3.6833, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8925831202046038e-05, |
|
"loss": 3.5281, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9437340153452687e-05, |
|
"loss": 3.4329, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9948849104859337e-05, |
|
"loss": 3.3203, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.0460358056265986e-05, |
|
"loss": 3.1954, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.0971867007672636e-05, |
|
"loss": 3.0686, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.1483375959079285e-05, |
|
"loss": 2.9424, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1994884910485935e-05, |
|
"loss": 3.0176, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.2506393861892585e-05, |
|
"loss": 2.8912, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.3017902813299234e-05, |
|
"loss": 2.7802, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 2.7013, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.4040920716112533e-05, |
|
"loss": 2.6416, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.4552429667519183e-05, |
|
"loss": 2.5482, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5063938618925832e-05, |
|
"loss": 2.3947, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5575447570332482e-05, |
|
"loss": 2.2859, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.608695652173913e-05, |
|
"loss": 2.2609, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.659846547314578e-05, |
|
"loss": 2.2065, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.710997442455243e-05, |
|
"loss": 2.1006, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.762148337595908e-05, |
|
"loss": 1.9752, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.813299232736573e-05, |
|
"loss": 2.0412, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.864450127877238e-05, |
|
"loss": 2.0338, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.915601023017903e-05, |
|
"loss": 1.8349, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.966751918158568e-05, |
|
"loss": 1.7146, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.0179028132992328e-05, |
|
"loss": 1.7222, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.069053708439898e-05, |
|
"loss": 1.8153, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.120204603580563e-05, |
|
"loss": 1.6485, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.171355498721228e-05, |
|
"loss": 1.485, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.222506393861893e-05, |
|
"loss": 1.7435, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.273657289002558e-05, |
|
"loss": 1.6707, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.324808184143223e-05, |
|
"loss": 1.5172, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.375959079283888e-05, |
|
"loss": 1.538, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.427109974424553e-05, |
|
"loss": 1.424, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 1.3758, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 1.3251, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.580562659846548e-05, |
|
"loss": 1.3147, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.6317135549872126e-05, |
|
"loss": 1.3606, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.6828644501278776e-05, |
|
"loss": 1.3198, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.7340153452685426e-05, |
|
"loss": 1.4826, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.7851662404092075e-05, |
|
"loss": 1.1348, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.8363171355498725e-05, |
|
"loss": 1.2849, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.8874680306905374e-05, |
|
"loss": 1.2261, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.9386189258312024e-05, |
|
"loss": 1.175, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.989769820971867e-05, |
|
"loss": 1.1654, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.9954467842914065e-05, |
|
"loss": 1.2218, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.989755264655663e-05, |
|
"loss": 1.0849, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.9840637450199205e-05, |
|
"loss": 1.1411, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.978372225384178e-05, |
|
"loss": 1.168, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.972680705748435e-05, |
|
"loss": 1.2054, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.9669891861126925e-05, |
|
"loss": 1.1557, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.96129766647695e-05, |
|
"loss": 1.119, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.955606146841207e-05, |
|
"loss": 1.1867, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.949914627205464e-05, |
|
"loss": 0.9051, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.944223107569721e-05, |
|
"loss": 1.0337, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.9385315879339785e-05, |
|
"loss": 0.9839, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.932840068298236e-05, |
|
"loss": 0.866, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.927148548662493e-05, |
|
"loss": 0.9547, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.9214570290267505e-05, |
|
"loss": 0.9838, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.915765509391008e-05, |
|
"loss": 1.1078, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.910073989755265e-05, |
|
"loss": 0.932, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9043824701195225e-05, |
|
"loss": 0.9879, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.898690950483779e-05, |
|
"loss": 1.1054, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.8929994308480365e-05, |
|
"loss": 0.9784, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.887307911212294e-05, |
|
"loss": 1.0294, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.881616391576551e-05, |
|
"loss": 0.946, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.8759248719408085e-05, |
|
"loss": 0.9403, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.870233352305066e-05, |
|
"loss": 0.8587, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.864541832669323e-05, |
|
"loss": 0.881, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.85885031303358e-05, |
|
"loss": 0.904, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.853158793397838e-05, |
|
"loss": 0.9147, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8474672737620945e-05, |
|
"loss": 0.9299, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.841775754126352e-05, |
|
"loss": 0.9064, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.836084234490609e-05, |
|
"loss": 1.0158, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8303927148548666e-05, |
|
"loss": 0.8712, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.824701195219124e-05, |
|
"loss": 0.8876, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.819009675583381e-05, |
|
"loss": 0.9165, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8133181559476386e-05, |
|
"loss": 0.7727, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.807626636311895e-05, |
|
"loss": 0.9544, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.8019351166761526e-05, |
|
"loss": 0.8282, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.79624359704041e-05, |
|
"loss": 0.8105, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.790552077404667e-05, |
|
"loss": 0.8246, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7848605577689246e-05, |
|
"loss": 0.8071, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.779169038133182e-05, |
|
"loss": 0.964, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.773477518497439e-05, |
|
"loss": 0.8634, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7677859988616966e-05, |
|
"loss": 0.7772, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.762094479225954e-05, |
|
"loss": 0.8086, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7564029595902106e-05, |
|
"loss": 0.8886, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.750711439954468e-05, |
|
"loss": 0.8618, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.745019920318725e-05, |
|
"loss": 0.8221, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.7393284006829826e-05, |
|
"loss": 0.9336, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.73363688104724e-05, |
|
"loss": 0.7384, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.727945361411497e-05, |
|
"loss": 0.7313, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7222538417757546e-05, |
|
"loss": 0.9178, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.716562322140011e-05, |
|
"loss": 0.8866, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.710870802504269e-05, |
|
"loss": 0.6832, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.705179282868526e-05, |
|
"loss": 0.776, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.699487763232783e-05, |
|
"loss": 0.7298, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6937962435970406e-05, |
|
"loss": 0.7268, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.688104723961298e-05, |
|
"loss": 0.8073, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.682413204325555e-05, |
|
"loss": 0.7678, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.6767216846898126e-05, |
|
"loss": 0.8216, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.67103016505407e-05, |
|
"loss": 0.6896, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6653386454183266e-05, |
|
"loss": 0.8691, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.659647125782584e-05, |
|
"loss": 0.8097, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.653955606146841e-05, |
|
"loss": 0.7124, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6482640865110987e-05, |
|
"loss": 0.8661, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.642572566875356e-05, |
|
"loss": 1.0455, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.636881047239613e-05, |
|
"loss": 0.8263, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.631189527603871e-05, |
|
"loss": 0.6256, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.625498007968128e-05, |
|
"loss": 0.788, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6198064883323853e-05, |
|
"loss": 0.7374, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.614114968696642e-05, |
|
"loss": 0.6936, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.6084234490608993e-05, |
|
"loss": 0.7579, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.602731929425157e-05, |
|
"loss": 0.7191, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.597040409789414e-05, |
|
"loss": 0.7349, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5913488901536714e-05, |
|
"loss": 0.6269, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.585657370517929e-05, |
|
"loss": 0.664, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.579965850882186e-05, |
|
"loss": 0.6365, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.574274331246443e-05, |
|
"loss": 0.756, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.568582811610701e-05, |
|
"loss": 0.7975, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.5628912919749574e-05, |
|
"loss": 0.8584, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.557199772339215e-05, |
|
"loss": 0.6439, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8138, |
|
"eval_loss": 0.6126329302787781, |
|
"eval_runtime": 60.9802, |
|
"eval_samples_per_second": 163.988, |
|
"eval_steps_per_second": 10.249, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.551508252703472e-05, |
|
"loss": 0.6383, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.5458167330677294e-05, |
|
"loss": 0.6756, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.540125213431987e-05, |
|
"loss": 0.5847, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.534433693796244e-05, |
|
"loss": 0.5047, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.5287421741605014e-05, |
|
"loss": 0.5946, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.523050654524758e-05, |
|
"loss": 0.51, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.5173591348890154e-05, |
|
"loss": 0.5915, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.511667615253273e-05, |
|
"loss": 0.6674, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.50597609561753e-05, |
|
"loss": 0.6427, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5002845759817874e-05, |
|
"loss": 0.6404, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.494593056346045e-05, |
|
"loss": 0.5568, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.488901536710302e-05, |
|
"loss": 0.7048, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.4832100170745594e-05, |
|
"loss": 0.5817, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.477518497438817e-05, |
|
"loss": 0.5192, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.4718269778030734e-05, |
|
"loss": 0.7096, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.466135458167331e-05, |
|
"loss": 0.561, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.460443938531588e-05, |
|
"loss": 0.6275, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4547524188958454e-05, |
|
"loss": 0.5082, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.449060899260103e-05, |
|
"loss": 0.616, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.44336937962436e-05, |
|
"loss": 0.5976, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4376778599886174e-05, |
|
"loss": 0.6847, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.431986340352874e-05, |
|
"loss": 0.4798, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.426294820717132e-05, |
|
"loss": 0.6393, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.420603301081389e-05, |
|
"loss": 0.4907, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.414911781445646e-05, |
|
"loss": 0.4741, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.4092202618099035e-05, |
|
"loss": 0.4989, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.403528742174161e-05, |
|
"loss": 0.6102, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.397837222538418e-05, |
|
"loss": 0.6051, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.3921457029026755e-05, |
|
"loss": 0.5615, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.386454183266933e-05, |
|
"loss": 0.5091, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3807626636311895e-05, |
|
"loss": 0.5863, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.375071143995447e-05, |
|
"loss": 0.6056, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.369379624359704e-05, |
|
"loss": 0.4893, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3636881047239615e-05, |
|
"loss": 0.607, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.357996585088219e-05, |
|
"loss": 0.5942, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.352305065452476e-05, |
|
"loss": 0.5453, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.3466135458167335e-05, |
|
"loss": 0.5637, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.34092202618099e-05, |
|
"loss": 0.5974, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.335230506545248e-05, |
|
"loss": 0.5365, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.329538986909505e-05, |
|
"loss": 0.5487, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.323847467273762e-05, |
|
"loss": 0.5981, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.3181559476380195e-05, |
|
"loss": 0.4977, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.312464428002277e-05, |
|
"loss": 0.4873, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.306772908366534e-05, |
|
"loss": 0.6305, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.3010813887307915e-05, |
|
"loss": 0.4625, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.295389869095049e-05, |
|
"loss": 0.7791, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.2896983494593055e-05, |
|
"loss": 0.5784, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.2840068298235635e-05, |
|
"loss": 0.4482, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.27831531018782e-05, |
|
"loss": 0.5718, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.2726237905520775e-05, |
|
"loss": 0.5399, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.266932270916335e-05, |
|
"loss": 0.5408, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.261240751280592e-05, |
|
"loss": 0.5713, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.2555492316448495e-05, |
|
"loss": 0.3968, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.249857712009107e-05, |
|
"loss": 0.5708, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.244166192373364e-05, |
|
"loss": 0.6139, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.238474672737621e-05, |
|
"loss": 0.6031, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.232783153101878e-05, |
|
"loss": 0.4819, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.2270916334661356e-05, |
|
"loss": 0.5141, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.221400113830393e-05, |
|
"loss": 0.4998, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.21570859419465e-05, |
|
"loss": 0.4646, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2100170745589076e-05, |
|
"loss": 0.4859, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.204325554923165e-05, |
|
"loss": 0.5069, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.1986340352874216e-05, |
|
"loss": 0.5751, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.1929425156516796e-05, |
|
"loss": 0.4505, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.187250996015936e-05, |
|
"loss": 0.5396, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.1815594763801936e-05, |
|
"loss": 0.5394, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.175867956744451e-05, |
|
"loss": 0.6824, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.170176437108708e-05, |
|
"loss": 0.414, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.1644849174729656e-05, |
|
"loss": 0.5944, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.158793397837223e-05, |
|
"loss": 0.5384, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.15310187820148e-05, |
|
"loss": 0.7521, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.147410358565737e-05, |
|
"loss": 0.6244, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.141718838929995e-05, |
|
"loss": 0.4822, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.1360273192942516e-05, |
|
"loss": 0.5942, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.130335799658509e-05, |
|
"loss": 0.5526, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.124644280022766e-05, |
|
"loss": 0.5807, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.1189527603870236e-05, |
|
"loss": 0.6191, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.113261240751281e-05, |
|
"loss": 0.4252, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.107569721115538e-05, |
|
"loss": 0.6039, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.1018782014797956e-05, |
|
"loss": 0.5023, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.096186681844052e-05, |
|
"loss": 0.4397, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.0904951622083096e-05, |
|
"loss": 0.5488, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.084803642572567e-05, |
|
"loss": 0.4943, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.079112122936824e-05, |
|
"loss": 0.4196, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.0734206033010816e-05, |
|
"loss": 0.5103, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.067729083665339e-05, |
|
"loss": 0.5383, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.062037564029596e-05, |
|
"loss": 0.5533, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.056346044393853e-05, |
|
"loss": 0.6003, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.0506545247581107e-05, |
|
"loss": 0.3887, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.0449630051223676e-05, |
|
"loss": 0.4925, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.0392714854866253e-05, |
|
"loss": 0.5327, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.0335799658508823e-05, |
|
"loss": 0.4195, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.0278884462151397e-05, |
|
"loss": 0.4912, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.022196926579397e-05, |
|
"loss": 0.6002, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.016505406943654e-05, |
|
"loss": 0.5191, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.0108138873079117e-05, |
|
"loss": 0.4732, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.0051223676721687e-05, |
|
"loss": 0.4728, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.999430848036426e-05, |
|
"loss": 0.658, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.993739328400683e-05, |
|
"loss": 0.3973, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.9880478087649403e-05, |
|
"loss": 0.518, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.982356289129198e-05, |
|
"loss": 0.513, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.976664769493455e-05, |
|
"loss": 0.4699, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.9709732498577124e-05, |
|
"loss": 0.5086, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.9652817302219694e-05, |
|
"loss": 0.4464, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.9595902105862267e-05, |
|
"loss": 0.4587, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.953898690950484e-05, |
|
"loss": 0.5568, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.9482071713147414e-05, |
|
"loss": 0.4991, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.9425156516789984e-05, |
|
"loss": 0.4953, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.9368241320432557e-05, |
|
"loss": 0.5821, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.9311326124075134e-05, |
|
"loss": 0.4582, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.9254410927717704e-05, |
|
"loss": 0.4931, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.9197495731360277e-05, |
|
"loss": 0.4979, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.9140580535002847e-05, |
|
"loss": 0.4933, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.908366533864542e-05, |
|
"loss": 0.463, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.902675014228799e-05, |
|
"loss": 0.4945, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.8969834945930567e-05, |
|
"loss": 0.4822, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.8912919749573137e-05, |
|
"loss": 0.5452, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.885600455321571e-05, |
|
"loss": 0.4868, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.8799089356858284e-05, |
|
"loss": 0.553, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.8742174160500854e-05, |
|
"loss": 0.5744, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.868525896414343e-05, |
|
"loss": 0.5091, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.8628343767786e-05, |
|
"loss": 0.5209, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.8571428571428574e-05, |
|
"loss": 0.5506, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.8514513375071144e-05, |
|
"loss": 0.5383, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.8457598178713718e-05, |
|
"loss": 0.5534, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.8400682982356294e-05, |
|
"loss": 0.3911, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.8343767785998864e-05, |
|
"loss": 0.501, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.8286852589641438e-05, |
|
"loss": 0.4988, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.8229937393284008e-05, |
|
"loss": 0.5158, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.817302219692658e-05, |
|
"loss": 0.4976, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.811610700056915e-05, |
|
"loss": 0.4873, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.8059191804211728e-05, |
|
"loss": 0.5198, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.8002276607854298e-05, |
|
"loss": 0.4795, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.794536141149687e-05, |
|
"loss": 0.5029, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.7888446215139448e-05, |
|
"loss": 0.4574, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.7831531018782018e-05, |
|
"loss": 0.4224, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.777461582242459e-05, |
|
"loss": 0.4447, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.771770062606716e-05, |
|
"loss": 0.5863, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.7660785429709735e-05, |
|
"loss": 0.5724, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.7603870233352305e-05, |
|
"loss": 0.4397, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.754695503699488e-05, |
|
"loss": 0.441, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.749003984063745e-05, |
|
"loss": 0.549, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.7433124644280025e-05, |
|
"loss": 0.4723, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.7376209447922598e-05, |
|
"loss": 0.4554, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.7319294251565168e-05, |
|
"loss": 0.5067, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.7262379055207745e-05, |
|
"loss": 0.3471, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.7205463858850315e-05, |
|
"loss": 0.4403, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.714854866249289e-05, |
|
"loss": 0.4034, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.709163346613546e-05, |
|
"loss": 0.617, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.7034718269778032e-05, |
|
"loss": 0.489, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.697780307342061e-05, |
|
"loss": 0.4514, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.692088787706318e-05, |
|
"loss": 0.4604, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.6863972680705752e-05, |
|
"loss": 0.4845, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.6807057484348322e-05, |
|
"loss": 0.4273, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.6750142287990895e-05, |
|
"loss": 0.3995, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.6693227091633465e-05, |
|
"loss": 0.6222, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8393, |
|
"eval_loss": 0.5094287395477295, |
|
"eval_runtime": 60.7156, |
|
"eval_samples_per_second": 164.702, |
|
"eval_steps_per_second": 10.294, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.6636311895276042e-05, |
|
"loss": 0.3977, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.6579396698918612e-05, |
|
"loss": 0.2847, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.6522481502561185e-05, |
|
"loss": 0.384, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.6465566306203762e-05, |
|
"loss": 0.3344, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.6408651109846332e-05, |
|
"loss": 0.347, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.6351735913488905e-05, |
|
"loss": 0.3207, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.6294820717131475e-05, |
|
"loss": 0.3625, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.623790552077405e-05, |
|
"loss": 0.2822, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.618099032441662e-05, |
|
"loss": 0.3479, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.6124075128059196e-05, |
|
"loss": 0.318, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.6067159931701766e-05, |
|
"loss": 0.3668, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.601024473534434e-05, |
|
"loss": 0.3594, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.5953329538986912e-05, |
|
"loss": 0.3636, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.5896414342629482e-05, |
|
"loss": 0.3588, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.583949914627206e-05, |
|
"loss": 0.3155, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.578258394991463e-05, |
|
"loss": 0.3362, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.5725668753557202e-05, |
|
"loss": 0.3159, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.5668753557199772e-05, |
|
"loss": 0.3167, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.5611838360842346e-05, |
|
"loss": 0.3597, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.5554923164484923e-05, |
|
"loss": 0.2862, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.5498007968127493e-05, |
|
"loss": 0.4218, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.5441092771770066e-05, |
|
"loss": 0.3902, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.5384177575412636e-05, |
|
"loss": 0.371, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.532726237905521e-05, |
|
"loss": 0.3218, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.527034718269778e-05, |
|
"loss": 0.3233, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.5213431986340356e-05, |
|
"loss": 0.3293, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.5156516789982926e-05, |
|
"loss": 0.295, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.50996015936255e-05, |
|
"loss": 0.3192, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.5042686397268073e-05, |
|
"loss": 0.2638, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.4985771200910646e-05, |
|
"loss": 0.3065, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.492885600455322e-05, |
|
"loss": 0.3483, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.487194080819579e-05, |
|
"loss": 0.3138, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.4815025611838363e-05, |
|
"loss": 0.3677, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.4758110415480933e-05, |
|
"loss": 0.3726, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.470119521912351e-05, |
|
"loss": 0.3356, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.4644280022766083e-05, |
|
"loss": 0.3099, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.4587364826408653e-05, |
|
"loss": 0.283, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.4530449630051226e-05, |
|
"loss": 0.2828, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.4473534433693796e-05, |
|
"loss": 0.3751, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.4416619237336373e-05, |
|
"loss": 0.3227, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.4359704040978943e-05, |
|
"loss": 0.3716, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.4302788844621517e-05, |
|
"loss": 0.3669, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.4245873648264087e-05, |
|
"loss": 0.3195, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.418895845190666e-05, |
|
"loss": 0.3147, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.4132043255549237e-05, |
|
"loss": 0.339, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.4075128059191807e-05, |
|
"loss": 0.3949, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.401821286283438e-05, |
|
"loss": 0.2976, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.396129766647695e-05, |
|
"loss": 0.4075, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.3904382470119523e-05, |
|
"loss": 0.3482, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.3847467273762093e-05, |
|
"loss": 0.4089, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.379055207740467e-05, |
|
"loss": 0.3574, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.373363688104724e-05, |
|
"loss": 0.3617, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.3676721684689814e-05, |
|
"loss": 0.3421, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.3619806488332387e-05, |
|
"loss": 0.3523, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.3562891291974957e-05, |
|
"loss": 0.3594, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.3505976095617534e-05, |
|
"loss": 0.3177, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.3449060899260104e-05, |
|
"loss": 0.3867, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.3392145702902677e-05, |
|
"loss": 0.3826, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.3335230506545247e-05, |
|
"loss": 0.2243, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.3278315310187824e-05, |
|
"loss": 0.3039, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.3221400113830397e-05, |
|
"loss": 0.3555, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.3164484917472967e-05, |
|
"loss": 0.3321, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.310756972111554e-05, |
|
"loss": 0.3334, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.305065452475811e-05, |
|
"loss": 0.3629, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.2993739328400687e-05, |
|
"loss": 0.2421, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.2936824132043257e-05, |
|
"loss": 0.3204, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.287990893568583e-05, |
|
"loss": 0.3631, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.28229937393284e-05, |
|
"loss": 0.3279, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.2766078542970974e-05, |
|
"loss": 0.3008, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.270916334661355e-05, |
|
"loss": 0.4036, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.265224815025612e-05, |
|
"loss": 0.3201, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.2595332953898694e-05, |
|
"loss": 0.3041, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.2538417757541264e-05, |
|
"loss": 0.3208, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.2481502561183838e-05, |
|
"loss": 0.2943, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.2424587364826408e-05, |
|
"loss": 0.2831, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.2367672168468984e-05, |
|
"loss": 0.3645, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.2310756972111554e-05, |
|
"loss": 0.3532, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.2253841775754128e-05, |
|
"loss": 0.3504, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.21969265793967e-05, |
|
"loss": 0.3465, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.214001138303927e-05, |
|
"loss": 0.358, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.2083096186681848e-05, |
|
"loss": 0.3855, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.2026180990324418e-05, |
|
"loss": 0.2887, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.196926579396699e-05, |
|
"loss": 0.275, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.191235059760956e-05, |
|
"loss": 0.2384, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.1855435401252138e-05, |
|
"loss": 0.2829, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.179852020489471e-05, |
|
"loss": 0.3765, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.174160500853728e-05, |
|
"loss": 0.3509, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.1684689812179855e-05, |
|
"loss": 0.3517, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.1627774615822425e-05, |
|
"loss": 0.3016, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.1570859419465e-05, |
|
"loss": 0.3421, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.151394422310757e-05, |
|
"loss": 0.3054, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.1457029026750145e-05, |
|
"loss": 0.3658, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.1400113830392715e-05, |
|
"loss": 0.2979, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.1343198634035288e-05, |
|
"loss": 0.413, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.1286283437677865e-05, |
|
"loss": 0.3388, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.1229368241320435e-05, |
|
"loss": 0.2758, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.117245304496301e-05, |
|
"loss": 0.2786, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.111553784860558e-05, |
|
"loss": 0.2577, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.1058622652248152e-05, |
|
"loss": 0.26, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.1001707455890722e-05, |
|
"loss": 0.2994, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.09447922595333e-05, |
|
"loss": 0.2211, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.088787706317587e-05, |
|
"loss": 0.3152, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.0830961866818442e-05, |
|
"loss": 0.253, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.0774046670461015e-05, |
|
"loss": 0.3429, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.0717131474103585e-05, |
|
"loss": 0.2717, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.0660216277746162e-05, |
|
"loss": 0.2923, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.0603301081388732e-05, |
|
"loss": 0.2446, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0546385885031305e-05, |
|
"loss": 0.2661, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.0489470688673875e-05, |
|
"loss": 0.3075, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.0432555492316452e-05, |
|
"loss": 0.3915, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.0375640295959025e-05, |
|
"loss": 0.385, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.0318725099601595e-05, |
|
"loss": 0.3714, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.026180990324417e-05, |
|
"loss": 0.3581, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.020489470688674e-05, |
|
"loss": 0.2439, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.0147979510529316e-05, |
|
"loss": 0.3, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.0091064314171886e-05, |
|
"loss": 0.2996, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.003414911781446e-05, |
|
"loss": 0.305, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.9977233921457032e-05, |
|
"loss": 0.3291, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.9920318725099602e-05, |
|
"loss": 0.2964, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.9863403528742176e-05, |
|
"loss": 0.4112, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.980648833238475e-05, |
|
"loss": 0.3476, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.974957313602732e-05, |
|
"loss": 0.314, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.9692657939669892e-05, |
|
"loss": 0.2829, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.9635742743312466e-05, |
|
"loss": 0.3628, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.957882754695504e-05, |
|
"loss": 0.2601, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9521912350597613e-05, |
|
"loss": 0.401, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9464997154240183e-05, |
|
"loss": 0.261, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.9408081957882756e-05, |
|
"loss": 0.3531, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.935116676152533e-05, |
|
"loss": 0.3118, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.92942515651679e-05, |
|
"loss": 0.3498, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9237336368810473e-05, |
|
"loss": 0.3738, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9180421172453046e-05, |
|
"loss": 0.2844, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.912350597609562e-05, |
|
"loss": 0.3668, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.9066590779738193e-05, |
|
"loss": 0.4105, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.9009675583380763e-05, |
|
"loss": 0.3562, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.8952760387023336e-05, |
|
"loss": 0.3053, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.889584519066591e-05, |
|
"loss": 0.3124, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.8838929994308483e-05, |
|
"loss": 0.3148, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8782014797951053e-05, |
|
"loss": 0.2883, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8725099601593626e-05, |
|
"loss": 0.3433, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.86681844052362e-05, |
|
"loss": 0.343, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.8611269208878773e-05, |
|
"loss": 0.2873, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.8554354012521346e-05, |
|
"loss": 0.3344, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.8497438816163916e-05, |
|
"loss": 0.2587, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.844052361980649e-05, |
|
"loss": 0.3247, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.8383608423449063e-05, |
|
"loss": 0.281, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.8326693227091633e-05, |
|
"loss": 0.2981, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.8269778030734207e-05, |
|
"loss": 0.228, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.821286283437678e-05, |
|
"loss": 0.3926, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.8155947638019353e-05, |
|
"loss": 0.2932, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.8099032441661927e-05, |
|
"loss": 0.364, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.8042117245304497e-05, |
|
"loss": 0.4113, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.798520204894707e-05, |
|
"loss": 0.3103, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.7928286852589643e-05, |
|
"loss": 0.2307, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.7871371656232213e-05, |
|
"loss": 0.2478, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7814456459874787e-05, |
|
"loss": 0.2912, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.861, |
|
"eval_loss": 0.4452311098575592, |
|
"eval_runtime": 61.2046, |
|
"eval_samples_per_second": 163.386, |
|
"eval_steps_per_second": 10.212, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.775754126351736e-05, |
|
"loss": 0.2109, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.7700626067159934e-05, |
|
"loss": 0.2094, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.7643710870802507e-05, |
|
"loss": 0.2467, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.7586795674445077e-05, |
|
"loss": 0.2747, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.752988047808765e-05, |
|
"loss": 0.1656, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.7472965281730224e-05, |
|
"loss": 0.1659, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.7416050085372797e-05, |
|
"loss": 0.2871, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.7359134889015367e-05, |
|
"loss": 0.2369, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.730221969265794e-05, |
|
"loss": 0.2459, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.7245304496300514e-05, |
|
"loss": 0.1826, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.7188389299943087e-05, |
|
"loss": 0.2467, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.713147410358566e-05, |
|
"loss": 0.2196, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.707455890722823e-05, |
|
"loss": 0.2427, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.7017643710870804e-05, |
|
"loss": 0.2632, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.6960728514513377e-05, |
|
"loss": 0.1924, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.6903813318155947e-05, |
|
"loss": 0.2471, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.684689812179852e-05, |
|
"loss": 0.2159, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.6789982925441094e-05, |
|
"loss": 0.2591, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.6733067729083667e-05, |
|
"loss": 0.2674, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.667615253272624e-05, |
|
"loss": 0.2457, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.661923733636881e-05, |
|
"loss": 0.2419, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.6562322140011384e-05, |
|
"loss": 0.1977, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.6505406943653958e-05, |
|
"loss": 0.216, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.6448491747296528e-05, |
|
"loss": 0.2799, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.63915765509391e-05, |
|
"loss": 0.1789, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.6334661354581674e-05, |
|
"loss": 0.2677, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.6277746158224248e-05, |
|
"loss": 0.1893, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.622083096186682e-05, |
|
"loss": 0.1756, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.616391576550939e-05, |
|
"loss": 0.2038, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.6107000569151964e-05, |
|
"loss": 0.1776, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.6050085372794538e-05, |
|
"loss": 0.3071, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.5993170176437108e-05, |
|
"loss": 0.2819, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.593625498007968e-05, |
|
"loss": 0.2425, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.5879339783722255e-05, |
|
"loss": 0.2611, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.5822424587364828e-05, |
|
"loss": 0.1911, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.57655093910074e-05, |
|
"loss": 0.2089, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.5708594194649975e-05, |
|
"loss": 0.2004, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.5651678998292545e-05, |
|
"loss": 0.2162, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.5594763801935118e-05, |
|
"loss": 0.2117, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.553784860557769e-05, |
|
"loss": 0.2401, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.548093340922026e-05, |
|
"loss": 0.2056, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.5424018212862835e-05, |
|
"loss": 0.23, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.5367103016505408e-05, |
|
"loss": 0.2444, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.531018782014798e-05, |
|
"loss": 0.3034, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.5253272623790553e-05, |
|
"loss": 0.1683, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.5196357427433127e-05, |
|
"loss": 0.1557, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.5139442231075698e-05, |
|
"loss": 0.1504, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.508252703471827e-05, |
|
"loss": 0.2048, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.5025611838360843e-05, |
|
"loss": 0.2511, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.4968696642003415e-05, |
|
"loss": 0.204, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.491178144564599e-05, |
|
"loss": 0.2699, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.4854866249288562e-05, |
|
"loss": 0.1463, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.4797951052931133e-05, |
|
"loss": 0.1645, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.4741035856573707e-05, |
|
"loss": 0.1908, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.4684120660216279e-05, |
|
"loss": 0.2191, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.4627205463858852e-05, |
|
"loss": 0.2372, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.4570290267501424e-05, |
|
"loss": 0.2423, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.4513375071143995e-05, |
|
"loss": 0.141, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.4456459874786569e-05, |
|
"loss": 0.2345, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.4399544678429142e-05, |
|
"loss": 0.2508, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.4342629482071715e-05, |
|
"loss": 0.1987, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.1662, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.4228799089356859e-05, |
|
"loss": 0.1729, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.4171883892999432e-05, |
|
"loss": 0.2623, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.4114968696642004e-05, |
|
"loss": 0.2242, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.4058053500284576e-05, |
|
"loss": 0.1906, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.4001138303927149e-05, |
|
"loss": 0.243, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.3944223107569724e-05, |
|
"loss": 0.2251, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.3887307911212296e-05, |
|
"loss": 0.2056, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.3830392714854867e-05, |
|
"loss": 0.2007, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.377347751849744e-05, |
|
"loss": 0.2273, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.3716562322140012e-05, |
|
"loss": 0.2652, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.3659647125782584e-05, |
|
"loss": 0.1765, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.3602731929425157e-05, |
|
"loss": 0.2289, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.354581673306773e-05, |
|
"loss": 0.2561, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.3488901536710304e-05, |
|
"loss": 0.2211, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.3431986340352876e-05, |
|
"loss": 0.1894, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.3375071143995448e-05, |
|
"loss": 0.1795, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.3318155947638021e-05, |
|
"loss": 0.1967, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.3261240751280593e-05, |
|
"loss": 0.2562, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.3204325554923166e-05, |
|
"loss": 0.2178, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.3147410358565738e-05, |
|
"loss": 0.1908, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.309049516220831e-05, |
|
"loss": 0.1789, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.3033579965850883e-05, |
|
"loss": 0.2742, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.2976664769493456e-05, |
|
"loss": 0.2605, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.291974957313603e-05, |
|
"loss": 0.2138, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.2862834376778601e-05, |
|
"loss": 0.1998, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.2805919180421173e-05, |
|
"loss": 0.2454, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.2749003984063746e-05, |
|
"loss": 0.2261, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.2692088787706318e-05, |
|
"loss": 0.1907, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.263517359134889e-05, |
|
"loss": 0.1849, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.2578258394991463e-05, |
|
"loss": 0.2552, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.2521343198634036e-05, |
|
"loss": 0.186, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.246442800227661e-05, |
|
"loss": 0.1753, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.2407512805919181e-05, |
|
"loss": 0.188, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.2350597609561755e-05, |
|
"loss": 0.2063, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.2293682413204327e-05, |
|
"loss": 0.1565, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.2236767216846898e-05, |
|
"loss": 0.185, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.2179852020489472e-05, |
|
"loss": 0.2167, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.2122936824132043e-05, |
|
"loss": 0.1588, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.2066021627774618e-05, |
|
"loss": 0.2709, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.200910643141719e-05, |
|
"loss": 0.2097, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.1952191235059762e-05, |
|
"loss": 0.1697, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.1895276038702335e-05, |
|
"loss": 0.2032, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.1838360842344907e-05, |
|
"loss": 0.2468, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.1781445645987478e-05, |
|
"loss": 0.1398, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.1724530449630052e-05, |
|
"loss": 0.2453, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.1667615253272624e-05, |
|
"loss": 0.2397, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.1610700056915199e-05, |
|
"loss": 0.1835, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.155378486055777e-05, |
|
"loss": 0.2497, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.1496869664200344e-05, |
|
"loss": 0.1499, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.1439954467842915e-05, |
|
"loss": 0.2455, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.1383039271485487e-05, |
|
"loss": 0.2016, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.132612407512806e-05, |
|
"loss": 0.2249, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.1269208878770632e-05, |
|
"loss": 0.1286, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.1212293682413204e-05, |
|
"loss": 0.2297, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.1155378486055777e-05, |
|
"loss": 0.1435, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.109846328969835e-05, |
|
"loss": 0.1694, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.1041548093340924e-05, |
|
"loss": 0.2167, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.0984632896983496e-05, |
|
"loss": 0.1979, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.0927717700626069e-05, |
|
"loss": 0.1548, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.087080250426864e-05, |
|
"loss": 0.2188, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.0813887307911212e-05, |
|
"loss": 0.2313, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.0756972111553786e-05, |
|
"loss": 0.2211, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.0700056915196357e-05, |
|
"loss": 0.1612, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.0643141718838932e-05, |
|
"loss": 0.2125, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.0586226522481504e-05, |
|
"loss": 0.206, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.0529311326124076e-05, |
|
"loss": 0.2112, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.047239612976665e-05, |
|
"loss": 0.1762, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.0415480933409221e-05, |
|
"loss": 0.169, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.0358565737051793e-05, |
|
"loss": 0.2013, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.0301650540694366e-05, |
|
"loss": 0.1734, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.0244735344336938e-05, |
|
"loss": 0.215, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.0187820147979513e-05, |
|
"loss": 0.2166, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.0130904951622084e-05, |
|
"loss": 0.2166, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.0073989755264658e-05, |
|
"loss": 0.1942, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.001707455890723e-05, |
|
"loss": 0.1952, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 9.960159362549801e-06, |
|
"loss": 0.1974, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 9.903244166192375e-06, |
|
"loss": 0.2231, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 9.846328969834946e-06, |
|
"loss": 0.2053, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 9.78941377347752e-06, |
|
"loss": 0.1894, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 9.732498577120091e-06, |
|
"loss": 0.2454, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 9.675583380762665e-06, |
|
"loss": 0.1853, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 9.618668184405236e-06, |
|
"loss": 0.2468, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 9.56175298804781e-06, |
|
"loss": 0.1915, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 9.504837791690381e-06, |
|
"loss": 0.2251, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 9.447922595332955e-06, |
|
"loss": 0.1638, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 9.391007398975526e-06, |
|
"loss": 0.16, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 9.3340922026181e-06, |
|
"loss": 0.1759, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 9.277177006260673e-06, |
|
"loss": 0.2079, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 9.220261809903245e-06, |
|
"loss": 0.22, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 9.163346613545817e-06, |
|
"loss": 0.2352, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 9.10643141718839e-06, |
|
"loss": 0.1975, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 9.049516220830963e-06, |
|
"loss": 0.2027, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 8.992601024473535e-06, |
|
"loss": 0.1782, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 8.935685828116107e-06, |
|
"loss": 0.2234, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8679, |
|
"eval_loss": 0.43295106291770935, |
|
"eval_runtime": 60.931, |
|
"eval_samples_per_second": 164.12, |
|
"eval_steps_per_second": 10.257, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.87877063175868e-06, |
|
"loss": 0.1934, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 8.821855435401253e-06, |
|
"loss": 0.1317, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 8.764940239043825e-06, |
|
"loss": 0.2049, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 8.708025042686399e-06, |
|
"loss": 0.1348, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 8.65110984632897e-06, |
|
"loss": 0.1759, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 8.594194649971544e-06, |
|
"loss": 0.1538, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 8.537279453614115e-06, |
|
"loss": 0.1096, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 8.480364257256689e-06, |
|
"loss": 0.1689, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 8.42344906089926e-06, |
|
"loss": 0.1647, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 8.366533864541834e-06, |
|
"loss": 0.1881, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 8.309618668184405e-06, |
|
"loss": 0.1345, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 8.252703471826979e-06, |
|
"loss": 0.134, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 8.19578827546955e-06, |
|
"loss": 0.1413, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 8.138873079112124e-06, |
|
"loss": 0.1382, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 8.081957882754696e-06, |
|
"loss": 0.1666, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.025042686397269e-06, |
|
"loss": 0.1115, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 7.96812749003984e-06, |
|
"loss": 0.1575, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 7.911212293682414e-06, |
|
"loss": 0.1469, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 7.854297097324987e-06, |
|
"loss": 0.1367, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 7.797381900967559e-06, |
|
"loss": 0.1432, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 7.74046670461013e-06, |
|
"loss": 0.1375, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 7.683551508252704e-06, |
|
"loss": 0.1574, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 7.626636311895277e-06, |
|
"loss": 0.1289, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 7.569721115537849e-06, |
|
"loss": 0.1744, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 7.512805919180422e-06, |
|
"loss": 0.1705, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 7.455890722822995e-06, |
|
"loss": 0.1719, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.398975526465567e-06, |
|
"loss": 0.1454, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.342060330108139e-06, |
|
"loss": 0.1038, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.285145133750712e-06, |
|
"loss": 0.1544, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 7.228229937393284e-06, |
|
"loss": 0.1299, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 7.171314741035858e-06, |
|
"loss": 0.1465, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 7.114399544678429e-06, |
|
"loss": 0.1236, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.057484348321002e-06, |
|
"loss": 0.1329, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.0005691519635745e-06, |
|
"loss": 0.1716, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 6.943653955606148e-06, |
|
"loss": 0.1225, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 6.88673875924872e-06, |
|
"loss": 0.1124, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 6.829823562891292e-06, |
|
"loss": 0.1425, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 6.772908366533865e-06, |
|
"loss": 0.1209, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 6.715993170176438e-06, |
|
"loss": 0.1293, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 6.6590779738190105e-06, |
|
"loss": 0.1592, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 6.602162777461583e-06, |
|
"loss": 0.1525, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 6.545247581104155e-06, |
|
"loss": 0.1993, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 6.488332384746728e-06, |
|
"loss": 0.1312, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 6.431417188389301e-06, |
|
"loss": 0.1318, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 6.374501992031873e-06, |
|
"loss": 0.1576, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 6.317586795674445e-06, |
|
"loss": 0.1417, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 6.260671599317018e-06, |
|
"loss": 0.1696, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 6.203756402959591e-06, |
|
"loss": 0.1711, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 6.146841206602163e-06, |
|
"loss": 0.187, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 6.089926010244736e-06, |
|
"loss": 0.1165, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 6.033010813887309e-06, |
|
"loss": 0.1282, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.976095617529881e-06, |
|
"loss": 0.1528, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.919180421172453e-06, |
|
"loss": 0.1477, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 5.862265224815026e-06, |
|
"loss": 0.1666, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 5.805350028457599e-06, |
|
"loss": 0.1881, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 5.748434832100172e-06, |
|
"loss": 0.1377, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 5.6915196357427435e-06, |
|
"loss": 0.16, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 5.634604439385316e-06, |
|
"loss": 0.1488, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 5.577689243027889e-06, |
|
"loss": 0.143, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 5.520774046670462e-06, |
|
"loss": 0.1414, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 5.4638588503130345e-06, |
|
"loss": 0.1979, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 5.406943653955606e-06, |
|
"loss": 0.1419, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5.350028457598179e-06, |
|
"loss": 0.1216, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5.293113261240752e-06, |
|
"loss": 0.1433, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 5.236198064883325e-06, |
|
"loss": 0.1615, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.179282868525896e-06, |
|
"loss": 0.1652, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.122367672168469e-06, |
|
"loss": 0.2001, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 5.065452475811042e-06, |
|
"loss": 0.1397, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 5.008537279453615e-06, |
|
"loss": 0.1599, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 4.951622083096187e-06, |
|
"loss": 0.1167, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 4.89470688673876e-06, |
|
"loss": 0.1473, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 4.837791690381332e-06, |
|
"loss": 0.1486, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 4.780876494023905e-06, |
|
"loss": 0.1604, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.723961297666477e-06, |
|
"loss": 0.1364, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.66704610130905e-06, |
|
"loss": 0.183, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 4.6101309049516225e-06, |
|
"loss": 0.1803, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 4.553215708594195e-06, |
|
"loss": 0.1405, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 4.4963005122367675e-06, |
|
"loss": 0.1436, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 4.43938531587934e-06, |
|
"loss": 0.1442, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 4.382470119521913e-06, |
|
"loss": 0.0979, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 4.325554923164485e-06, |
|
"loss": 0.1218, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.268639726807058e-06, |
|
"loss": 0.1965, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 4.21172453044963e-06, |
|
"loss": 0.161, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 4.154809334092203e-06, |
|
"loss": 0.1378, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.097894137734775e-06, |
|
"loss": 0.1333, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 4.040978941377348e-06, |
|
"loss": 0.137, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.98406374501992e-06, |
|
"loss": 0.1652, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 3.927148548662494e-06, |
|
"loss": 0.1655, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 3.870233352305065e-06, |
|
"loss": 0.1791, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 3.8133181559476383e-06, |
|
"loss": 0.128, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 3.756402959590211e-06, |
|
"loss": 0.1567, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 3.6994877632327834e-06, |
|
"loss": 0.1564, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 3.642572566875356e-06, |
|
"loss": 0.1369, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.585657370517929e-06, |
|
"loss": 0.176, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.528742174160501e-06, |
|
"loss": 0.1456, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3.471826977803074e-06, |
|
"loss": 0.1346, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 3.414911781445646e-06, |
|
"loss": 0.1565, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 3.357996585088219e-06, |
|
"loss": 0.1792, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 3.3010813887307915e-06, |
|
"loss": 0.1411, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 3.244166192373364e-06, |
|
"loss": 0.1847, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.1872509960159366e-06, |
|
"loss": 0.19, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.130335799658509e-06, |
|
"loss": 0.1474, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.0734206033010816e-06, |
|
"loss": 0.1869, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 3.0165054069436546e-06, |
|
"loss": 0.1128, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.9595902105862267e-06, |
|
"loss": 0.1203, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.9026750142287997e-06, |
|
"loss": 0.1286, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.8457598178713718e-06, |
|
"loss": 0.12, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.7888446215139443e-06, |
|
"loss": 0.095, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.7319294251565172e-06, |
|
"loss": 0.1002, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.6750142287990894e-06, |
|
"loss": 0.1256, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.6180990324416623e-06, |
|
"loss": 0.1148, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.5611838360842344e-06, |
|
"loss": 0.1646, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.5042686397268074e-06, |
|
"loss": 0.1083, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.44735344336938e-06, |
|
"loss": 0.1251, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.3904382470119524e-06, |
|
"loss": 0.1242, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.333523050654525e-06, |
|
"loss": 0.1671, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.2766078542970975e-06, |
|
"loss": 0.1442, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.21969265793967e-06, |
|
"loss": 0.1694, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.1627774615822426e-06, |
|
"loss": 0.1632, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.105862265224815e-06, |
|
"loss": 0.1738, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.0489470688673876e-06, |
|
"loss": 0.1062, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.99203187250996e-06, |
|
"loss": 0.1395, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.9351166761525327e-06, |
|
"loss": 0.1321, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.8782014797951054e-06, |
|
"loss": 0.1251, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.821286283437678e-06, |
|
"loss": 0.1499, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.7643710870802505e-06, |
|
"loss": 0.1338, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.707455890722823e-06, |
|
"loss": 0.1873, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.6505406943653958e-06, |
|
"loss": 0.1352, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.5936254980079683e-06, |
|
"loss": 0.1673, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.5367103016505408e-06, |
|
"loss": 0.1705, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.4797951052931133e-06, |
|
"loss": 0.1145, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.4228799089356859e-06, |
|
"loss": 0.0999, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.3659647125782586e-06, |
|
"loss": 0.1012, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.3090495162208312e-06, |
|
"loss": 0.1677, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 1.2521343198634037e-06, |
|
"loss": 0.169, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.1952191235059762e-06, |
|
"loss": 0.1844, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.1383039271485487e-06, |
|
"loss": 0.1701, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.0813887307911213e-06, |
|
"loss": 0.1319, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.0244735344336938e-06, |
|
"loss": 0.1546, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 9.675583380762663e-07, |
|
"loss": 0.1579, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 9.10643141718839e-07, |
|
"loss": 0.1308, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 8.537279453614115e-07, |
|
"loss": 0.1453, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 7.968127490039841e-07, |
|
"loss": 0.169, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 7.398975526465567e-07, |
|
"loss": 0.1191, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 6.829823562891293e-07, |
|
"loss": 0.152, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 6.260671599317018e-07, |
|
"loss": 0.1275, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.691519635742744e-07, |
|
"loss": 0.1409, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.122367672168469e-07, |
|
"loss": 0.1332, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.553215708594195e-07, |
|
"loss": 0.163, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 3.9840637450199207e-07, |
|
"loss": 0.1564, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 3.4149117814456466e-07, |
|
"loss": 0.2202, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.845759817871372e-07, |
|
"loss": 0.1769, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.2766078542970974e-07, |
|
"loss": 0.1343, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 1.7074558907228233e-07, |
|
"loss": 0.1481, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.1383039271485487e-07, |
|
"loss": 0.1591, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 5.6915196357427436e-08, |
|
"loss": 0.1983, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.121, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8735, |
|
"eval_loss": 0.42226287722587585, |
|
"eval_runtime": 60.1747, |
|
"eval_samples_per_second": 166.183, |
|
"eval_steps_per_second": 10.386, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3905, |
|
"total_flos": 6.230614598311477e+18, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.1997, |
|
"train_samples_per_second": 1252007.126, |
|
"train_steps_per_second": 19556.351 |
|
} |
|
], |
|
"max_steps": 3905, |
|
"num_train_epochs": 5, |
|
"total_flos": 6.230614598311477e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|