|
{ |
|
"best_metric": 0.9100631475448608, |
|
"best_model_checkpoint": "./lora-alpaca/checkpoint-1600", |
|
"epoch": 3.9486673247778876, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 1.5473, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 1.5181, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.4e-05, |
|
"loss": 1.3749, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 1.1669, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 1.0525, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017399999999999997, |
|
"loss": 1.0376, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000204, |
|
"loss": 1.0043, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000234, |
|
"loss": 0.9849, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00026399999999999997, |
|
"loss": 0.9821, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000294, |
|
"loss": 0.9846, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002987525987525987, |
|
"loss": 0.9635, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00029719334719334716, |
|
"loss": 0.9538, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002956340956340956, |
|
"loss": 0.9408, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00029407484407484405, |
|
"loss": 0.9529, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002925155925155925, |
|
"loss": 0.9622, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00029095634095634094, |
|
"loss": 0.9517, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002893970893970894, |
|
"loss": 0.9624, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002878378378378378, |
|
"loss": 0.9445, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002862785862785863, |
|
"loss": 0.9428, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002847193347193347, |
|
"loss": 0.9372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.9433729648590088, |
|
"eval_runtime": 44.3089, |
|
"eval_samples_per_second": 45.138, |
|
"eval_steps_per_second": 0.722, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00028316008316008317, |
|
"loss": 0.9388, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00028160083160083156, |
|
"loss": 0.9412, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00028004158004158, |
|
"loss": 0.9365, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00027848232848232845, |
|
"loss": 0.9382, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002769230769230769, |
|
"loss": 0.9309, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00027536382536382534, |
|
"loss": 0.9256, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002738045738045738, |
|
"loss": 0.9245, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00027224532224532223, |
|
"loss": 0.9329, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002706860706860707, |
|
"loss": 0.9405, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002691268191268191, |
|
"loss": 0.9168, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00026756756756756756, |
|
"loss": 0.92, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.000266008316008316, |
|
"loss": 0.9098, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00026444906444906445, |
|
"loss": 0.9275, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00026288981288981285, |
|
"loss": 0.9215, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002613305613305613, |
|
"loss": 0.9237, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00025977130977130974, |
|
"loss": 0.9237, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002582120582120582, |
|
"loss": 0.928, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002566528066528066, |
|
"loss": 0.9156, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00025509355509355507, |
|
"loss": 0.9255, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002535343035343035, |
|
"loss": 0.9102, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.9269554615020752, |
|
"eval_runtime": 44.228, |
|
"eval_samples_per_second": 45.22, |
|
"eval_steps_per_second": 0.724, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00025197505197505196, |
|
"loss": 0.9312, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002504158004158004, |
|
"loss": 0.9249, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00024885654885654885, |
|
"loss": 0.9151, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002472972972972973, |
|
"loss": 0.9265, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00024573804573804574, |
|
"loss": 0.9064, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00024417879417879413, |
|
"loss": 0.9185, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002426195426195426, |
|
"loss": 0.9128, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00024106029106029105, |
|
"loss": 0.9159, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002395010395010395, |
|
"loss": 0.8987, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002379417879417879, |
|
"loss": 0.9217, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00023638253638253636, |
|
"loss": 0.906, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0002348232848232848, |
|
"loss": 0.9066, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00023326403326403325, |
|
"loss": 0.9085, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00023170478170478166, |
|
"loss": 0.9203, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002301455301455301, |
|
"loss": 0.9003, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00022858627858627858, |
|
"loss": 0.9071, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00022702702702702703, |
|
"loss": 0.9026, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00022546777546777544, |
|
"loss": 0.8845, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002239085239085239, |
|
"loss": 0.9023, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00022234927234927233, |
|
"loss": 0.9169, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 0.9196034073829651, |
|
"eval_runtime": 44.3139, |
|
"eval_samples_per_second": 45.133, |
|
"eval_steps_per_second": 0.722, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00022079002079002078, |
|
"loss": 0.9015, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0002192307692307692, |
|
"loss": 0.9066, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00021767151767151764, |
|
"loss": 0.8981, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0002161122661122661, |
|
"loss": 0.9023, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00021455301455301456, |
|
"loss": 0.9077, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00021299376299376295, |
|
"loss": 0.9055, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00021143451143451142, |
|
"loss": 0.9028, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00020987525987525987, |
|
"loss": 0.8874, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0002083160083160083, |
|
"loss": 0.8844, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00020675675675675673, |
|
"loss": 0.8996, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00020519750519750517, |
|
"loss": 0.9085, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00020363825363825362, |
|
"loss": 0.9024, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00020207900207900206, |
|
"loss": 0.899, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00020051975051975048, |
|
"loss": 0.8926, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00019896049896049893, |
|
"loss": 0.89, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001974012474012474, |
|
"loss": 0.8858, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00019584199584199584, |
|
"loss": 0.8972, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00019428274428274426, |
|
"loss": 0.9003, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001927234927234927, |
|
"loss": 0.8991, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00019116424116424115, |
|
"loss": 0.8912, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.914772093296051, |
|
"eval_runtime": 44.1817, |
|
"eval_samples_per_second": 45.268, |
|
"eval_steps_per_second": 0.724, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001896049896049896, |
|
"loss": 0.8859, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00018804573804573802, |
|
"loss": 0.8942, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00018648648648648646, |
|
"loss": 0.8953, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0001849272349272349, |
|
"loss": 0.8845, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00018336798336798335, |
|
"loss": 0.8871, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00018180873180873177, |
|
"loss": 0.8849, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001802494802494802, |
|
"loss": 0.8672, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00017869022869022869, |
|
"loss": 0.8811, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00017713097713097713, |
|
"loss": 0.8911, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00017557172557172555, |
|
"loss": 0.8814, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.000174012474012474, |
|
"loss": 0.8819, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00017245322245322244, |
|
"loss": 0.8835, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00017089397089397088, |
|
"loss": 0.8768, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0001693347193347193, |
|
"loss": 0.8829, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00016777546777546775, |
|
"loss": 0.8898, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001662162162162162, |
|
"loss": 0.8884, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00016465696465696466, |
|
"loss": 0.87, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00016309771309771305, |
|
"loss": 0.8851, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00016153846153846153, |
|
"loss": 0.8807, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00015997920997920997, |
|
"loss": 0.8923, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.9123471975326538, |
|
"eval_runtime": 44.1751, |
|
"eval_samples_per_second": 45.274, |
|
"eval_steps_per_second": 0.724, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00015841995841995842, |
|
"loss": 0.8825, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00015686070686070683, |
|
"loss": 0.878, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00015530145530145528, |
|
"loss": 0.861, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00015374220374220372, |
|
"loss": 0.8652, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00015218295218295217, |
|
"loss": 0.8589, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0001506237006237006, |
|
"loss": 0.8688, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00014906444906444906, |
|
"loss": 0.8796, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0001475051975051975, |
|
"loss": 0.8719, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00014594594594594595, |
|
"loss": 0.8798, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00014438669438669437, |
|
"loss": 0.8669, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0001428274428274428, |
|
"loss": 0.8661, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00014126819126819126, |
|
"loss": 0.8684, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001397089397089397, |
|
"loss": 0.8591, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00013814968814968815, |
|
"loss": 0.8659, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001365904365904366, |
|
"loss": 0.868, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.000135031185031185, |
|
"loss": 0.871, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00013347193347193346, |
|
"loss": 0.8639, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001319126819126819, |
|
"loss": 0.8603, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00013035343035343035, |
|
"loss": 0.8617, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0001287941787941788, |
|
"loss": 0.8548, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.9124282002449036, |
|
"eval_runtime": 44.2512, |
|
"eval_samples_per_second": 45.197, |
|
"eval_steps_per_second": 0.723, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00012723492723492724, |
|
"loss": 0.8694, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00012567567567567565, |
|
"loss": 0.8651, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00012411642411642413, |
|
"loss": 0.8606, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00012255717255717254, |
|
"loss": 0.8678, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00012099792099792099, |
|
"loss": 0.8676, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00011943866943866943, |
|
"loss": 0.8602, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00011787941787941788, |
|
"loss": 0.8718, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00011632016632016631, |
|
"loss": 0.8618, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00011476091476091475, |
|
"loss": 0.8677, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00011320166320166319, |
|
"loss": 0.844, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00011164241164241164, |
|
"loss": 0.8653, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00011008316008316008, |
|
"loss": 0.8639, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00010852390852390852, |
|
"loss": 0.873, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00010696465696465695, |
|
"loss": 0.8639, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0001054054054054054, |
|
"loss": 0.8545, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00010384615384615383, |
|
"loss": 0.8654, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00010228690228690229, |
|
"loss": 0.8555, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00010072765072765072, |
|
"loss": 0.8707, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.916839916839916e-05, |
|
"loss": 0.8452, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 9.76091476091476e-05, |
|
"loss": 0.8512, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 0.9109137058258057, |
|
"eval_runtime": 44.368, |
|
"eval_samples_per_second": 45.077, |
|
"eval_steps_per_second": 0.721, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 9.604989604989604e-05, |
|
"loss": 0.8485, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 9.449064449064449e-05, |
|
"loss": 0.8635, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 9.293139293139293e-05, |
|
"loss": 0.8499, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 9.137214137214136e-05, |
|
"loss": 0.855, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.98128898128898e-05, |
|
"loss": 0.8437, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.825363825363824e-05, |
|
"loss": 0.8552, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.66943866943867e-05, |
|
"loss": 0.845, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.513513513513513e-05, |
|
"loss": 0.8591, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.357588357588357e-05, |
|
"loss": 0.8559, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.2016632016632e-05, |
|
"loss": 0.8562, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 8.045738045738045e-05, |
|
"loss": 0.8585, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.889812889812888e-05, |
|
"loss": 0.8627, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 7.733887733887734e-05, |
|
"loss": 0.849, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.577962577962577e-05, |
|
"loss": 0.8479, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.422037422037422e-05, |
|
"loss": 0.8522, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 7.266112266112266e-05, |
|
"loss": 0.8535, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.11018711018711e-05, |
|
"loss": 0.862, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 6.954261954261954e-05, |
|
"loss": 0.8618, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 6.798336798336798e-05, |
|
"loss": 0.8478, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 6.642411642411643e-05, |
|
"loss": 0.8467, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_loss": 0.9100631475448608, |
|
"eval_runtime": 44.1701, |
|
"eval_samples_per_second": 45.279, |
|
"eval_steps_per_second": 0.724, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 6.486486486486486e-05, |
|
"loss": 0.8471, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.33056133056133e-05, |
|
"loss": 0.8511, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 6.174636174636175e-05, |
|
"loss": 0.8517, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 6.018711018711019e-05, |
|
"loss": 0.8537, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 5.8627858627858625e-05, |
|
"loss": 0.8479, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 5.7068607068607063e-05, |
|
"loss": 0.8473, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 5.550935550935551e-05, |
|
"loss": 0.8394, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 5.395010395010395e-05, |
|
"loss": 0.8528, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 5.239085239085239e-05, |
|
"loss": 0.8356, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 5.083160083160083e-05, |
|
"loss": 0.8543, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.927234927234927e-05, |
|
"loss": 0.8332, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.771309771309771e-05, |
|
"loss": 0.8511, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 4.615384615384615e-05, |
|
"loss": 0.8542, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.4594594594594596e-05, |
|
"loss": 0.8238, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.3035343035343035e-05, |
|
"loss": 0.8424, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.147609147609147e-05, |
|
"loss": 0.8455, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.991683991683992e-05, |
|
"loss": 0.8554, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.8357588357588356e-05, |
|
"loss": 0.8491, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.6798336798336794e-05, |
|
"loss": 0.8506, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.523908523908524e-05, |
|
"loss": 0.8357, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 0.9110742211341858, |
|
"eval_runtime": 44.3239, |
|
"eval_samples_per_second": 45.122, |
|
"eval_steps_per_second": 0.722, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.367983367983368e-05, |
|
"loss": 0.8606, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 3.212058212058212e-05, |
|
"loss": 0.8471, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.056133056133056e-05, |
|
"loss": 0.8419, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.9002079002079002e-05, |
|
"loss": 0.8328, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.744282744282744e-05, |
|
"loss": 0.8442, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.5883575883575882e-05, |
|
"loss": 0.835, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.4324324324324324e-05, |
|
"loss": 0.8432, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.2765072765072765e-05, |
|
"loss": 0.8378, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.1205821205821204e-05, |
|
"loss": 0.8394, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.9646569646569645e-05, |
|
"loss": 0.8364, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.8087318087318087e-05, |
|
"loss": 0.849, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.652806652806653e-05, |
|
"loss": 0.8542, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.4968814968814968e-05, |
|
"loss": 0.8464, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.340956340956341e-05, |
|
"loss": 0.837, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.185031185031185e-05, |
|
"loss": 0.8385, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.0291060291060291e-05, |
|
"loss": 0.8399, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 8.731808731808731e-06, |
|
"loss": 0.839, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 7.172557172557172e-06, |
|
"loss": 0.8424, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 5.613305613305613e-06, |
|
"loss": 0.8398, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.054054054054054e-06, |
|
"loss": 0.8435, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.9102190732955933, |
|
"eval_runtime": 44.2725, |
|
"eval_samples_per_second": 45.175, |
|
"eval_steps_per_second": 0.723, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 2024, |
|
"num_train_epochs": 4, |
|
"total_flos": 8.080540492757991e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|