{ "best_metric": 0.249, "best_model_checkpoint": "logs/google-t5/t5-small/checkpoint-28155", "epoch": 18.0, "eval_steps": 500, "global_step": 33786, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 1.3638477325439453, "learning_rate": 1.997336174746937e-05, "loss": 2.8409, "step": 50 }, { "epoch": 0.05, "grad_norm": 1.420181155204773, "learning_rate": 1.9946723494938732e-05, "loss": 2.538, "step": 100 }, { "epoch": 0.08, "grad_norm": 1.119789719581604, "learning_rate": 1.9920085242408102e-05, "loss": 2.4663, "step": 150 }, { "epoch": 0.11, "grad_norm": 1.1085336208343506, "learning_rate": 1.9893446989877465e-05, "loss": 2.4737, "step": 200 }, { "epoch": 0.13, "grad_norm": 0.8778786659240723, "learning_rate": 1.9866808737346832e-05, "loss": 2.4436, "step": 250 }, { "epoch": 0.16, "grad_norm": 1.1101582050323486, "learning_rate": 1.9840170484816195e-05, "loss": 2.4545, "step": 300 }, { "epoch": 0.19, "grad_norm": 1.1823639869689941, "learning_rate": 1.9813532232285565e-05, "loss": 2.3472, "step": 350 }, { "epoch": 0.21, "grad_norm": 1.0613980293273926, "learning_rate": 1.978689397975493e-05, "loss": 2.3945, "step": 400 }, { "epoch": 0.24, "grad_norm": 1.1558998823165894, "learning_rate": 1.9760255727224296e-05, "loss": 2.4172, "step": 450 }, { "epoch": 0.27, "grad_norm": 1.098973274230957, "learning_rate": 1.9733617474693662e-05, "loss": 2.3629, "step": 500 }, { "epoch": 0.29, "grad_norm": 1.3629950284957886, "learning_rate": 1.970697922216303e-05, "loss": 2.3975, "step": 550 }, { "epoch": 0.32, "grad_norm": 1.1299411058425903, "learning_rate": 1.9680340969632396e-05, "loss": 2.3322, "step": 600 }, { "epoch": 0.35, "grad_norm": 0.9830572605133057, "learning_rate": 1.965370271710176e-05, "loss": 2.379, "step": 650 }, { "epoch": 0.37, "grad_norm": 1.0205196142196655, "learning_rate": 1.9627064464571126e-05, "loss": 2.3454, "step": 700 }, { "epoch": 0.4, "grad_norm": 1.230190396308899, "learning_rate": 1.960042621204049e-05, "loss": 2.3045, "step": 750 }, { "epoch": 0.43, "grad_norm": 1.115490436553955, "learning_rate": 1.957378795950986e-05, "loss": 2.3676, "step": 800 }, { "epoch": 0.45, "grad_norm": 0.9047977924346924, "learning_rate": 1.9547149706979226e-05, "loss": 2.3871, "step": 850 }, { "epoch": 0.48, "grad_norm": 1.0352205038070679, "learning_rate": 1.952051145444859e-05, "loss": 2.3481, "step": 900 }, { "epoch": 0.51, "grad_norm": 0.864746630191803, "learning_rate": 1.9493873201917956e-05, "loss": 2.3384, "step": 950 }, { "epoch": 0.53, "grad_norm": 1.3858081102371216, "learning_rate": 1.9467234949387323e-05, "loss": 2.3271, "step": 1000 }, { "epoch": 0.56, "grad_norm": 0.9969013333320618, "learning_rate": 1.944059669685669e-05, "loss": 2.3267, "step": 1050 }, { "epoch": 0.59, "grad_norm": 1.035396933555603, "learning_rate": 1.9413958444326053e-05, "loss": 2.349, "step": 1100 }, { "epoch": 0.61, "grad_norm": 1.235714316368103, "learning_rate": 1.938732019179542e-05, "loss": 2.3442, "step": 1150 }, { "epoch": 0.64, "grad_norm": 0.9119770526885986, "learning_rate": 1.9360681939264787e-05, "loss": 2.3246, "step": 1200 }, { "epoch": 0.67, "grad_norm": 0.8794578909873962, "learning_rate": 1.9334043686734153e-05, "loss": 2.3633, "step": 1250 }, { "epoch": 0.69, "grad_norm": 0.9355188012123108, "learning_rate": 1.930740543420352e-05, "loss": 2.339, "step": 1300 }, { "epoch": 0.72, "grad_norm": 0.9303766489028931, "learning_rate": 1.9280767181672883e-05, "loss": 2.3174, "step": 1350 }, { "epoch": 0.75, "grad_norm": 1.3199224472045898, "learning_rate": 1.925412892914225e-05, "loss": 2.3321, "step": 1400 }, { "epoch": 0.77, "grad_norm": 1.0782549381256104, "learning_rate": 1.9227490676611617e-05, "loss": 2.3348, "step": 1450 }, { "epoch": 0.8, "grad_norm": 0.9403214454650879, "learning_rate": 1.9200852424080984e-05, "loss": 2.3133, "step": 1500 }, { "epoch": 0.83, "grad_norm": 0.9809348583221436, "learning_rate": 1.9174214171550347e-05, "loss": 2.3177, "step": 1550 }, { "epoch": 0.85, "grad_norm": 0.9974561333656311, "learning_rate": 1.9147575919019714e-05, "loss": 2.311, "step": 1600 }, { "epoch": 0.88, "grad_norm": 0.9475740194320679, "learning_rate": 1.912093766648908e-05, "loss": 2.3208, "step": 1650 }, { "epoch": 0.91, "grad_norm": 1.0113029479980469, "learning_rate": 1.9094299413958447e-05, "loss": 2.3031, "step": 1700 }, { "epoch": 0.93, "grad_norm": 0.920647144317627, "learning_rate": 1.9067661161427814e-05, "loss": 2.3268, "step": 1750 }, { "epoch": 0.96, "grad_norm": 0.9876402020454407, "learning_rate": 1.9041022908897177e-05, "loss": 2.3, "step": 1800 }, { "epoch": 0.99, "grad_norm": 1.0489838123321533, "learning_rate": 1.9014384656366544e-05, "loss": 2.305, "step": 1850 }, { "epoch": 1.0, "eval_bert-score-f1": 0.8753073360155793, "eval_bert-score-precision": 0.8779077029804112, "eval_bert-score-recall": 0.873077545509742, "eval_gen_len": 49.607327717446466, "eval_loss": 2.088743209838867, "eval_rouge1": 0.3105, "eval_rouge2": 0.1136, "eval_rougeL": 0.2275, "eval_rougeLsum": 0.2276, "eval_runtime": 1364.7929, "eval_samples_per_second": 14.439, "eval_steps_per_second": 0.344, "step": 1877 }, { "epoch": 1.01, "grad_norm": 0.8712087869644165, "learning_rate": 1.898774640383591e-05, "loss": 2.2941, "step": 1900 }, { "epoch": 1.04, "grad_norm": 0.8705251812934875, "learning_rate": 1.8961108151305278e-05, "loss": 2.2953, "step": 1950 }, { "epoch": 1.07, "grad_norm": 1.098132610321045, "learning_rate": 1.893446989877464e-05, "loss": 2.2976, "step": 2000 }, { "epoch": 1.09, "grad_norm": 1.1778416633605957, "learning_rate": 1.8907831646244008e-05, "loss": 2.2866, "step": 2050 }, { "epoch": 1.12, "grad_norm": 0.7667921185493469, "learning_rate": 1.8881193393713374e-05, "loss": 2.2641, "step": 2100 }, { "epoch": 1.15, "grad_norm": 0.9039889574050903, "learning_rate": 1.885455514118274e-05, "loss": 2.2788, "step": 2150 }, { "epoch": 1.17, "grad_norm": 0.8681074976921082, "learning_rate": 1.8827916888652108e-05, "loss": 2.3297, "step": 2200 }, { "epoch": 1.2, "grad_norm": 1.0154231786727905, "learning_rate": 1.880127863612147e-05, "loss": 2.3267, "step": 2250 }, { "epoch": 1.23, "grad_norm": 1.5686161518096924, "learning_rate": 1.8774640383590838e-05, "loss": 2.2867, "step": 2300 }, { "epoch": 1.25, "grad_norm": 1.0235270261764526, "learning_rate": 1.8748002131060205e-05, "loss": 2.3132, "step": 2350 }, { "epoch": 1.28, "grad_norm": 1.1120573282241821, "learning_rate": 1.872136387852957e-05, "loss": 2.2774, "step": 2400 }, { "epoch": 1.31, "grad_norm": 0.9750345349311829, "learning_rate": 1.8694725625998935e-05, "loss": 2.2903, "step": 2450 }, { "epoch": 1.33, "grad_norm": 0.8807668089866638, "learning_rate": 1.86680873734683e-05, "loss": 2.2923, "step": 2500 }, { "epoch": 1.36, "grad_norm": 0.9335638284683228, "learning_rate": 1.8641449120937668e-05, "loss": 2.3168, "step": 2550 }, { "epoch": 1.39, "grad_norm": 0.8730989694595337, "learning_rate": 1.8614810868407035e-05, "loss": 2.3007, "step": 2600 }, { "epoch": 1.41, "grad_norm": 1.0019006729125977, "learning_rate": 1.8588172615876402e-05, "loss": 2.3087, "step": 2650 }, { "epoch": 1.44, "grad_norm": 0.9429858922958374, "learning_rate": 1.8561534363345765e-05, "loss": 2.2802, "step": 2700 }, { "epoch": 1.47, "grad_norm": 1.0355122089385986, "learning_rate": 1.8534896110815132e-05, "loss": 2.2801, "step": 2750 }, { "epoch": 1.49, "grad_norm": 1.3223506212234497, "learning_rate": 1.85082578582845e-05, "loss": 2.3245, "step": 2800 }, { "epoch": 1.52, "grad_norm": 0.9225859642028809, "learning_rate": 1.8481619605753865e-05, "loss": 2.2838, "step": 2850 }, { "epoch": 1.55, "grad_norm": 0.9716720581054688, "learning_rate": 1.845498135322323e-05, "loss": 2.296, "step": 2900 }, { "epoch": 1.57, "grad_norm": 0.947675883769989, "learning_rate": 1.8428343100692595e-05, "loss": 2.2768, "step": 2950 }, { "epoch": 1.6, "grad_norm": 0.9065707921981812, "learning_rate": 1.8401704848161962e-05, "loss": 2.2767, "step": 3000 }, { "epoch": 1.62, "grad_norm": 1.0325031280517578, "learning_rate": 1.837506659563133e-05, "loss": 2.262, "step": 3050 }, { "epoch": 1.65, "grad_norm": 0.9252289533615112, "learning_rate": 1.8348428343100696e-05, "loss": 2.2824, "step": 3100 }, { "epoch": 1.68, "grad_norm": 0.8026869297027588, "learning_rate": 1.832179009057006e-05, "loss": 2.2762, "step": 3150 }, { "epoch": 1.7, "grad_norm": 1.017001748085022, "learning_rate": 1.8295151838039426e-05, "loss": 2.2742, "step": 3200 }, { "epoch": 1.73, "grad_norm": 0.8680307269096375, "learning_rate": 1.8268513585508792e-05, "loss": 2.252, "step": 3250 }, { "epoch": 1.76, "grad_norm": 0.9704865217208862, "learning_rate": 1.824187533297816e-05, "loss": 2.2627, "step": 3300 }, { "epoch": 1.78, "grad_norm": 1.1407126188278198, "learning_rate": 1.8215237080447523e-05, "loss": 2.305, "step": 3350 }, { "epoch": 1.81, "grad_norm": 1.0476961135864258, "learning_rate": 1.818859882791689e-05, "loss": 2.2659, "step": 3400 }, { "epoch": 1.84, "grad_norm": 0.9194826483726501, "learning_rate": 1.8161960575386256e-05, "loss": 2.2728, "step": 3450 }, { "epoch": 1.86, "grad_norm": 1.061948299407959, "learning_rate": 1.8135322322855623e-05, "loss": 2.262, "step": 3500 }, { "epoch": 1.89, "grad_norm": 0.8690770864486694, "learning_rate": 1.810868407032499e-05, "loss": 2.2611, "step": 3550 }, { "epoch": 1.92, "grad_norm": 1.000588297843933, "learning_rate": 1.8082045817794353e-05, "loss": 2.2707, "step": 3600 }, { "epoch": 1.94, "grad_norm": 1.2689441442489624, "learning_rate": 1.805540756526372e-05, "loss": 2.2688, "step": 3650 }, { "epoch": 1.97, "grad_norm": 0.9255685210227966, "learning_rate": 1.8028769312733086e-05, "loss": 2.2559, "step": 3700 }, { "epoch": 2.0, "grad_norm": 1.0140782594680786, "learning_rate": 1.8002131060202453e-05, "loss": 2.2838, "step": 3750 }, { "epoch": 2.0, "eval_bert-score-f1": 0.8773485723212565, "eval_bert-score-precision": 0.8805842198545674, "eval_bert-score-recall": 0.8744701069949739, "eval_gen_len": 49.16461991271694, "eval_loss": 2.062197685241699, "eval_rouge1": 0.3196, "eval_rouge2": 0.1183, "eval_rougeL": 0.2349, "eval_rougeLsum": 0.235, "eval_runtime": 1347.8022, "eval_samples_per_second": 14.621, "eval_steps_per_second": 0.349, "step": 3754 }, { "epoch": 2.02, "grad_norm": 0.8860589861869812, "learning_rate": 1.7975492807671817e-05, "loss": 2.2635, "step": 3800 }, { "epoch": 2.05, "grad_norm": 0.9218833446502686, "learning_rate": 1.7948854555141183e-05, "loss": 2.2621, "step": 3850 }, { "epoch": 2.08, "grad_norm": 0.9549726247787476, "learning_rate": 1.792221630261055e-05, "loss": 2.2513, "step": 3900 }, { "epoch": 2.1, "grad_norm": 1.135712742805481, "learning_rate": 1.7895578050079917e-05, "loss": 2.2721, "step": 3950 }, { "epoch": 2.13, "grad_norm": 1.056344985961914, "learning_rate": 1.7868939797549283e-05, "loss": 2.2601, "step": 4000 }, { "epoch": 2.16, "grad_norm": 0.8976427316665649, "learning_rate": 1.7842301545018647e-05, "loss": 2.2441, "step": 4050 }, { "epoch": 2.18, "grad_norm": 1.0189875364303589, "learning_rate": 1.7815663292488014e-05, "loss": 2.2441, "step": 4100 }, { "epoch": 2.21, "grad_norm": 1.1941207647323608, "learning_rate": 1.778902503995738e-05, "loss": 2.2518, "step": 4150 }, { "epoch": 2.24, "grad_norm": 0.9858642816543579, "learning_rate": 1.7762386787426747e-05, "loss": 2.2417, "step": 4200 }, { "epoch": 2.26, "grad_norm": 0.8909502625465393, "learning_rate": 1.7735748534896114e-05, "loss": 2.2741, "step": 4250 }, { "epoch": 2.29, "grad_norm": 1.1010240316390991, "learning_rate": 1.7709110282365477e-05, "loss": 2.2538, "step": 4300 }, { "epoch": 2.32, "grad_norm": 1.1926771402359009, "learning_rate": 1.7682472029834844e-05, "loss": 2.2844, "step": 4350 }, { "epoch": 2.34, "grad_norm": 1.048973798751831, "learning_rate": 1.765583377730421e-05, "loss": 2.2677, "step": 4400 }, { "epoch": 2.37, "grad_norm": 1.0072720050811768, "learning_rate": 1.7629195524773577e-05, "loss": 2.2616, "step": 4450 }, { "epoch": 2.4, "grad_norm": 0.854369044303894, "learning_rate": 1.760255727224294e-05, "loss": 2.2498, "step": 4500 }, { "epoch": 2.42, "grad_norm": 0.9605410099029541, "learning_rate": 1.7575919019712307e-05, "loss": 2.2541, "step": 4550 }, { "epoch": 2.45, "grad_norm": 0.9667823314666748, "learning_rate": 1.7549280767181674e-05, "loss": 2.26, "step": 4600 }, { "epoch": 2.48, "grad_norm": 2.42110013961792, "learning_rate": 1.752264251465104e-05, "loss": 2.2585, "step": 4650 }, { "epoch": 2.5, "grad_norm": 1.0035040378570557, "learning_rate": 1.7496004262120408e-05, "loss": 2.2538, "step": 4700 }, { "epoch": 2.53, "grad_norm": 1.1149507761001587, "learning_rate": 1.746936600958977e-05, "loss": 2.2643, "step": 4750 }, { "epoch": 2.56, "grad_norm": 1.4942309856414795, "learning_rate": 1.7442727757059138e-05, "loss": 2.3053, "step": 4800 }, { "epoch": 2.58, "grad_norm": 1.016640305519104, "learning_rate": 1.7416089504528505e-05, "loss": 2.2325, "step": 4850 }, { "epoch": 2.61, "grad_norm": 0.8914662003517151, "learning_rate": 1.738945125199787e-05, "loss": 2.26, "step": 4900 }, { "epoch": 2.64, "grad_norm": 1.0502891540527344, "learning_rate": 1.7362812999467235e-05, "loss": 2.2357, "step": 4950 }, { "epoch": 2.66, "grad_norm": 0.9784315824508667, "learning_rate": 1.73361747469366e-05, "loss": 2.2371, "step": 5000 }, { "epoch": 2.69, "grad_norm": 0.9308114647865295, "learning_rate": 1.7309536494405968e-05, "loss": 2.2452, "step": 5050 }, { "epoch": 2.72, "grad_norm": 0.9364565014839172, "learning_rate": 1.7282898241875335e-05, "loss": 2.235, "step": 5100 }, { "epoch": 2.74, "grad_norm": 0.8989120721817017, "learning_rate": 1.72562599893447e-05, "loss": 2.272, "step": 5150 }, { "epoch": 2.77, "grad_norm": 1.2291622161865234, "learning_rate": 1.7229621736814065e-05, "loss": 2.278, "step": 5200 }, { "epoch": 2.8, "grad_norm": 0.9619302749633789, "learning_rate": 1.7202983484283432e-05, "loss": 2.2554, "step": 5250 }, { "epoch": 2.82, "grad_norm": 1.0430196523666382, "learning_rate": 1.71763452317528e-05, "loss": 2.2546, "step": 5300 }, { "epoch": 2.85, "grad_norm": 1.0834633111953735, "learning_rate": 1.7149706979222165e-05, "loss": 2.2379, "step": 5350 }, { "epoch": 2.88, "grad_norm": 1.0730029344558716, "learning_rate": 1.712306872669153e-05, "loss": 2.2789, "step": 5400 }, { "epoch": 2.9, "grad_norm": 1.124229907989502, "learning_rate": 1.7096430474160895e-05, "loss": 2.2293, "step": 5450 }, { "epoch": 2.93, "grad_norm": 0.9467495679855347, "learning_rate": 1.7069792221630262e-05, "loss": 2.2606, "step": 5500 }, { "epoch": 2.96, "grad_norm": 0.8856106400489807, "learning_rate": 1.704315396909963e-05, "loss": 2.2713, "step": 5550 }, { "epoch": 2.98, "grad_norm": 0.7882632613182068, "learning_rate": 1.7016515716568996e-05, "loss": 2.2508, "step": 5600 }, { "epoch": 3.0, "eval_bert-score-f1": 0.8785184490539139, "eval_bert-score-precision": 0.8821266702464495, "eval_bert-score-recall": 0.8752563403945616, "eval_gen_len": 48.600781487871714, "eval_loss": 2.047697067260742, "eval_rouge1": 0.324, "eval_rouge2": 0.1204, "eval_rougeL": 0.2387, "eval_rougeLsum": 0.2389, "eval_runtime": 1313.571, "eval_samples_per_second": 15.002, "eval_steps_per_second": 0.358, "step": 5631 }, { "epoch": 3.01, "grad_norm": 0.9630438685417175, "learning_rate": 1.698987746403836e-05, "loss": 2.246, "step": 5650 }, { "epoch": 3.04, "grad_norm": 0.8204315900802612, "learning_rate": 1.6963239211507726e-05, "loss": 2.225, "step": 5700 }, { "epoch": 3.06, "grad_norm": 1.1320478916168213, "learning_rate": 1.6936600958977092e-05, "loss": 2.2206, "step": 5750 }, { "epoch": 3.09, "grad_norm": 0.9559237360954285, "learning_rate": 1.690996270644646e-05, "loss": 2.2459, "step": 5800 }, { "epoch": 3.12, "grad_norm": 0.9689226150512695, "learning_rate": 1.6883324453915822e-05, "loss": 2.2517, "step": 5850 }, { "epoch": 3.14, "grad_norm": 0.9438573718070984, "learning_rate": 1.685668620138519e-05, "loss": 2.2579, "step": 5900 }, { "epoch": 3.17, "grad_norm": 1.1130074262619019, "learning_rate": 1.6830047948854556e-05, "loss": 2.2387, "step": 5950 }, { "epoch": 3.2, "grad_norm": 1.1083691120147705, "learning_rate": 1.6803409696323923e-05, "loss": 2.2558, "step": 6000 }, { "epoch": 3.22, "grad_norm": 0.96318119764328, "learning_rate": 1.677677144379329e-05, "loss": 2.257, "step": 6050 }, { "epoch": 3.25, "grad_norm": 0.8898953795433044, "learning_rate": 1.6750133191262653e-05, "loss": 2.2524, "step": 6100 }, { "epoch": 3.28, "grad_norm": 1.2933117151260376, "learning_rate": 1.672349493873202e-05, "loss": 2.2551, "step": 6150 }, { "epoch": 3.3, "grad_norm": 1.1499016284942627, "learning_rate": 1.6696856686201386e-05, "loss": 2.244, "step": 6200 }, { "epoch": 3.33, "grad_norm": 0.9967105388641357, "learning_rate": 1.6670218433670753e-05, "loss": 2.2514, "step": 6250 }, { "epoch": 3.36, "grad_norm": 1.0175275802612305, "learning_rate": 1.6643580181140116e-05, "loss": 2.2298, "step": 6300 }, { "epoch": 3.38, "grad_norm": 0.8878999352455139, "learning_rate": 1.6616941928609483e-05, "loss": 2.2394, "step": 6350 }, { "epoch": 3.41, "grad_norm": 1.1751534938812256, "learning_rate": 1.6590303676078853e-05, "loss": 2.2321, "step": 6400 }, { "epoch": 3.44, "grad_norm": 1.0338590145111084, "learning_rate": 1.6563665423548217e-05, "loss": 2.2562, "step": 6450 }, { "epoch": 3.46, "grad_norm": 0.8953673243522644, "learning_rate": 1.6537027171017583e-05, "loss": 2.2199, "step": 6500 }, { "epoch": 3.49, "grad_norm": 1.24599027633667, "learning_rate": 1.6510388918486947e-05, "loss": 2.2346, "step": 6550 }, { "epoch": 3.52, "grad_norm": 0.953091561794281, "learning_rate": 1.6483750665956313e-05, "loss": 2.246, "step": 6600 }, { "epoch": 3.54, "grad_norm": 0.919922947883606, "learning_rate": 1.645711241342568e-05, "loss": 2.2288, "step": 6650 }, { "epoch": 3.57, "grad_norm": 1.1812031269073486, "learning_rate": 1.6430474160895047e-05, "loss": 2.2419, "step": 6700 }, { "epoch": 3.6, "grad_norm": 0.9377938508987427, "learning_rate": 1.640383590836441e-05, "loss": 2.2478, "step": 6750 }, { "epoch": 3.62, "grad_norm": 1.247550368309021, "learning_rate": 1.6377197655833777e-05, "loss": 2.2495, "step": 6800 }, { "epoch": 3.65, "grad_norm": 1.1216537952423096, "learning_rate": 1.6350559403303147e-05, "loss": 2.2241, "step": 6850 }, { "epoch": 3.68, "grad_norm": 1.038252353668213, "learning_rate": 1.632392115077251e-05, "loss": 2.2346, "step": 6900 }, { "epoch": 3.7, "grad_norm": 0.8921188116073608, "learning_rate": 1.6297282898241877e-05, "loss": 2.2488, "step": 6950 }, { "epoch": 3.73, "grad_norm": 1.0422166585922241, "learning_rate": 1.627064464571124e-05, "loss": 2.2336, "step": 7000 }, { "epoch": 3.76, "grad_norm": 1.0294251441955566, "learning_rate": 1.624400639318061e-05, "loss": 2.234, "step": 7050 }, { "epoch": 3.78, "grad_norm": 0.8972188830375671, "learning_rate": 1.6217368140649974e-05, "loss": 2.2147, "step": 7100 }, { "epoch": 3.81, "grad_norm": 1.1645957231521606, "learning_rate": 1.619072988811934e-05, "loss": 2.2336, "step": 7150 }, { "epoch": 3.84, "grad_norm": 1.2658772468566895, "learning_rate": 1.6164091635588704e-05, "loss": 2.2322, "step": 7200 }, { "epoch": 3.86, "grad_norm": 0.923834502696991, "learning_rate": 1.613745338305807e-05, "loss": 2.2348, "step": 7250 }, { "epoch": 3.89, "grad_norm": 0.8885565996170044, "learning_rate": 1.611081513052744e-05, "loss": 2.2251, "step": 7300 }, { "epoch": 3.92, "grad_norm": 1.1370388269424438, "learning_rate": 1.6084176877996804e-05, "loss": 2.2266, "step": 7350 }, { "epoch": 3.94, "grad_norm": 0.9725862145423889, "learning_rate": 1.605753862546617e-05, "loss": 2.2243, "step": 7400 }, { "epoch": 3.97, "grad_norm": 0.8951359987258911, "learning_rate": 1.6030900372935535e-05, "loss": 2.2278, "step": 7450 }, { "epoch": 4.0, "grad_norm": 0.9756078720092773, "learning_rate": 1.6004262120404905e-05, "loss": 2.2591, "step": 7500 }, { "epoch": 4.0, "eval_bert-score-f1": 0.8792554777484974, "eval_bert-score-precision": 0.8833729287797178, "eval_bert-score-recall": 0.8754834527081552, "eval_gen_len": 47.54663554247438, "eval_loss": 2.039776086807251, "eval_rouge1": 0.3271, "eval_rouge2": 0.1225, "eval_rougeL": 0.2416, "eval_rougeLsum": 0.2417, "eval_runtime": 1382.3306, "eval_samples_per_second": 14.256, "eval_steps_per_second": 0.34, "step": 7508 }, { "epoch": 4.02, "grad_norm": 0.9563359618186951, "learning_rate": 1.5977623867874268e-05, "loss": 2.2286, "step": 7550 }, { "epoch": 4.05, "grad_norm": 0.9959126114845276, "learning_rate": 1.5950985615343635e-05, "loss": 2.2287, "step": 7600 }, { "epoch": 4.08, "grad_norm": 0.9405544996261597, "learning_rate": 1.5924347362813e-05, "loss": 2.244, "step": 7650 }, { "epoch": 4.1, "grad_norm": 1.0261281728744507, "learning_rate": 1.5897709110282368e-05, "loss": 2.2141, "step": 7700 }, { "epoch": 4.13, "grad_norm": 0.8446668386459351, "learning_rate": 1.5871070857751735e-05, "loss": 2.2217, "step": 7750 }, { "epoch": 4.16, "grad_norm": 0.981716513633728, "learning_rate": 1.58444326052211e-05, "loss": 2.2535, "step": 7800 }, { "epoch": 4.18, "grad_norm": 0.9390552043914795, "learning_rate": 1.5817794352690465e-05, "loss": 2.1902, "step": 7850 }, { "epoch": 4.21, "grad_norm": 0.9392485618591309, "learning_rate": 1.5791156100159832e-05, "loss": 2.2325, "step": 7900 }, { "epoch": 4.24, "grad_norm": 0.8909654021263123, "learning_rate": 1.57645178476292e-05, "loss": 2.2106, "step": 7950 }, { "epoch": 4.26, "grad_norm": 0.9464941620826721, "learning_rate": 1.5737879595098562e-05, "loss": 2.1985, "step": 8000 }, { "epoch": 4.29, "grad_norm": 0.9561291337013245, "learning_rate": 1.571124134256793e-05, "loss": 2.2595, "step": 8050 }, { "epoch": 4.32, "grad_norm": 1.0075396299362183, "learning_rate": 1.5684603090037295e-05, "loss": 2.2328, "step": 8100 }, { "epoch": 4.34, "grad_norm": 1.0439025163650513, "learning_rate": 1.5657964837506662e-05, "loss": 2.2036, "step": 8150 }, { "epoch": 4.37, "grad_norm": 0.9002524614334106, "learning_rate": 1.563132658497603e-05, "loss": 2.2157, "step": 8200 }, { "epoch": 4.4, "grad_norm": 0.9860824346542358, "learning_rate": 1.5604688332445392e-05, "loss": 2.2526, "step": 8250 }, { "epoch": 4.42, "grad_norm": 0.8125404715538025, "learning_rate": 1.557805007991476e-05, "loss": 2.2458, "step": 8300 }, { "epoch": 4.45, "grad_norm": 0.9793021082878113, "learning_rate": 1.5551411827384126e-05, "loss": 2.2111, "step": 8350 }, { "epoch": 4.48, "grad_norm": 0.9543974995613098, "learning_rate": 1.5524773574853492e-05, "loss": 2.2372, "step": 8400 }, { "epoch": 4.5, "grad_norm": 1.0786656141281128, "learning_rate": 1.5498135322322856e-05, "loss": 2.2256, "step": 8450 }, { "epoch": 4.53, "grad_norm": 1.1105990409851074, "learning_rate": 1.5471497069792223e-05, "loss": 2.2282, "step": 8500 }, { "epoch": 4.56, "grad_norm": 1.4228782653808594, "learning_rate": 1.544485881726159e-05, "loss": 2.2482, "step": 8550 }, { "epoch": 4.58, "grad_norm": 1.0002621412277222, "learning_rate": 1.5418220564730956e-05, "loss": 2.2104, "step": 8600 }, { "epoch": 4.61, "grad_norm": 0.8620786666870117, "learning_rate": 1.5391582312200323e-05, "loss": 2.2201, "step": 8650 }, { "epoch": 4.64, "grad_norm": 0.9315156936645508, "learning_rate": 1.5364944059669686e-05, "loss": 2.2519, "step": 8700 }, { "epoch": 4.66, "grad_norm": 0.9382575750350952, "learning_rate": 1.5338305807139053e-05, "loss": 2.2439, "step": 8750 }, { "epoch": 4.69, "grad_norm": 0.9228121042251587, "learning_rate": 1.531166755460842e-05, "loss": 2.2105, "step": 8800 }, { "epoch": 4.71, "grad_norm": 0.9347973465919495, "learning_rate": 1.5285029302077786e-05, "loss": 2.2201, "step": 8850 }, { "epoch": 4.74, "grad_norm": 1.0652745962142944, "learning_rate": 1.525839104954715e-05, "loss": 2.2117, "step": 8900 }, { "epoch": 4.77, "grad_norm": 0.9884467720985413, "learning_rate": 1.5231752797016516e-05, "loss": 2.2058, "step": 8950 }, { "epoch": 4.79, "grad_norm": 1.2926949262619019, "learning_rate": 1.5205114544485883e-05, "loss": 2.2062, "step": 9000 }, { "epoch": 4.82, "grad_norm": 0.9000586867332458, "learning_rate": 1.5178476291955248e-05, "loss": 2.2578, "step": 9050 }, { "epoch": 4.85, "grad_norm": 1.1215986013412476, "learning_rate": 1.5151838039424615e-05, "loss": 2.2035, "step": 9100 }, { "epoch": 4.87, "grad_norm": 0.9411687254905701, "learning_rate": 1.512519978689398e-05, "loss": 2.2392, "step": 9150 }, { "epoch": 4.9, "grad_norm": 0.9772723913192749, "learning_rate": 1.5098561534363347e-05, "loss": 2.2341, "step": 9200 }, { "epoch": 4.93, "grad_norm": 0.9062642455101013, "learning_rate": 1.5071923281832712e-05, "loss": 2.2132, "step": 9250 }, { "epoch": 4.95, "grad_norm": 0.9838491082191467, "learning_rate": 1.5045285029302079e-05, "loss": 2.2316, "step": 9300 }, { "epoch": 4.98, "grad_norm": 0.7986493110656738, "learning_rate": 1.5018646776771445e-05, "loss": 2.2406, "step": 9350 }, { "epoch": 5.0, "eval_bert-score-f1": 0.8796141604796214, "eval_bert-score-precision": 0.883697147159108, "eval_bert-score-recall": 0.8758704174409406, "eval_gen_len": 47.69806150411042, "eval_loss": 2.032108783721924, "eval_rouge1": 0.3291, "eval_rouge2": 0.1235, "eval_rougeL": 0.2433, "eval_rougeLsum": 0.2434, "eval_runtime": 1320.3538, "eval_samples_per_second": 14.925, "eval_steps_per_second": 0.356, "step": 9385 }, { "epoch": 5.01, "grad_norm": 1.181012511253357, "learning_rate": 1.499200852424081e-05, "loss": 2.205, "step": 9400 }, { "epoch": 5.03, "grad_norm": 0.8982387185096741, "learning_rate": 1.4965370271710177e-05, "loss": 2.2044, "step": 9450 }, { "epoch": 5.06, "grad_norm": 0.9843763709068298, "learning_rate": 1.4938732019179542e-05, "loss": 2.2087, "step": 9500 }, { "epoch": 5.09, "grad_norm": 0.9226497411727905, "learning_rate": 1.4912093766648909e-05, "loss": 2.2019, "step": 9550 }, { "epoch": 5.11, "grad_norm": 1.0900003910064697, "learning_rate": 1.4885455514118274e-05, "loss": 2.2219, "step": 9600 }, { "epoch": 5.14, "grad_norm": 1.0898627042770386, "learning_rate": 1.485881726158764e-05, "loss": 2.2281, "step": 9650 }, { "epoch": 5.17, "grad_norm": 1.1963268518447876, "learning_rate": 1.4832179009057006e-05, "loss": 2.2289, "step": 9700 }, { "epoch": 5.19, "grad_norm": 0.9371781349182129, "learning_rate": 1.4805540756526373e-05, "loss": 2.2287, "step": 9750 }, { "epoch": 5.22, "grad_norm": 0.9393157958984375, "learning_rate": 1.4778902503995741e-05, "loss": 2.1454, "step": 9800 }, { "epoch": 5.25, "grad_norm": 1.1463308334350586, "learning_rate": 1.4752264251465104e-05, "loss": 2.2116, "step": 9850 }, { "epoch": 5.27, "grad_norm": 1.232642412185669, "learning_rate": 1.4725625998934471e-05, "loss": 2.2056, "step": 9900 }, { "epoch": 5.3, "grad_norm": 0.8566424250602722, "learning_rate": 1.4698987746403836e-05, "loss": 2.2077, "step": 9950 }, { "epoch": 5.33, "grad_norm": 1.073114275932312, "learning_rate": 1.4672349493873203e-05, "loss": 2.2043, "step": 10000 }, { "epoch": 5.35, "grad_norm": 1.100190281867981, "learning_rate": 1.4645711241342568e-05, "loss": 2.2105, "step": 10050 }, { "epoch": 5.38, "grad_norm": 0.8915311694145203, "learning_rate": 1.4619072988811935e-05, "loss": 2.2263, "step": 10100 }, { "epoch": 5.41, "grad_norm": 1.0838483572006226, "learning_rate": 1.45924347362813e-05, "loss": 2.2386, "step": 10150 }, { "epoch": 5.43, "grad_norm": 1.0265840291976929, "learning_rate": 1.4565796483750666e-05, "loss": 2.2046, "step": 10200 }, { "epoch": 5.46, "grad_norm": 1.012404441833496, "learning_rate": 1.4539158231220035e-05, "loss": 2.1914, "step": 10250 }, { "epoch": 5.49, "grad_norm": 0.8332634568214417, "learning_rate": 1.4512519978689398e-05, "loss": 2.2424, "step": 10300 }, { "epoch": 5.51, "grad_norm": 0.9419781565666199, "learning_rate": 1.4485881726158767e-05, "loss": 2.2081, "step": 10350 }, { "epoch": 5.54, "grad_norm": 0.9537687301635742, "learning_rate": 1.445924347362813e-05, "loss": 2.2382, "step": 10400 }, { "epoch": 5.57, "grad_norm": 0.8523421883583069, "learning_rate": 1.4432605221097498e-05, "loss": 2.2479, "step": 10450 }, { "epoch": 5.59, "grad_norm": 0.9717277884483337, "learning_rate": 1.4405966968566862e-05, "loss": 2.2335, "step": 10500 }, { "epoch": 5.62, "grad_norm": 1.2556852102279663, "learning_rate": 1.4379328716036229e-05, "loss": 2.207, "step": 10550 }, { "epoch": 5.65, "grad_norm": 1.0949528217315674, "learning_rate": 1.4352690463505594e-05, "loss": 2.2454, "step": 10600 }, { "epoch": 5.67, "grad_norm": 0.8859919309616089, "learning_rate": 1.432605221097496e-05, "loss": 2.1991, "step": 10650 }, { "epoch": 5.7, "grad_norm": 1.0127480030059814, "learning_rate": 1.4299413958444329e-05, "loss": 2.2031, "step": 10700 }, { "epoch": 5.73, "grad_norm": 1.0773361921310425, "learning_rate": 1.4272775705913692e-05, "loss": 2.2183, "step": 10750 }, { "epoch": 5.75, "grad_norm": 1.2354990243911743, "learning_rate": 1.424613745338306e-05, "loss": 2.1787, "step": 10800 }, { "epoch": 5.78, "grad_norm": 0.8633403182029724, "learning_rate": 1.4219499200852424e-05, "loss": 2.2058, "step": 10850 }, { "epoch": 5.81, "grad_norm": 0.8349035382270813, "learning_rate": 1.4192860948321792e-05, "loss": 2.2102, "step": 10900 }, { "epoch": 5.83, "grad_norm": 0.954465389251709, "learning_rate": 1.4166222695791156e-05, "loss": 2.2074, "step": 10950 }, { "epoch": 5.86, "grad_norm": 1.0947058200836182, "learning_rate": 1.4139584443260524e-05, "loss": 2.2063, "step": 11000 }, { "epoch": 5.89, "grad_norm": 1.0066620111465454, "learning_rate": 1.4112946190729891e-05, "loss": 2.2391, "step": 11050 }, { "epoch": 5.91, "grad_norm": 1.3262407779693604, "learning_rate": 1.4086307938199256e-05, "loss": 2.1883, "step": 11100 }, { "epoch": 5.94, "grad_norm": 0.898461639881134, "learning_rate": 1.4059669685668623e-05, "loss": 2.2254, "step": 11150 }, { "epoch": 5.97, "grad_norm": 1.2728203535079956, "learning_rate": 1.4033031433137986e-05, "loss": 2.2002, "step": 11200 }, { "epoch": 5.99, "grad_norm": 0.8816812038421631, "learning_rate": 1.4006393180607354e-05, "loss": 2.222, "step": 11250 }, { "epoch": 6.0, "eval_bert-score-f1": 0.8800827935451446, "eval_bert-score-precision": 0.884203289352576, "eval_bert-score-recall": 0.8763012464314104, "eval_gen_len": 47.73409113975439, "eval_loss": 2.0270581245422363, "eval_rouge1": 0.3308, "eval_rouge2": 0.125, "eval_rougeL": 0.2449, "eval_rougeLsum": 0.245, "eval_runtime": 1367.5469, "eval_samples_per_second": 14.41, "eval_steps_per_second": 0.344, "step": 11262 }, { "epoch": 6.02, "grad_norm": 0.9405691623687744, "learning_rate": 1.3979754928076718e-05, "loss": 2.1931, "step": 11300 }, { "epoch": 6.05, "grad_norm": 0.8556106686592102, "learning_rate": 1.3953116675546086e-05, "loss": 2.2086, "step": 11350 }, { "epoch": 6.07, "grad_norm": 1.0254322290420532, "learning_rate": 1.392647842301545e-05, "loss": 2.2235, "step": 11400 }, { "epoch": 6.1, "grad_norm": 0.9665789604187012, "learning_rate": 1.3899840170484818e-05, "loss": 2.1956, "step": 11450 }, { "epoch": 6.13, "grad_norm": 1.1138479709625244, "learning_rate": 1.3873201917954185e-05, "loss": 2.2233, "step": 11500 }, { "epoch": 6.15, "grad_norm": 0.9537137150764465, "learning_rate": 1.384656366542355e-05, "loss": 2.2047, "step": 11550 }, { "epoch": 6.18, "grad_norm": 0.9105952978134155, "learning_rate": 1.3819925412892917e-05, "loss": 2.1739, "step": 11600 }, { "epoch": 6.21, "grad_norm": 0.9230592250823975, "learning_rate": 1.3793287160362282e-05, "loss": 2.1911, "step": 11650 }, { "epoch": 6.23, "grad_norm": 0.944442868232727, "learning_rate": 1.3766648907831648e-05, "loss": 2.2074, "step": 11700 }, { "epoch": 6.26, "grad_norm": 0.9925301671028137, "learning_rate": 1.3740010655301013e-05, "loss": 2.1973, "step": 11750 }, { "epoch": 6.29, "grad_norm": 1.0720447301864624, "learning_rate": 1.371337240277038e-05, "loss": 2.181, "step": 11800 }, { "epoch": 6.31, "grad_norm": 0.9020980000495911, "learning_rate": 1.3686734150239745e-05, "loss": 2.193, "step": 11850 }, { "epoch": 6.34, "grad_norm": 0.9980204701423645, "learning_rate": 1.3660095897709112e-05, "loss": 2.2342, "step": 11900 }, { "epoch": 6.37, "grad_norm": 0.9240878820419312, "learning_rate": 1.3633457645178479e-05, "loss": 2.1855, "step": 11950 }, { "epoch": 6.39, "grad_norm": 1.1149895191192627, "learning_rate": 1.3606819392647844e-05, "loss": 2.1918, "step": 12000 }, { "epoch": 6.42, "grad_norm": 0.8434773683547974, "learning_rate": 1.358018114011721e-05, "loss": 2.2347, "step": 12050 }, { "epoch": 6.45, "grad_norm": 0.8160800337791443, "learning_rate": 1.3553542887586576e-05, "loss": 2.2275, "step": 12100 }, { "epoch": 6.47, "grad_norm": 1.0393248796463013, "learning_rate": 1.3526904635055942e-05, "loss": 2.2449, "step": 12150 }, { "epoch": 6.5, "grad_norm": 0.9805082082748413, "learning_rate": 1.3500266382525307e-05, "loss": 2.2118, "step": 12200 }, { "epoch": 6.53, "grad_norm": 0.8884342908859253, "learning_rate": 1.3473628129994674e-05, "loss": 2.2184, "step": 12250 }, { "epoch": 6.55, "grad_norm": 1.1096868515014648, "learning_rate": 1.3446989877464039e-05, "loss": 2.231, "step": 12300 }, { "epoch": 6.58, "grad_norm": 0.937127947807312, "learning_rate": 1.3420351624933406e-05, "loss": 2.2032, "step": 12350 }, { "epoch": 6.61, "grad_norm": 1.0019285678863525, "learning_rate": 1.3393713372402773e-05, "loss": 2.2028, "step": 12400 }, { "epoch": 6.63, "grad_norm": 1.070123553276062, "learning_rate": 1.3367075119872138e-05, "loss": 2.2673, "step": 12450 }, { "epoch": 6.66, "grad_norm": 0.8974484801292419, "learning_rate": 1.3340436867341504e-05, "loss": 2.1909, "step": 12500 }, { "epoch": 6.69, "grad_norm": 0.9534024596214294, "learning_rate": 1.331379861481087e-05, "loss": 2.2061, "step": 12550 }, { "epoch": 6.71, "grad_norm": 0.8722013831138611, "learning_rate": 1.3287160362280236e-05, "loss": 2.2054, "step": 12600 }, { "epoch": 6.74, "grad_norm": 1.222458004951477, "learning_rate": 1.3260522109749601e-05, "loss": 2.167, "step": 12650 }, { "epoch": 6.77, "grad_norm": 0.8800060153007507, "learning_rate": 1.3233883857218968e-05, "loss": 2.1955, "step": 12700 }, { "epoch": 6.79, "grad_norm": 1.005924105644226, "learning_rate": 1.3207245604688335e-05, "loss": 2.1644, "step": 12750 }, { "epoch": 6.82, "grad_norm": 1.0622010231018066, "learning_rate": 1.31806073521577e-05, "loss": 2.226, "step": 12800 }, { "epoch": 6.85, "grad_norm": 0.9073338508605957, "learning_rate": 1.3153969099627067e-05, "loss": 2.1781, "step": 12850 }, { "epoch": 6.87, "grad_norm": 1.0695091485977173, "learning_rate": 1.3127330847096432e-05, "loss": 2.1965, "step": 12900 }, { "epoch": 6.9, "grad_norm": 1.0539031028747559, "learning_rate": 1.3100692594565798e-05, "loss": 2.1864, "step": 12950 }, { "epoch": 6.93, "grad_norm": 1.0355671644210815, "learning_rate": 1.3074054342035163e-05, "loss": 2.21, "step": 13000 }, { "epoch": 6.95, "grad_norm": 0.9524690508842468, "learning_rate": 1.304741608950453e-05, "loss": 2.2146, "step": 13050 }, { "epoch": 6.98, "grad_norm": 0.8750210404396057, "learning_rate": 1.3020777836973895e-05, "loss": 2.2185, "step": 13100 }, { "epoch": 7.0, "eval_bert-score-f1": 0.8803590643584976, "eval_bert-score-precision": 0.8845637915861083, "eval_bert-score-recall": 0.8764868623034598, "eval_gen_len": 47.655891606617274, "eval_loss": 2.022202253341675, "eval_rouge1": 0.3314, "eval_rouge2": 0.1248, "eval_rougeL": 0.2451, "eval_rougeLsum": 0.2452, "eval_runtime": 1324.8965, "eval_samples_per_second": 14.874, "eval_steps_per_second": 0.355, "step": 13139 }, { "epoch": 7.01, "grad_norm": 1.8067052364349365, "learning_rate": 1.2994139584443262e-05, "loss": 2.2044, "step": 13150 }, { "epoch": 7.03, "grad_norm": 1.0369492769241333, "learning_rate": 1.2967501331912629e-05, "loss": 2.2093, "step": 13200 }, { "epoch": 7.06, "grad_norm": 0.7998984456062317, "learning_rate": 1.2940863079381994e-05, "loss": 2.1803, "step": 13250 }, { "epoch": 7.09, "grad_norm": 0.8763892650604248, "learning_rate": 1.291422482685136e-05, "loss": 2.2028, "step": 13300 }, { "epoch": 7.11, "grad_norm": 1.031553864479065, "learning_rate": 1.2887586574320726e-05, "loss": 2.1598, "step": 13350 }, { "epoch": 7.14, "grad_norm": 1.2158304452896118, "learning_rate": 1.2860948321790092e-05, "loss": 2.2008, "step": 13400 }, { "epoch": 7.17, "grad_norm": 1.0015206336975098, "learning_rate": 1.2834310069259457e-05, "loss": 2.2118, "step": 13450 }, { "epoch": 7.19, "grad_norm": 1.3029738664627075, "learning_rate": 1.2807671816728824e-05, "loss": 2.163, "step": 13500 }, { "epoch": 7.22, "grad_norm": 1.1144462823867798, "learning_rate": 1.2781033564198189e-05, "loss": 2.1821, "step": 13550 }, { "epoch": 7.25, "grad_norm": 0.9808390736579895, "learning_rate": 1.2754395311667556e-05, "loss": 2.1844, "step": 13600 }, { "epoch": 7.27, "grad_norm": 0.8838719129562378, "learning_rate": 1.2727757059136923e-05, "loss": 2.1936, "step": 13650 }, { "epoch": 7.3, "grad_norm": 1.0219964981079102, "learning_rate": 1.2701118806606288e-05, "loss": 2.1864, "step": 13700 }, { "epoch": 7.33, "grad_norm": 0.9708404541015625, "learning_rate": 1.2674480554075654e-05, "loss": 2.1734, "step": 13750 }, { "epoch": 7.35, "grad_norm": 0.9595683217048645, "learning_rate": 1.264784230154502e-05, "loss": 2.2069, "step": 13800 }, { "epoch": 7.38, "grad_norm": 1.2408117055892944, "learning_rate": 1.2621204049014386e-05, "loss": 2.1918, "step": 13850 }, { "epoch": 7.41, "grad_norm": 0.9526282548904419, "learning_rate": 1.2594565796483751e-05, "loss": 2.2069, "step": 13900 }, { "epoch": 7.43, "grad_norm": 0.9085534811019897, "learning_rate": 1.2567927543953118e-05, "loss": 2.1929, "step": 13950 }, { "epoch": 7.46, "grad_norm": 0.9988005757331848, "learning_rate": 1.2541289291422483e-05, "loss": 2.212, "step": 14000 }, { "epoch": 7.49, "grad_norm": 0.9731308221817017, "learning_rate": 1.251465103889185e-05, "loss": 2.2168, "step": 14050 }, { "epoch": 7.51, "grad_norm": 1.2218581438064575, "learning_rate": 1.2488012786361216e-05, "loss": 2.2136, "step": 14100 }, { "epoch": 7.54, "grad_norm": 1.3860177993774414, "learning_rate": 1.2461374533830582e-05, "loss": 2.1844, "step": 14150 }, { "epoch": 7.57, "grad_norm": 1.3227049112319946, "learning_rate": 1.2434736281299948e-05, "loss": 2.2017, "step": 14200 }, { "epoch": 7.59, "grad_norm": 0.9687981009483337, "learning_rate": 1.2408098028769313e-05, "loss": 2.2242, "step": 14250 }, { "epoch": 7.62, "grad_norm": 1.0260825157165527, "learning_rate": 1.238145977623868e-05, "loss": 2.2096, "step": 14300 }, { "epoch": 7.65, "grad_norm": 0.9353439211845398, "learning_rate": 1.2354821523708045e-05, "loss": 2.2122, "step": 14350 }, { "epoch": 7.67, "grad_norm": 1.0099036693572998, "learning_rate": 1.2328183271177412e-05, "loss": 2.2187, "step": 14400 }, { "epoch": 7.7, "grad_norm": 0.8156920075416565, "learning_rate": 1.2301545018646779e-05, "loss": 2.1853, "step": 14450 }, { "epoch": 7.73, "grad_norm": 14.783089637756348, "learning_rate": 1.2274906766116144e-05, "loss": 2.1917, "step": 14500 }, { "epoch": 7.75, "grad_norm": 0.9252942204475403, "learning_rate": 1.224826851358551e-05, "loss": 2.2074, "step": 14550 }, { "epoch": 7.78, "grad_norm": 0.9822306632995605, "learning_rate": 1.2221630261054875e-05, "loss": 2.193, "step": 14600 }, { "epoch": 7.81, "grad_norm": 1.1552455425262451, "learning_rate": 1.2194992008524242e-05, "loss": 2.1851, "step": 14650 }, { "epoch": 7.83, "grad_norm": 1.0293680429458618, "learning_rate": 1.2168353755993607e-05, "loss": 2.2109, "step": 14700 }, { "epoch": 7.86, "grad_norm": 1.0043119192123413, "learning_rate": 1.2141715503462974e-05, "loss": 2.2112, "step": 14750 }, { "epoch": 7.88, "grad_norm": 0.9331013560295105, "learning_rate": 1.2115077250932339e-05, "loss": 2.1837, "step": 14800 }, { "epoch": 7.91, "grad_norm": 0.9587385058403015, "learning_rate": 1.2088438998401706e-05, "loss": 2.1773, "step": 14850 }, { "epoch": 7.94, "grad_norm": 1.1071295738220215, "learning_rate": 1.2061800745871073e-05, "loss": 2.2065, "step": 14900 }, { "epoch": 7.96, "grad_norm": 1.34778892993927, "learning_rate": 1.2035162493340438e-05, "loss": 2.1854, "step": 14950 }, { "epoch": 7.99, "grad_norm": 3.2136006355285645, "learning_rate": 1.2008524240809804e-05, "loss": 2.1886, "step": 15000 }, { "epoch": 8.0, "eval_bert-score-f1": 0.8805449074880007, "eval_bert-score-precision": 0.8848301609899627, "eval_bert-score-recall": 0.8765954361686147, "eval_gen_len": 47.339947224195676, "eval_loss": 2.0204520225524902, "eval_rouge1": 0.3323, "eval_rouge2": 0.1257, "eval_rougeL": 0.2461, "eval_rougeLsum": 0.2462, "eval_runtime": 1323.5674, "eval_samples_per_second": 14.889, "eval_steps_per_second": 0.355, "step": 15016 }, { "epoch": 8.02, "grad_norm": 0.8995711803436279, "learning_rate": 1.198188598827917e-05, "loss": 2.1601, "step": 15050 }, { "epoch": 8.04, "grad_norm": 1.0043758153915405, "learning_rate": 1.1955247735748536e-05, "loss": 2.2012, "step": 15100 }, { "epoch": 8.07, "grad_norm": 0.9039593935012817, "learning_rate": 1.1928609483217901e-05, "loss": 2.2207, "step": 15150 }, { "epoch": 8.1, "grad_norm": 0.9619396328926086, "learning_rate": 1.1901971230687268e-05, "loss": 2.1609, "step": 15200 }, { "epoch": 8.12, "grad_norm": 1.0527337789535522, "learning_rate": 1.1875332978156633e-05, "loss": 2.221, "step": 15250 }, { "epoch": 8.15, "grad_norm": 0.9329215288162231, "learning_rate": 1.1848694725626e-05, "loss": 2.1669, "step": 15300 }, { "epoch": 8.18, "grad_norm": 1.0186532735824585, "learning_rate": 1.1822056473095366e-05, "loss": 2.1857, "step": 15350 }, { "epoch": 8.2, "grad_norm": 1.1034983396530151, "learning_rate": 1.1795418220564731e-05, "loss": 2.1956, "step": 15400 }, { "epoch": 8.23, "grad_norm": 1.0719212293624878, "learning_rate": 1.1768779968034098e-05, "loss": 2.2007, "step": 15450 }, { "epoch": 8.26, "grad_norm": 1.5037603378295898, "learning_rate": 1.1742141715503463e-05, "loss": 2.1967, "step": 15500 }, { "epoch": 8.28, "grad_norm": 0.9770453572273254, "learning_rate": 1.171550346297283e-05, "loss": 2.218, "step": 15550 }, { "epoch": 8.31, "grad_norm": 1.0334933996200562, "learning_rate": 1.1688865210442195e-05, "loss": 2.1892, "step": 15600 }, { "epoch": 8.34, "grad_norm": 0.9753350615501404, "learning_rate": 1.1662226957911562e-05, "loss": 2.2074, "step": 15650 }, { "epoch": 8.36, "grad_norm": 0.9147941470146179, "learning_rate": 1.1635588705380927e-05, "loss": 2.2111, "step": 15700 }, { "epoch": 8.39, "grad_norm": 1.0154210329055786, "learning_rate": 1.1608950452850294e-05, "loss": 2.1971, "step": 15750 }, { "epoch": 8.42, "grad_norm": 1.0365736484527588, "learning_rate": 1.158231220031966e-05, "loss": 2.1874, "step": 15800 }, { "epoch": 8.44, "grad_norm": 0.894719660282135, "learning_rate": 1.1555673947789025e-05, "loss": 2.2059, "step": 15850 }, { "epoch": 8.47, "grad_norm": 0.9420655369758606, "learning_rate": 1.1529035695258392e-05, "loss": 2.1473, "step": 15900 }, { "epoch": 8.5, "grad_norm": 1.0870007276535034, "learning_rate": 1.1502397442727757e-05, "loss": 2.1944, "step": 15950 }, { "epoch": 8.52, "grad_norm": 1.072520136833191, "learning_rate": 1.1475759190197124e-05, "loss": 2.1914, "step": 16000 }, { "epoch": 8.55, "grad_norm": 0.9754800200462341, "learning_rate": 1.1449120937666489e-05, "loss": 2.1726, "step": 16050 }, { "epoch": 8.58, "grad_norm": 0.983051598072052, "learning_rate": 1.1422482685135856e-05, "loss": 2.1591, "step": 16100 }, { "epoch": 8.6, "grad_norm": 0.9521353244781494, "learning_rate": 1.1395844432605222e-05, "loss": 2.1675, "step": 16150 }, { "epoch": 8.63, "grad_norm": 1.016432762145996, "learning_rate": 1.1369206180074587e-05, "loss": 2.1821, "step": 16200 }, { "epoch": 8.66, "grad_norm": 0.9886535406112671, "learning_rate": 1.1342567927543954e-05, "loss": 2.1711, "step": 16250 }, { "epoch": 8.68, "grad_norm": 0.9348542094230652, "learning_rate": 1.131592967501332e-05, "loss": 2.2016, "step": 16300 }, { "epoch": 8.71, "grad_norm": 0.8952618837356567, "learning_rate": 1.1289291422482686e-05, "loss": 2.1632, "step": 16350 }, { "epoch": 8.74, "grad_norm": 0.9894302487373352, "learning_rate": 1.1262653169952051e-05, "loss": 2.1792, "step": 16400 }, { "epoch": 8.76, "grad_norm": 0.899584949016571, "learning_rate": 1.1236014917421418e-05, "loss": 2.1698, "step": 16450 }, { "epoch": 8.79, "grad_norm": 0.9734613299369812, "learning_rate": 1.1209376664890783e-05, "loss": 2.2253, "step": 16500 }, { "epoch": 8.82, "grad_norm": 1.0697177648544312, "learning_rate": 1.118273841236015e-05, "loss": 2.2307, "step": 16550 }, { "epoch": 8.84, "grad_norm": 0.827741801738739, "learning_rate": 1.1156100159829516e-05, "loss": 2.179, "step": 16600 }, { "epoch": 8.87, "grad_norm": 0.8641231060028076, "learning_rate": 1.1129461907298881e-05, "loss": 2.1768, "step": 16650 }, { "epoch": 8.9, "grad_norm": 0.9500383734703064, "learning_rate": 1.1102823654768248e-05, "loss": 2.185, "step": 16700 }, { "epoch": 8.92, "grad_norm": 0.8771828413009644, "learning_rate": 1.1076185402237613e-05, "loss": 2.2018, "step": 16750 }, { "epoch": 8.95, "grad_norm": 0.7786308526992798, "learning_rate": 1.104954714970698e-05, "loss": 2.1856, "step": 16800 }, { "epoch": 8.98, "grad_norm": 0.9104407429695129, "learning_rate": 1.1022908897176345e-05, "loss": 2.2215, "step": 16850 }, { "epoch": 9.0, "eval_bert-score-f1": 0.8808141563956924, "eval_bert-score-precision": 0.8852914021691005, "eval_bert-score-recall": 0.8766740019322894, "eval_gen_len": 47.14802598193444, "eval_loss": 2.0156819820404053, "eval_rouge1": 0.333, "eval_rouge2": 0.1262, "eval_rougeL": 0.2467, "eval_rougeLsum": 0.2467, "eval_runtime": 1329.5202, "eval_samples_per_second": 14.822, "eval_steps_per_second": 0.354, "step": 16893 }, { "epoch": 9.0, "grad_norm": 0.9584008455276489, "learning_rate": 1.0996270644645712e-05, "loss": 2.1627, "step": 16900 }, { "epoch": 9.03, "grad_norm": 1.0817357301712036, "learning_rate": 1.0969632392115077e-05, "loss": 2.174, "step": 16950 }, { "epoch": 9.06, "grad_norm": 1.1630058288574219, "learning_rate": 1.0942994139584444e-05, "loss": 2.1988, "step": 17000 }, { "epoch": 9.08, "grad_norm": 1.065058946609497, "learning_rate": 1.091635588705381e-05, "loss": 2.1628, "step": 17050 }, { "epoch": 9.11, "grad_norm": 1.3880870342254639, "learning_rate": 1.0889717634523175e-05, "loss": 2.177, "step": 17100 }, { "epoch": 9.14, "grad_norm": 1.0271745920181274, "learning_rate": 1.0863079381992542e-05, "loss": 2.212, "step": 17150 }, { "epoch": 9.16, "grad_norm": 0.9514613747596741, "learning_rate": 1.0836441129461907e-05, "loss": 2.1729, "step": 17200 }, { "epoch": 9.19, "grad_norm": 0.9049180746078491, "learning_rate": 1.0809802876931274e-05, "loss": 2.1928, "step": 17250 }, { "epoch": 9.22, "grad_norm": 1.0372991561889648, "learning_rate": 1.0783164624400639e-05, "loss": 2.207, "step": 17300 }, { "epoch": 9.24, "grad_norm": 0.9088106751441956, "learning_rate": 1.0756526371870006e-05, "loss": 2.1818, "step": 17350 }, { "epoch": 9.27, "grad_norm": 1.0153067111968994, "learning_rate": 1.072988811933937e-05, "loss": 2.1996, "step": 17400 }, { "epoch": 9.3, "grad_norm": 1.0672743320465088, "learning_rate": 1.0703249866808737e-05, "loss": 2.2183, "step": 17450 }, { "epoch": 9.32, "grad_norm": 1.0173332691192627, "learning_rate": 1.0676611614278106e-05, "loss": 2.2153, "step": 17500 }, { "epoch": 9.35, "grad_norm": 1.1739368438720703, "learning_rate": 1.064997336174747e-05, "loss": 2.1694, "step": 17550 }, { "epoch": 9.38, "grad_norm": 0.8909546136856079, "learning_rate": 1.0623335109216838e-05, "loss": 2.1908, "step": 17600 }, { "epoch": 9.4, "grad_norm": 0.9530623555183411, "learning_rate": 1.0596696856686201e-05, "loss": 2.2027, "step": 17650 }, { "epoch": 9.43, "grad_norm": 0.8588944673538208, "learning_rate": 1.057005860415557e-05, "loss": 2.143, "step": 17700 }, { "epoch": 9.46, "grad_norm": 1.096243977546692, "learning_rate": 1.0543420351624933e-05, "loss": 2.1315, "step": 17750 }, { "epoch": 9.48, "grad_norm": 0.888977587223053, "learning_rate": 1.05167820990943e-05, "loss": 2.1478, "step": 17800 }, { "epoch": 9.51, "grad_norm": 1.374234676361084, "learning_rate": 1.0490143846563668e-05, "loss": 2.1564, "step": 17850 }, { "epoch": 9.54, "grad_norm": 1.0646979808807373, "learning_rate": 1.0463505594033031e-05, "loss": 2.1963, "step": 17900 }, { "epoch": 9.56, "grad_norm": 0.8910280466079712, "learning_rate": 1.04368673415024e-05, "loss": 2.1892, "step": 17950 }, { "epoch": 9.59, "grad_norm": 1.0399140119552612, "learning_rate": 1.0410229088971763e-05, "loss": 2.2132, "step": 18000 }, { "epoch": 9.62, "grad_norm": 1.1613365411758423, "learning_rate": 1.0383590836441132e-05, "loss": 2.1946, "step": 18050 }, { "epoch": 9.64, "grad_norm": 0.9431652426719666, "learning_rate": 1.0356952583910495e-05, "loss": 2.1825, "step": 18100 }, { "epoch": 9.67, "grad_norm": 1.1334350109100342, "learning_rate": 1.0330314331379863e-05, "loss": 2.1937, "step": 18150 }, { "epoch": 9.7, "grad_norm": 1.1028201580047607, "learning_rate": 1.0303676078849227e-05, "loss": 2.2181, "step": 18200 }, { "epoch": 9.72, "grad_norm": 0.9916718602180481, "learning_rate": 1.0277037826318595e-05, "loss": 2.174, "step": 18250 }, { "epoch": 9.75, "grad_norm": 1.0355281829833984, "learning_rate": 1.0250399573787962e-05, "loss": 2.1565, "step": 18300 }, { "epoch": 9.78, "grad_norm": 0.901858925819397, "learning_rate": 1.0223761321257327e-05, "loss": 2.1732, "step": 18350 }, { "epoch": 9.8, "grad_norm": 1.1691358089447021, "learning_rate": 1.0197123068726694e-05, "loss": 2.1854, "step": 18400 }, { "epoch": 9.83, "grad_norm": 1.1045140027999878, "learning_rate": 1.0170484816196057e-05, "loss": 2.1906, "step": 18450 }, { "epoch": 9.86, "grad_norm": 1.4803402423858643, "learning_rate": 1.0143846563665425e-05, "loss": 2.1744, "step": 18500 }, { "epoch": 9.88, "grad_norm": 1.0411149263381958, "learning_rate": 1.0117208311134789e-05, "loss": 2.2064, "step": 18550 }, { "epoch": 9.91, "grad_norm": 0.9394697546958923, "learning_rate": 1.0090570058604157e-05, "loss": 2.1646, "step": 18600 }, { "epoch": 9.94, "grad_norm": 1.0079654455184937, "learning_rate": 1.006393180607352e-05, "loss": 2.1638, "step": 18650 }, { "epoch": 9.96, "grad_norm": 0.9374598264694214, "learning_rate": 1.0037293553542889e-05, "loss": 2.2037, "step": 18700 }, { "epoch": 9.99, "grad_norm": 0.9795972108840942, "learning_rate": 1.0010655301012256e-05, "loss": 2.2119, "step": 18750 }, { "epoch": 10.0, "eval_bert-score-f1": 0.8809602989904121, "eval_bert-score-precision": 0.8852206096635378, "eval_bert-score-recall": 0.8770327499302876, "eval_gen_len": 47.48157921445245, "eval_loss": 2.0146677494049072, "eval_rouge1": 0.3344, "eval_rouge2": 0.1266, "eval_rougeL": 0.2474, "eval_rougeLsum": 0.2475, "eval_runtime": 1332.7051, "eval_samples_per_second": 14.786, "eval_steps_per_second": 0.353, "step": 18770 }, { "epoch": 10.02, "grad_norm": 1.0391963720321655, "learning_rate": 9.984017048481621e-06, "loss": 2.1618, "step": 18800 }, { "epoch": 10.04, "grad_norm": 0.9638227820396423, "learning_rate": 9.957378795950986e-06, "loss": 2.1847, "step": 18850 }, { "epoch": 10.07, "grad_norm": 0.9130365252494812, "learning_rate": 9.930740543420353e-06, "loss": 2.1409, "step": 18900 }, { "epoch": 10.1, "grad_norm": 0.97170490026474, "learning_rate": 9.90410229088972e-06, "loss": 2.2007, "step": 18950 }, { "epoch": 10.12, "grad_norm": 0.9903939962387085, "learning_rate": 9.877464038359084e-06, "loss": 2.1873, "step": 19000 }, { "epoch": 10.15, "grad_norm": 1.0155619382858276, "learning_rate": 9.850825785828451e-06, "loss": 2.2185, "step": 19050 }, { "epoch": 10.18, "grad_norm": 1.0440953969955444, "learning_rate": 9.824187533297816e-06, "loss": 2.183, "step": 19100 }, { "epoch": 10.2, "grad_norm": 1.002216100692749, "learning_rate": 9.797549280767183e-06, "loss": 2.1827, "step": 19150 }, { "epoch": 10.23, "grad_norm": 0.9036744236946106, "learning_rate": 9.770911028236548e-06, "loss": 2.157, "step": 19200 }, { "epoch": 10.26, "grad_norm": 1.0186132192611694, "learning_rate": 9.744272775705915e-06, "loss": 2.1458, "step": 19250 }, { "epoch": 10.28, "grad_norm": 1.157223105430603, "learning_rate": 9.71763452317528e-06, "loss": 2.1578, "step": 19300 }, { "epoch": 10.31, "grad_norm": 1.0317802429199219, "learning_rate": 9.690996270644647e-06, "loss": 2.1852, "step": 19350 }, { "epoch": 10.34, "grad_norm": 0.9137316942214966, "learning_rate": 9.664358018114013e-06, "loss": 2.228, "step": 19400 }, { "epoch": 10.36, "grad_norm": 0.8266160488128662, "learning_rate": 9.637719765583378e-06, "loss": 2.2115, "step": 19450 }, { "epoch": 10.39, "grad_norm": 0.9575254917144775, "learning_rate": 9.611081513052745e-06, "loss": 2.1671, "step": 19500 }, { "epoch": 10.42, "grad_norm": 1.1885944604873657, "learning_rate": 9.58444326052211e-06, "loss": 2.1953, "step": 19550 }, { "epoch": 10.44, "grad_norm": 0.8843189477920532, "learning_rate": 9.557805007991477e-06, "loss": 2.1859, "step": 19600 }, { "epoch": 10.47, "grad_norm": 1.5487014055252075, "learning_rate": 9.531166755460842e-06, "loss": 2.1678, "step": 19650 }, { "epoch": 10.5, "grad_norm": 1.1339489221572876, "learning_rate": 9.504528502930209e-06, "loss": 2.1917, "step": 19700 }, { "epoch": 10.52, "grad_norm": 0.973892331123352, "learning_rate": 9.477890250399575e-06, "loss": 2.1741, "step": 19750 }, { "epoch": 10.55, "grad_norm": 1.0158993005752563, "learning_rate": 9.45125199786894e-06, "loss": 2.1949, "step": 19800 }, { "epoch": 10.58, "grad_norm": 0.8538472056388855, "learning_rate": 9.424613745338307e-06, "loss": 2.1792, "step": 19850 }, { "epoch": 10.6, "grad_norm": 1.1701879501342773, "learning_rate": 9.397975492807672e-06, "loss": 2.1807, "step": 19900 }, { "epoch": 10.63, "grad_norm": 1.2362642288208008, "learning_rate": 9.371337240277039e-06, "loss": 2.1767, "step": 19950 }, { "epoch": 10.66, "grad_norm": 1.123152494430542, "learning_rate": 9.344698987746404e-06, "loss": 2.1598, "step": 20000 }, { "epoch": 10.68, "grad_norm": 0.953671932220459, "learning_rate": 9.31806073521577e-06, "loss": 2.1543, "step": 20050 }, { "epoch": 10.71, "grad_norm": 1.0523099899291992, "learning_rate": 9.291422482685136e-06, "loss": 2.2153, "step": 20100 }, { "epoch": 10.74, "grad_norm": 1.3634711503982544, "learning_rate": 9.264784230154503e-06, "loss": 2.1659, "step": 20150 }, { "epoch": 10.76, "grad_norm": 0.9536843299865723, "learning_rate": 9.23814597762387e-06, "loss": 2.1771, "step": 20200 }, { "epoch": 10.79, "grad_norm": 0.9196369647979736, "learning_rate": 9.211507725093234e-06, "loss": 2.1675, "step": 20250 }, { "epoch": 10.82, "grad_norm": 0.846582293510437, "learning_rate": 9.184869472562601e-06, "loss": 2.1461, "step": 20300 }, { "epoch": 10.84, "grad_norm": 0.9845914840698242, "learning_rate": 9.158231220031966e-06, "loss": 2.2221, "step": 20350 }, { "epoch": 10.87, "grad_norm": 1.0751748085021973, "learning_rate": 9.131592967501333e-06, "loss": 2.1662, "step": 20400 }, { "epoch": 10.9, "grad_norm": 0.9254376292228699, "learning_rate": 9.104954714970698e-06, "loss": 2.1693, "step": 20450 }, { "epoch": 10.92, "grad_norm": 0.9914495348930359, "learning_rate": 9.078316462440065e-06, "loss": 2.1774, "step": 20500 }, { "epoch": 10.95, "grad_norm": 1.060456395149231, "learning_rate": 9.05167820990943e-06, "loss": 2.1928, "step": 20550 }, { "epoch": 10.97, "grad_norm": 0.9841185212135315, "learning_rate": 9.025039957378797e-06, "loss": 2.1774, "step": 20600 }, { "epoch": 11.0, "eval_bert-score-f1": 0.8810000661683061, "eval_bert-score-precision": 0.8851854187195088, "eval_bert-score-recall": 0.8771485046185927, "eval_gen_len": 47.51730437430224, "eval_loss": 2.013378381729126, "eval_rouge1": 0.3348, "eval_rouge2": 0.1268, "eval_rougeL": 0.2476, "eval_rougeLsum": 0.2477, "eval_runtime": 1329.4509, "eval_samples_per_second": 14.823, "eval_steps_per_second": 0.354, "step": 20647 }, { "epoch": 11.0, "grad_norm": 0.9042637944221497, "learning_rate": 8.998401704848163e-06, "loss": 2.1635, "step": 20650 }, { "epoch": 11.03, "grad_norm": 0.9590170979499817, "learning_rate": 8.971763452317528e-06, "loss": 2.165, "step": 20700 }, { "epoch": 11.05, "grad_norm": 0.8360864520072937, "learning_rate": 8.945125199786895e-06, "loss": 2.1263, "step": 20750 }, { "epoch": 11.08, "grad_norm": 1.0940536260604858, "learning_rate": 8.91848694725626e-06, "loss": 2.1797, "step": 20800 }, { "epoch": 11.11, "grad_norm": 0.9590544104576111, "learning_rate": 8.891848694725627e-06, "loss": 2.1848, "step": 20850 }, { "epoch": 11.13, "grad_norm": 1.009817361831665, "learning_rate": 8.865210442194992e-06, "loss": 2.1582, "step": 20900 }, { "epoch": 11.16, "grad_norm": 0.9471531510353088, "learning_rate": 8.838572189664359e-06, "loss": 2.193, "step": 20950 }, { "epoch": 11.19, "grad_norm": 1.0452817678451538, "learning_rate": 8.811933937133724e-06, "loss": 2.1514, "step": 21000 }, { "epoch": 11.21, "grad_norm": 1.0049973726272583, "learning_rate": 8.78529568460309e-06, "loss": 2.1969, "step": 21050 }, { "epoch": 11.24, "grad_norm": 1.1841228008270264, "learning_rate": 8.758657432072457e-06, "loss": 2.1479, "step": 21100 }, { "epoch": 11.27, "grad_norm": 0.9284167289733887, "learning_rate": 8.732019179541822e-06, "loss": 2.1597, "step": 21150 }, { "epoch": 11.29, "grad_norm": 1.0026378631591797, "learning_rate": 8.705380927011189e-06, "loss": 2.1856, "step": 21200 }, { "epoch": 11.32, "grad_norm": 1.1704638004302979, "learning_rate": 8.678742674480554e-06, "loss": 2.1991, "step": 21250 }, { "epoch": 11.35, "grad_norm": 0.9646838307380676, "learning_rate": 8.65210442194992e-06, "loss": 2.1673, "step": 21300 }, { "epoch": 11.37, "grad_norm": 0.9169173240661621, "learning_rate": 8.625466169419286e-06, "loss": 2.1879, "step": 21350 }, { "epoch": 11.4, "grad_norm": 1.2698827981948853, "learning_rate": 8.598827916888653e-06, "loss": 2.1711, "step": 21400 }, { "epoch": 11.43, "grad_norm": 1.0014339685440063, "learning_rate": 8.57218966435802e-06, "loss": 2.1574, "step": 21450 }, { "epoch": 11.45, "grad_norm": 1.141926884651184, "learning_rate": 8.545551411827386e-06, "loss": 2.181, "step": 21500 }, { "epoch": 11.48, "grad_norm": 0.9993128180503845, "learning_rate": 8.518913159296751e-06, "loss": 2.1954, "step": 21550 }, { "epoch": 11.51, "grad_norm": 0.8284913301467896, "learning_rate": 8.492274906766118e-06, "loss": 2.1507, "step": 21600 }, { "epoch": 11.53, "grad_norm": 0.9283605217933655, "learning_rate": 8.465636654235483e-06, "loss": 2.164, "step": 21650 }, { "epoch": 11.56, "grad_norm": 1.0683674812316895, "learning_rate": 8.438998401704848e-06, "loss": 2.1689, "step": 21700 }, { "epoch": 11.59, "grad_norm": 0.9687473177909851, "learning_rate": 8.412360149174215e-06, "loss": 2.1447, "step": 21750 }, { "epoch": 11.61, "grad_norm": 1.2580022811889648, "learning_rate": 8.38572189664358e-06, "loss": 2.1895, "step": 21800 }, { "epoch": 11.64, "grad_norm": 0.9309559464454651, "learning_rate": 8.359083644112946e-06, "loss": 2.178, "step": 21850 }, { "epoch": 11.67, "grad_norm": 0.9081181883811951, "learning_rate": 8.332445391582313e-06, "loss": 2.1827, "step": 21900 }, { "epoch": 11.69, "grad_norm": 0.9058334231376648, "learning_rate": 8.30580713905168e-06, "loss": 2.1636, "step": 21950 }, { "epoch": 11.72, "grad_norm": 0.9327389597892761, "learning_rate": 8.279168886521045e-06, "loss": 2.175, "step": 22000 }, { "epoch": 11.75, "grad_norm": 1.3059749603271484, "learning_rate": 8.252530633990412e-06, "loss": 2.1655, "step": 22050 }, { "epoch": 11.77, "grad_norm": 0.9360121488571167, "learning_rate": 8.225892381459777e-06, "loss": 2.1759, "step": 22100 }, { "epoch": 11.8, "grad_norm": 2.760277032852173, "learning_rate": 8.199254128929144e-06, "loss": 2.1784, "step": 22150 }, { "epoch": 11.83, "grad_norm": 0.9712272882461548, "learning_rate": 8.172615876398509e-06, "loss": 2.1736, "step": 22200 }, { "epoch": 11.85, "grad_norm": 0.8907904028892517, "learning_rate": 8.145977623867875e-06, "loss": 2.1918, "step": 22250 }, { "epoch": 11.88, "grad_norm": 1.1707508563995361, "learning_rate": 8.119339371337242e-06, "loss": 2.1679, "step": 22300 }, { "epoch": 11.91, "grad_norm": 0.8738240003585815, "learning_rate": 8.092701118806607e-06, "loss": 2.1831, "step": 22350 }, { "epoch": 11.93, "grad_norm": 1.0194109678268433, "learning_rate": 8.066062866275974e-06, "loss": 2.1654, "step": 22400 }, { "epoch": 11.96, "grad_norm": 2.5614118576049805, "learning_rate": 8.039424613745339e-06, "loss": 2.1985, "step": 22450 }, { "epoch": 11.99, "grad_norm": 1.0855169296264648, "learning_rate": 8.012786361214706e-06, "loss": 2.2065, "step": 22500 }, { "epoch": 12.0, "eval_bert-score-f1": 0.8812093867313604, "eval_bert-score-precision": 0.8855559532955481, "eval_bert-score-recall": 0.8771963626476008, "eval_gen_len": 47.54465644981224, "eval_loss": 2.0089080333709717, "eval_rouge1": 0.3353, "eval_rouge2": 0.1269, "eval_rougeL": 0.2482, "eval_rougeLsum": 0.2483, "eval_runtime": 1322.3144, "eval_samples_per_second": 14.903, "eval_steps_per_second": 0.355, "step": 22524 }, { "epoch": 12.01, "grad_norm": 0.9317869544029236, "learning_rate": 7.98614810868407e-06, "loss": 2.1648, "step": 22550 }, { "epoch": 12.04, "grad_norm": 0.9064331650733948, "learning_rate": 7.959509856153437e-06, "loss": 2.1624, "step": 22600 }, { "epoch": 12.07, "grad_norm": 0.867645263671875, "learning_rate": 7.932871603622802e-06, "loss": 2.135, "step": 22650 }, { "epoch": 12.09, "grad_norm": 0.9419721364974976, "learning_rate": 7.90623335109217e-06, "loss": 2.1511, "step": 22700 }, { "epoch": 12.12, "grad_norm": 1.0487383604049683, "learning_rate": 7.879595098561536e-06, "loss": 2.2027, "step": 22750 }, { "epoch": 12.15, "grad_norm": 0.9865580797195435, "learning_rate": 7.852956846030901e-06, "loss": 2.1988, "step": 22800 }, { "epoch": 12.17, "grad_norm": 0.8484292030334473, "learning_rate": 7.826318593500268e-06, "loss": 2.1662, "step": 22850 }, { "epoch": 12.2, "grad_norm": 1.0033077001571655, "learning_rate": 7.799680340969633e-06, "loss": 2.1494, "step": 22900 }, { "epoch": 12.23, "grad_norm": 0.967682957649231, "learning_rate": 7.773042088439e-06, "loss": 2.149, "step": 22950 }, { "epoch": 12.25, "grad_norm": 1.0493502616882324, "learning_rate": 7.746403835908365e-06, "loss": 2.1783, "step": 23000 }, { "epoch": 12.28, "grad_norm": 1.7216830253601074, "learning_rate": 7.719765583377731e-06, "loss": 2.2004, "step": 23050 }, { "epoch": 12.31, "grad_norm": 0.8698610067367554, "learning_rate": 7.693127330847096e-06, "loss": 2.1873, "step": 23100 }, { "epoch": 12.33, "grad_norm": 0.9329569935798645, "learning_rate": 7.666489078316463e-06, "loss": 2.1386, "step": 23150 }, { "epoch": 12.36, "grad_norm": 1.0915803909301758, "learning_rate": 7.63985082578583e-06, "loss": 2.1847, "step": 23200 }, { "epoch": 12.39, "grad_norm": 0.945863664150238, "learning_rate": 7.613212573255196e-06, "loss": 2.1515, "step": 23250 }, { "epoch": 12.41, "grad_norm": 1.3315609693527222, "learning_rate": 7.586574320724561e-06, "loss": 2.1758, "step": 23300 }, { "epoch": 12.44, "grad_norm": 0.9587283134460449, "learning_rate": 7.559936068193927e-06, "loss": 2.1817, "step": 23350 }, { "epoch": 12.47, "grad_norm": 1.0035264492034912, "learning_rate": 7.533297815663293e-06, "loss": 2.2093, "step": 23400 }, { "epoch": 12.49, "grad_norm": 0.9835115671157837, "learning_rate": 7.5066595631326585e-06, "loss": 2.1545, "step": 23450 }, { "epoch": 12.52, "grad_norm": 1.103174090385437, "learning_rate": 7.480021310602024e-06, "loss": 2.1901, "step": 23500 }, { "epoch": 12.55, "grad_norm": 0.9141381978988647, "learning_rate": 7.45338305807139e-06, "loss": 2.1721, "step": 23550 }, { "epoch": 12.57, "grad_norm": 0.9373638033866882, "learning_rate": 7.426744805540758e-06, "loss": 2.1484, "step": 23600 }, { "epoch": 12.6, "grad_norm": 0.8388597369194031, "learning_rate": 7.400106553010124e-06, "loss": 2.1476, "step": 23650 }, { "epoch": 12.63, "grad_norm": 1.1504141092300415, "learning_rate": 7.37346830047949e-06, "loss": 2.1471, "step": 23700 }, { "epoch": 12.65, "grad_norm": 1.0790530443191528, "learning_rate": 7.3468300479488556e-06, "loss": 2.1905, "step": 23750 }, { "epoch": 12.68, "grad_norm": 1.2390844821929932, "learning_rate": 7.3201917954182215e-06, "loss": 2.1447, "step": 23800 }, { "epoch": 12.71, "grad_norm": 1.1462445259094238, "learning_rate": 7.293553542887587e-06, "loss": 2.176, "step": 23850 }, { "epoch": 12.73, "grad_norm": 1.1283916234970093, "learning_rate": 7.266915290356953e-06, "loss": 2.2012, "step": 23900 }, { "epoch": 12.76, "grad_norm": 0.9534170031547546, "learning_rate": 7.240277037826319e-06, "loss": 2.1723, "step": 23950 }, { "epoch": 12.79, "grad_norm": 1.0652652978897095, "learning_rate": 7.213638785295686e-06, "loss": 2.1637, "step": 24000 }, { "epoch": 12.81, "grad_norm": 0.9238381385803223, "learning_rate": 7.187000532765052e-06, "loss": 2.1684, "step": 24050 }, { "epoch": 12.84, "grad_norm": 1.0384712219238281, "learning_rate": 7.160362280234418e-06, "loss": 2.1827, "step": 24100 }, { "epoch": 12.87, "grad_norm": 1.0068541765213013, "learning_rate": 7.133724027703784e-06, "loss": 2.1631, "step": 24150 }, { "epoch": 12.89, "grad_norm": 1.0497870445251465, "learning_rate": 7.1070857751731495e-06, "loss": 2.1397, "step": 24200 }, { "epoch": 12.92, "grad_norm": 0.9750410318374634, "learning_rate": 7.080447522642515e-06, "loss": 2.1806, "step": 24250 }, { "epoch": 12.95, "grad_norm": 1.383784532546997, "learning_rate": 7.053809270111881e-06, "loss": 2.1835, "step": 24300 }, { "epoch": 12.97, "grad_norm": 1.006396770477295, "learning_rate": 7.027171017581247e-06, "loss": 2.1553, "step": 24350 }, { "epoch": 13.0, "grad_norm": 1.2312555313110352, "learning_rate": 7.000532765050613e-06, "loss": 2.1748, "step": 24400 }, { "epoch": 13.0, "eval_bert-score-f1": 0.8812570747936678, "eval_bert-score-precision": 0.8856106671139539, "eval_bert-score-recall": 0.8772335333635659, "eval_gen_len": 47.40327818938395, "eval_loss": 2.0081796646118164, "eval_rouge1": 0.3355, "eval_rouge2": 0.1268, "eval_rougeL": 0.2484, "eval_rougeLsum": 0.2485, "eval_runtime": 1330.0155, "eval_samples_per_second": 14.816, "eval_steps_per_second": 0.353, "step": 24401 }, { "epoch": 13.03, "grad_norm": 0.9376835227012634, "learning_rate": 6.97389451251998e-06, "loss": 2.177, "step": 24450 }, { "epoch": 13.05, "grad_norm": 0.8431729078292847, "learning_rate": 6.947256259989346e-06, "loss": 2.151, "step": 24500 }, { "epoch": 13.08, "grad_norm": 0.9604516625404358, "learning_rate": 6.920618007458712e-06, "loss": 2.1812, "step": 24550 }, { "epoch": 13.11, "grad_norm": 1.005323886871338, "learning_rate": 6.8939797549280775e-06, "loss": 2.1375, "step": 24600 }, { "epoch": 13.13, "grad_norm": 0.8559876680374146, "learning_rate": 6.867341502397443e-06, "loss": 2.1757, "step": 24650 }, { "epoch": 13.16, "grad_norm": 0.9345346689224243, "learning_rate": 6.840703249866809e-06, "loss": 2.1612, "step": 24700 }, { "epoch": 13.19, "grad_norm": 0.8913391828536987, "learning_rate": 6.814064997336175e-06, "loss": 2.1644, "step": 24750 }, { "epoch": 13.21, "grad_norm": 1.023695468902588, "learning_rate": 6.787426744805541e-06, "loss": 2.1737, "step": 24800 }, { "epoch": 13.24, "grad_norm": 0.9663516879081726, "learning_rate": 6.760788492274908e-06, "loss": 2.194, "step": 24850 }, { "epoch": 13.27, "grad_norm": 2.1459882259368896, "learning_rate": 6.734150239744274e-06, "loss": 2.1383, "step": 24900 }, { "epoch": 13.29, "grad_norm": 1.0092273950576782, "learning_rate": 6.70751198721364e-06, "loss": 2.1482, "step": 24950 }, { "epoch": 13.32, "grad_norm": 0.9323935508728027, "learning_rate": 6.6808737346830055e-06, "loss": 2.1585, "step": 25000 }, { "epoch": 13.35, "grad_norm": 0.9155877828598022, "learning_rate": 6.654235482152371e-06, "loss": 2.1752, "step": 25050 }, { "epoch": 13.37, "grad_norm": 1.0836352109909058, "learning_rate": 6.627597229621737e-06, "loss": 2.1702, "step": 25100 }, { "epoch": 13.4, "grad_norm": 1.0014092922210693, "learning_rate": 6.600958977091103e-06, "loss": 2.1667, "step": 25150 }, { "epoch": 13.43, "grad_norm": 0.917218804359436, "learning_rate": 6.574320724560469e-06, "loss": 2.174, "step": 25200 }, { "epoch": 13.45, "grad_norm": 0.9018741846084595, "learning_rate": 6.547682472029835e-06, "loss": 2.1612, "step": 25250 }, { "epoch": 13.48, "grad_norm": 1.0210672616958618, "learning_rate": 6.521044219499202e-06, "loss": 2.1516, "step": 25300 }, { "epoch": 13.51, "grad_norm": 0.9759948253631592, "learning_rate": 6.494405966968568e-06, "loss": 2.1683, "step": 25350 }, { "epoch": 13.53, "grad_norm": 0.8901129961013794, "learning_rate": 6.4677677144379335e-06, "loss": 2.1777, "step": 25400 }, { "epoch": 13.56, "grad_norm": 0.9080635905265808, "learning_rate": 6.4411294619072994e-06, "loss": 2.1821, "step": 25450 }, { "epoch": 13.59, "grad_norm": 0.9286373257637024, "learning_rate": 6.414491209376665e-06, "loss": 2.1779, "step": 25500 }, { "epoch": 13.61, "grad_norm": 1.1481138467788696, "learning_rate": 6.387852956846031e-06, "loss": 2.1692, "step": 25550 }, { "epoch": 13.64, "grad_norm": 0.9707843661308289, "learning_rate": 6.361214704315397e-06, "loss": 2.1682, "step": 25600 }, { "epoch": 13.67, "grad_norm": 1.042262077331543, "learning_rate": 6.334576451784763e-06, "loss": 2.1584, "step": 25650 }, { "epoch": 13.69, "grad_norm": 0.9933112859725952, "learning_rate": 6.30793819925413e-06, "loss": 2.1558, "step": 25700 }, { "epoch": 13.72, "grad_norm": 0.8889453411102295, "learning_rate": 6.281299946723496e-06, "loss": 2.1592, "step": 25750 }, { "epoch": 13.75, "grad_norm": 0.8605052828788757, "learning_rate": 6.2546616941928615e-06, "loss": 2.1993, "step": 25800 }, { "epoch": 13.77, "grad_norm": 0.946685791015625, "learning_rate": 6.2280234416622274e-06, "loss": 2.1609, "step": 25850 }, { "epoch": 13.8, "grad_norm": 0.9031264185905457, "learning_rate": 6.201385189131593e-06, "loss": 2.1578, "step": 25900 }, { "epoch": 13.83, "grad_norm": 1.1098228693008423, "learning_rate": 6.174746936600959e-06, "loss": 2.1487, "step": 25950 }, { "epoch": 13.85, "grad_norm": 0.9173837900161743, "learning_rate": 6.148108684070325e-06, "loss": 2.1639, "step": 26000 }, { "epoch": 13.88, "grad_norm": 1.1397854089736938, "learning_rate": 6.121470431539691e-06, "loss": 2.165, "step": 26050 }, { "epoch": 13.91, "grad_norm": 1.0020257234573364, "learning_rate": 6.094832179009057e-06, "loss": 2.1713, "step": 26100 }, { "epoch": 13.93, "grad_norm": 0.9584590792655945, "learning_rate": 6.068193926478424e-06, "loss": 2.2049, "step": 26150 }, { "epoch": 13.96, "grad_norm": 1.1951069831848145, "learning_rate": 6.0415556739477896e-06, "loss": 2.1632, "step": 26200 }, { "epoch": 13.99, "grad_norm": 1.0568323135375977, "learning_rate": 6.0149174214171555e-06, "loss": 2.1792, "step": 26250 }, { "epoch": 14.0, "eval_bert-score-f1": 0.8812965099779855, "eval_bert-score-precision": 0.8854620127155008, "eval_bert-score-recall": 0.877461512582043, "eval_gen_len": 47.727494164213944, "eval_loss": 2.0084121227264404, "eval_rouge1": 0.3362, "eval_rouge2": 0.1273, "eval_rougeL": 0.2487, "eval_rougeLsum": 0.2487, "eval_runtime": 1350.2387, "eval_samples_per_second": 14.594, "eval_steps_per_second": 0.348, "step": 26278 }, { "epoch": 14.01, "grad_norm": 0.9785760045051575, "learning_rate": 5.988279168886521e-06, "loss": 2.1468, "step": 26300 }, { "epoch": 14.04, "grad_norm": 1.0162745714187622, "learning_rate": 5.961640916355887e-06, "loss": 2.1717, "step": 26350 }, { "epoch": 14.06, "grad_norm": 1.0883443355560303, "learning_rate": 5.935002663825253e-06, "loss": 2.1565, "step": 26400 }, { "epoch": 14.09, "grad_norm": 1.0060546398162842, "learning_rate": 5.908364411294619e-06, "loss": 2.1691, "step": 26450 }, { "epoch": 14.12, "grad_norm": 0.9702796936035156, "learning_rate": 5.881726158763985e-06, "loss": 2.1747, "step": 26500 }, { "epoch": 14.14, "grad_norm": 0.9459964632987976, "learning_rate": 5.8550879062333525e-06, "loss": 2.1781, "step": 26550 }, { "epoch": 14.17, "grad_norm": 0.8751854300498962, "learning_rate": 5.8284496537027176e-06, "loss": 2.1531, "step": 26600 }, { "epoch": 14.2, "grad_norm": 0.9021329879760742, "learning_rate": 5.8018114011720835e-06, "loss": 2.1741, "step": 26650 }, { "epoch": 14.22, "grad_norm": 0.9062153100967407, "learning_rate": 5.775173148641449e-06, "loss": 2.1424, "step": 26700 }, { "epoch": 14.25, "grad_norm": 0.8709902167320251, "learning_rate": 5.748534896110815e-06, "loss": 2.1458, "step": 26750 }, { "epoch": 14.28, "grad_norm": 0.8705712556838989, "learning_rate": 5.721896643580181e-06, "loss": 2.113, "step": 26800 }, { "epoch": 14.3, "grad_norm": 0.880886435508728, "learning_rate": 5.695258391049547e-06, "loss": 2.2009, "step": 26850 }, { "epoch": 14.33, "grad_norm": 1.1115506887435913, "learning_rate": 5.668620138518913e-06, "loss": 2.1809, "step": 26900 }, { "epoch": 14.36, "grad_norm": 0.9538648128509521, "learning_rate": 5.641981885988279e-06, "loss": 2.1776, "step": 26950 }, { "epoch": 14.38, "grad_norm": 0.8338669538497925, "learning_rate": 5.6153436334576464e-06, "loss": 2.1357, "step": 27000 }, { "epoch": 14.41, "grad_norm": 0.9421921372413635, "learning_rate": 5.588705380927012e-06, "loss": 2.1514, "step": 27050 }, { "epoch": 14.44, "grad_norm": 0.8788719177246094, "learning_rate": 5.562067128396378e-06, "loss": 2.1643, "step": 27100 }, { "epoch": 14.46, "grad_norm": 0.9146755337715149, "learning_rate": 5.535428875865744e-06, "loss": 2.1334, "step": 27150 }, { "epoch": 14.49, "grad_norm": 1.042197346687317, "learning_rate": 5.50879062333511e-06, "loss": 2.1927, "step": 27200 }, { "epoch": 14.52, "grad_norm": 0.9642235636711121, "learning_rate": 5.482152370804475e-06, "loss": 2.1628, "step": 27250 }, { "epoch": 14.54, "grad_norm": 1.0911927223205566, "learning_rate": 5.455514118273841e-06, "loss": 2.1643, "step": 27300 }, { "epoch": 14.57, "grad_norm": 1.1119507551193237, "learning_rate": 5.428875865743207e-06, "loss": 2.1757, "step": 27350 }, { "epoch": 14.6, "grad_norm": 0.9982576370239258, "learning_rate": 5.4022376132125744e-06, "loss": 2.1912, "step": 27400 }, { "epoch": 14.62, "grad_norm": 0.9314507842063904, "learning_rate": 5.37559936068194e-06, "loss": 2.171, "step": 27450 }, { "epoch": 14.65, "grad_norm": 1.2115583419799805, "learning_rate": 5.348961108151306e-06, "loss": 2.16, "step": 27500 }, { "epoch": 14.68, "grad_norm": 0.9271109700202942, "learning_rate": 5.322322855620672e-06, "loss": 2.1908, "step": 27550 }, { "epoch": 14.7, "grad_norm": 0.8988925218582153, "learning_rate": 5.295684603090038e-06, "loss": 2.1852, "step": 27600 }, { "epoch": 14.73, "grad_norm": 0.9780814051628113, "learning_rate": 5.269046350559404e-06, "loss": 2.1417, "step": 27650 }, { "epoch": 14.76, "grad_norm": 1.1456106901168823, "learning_rate": 5.24240809802877e-06, "loss": 2.1775, "step": 27700 }, { "epoch": 14.78, "grad_norm": 0.813176155090332, "learning_rate": 5.215769845498136e-06, "loss": 2.1916, "step": 27750 }, { "epoch": 14.81, "grad_norm": 0.9489790797233582, "learning_rate": 5.189131592967502e-06, "loss": 2.1561, "step": 27800 }, { "epoch": 14.84, "grad_norm": 0.9345203042030334, "learning_rate": 5.162493340436868e-06, "loss": 2.1511, "step": 27850 }, { "epoch": 14.86, "grad_norm": 0.7995460033416748, "learning_rate": 5.135855087906234e-06, "loss": 2.1918, "step": 27900 }, { "epoch": 14.89, "grad_norm": 0.8729720711708069, "learning_rate": 5.1092168353756e-06, "loss": 2.1598, "step": 27950 }, { "epoch": 14.92, "grad_norm": 0.928892970085144, "learning_rate": 5.082578582844966e-06, "loss": 2.1602, "step": 28000 }, { "epoch": 14.94, "grad_norm": 0.9653081297874451, "learning_rate": 5.055940330314332e-06, "loss": 2.1591, "step": 28050 }, { "epoch": 14.97, "grad_norm": 1.006082534790039, "learning_rate": 5.029302077783698e-06, "loss": 2.1554, "step": 28100 }, { "epoch": 15.0, "grad_norm": 0.9210333228111267, "learning_rate": 5.002663825253064e-06, "loss": 2.1609, "step": 28150 }, { "epoch": 15.0, "eval_bert-score-f1": 0.8813901242448342, "eval_bert-score-precision": 0.8855356388553972, "eval_bert-score-recall": 0.8775755109870953, "eval_gen_len": 47.89211407693088, "eval_loss": 2.008052349090576, "eval_rouge1": 0.3364, "eval_rouge2": 0.1275, "eval_rougeL": 0.249, "eval_rougeLsum": 0.2491, "eval_runtime": 1342.1803, "eval_samples_per_second": 14.682, "eval_steps_per_second": 0.35, "step": 28155 }, { "epoch": 15.02, "grad_norm": 0.9067476987838745, "learning_rate": 4.97602557272243e-06, "loss": 2.1855, "step": 28200 }, { "epoch": 15.05, "grad_norm": 0.9924964904785156, "learning_rate": 4.9493873201917955e-06, "loss": 2.1434, "step": 28250 }, { "epoch": 15.08, "grad_norm": 0.9526183009147644, "learning_rate": 4.922749067661161e-06, "loss": 2.1306, "step": 28300 }, { "epoch": 15.1, "grad_norm": 0.8999783396720886, "learning_rate": 4.896110815130528e-06, "loss": 2.1683, "step": 28350 }, { "epoch": 15.13, "grad_norm": 1.3067386150360107, "learning_rate": 4.869472562599894e-06, "loss": 2.175, "step": 28400 }, { "epoch": 15.16, "grad_norm": 0.930998682975769, "learning_rate": 4.84283431006926e-06, "loss": 2.1571, "step": 28450 }, { "epoch": 15.18, "grad_norm": 1.0874801874160767, "learning_rate": 4.816196057538626e-06, "loss": 2.1648, "step": 28500 }, { "epoch": 15.21, "grad_norm": 0.8937533497810364, "learning_rate": 4.789557805007992e-06, "loss": 2.1593, "step": 28550 }, { "epoch": 15.24, "grad_norm": 1.1147658824920654, "learning_rate": 4.762919552477358e-06, "loss": 2.1554, "step": 28600 }, { "epoch": 15.26, "grad_norm": 1.2026888132095337, "learning_rate": 4.7362812999467235e-06, "loss": 2.1702, "step": 28650 }, { "epoch": 15.29, "grad_norm": 1.02727210521698, "learning_rate": 4.7096430474160894e-06, "loss": 2.1316, "step": 28700 }, { "epoch": 15.32, "grad_norm": 0.9779881834983826, "learning_rate": 4.683004794885455e-06, "loss": 2.1673, "step": 28750 }, { "epoch": 15.34, "grad_norm": 1.0025492906570435, "learning_rate": 4.656366542354822e-06, "loss": 2.1949, "step": 28800 }, { "epoch": 15.37, "grad_norm": 1.1579878330230713, "learning_rate": 4.629728289824188e-06, "loss": 2.161, "step": 28850 }, { "epoch": 15.4, "grad_norm": 0.9376475811004639, "learning_rate": 4.603090037293554e-06, "loss": 2.1849, "step": 28900 }, { "epoch": 15.42, "grad_norm": 1.239538550376892, "learning_rate": 4.57645178476292e-06, "loss": 2.148, "step": 28950 }, { "epoch": 15.45, "grad_norm": 0.9662672281265259, "learning_rate": 4.5498135322322865e-06, "loss": 2.1917, "step": 29000 }, { "epoch": 15.48, "grad_norm": 0.999487578868866, "learning_rate": 4.523175279701652e-06, "loss": 2.1746, "step": 29050 }, { "epoch": 15.5, "grad_norm": 1.1087113618850708, "learning_rate": 4.496537027171018e-06, "loss": 2.1756, "step": 29100 }, { "epoch": 15.53, "grad_norm": 0.9873372316360474, "learning_rate": 4.469898774640384e-06, "loss": 2.1333, "step": 29150 }, { "epoch": 15.56, "grad_norm": 0.9368143677711487, "learning_rate": 4.44326052210975e-06, "loss": 2.1784, "step": 29200 }, { "epoch": 15.58, "grad_norm": 1.0396299362182617, "learning_rate": 4.416622269579116e-06, "loss": 2.1786, "step": 29250 }, { "epoch": 15.61, "grad_norm": 0.9846010804176331, "learning_rate": 4.389984017048482e-06, "loss": 2.1886, "step": 29300 }, { "epoch": 15.64, "grad_norm": 0.7157499194145203, "learning_rate": 4.363345764517848e-06, "loss": 2.1454, "step": 29350 }, { "epoch": 15.66, "grad_norm": 0.8496772050857544, "learning_rate": 4.336707511987214e-06, "loss": 2.1456, "step": 29400 }, { "epoch": 15.69, "grad_norm": 1.0150104761123657, "learning_rate": 4.31006925945658e-06, "loss": 2.1725, "step": 29450 }, { "epoch": 15.72, "grad_norm": 0.9437685012817383, "learning_rate": 4.283431006925946e-06, "loss": 2.1655, "step": 29500 }, { "epoch": 15.74, "grad_norm": 0.978090226650238, "learning_rate": 4.256792754395312e-06, "loss": 2.1476, "step": 29550 }, { "epoch": 15.77, "grad_norm": 0.8782944679260254, "learning_rate": 4.230154501864678e-06, "loss": 2.1482, "step": 29600 }, { "epoch": 15.8, "grad_norm": 0.830117404460907, "learning_rate": 4.203516249334044e-06, "loss": 2.118, "step": 29650 }, { "epoch": 15.82, "grad_norm": 0.9760297536849976, "learning_rate": 4.17687799680341e-06, "loss": 2.2007, "step": 29700 }, { "epoch": 15.85, "grad_norm": 0.8977949619293213, "learning_rate": 4.150239744272776e-06, "loss": 2.1008, "step": 29750 }, { "epoch": 15.88, "grad_norm": 1.0869261026382446, "learning_rate": 4.123601491742142e-06, "loss": 2.1915, "step": 29800 }, { "epoch": 15.9, "grad_norm": 1.1186174154281616, "learning_rate": 4.0969632392115084e-06, "loss": 2.1653, "step": 29850 }, { "epoch": 15.93, "grad_norm": 0.9164936542510986, "learning_rate": 4.070324986680874e-06, "loss": 2.1479, "step": 29900 }, { "epoch": 15.96, "grad_norm": 0.9677796959877014, "learning_rate": 4.04368673415024e-06, "loss": 2.1207, "step": 29950 }, { "epoch": 15.98, "grad_norm": 0.8449875116348267, "learning_rate": 4.017048481619606e-06, "loss": 2.189, "step": 30000 }, { "epoch": 16.0, "eval_bert-score-f1": 0.8815181989403869, "eval_bert-score-precision": 0.8858505654647171, "eval_bert-score-recall": 0.8775155795209725, "eval_gen_len": 47.55181163097534, "eval_loss": 2.0050902366638184, "eval_rouge1": 0.3365, "eval_rouge2": 0.1273, "eval_rougeL": 0.249, "eval_rougeLsum": 0.2491, "eval_runtime": 1330.728, "eval_samples_per_second": 14.808, "eval_steps_per_second": 0.353, "step": 30032 }, { "epoch": 16.01, "grad_norm": 0.9980852603912354, "learning_rate": 3.990410229088972e-06, "loss": 2.1959, "step": 30050 }, { "epoch": 16.04, "grad_norm": 1.3163436651229858, "learning_rate": 3.963771976558338e-06, "loss": 2.173, "step": 30100 }, { "epoch": 16.06, "grad_norm": 0.9099076986312866, "learning_rate": 3.937133724027704e-06, "loss": 2.1724, "step": 30150 }, { "epoch": 16.09, "grad_norm": 1.0009883642196655, "learning_rate": 3.91049547149707e-06, "loss": 2.1544, "step": 30200 }, { "epoch": 16.12, "grad_norm": 0.9975098967552185, "learning_rate": 3.883857218966436e-06, "loss": 2.1458, "step": 30250 }, { "epoch": 16.14, "grad_norm": 0.8518524169921875, "learning_rate": 3.857218966435802e-06, "loss": 2.1569, "step": 30300 }, { "epoch": 16.17, "grad_norm": 0.9639623761177063, "learning_rate": 3.830580713905168e-06, "loss": 2.15, "step": 30350 }, { "epoch": 16.2, "grad_norm": 0.8993538022041321, "learning_rate": 3.803942461374534e-06, "loss": 2.1448, "step": 30400 }, { "epoch": 16.22, "grad_norm": 1.0397539138793945, "learning_rate": 3.7773042088439e-06, "loss": 2.1305, "step": 30450 }, { "epoch": 16.25, "grad_norm": 1.0676199197769165, "learning_rate": 3.7506659563132663e-06, "loss": 2.141, "step": 30500 }, { "epoch": 16.28, "grad_norm": 1.0054043531417847, "learning_rate": 3.7240277037826322e-06, "loss": 2.1595, "step": 30550 }, { "epoch": 16.3, "grad_norm": 0.949520468711853, "learning_rate": 3.697389451251998e-06, "loss": 2.2005, "step": 30600 }, { "epoch": 16.33, "grad_norm": 0.9753077030181885, "learning_rate": 3.670751198721364e-06, "loss": 2.1924, "step": 30650 }, { "epoch": 16.36, "grad_norm": 0.8877549767494202, "learning_rate": 3.6441129461907304e-06, "loss": 2.1553, "step": 30700 }, { "epoch": 16.38, "grad_norm": 1.0130952596664429, "learning_rate": 3.6174746936600963e-06, "loss": 2.1451, "step": 30750 }, { "epoch": 16.41, "grad_norm": 1.2479366064071655, "learning_rate": 3.590836441129462e-06, "loss": 2.168, "step": 30800 }, { "epoch": 16.44, "grad_norm": 1.0229461193084717, "learning_rate": 3.564198188598828e-06, "loss": 2.1642, "step": 30850 }, { "epoch": 16.46, "grad_norm": 0.9332823157310486, "learning_rate": 3.5375599360681944e-06, "loss": 2.1587, "step": 30900 }, { "epoch": 16.49, "grad_norm": 1.008484125137329, "learning_rate": 3.5109216835375603e-06, "loss": 2.1725, "step": 30950 }, { "epoch": 16.52, "grad_norm": 0.9926919341087341, "learning_rate": 3.484283431006926e-06, "loss": 2.1694, "step": 31000 }, { "epoch": 16.54, "grad_norm": 0.9659560322761536, "learning_rate": 3.457645178476292e-06, "loss": 2.1787, "step": 31050 }, { "epoch": 16.57, "grad_norm": 0.9388399720191956, "learning_rate": 3.431006925945658e-06, "loss": 2.1726, "step": 31100 }, { "epoch": 16.6, "grad_norm": 0.8616447448730469, "learning_rate": 3.4043686734150243e-06, "loss": 2.1566, "step": 31150 }, { "epoch": 16.62, "grad_norm": 0.8849464058876038, "learning_rate": 3.37773042088439e-06, "loss": 2.1769, "step": 31200 }, { "epoch": 16.65, "grad_norm": 0.9730740785598755, "learning_rate": 3.351092168353756e-06, "loss": 2.1858, "step": 31250 }, { "epoch": 16.68, "grad_norm": 1.1059538125991821, "learning_rate": 3.324453915823122e-06, "loss": 2.1722, "step": 31300 }, { "epoch": 16.7, "grad_norm": 1.2941957712173462, "learning_rate": 3.2978156632924883e-06, "loss": 2.1652, "step": 31350 }, { "epoch": 16.73, "grad_norm": 0.9166463017463684, "learning_rate": 3.271177410761854e-06, "loss": 2.158, "step": 31400 }, { "epoch": 16.76, "grad_norm": 1.1810513734817505, "learning_rate": 3.24453915823122e-06, "loss": 2.1334, "step": 31450 }, { "epoch": 16.78, "grad_norm": 0.9561477899551392, "learning_rate": 3.217900905700586e-06, "loss": 2.1354, "step": 31500 }, { "epoch": 16.81, "grad_norm": 0.8539523482322693, "learning_rate": 3.1912626531699527e-06, "loss": 2.1458, "step": 31550 }, { "epoch": 16.84, "grad_norm": 1.099672794342041, "learning_rate": 3.1646244006393186e-06, "loss": 2.1784, "step": 31600 }, { "epoch": 16.86, "grad_norm": 0.8352581858634949, "learning_rate": 3.1379861481086845e-06, "loss": 2.1348, "step": 31650 }, { "epoch": 16.89, "grad_norm": 1.0823798179626465, "learning_rate": 3.11134789557805e-06, "loss": 2.1657, "step": 31700 }, { "epoch": 16.92, "grad_norm": 0.9096495509147644, "learning_rate": 3.0847096430474167e-06, "loss": 2.1526, "step": 31750 }, { "epoch": 16.94, "grad_norm": 0.9881473779678345, "learning_rate": 3.0580713905167826e-06, "loss": 2.1759, "step": 31800 }, { "epoch": 16.97, "grad_norm": 1.2694969177246094, "learning_rate": 3.0314331379861485e-06, "loss": 2.1896, "step": 31850 }, { "epoch": 17.0, "grad_norm": 1.1195552349090576, "learning_rate": 3.0047948854555144e-06, "loss": 2.1417, "step": 31900 }, { "epoch": 17.0, "eval_bert-score-f1": 0.8814209539291642, "eval_bert-score-precision": 0.8856624760483045, "eval_bert-score-recall": 0.8775075128371735, "eval_gen_len": 47.645996143306604, "eval_loss": 2.005204439163208, "eval_rouge1": 0.3363, "eval_rouge2": 0.1273, "eval_rougeL": 0.2488, "eval_rougeLsum": 0.2489, "eval_runtime": 1335.7013, "eval_samples_per_second": 14.753, "eval_steps_per_second": 0.352, "step": 31909 }, { "epoch": 17.02, "grad_norm": 0.8519843816757202, "learning_rate": 2.9781566329248803e-06, "loss": 2.139, "step": 31950 }, { "epoch": 17.05, "grad_norm": 0.9282755255699158, "learning_rate": 2.9515183803942466e-06, "loss": 2.1555, "step": 32000 }, { "epoch": 17.08, "grad_norm": 1.0514159202575684, "learning_rate": 2.9248801278636125e-06, "loss": 2.1316, "step": 32050 }, { "epoch": 17.1, "grad_norm": 1.034957766532898, "learning_rate": 2.8982418753329784e-06, "loss": 2.2094, "step": 32100 }, { "epoch": 17.13, "grad_norm": 1.0574824810028076, "learning_rate": 2.8716036228023443e-06, "loss": 2.1641, "step": 32150 }, { "epoch": 17.16, "grad_norm": 0.9753189086914062, "learning_rate": 2.8449653702717106e-06, "loss": 2.1442, "step": 32200 }, { "epoch": 17.18, "grad_norm": 1.2037853002548218, "learning_rate": 2.8183271177410765e-06, "loss": 2.147, "step": 32250 }, { "epoch": 17.21, "grad_norm": 0.8287584185600281, "learning_rate": 2.7916888652104424e-06, "loss": 2.1765, "step": 32300 }, { "epoch": 17.23, "grad_norm": 1.0360517501831055, "learning_rate": 2.7650506126798083e-06, "loss": 2.1864, "step": 32350 }, { "epoch": 17.26, "grad_norm": 1.1471740007400513, "learning_rate": 2.7384123601491746e-06, "loss": 2.1629, "step": 32400 }, { "epoch": 17.29, "grad_norm": 0.9185000658035278, "learning_rate": 2.7117741076185405e-06, "loss": 2.1639, "step": 32450 }, { "epoch": 17.31, "grad_norm": 1.1663320064544678, "learning_rate": 2.6851358550879064e-06, "loss": 2.1174, "step": 32500 }, { "epoch": 17.34, "grad_norm": 0.980441689491272, "learning_rate": 2.6584976025572723e-06, "loss": 2.1397, "step": 32550 }, { "epoch": 17.37, "grad_norm": 1.1273858547210693, "learning_rate": 2.6318593500266386e-06, "loss": 2.1462, "step": 32600 }, { "epoch": 17.39, "grad_norm": 1.146296501159668, "learning_rate": 2.6052210974960045e-06, "loss": 2.1797, "step": 32650 }, { "epoch": 17.42, "grad_norm": 0.9205301403999329, "learning_rate": 2.5785828449653704e-06, "loss": 2.1909, "step": 32700 }, { "epoch": 17.45, "grad_norm": 1.103619933128357, "learning_rate": 2.5519445924347363e-06, "loss": 2.1637, "step": 32750 }, { "epoch": 17.47, "grad_norm": 0.8971651196479797, "learning_rate": 2.5253063399041022e-06, "loss": 2.1693, "step": 32800 }, { "epoch": 17.5, "grad_norm": 1.088956356048584, "learning_rate": 2.4986680873734685e-06, "loss": 2.151, "step": 32850 }, { "epoch": 17.53, "grad_norm": 0.9202156066894531, "learning_rate": 2.4720298348428344e-06, "loss": 2.1679, "step": 32900 }, { "epoch": 17.55, "grad_norm": 0.9409503936767578, "learning_rate": 2.4453915823122003e-06, "loss": 2.157, "step": 32950 }, { "epoch": 17.58, "grad_norm": 1.0779467821121216, "learning_rate": 2.4187533297815667e-06, "loss": 2.1291, "step": 33000 }, { "epoch": 17.61, "grad_norm": 0.918696403503418, "learning_rate": 2.3921150772509325e-06, "loss": 2.1844, "step": 33050 }, { "epoch": 17.63, "grad_norm": 1.5606318712234497, "learning_rate": 2.3654768247202984e-06, "loss": 2.1489, "step": 33100 }, { "epoch": 17.66, "grad_norm": 1.205295443534851, "learning_rate": 2.3388385721896643e-06, "loss": 2.1667, "step": 33150 }, { "epoch": 17.69, "grad_norm": 1.052687644958496, "learning_rate": 2.3122003196590307e-06, "loss": 2.1258, "step": 33200 }, { "epoch": 17.71, "grad_norm": 0.9863780736923218, "learning_rate": 2.2855620671283966e-06, "loss": 2.1722, "step": 33250 }, { "epoch": 17.74, "grad_norm": 0.9677025079727173, "learning_rate": 2.258923814597763e-06, "loss": 2.1813, "step": 33300 }, { "epoch": 17.77, "grad_norm": 1.0325006246566772, "learning_rate": 2.2322855620671283e-06, "loss": 2.1258, "step": 33350 }, { "epoch": 17.79, "grad_norm": 1.2607378959655762, "learning_rate": 2.2056473095364947e-06, "loss": 2.1531, "step": 33400 }, { "epoch": 17.82, "grad_norm": 0.8155959844589233, "learning_rate": 2.1790090570058606e-06, "loss": 2.1356, "step": 33450 }, { "epoch": 17.85, "grad_norm": 1.000097632408142, "learning_rate": 2.152370804475227e-06, "loss": 2.1564, "step": 33500 }, { "epoch": 17.87, "grad_norm": 1.0435174703598022, "learning_rate": 2.1257325519445928e-06, "loss": 2.1442, "step": 33550 }, { "epoch": 17.9, "grad_norm": 0.8841784000396729, "learning_rate": 2.0990942994139587e-06, "loss": 2.1775, "step": 33600 }, { "epoch": 17.93, "grad_norm": 0.971834123134613, "learning_rate": 2.0724560468833246e-06, "loss": 2.1737, "step": 33650 }, { "epoch": 17.95, "grad_norm": 1.0215661525726318, "learning_rate": 2.0458177943526905e-06, "loss": 2.1105, "step": 33700 }, { "epoch": 17.98, "grad_norm": 0.964297890663147, "learning_rate": 2.0191795418220568e-06, "loss": 2.1731, "step": 33750 }, { "epoch": 18.0, "eval_bert-score-f1": 0.8815276650811141, "eval_bert-score-precision": 0.8858032303631735, "eval_bert-score-recall": 0.8775806915883447, "eval_gen_len": 47.6104739673196, "eval_loss": 2.004894495010376, "eval_rouge1": 0.3366, "eval_rouge2": 0.1273, "eval_rougeL": 0.249, "eval_rougeLsum": 0.2491, "eval_runtime": 1338.1647, "eval_samples_per_second": 14.726, "eval_steps_per_second": 0.351, "step": 33786 } ], "logging_steps": 50, "max_steps": 37540, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.840531477455831e+17, "train_batch_size": 42, "trial_name": null, "trial_params": null }