t5-Summarizer / trainer_state.json
SushantGautam's picture
Upload folder using huggingface_hub
f213853 verified
raw
history blame contribute delete
No virus
119 kB
{
"best_metric": 0.249,
"best_model_checkpoint": "logs/google-t5/t5-small/checkpoint-28155",
"epoch": 18.0,
"eval_steps": 500,
"global_step": 33786,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"grad_norm": 1.3638477325439453,
"learning_rate": 1.997336174746937e-05,
"loss": 2.8409,
"step": 50
},
{
"epoch": 0.05,
"grad_norm": 1.420181155204773,
"learning_rate": 1.9946723494938732e-05,
"loss": 2.538,
"step": 100
},
{
"epoch": 0.08,
"grad_norm": 1.119789719581604,
"learning_rate": 1.9920085242408102e-05,
"loss": 2.4663,
"step": 150
},
{
"epoch": 0.11,
"grad_norm": 1.1085336208343506,
"learning_rate": 1.9893446989877465e-05,
"loss": 2.4737,
"step": 200
},
{
"epoch": 0.13,
"grad_norm": 0.8778786659240723,
"learning_rate": 1.9866808737346832e-05,
"loss": 2.4436,
"step": 250
},
{
"epoch": 0.16,
"grad_norm": 1.1101582050323486,
"learning_rate": 1.9840170484816195e-05,
"loss": 2.4545,
"step": 300
},
{
"epoch": 0.19,
"grad_norm": 1.1823639869689941,
"learning_rate": 1.9813532232285565e-05,
"loss": 2.3472,
"step": 350
},
{
"epoch": 0.21,
"grad_norm": 1.0613980293273926,
"learning_rate": 1.978689397975493e-05,
"loss": 2.3945,
"step": 400
},
{
"epoch": 0.24,
"grad_norm": 1.1558998823165894,
"learning_rate": 1.9760255727224296e-05,
"loss": 2.4172,
"step": 450
},
{
"epoch": 0.27,
"grad_norm": 1.098973274230957,
"learning_rate": 1.9733617474693662e-05,
"loss": 2.3629,
"step": 500
},
{
"epoch": 0.29,
"grad_norm": 1.3629950284957886,
"learning_rate": 1.970697922216303e-05,
"loss": 2.3975,
"step": 550
},
{
"epoch": 0.32,
"grad_norm": 1.1299411058425903,
"learning_rate": 1.9680340969632396e-05,
"loss": 2.3322,
"step": 600
},
{
"epoch": 0.35,
"grad_norm": 0.9830572605133057,
"learning_rate": 1.965370271710176e-05,
"loss": 2.379,
"step": 650
},
{
"epoch": 0.37,
"grad_norm": 1.0205196142196655,
"learning_rate": 1.9627064464571126e-05,
"loss": 2.3454,
"step": 700
},
{
"epoch": 0.4,
"grad_norm": 1.230190396308899,
"learning_rate": 1.960042621204049e-05,
"loss": 2.3045,
"step": 750
},
{
"epoch": 0.43,
"grad_norm": 1.115490436553955,
"learning_rate": 1.957378795950986e-05,
"loss": 2.3676,
"step": 800
},
{
"epoch": 0.45,
"grad_norm": 0.9047977924346924,
"learning_rate": 1.9547149706979226e-05,
"loss": 2.3871,
"step": 850
},
{
"epoch": 0.48,
"grad_norm": 1.0352205038070679,
"learning_rate": 1.952051145444859e-05,
"loss": 2.3481,
"step": 900
},
{
"epoch": 0.51,
"grad_norm": 0.864746630191803,
"learning_rate": 1.9493873201917956e-05,
"loss": 2.3384,
"step": 950
},
{
"epoch": 0.53,
"grad_norm": 1.3858081102371216,
"learning_rate": 1.9467234949387323e-05,
"loss": 2.3271,
"step": 1000
},
{
"epoch": 0.56,
"grad_norm": 0.9969013333320618,
"learning_rate": 1.944059669685669e-05,
"loss": 2.3267,
"step": 1050
},
{
"epoch": 0.59,
"grad_norm": 1.035396933555603,
"learning_rate": 1.9413958444326053e-05,
"loss": 2.349,
"step": 1100
},
{
"epoch": 0.61,
"grad_norm": 1.235714316368103,
"learning_rate": 1.938732019179542e-05,
"loss": 2.3442,
"step": 1150
},
{
"epoch": 0.64,
"grad_norm": 0.9119770526885986,
"learning_rate": 1.9360681939264787e-05,
"loss": 2.3246,
"step": 1200
},
{
"epoch": 0.67,
"grad_norm": 0.8794578909873962,
"learning_rate": 1.9334043686734153e-05,
"loss": 2.3633,
"step": 1250
},
{
"epoch": 0.69,
"grad_norm": 0.9355188012123108,
"learning_rate": 1.930740543420352e-05,
"loss": 2.339,
"step": 1300
},
{
"epoch": 0.72,
"grad_norm": 0.9303766489028931,
"learning_rate": 1.9280767181672883e-05,
"loss": 2.3174,
"step": 1350
},
{
"epoch": 0.75,
"grad_norm": 1.3199224472045898,
"learning_rate": 1.925412892914225e-05,
"loss": 2.3321,
"step": 1400
},
{
"epoch": 0.77,
"grad_norm": 1.0782549381256104,
"learning_rate": 1.9227490676611617e-05,
"loss": 2.3348,
"step": 1450
},
{
"epoch": 0.8,
"grad_norm": 0.9403214454650879,
"learning_rate": 1.9200852424080984e-05,
"loss": 2.3133,
"step": 1500
},
{
"epoch": 0.83,
"grad_norm": 0.9809348583221436,
"learning_rate": 1.9174214171550347e-05,
"loss": 2.3177,
"step": 1550
},
{
"epoch": 0.85,
"grad_norm": 0.9974561333656311,
"learning_rate": 1.9147575919019714e-05,
"loss": 2.311,
"step": 1600
},
{
"epoch": 0.88,
"grad_norm": 0.9475740194320679,
"learning_rate": 1.912093766648908e-05,
"loss": 2.3208,
"step": 1650
},
{
"epoch": 0.91,
"grad_norm": 1.0113029479980469,
"learning_rate": 1.9094299413958447e-05,
"loss": 2.3031,
"step": 1700
},
{
"epoch": 0.93,
"grad_norm": 0.920647144317627,
"learning_rate": 1.9067661161427814e-05,
"loss": 2.3268,
"step": 1750
},
{
"epoch": 0.96,
"grad_norm": 0.9876402020454407,
"learning_rate": 1.9041022908897177e-05,
"loss": 2.3,
"step": 1800
},
{
"epoch": 0.99,
"grad_norm": 1.0489838123321533,
"learning_rate": 1.9014384656366544e-05,
"loss": 2.305,
"step": 1850
},
{
"epoch": 1.0,
"eval_bert-score-f1": 0.8753073360155793,
"eval_bert-score-precision": 0.8779077029804112,
"eval_bert-score-recall": 0.873077545509742,
"eval_gen_len": 49.607327717446466,
"eval_loss": 2.088743209838867,
"eval_rouge1": 0.3105,
"eval_rouge2": 0.1136,
"eval_rougeL": 0.2275,
"eval_rougeLsum": 0.2276,
"eval_runtime": 1364.7929,
"eval_samples_per_second": 14.439,
"eval_steps_per_second": 0.344,
"step": 1877
},
{
"epoch": 1.01,
"grad_norm": 0.8712087869644165,
"learning_rate": 1.898774640383591e-05,
"loss": 2.2941,
"step": 1900
},
{
"epoch": 1.04,
"grad_norm": 0.8705251812934875,
"learning_rate": 1.8961108151305278e-05,
"loss": 2.2953,
"step": 1950
},
{
"epoch": 1.07,
"grad_norm": 1.098132610321045,
"learning_rate": 1.893446989877464e-05,
"loss": 2.2976,
"step": 2000
},
{
"epoch": 1.09,
"grad_norm": 1.1778416633605957,
"learning_rate": 1.8907831646244008e-05,
"loss": 2.2866,
"step": 2050
},
{
"epoch": 1.12,
"grad_norm": 0.7667921185493469,
"learning_rate": 1.8881193393713374e-05,
"loss": 2.2641,
"step": 2100
},
{
"epoch": 1.15,
"grad_norm": 0.9039889574050903,
"learning_rate": 1.885455514118274e-05,
"loss": 2.2788,
"step": 2150
},
{
"epoch": 1.17,
"grad_norm": 0.8681074976921082,
"learning_rate": 1.8827916888652108e-05,
"loss": 2.3297,
"step": 2200
},
{
"epoch": 1.2,
"grad_norm": 1.0154231786727905,
"learning_rate": 1.880127863612147e-05,
"loss": 2.3267,
"step": 2250
},
{
"epoch": 1.23,
"grad_norm": 1.5686161518096924,
"learning_rate": 1.8774640383590838e-05,
"loss": 2.2867,
"step": 2300
},
{
"epoch": 1.25,
"grad_norm": 1.0235270261764526,
"learning_rate": 1.8748002131060205e-05,
"loss": 2.3132,
"step": 2350
},
{
"epoch": 1.28,
"grad_norm": 1.1120573282241821,
"learning_rate": 1.872136387852957e-05,
"loss": 2.2774,
"step": 2400
},
{
"epoch": 1.31,
"grad_norm": 0.9750345349311829,
"learning_rate": 1.8694725625998935e-05,
"loss": 2.2903,
"step": 2450
},
{
"epoch": 1.33,
"grad_norm": 0.8807668089866638,
"learning_rate": 1.86680873734683e-05,
"loss": 2.2923,
"step": 2500
},
{
"epoch": 1.36,
"grad_norm": 0.9335638284683228,
"learning_rate": 1.8641449120937668e-05,
"loss": 2.3168,
"step": 2550
},
{
"epoch": 1.39,
"grad_norm": 0.8730989694595337,
"learning_rate": 1.8614810868407035e-05,
"loss": 2.3007,
"step": 2600
},
{
"epoch": 1.41,
"grad_norm": 1.0019006729125977,
"learning_rate": 1.8588172615876402e-05,
"loss": 2.3087,
"step": 2650
},
{
"epoch": 1.44,
"grad_norm": 0.9429858922958374,
"learning_rate": 1.8561534363345765e-05,
"loss": 2.2802,
"step": 2700
},
{
"epoch": 1.47,
"grad_norm": 1.0355122089385986,
"learning_rate": 1.8534896110815132e-05,
"loss": 2.2801,
"step": 2750
},
{
"epoch": 1.49,
"grad_norm": 1.3223506212234497,
"learning_rate": 1.85082578582845e-05,
"loss": 2.3245,
"step": 2800
},
{
"epoch": 1.52,
"grad_norm": 0.9225859642028809,
"learning_rate": 1.8481619605753865e-05,
"loss": 2.2838,
"step": 2850
},
{
"epoch": 1.55,
"grad_norm": 0.9716720581054688,
"learning_rate": 1.845498135322323e-05,
"loss": 2.296,
"step": 2900
},
{
"epoch": 1.57,
"grad_norm": 0.947675883769989,
"learning_rate": 1.8428343100692595e-05,
"loss": 2.2768,
"step": 2950
},
{
"epoch": 1.6,
"grad_norm": 0.9065707921981812,
"learning_rate": 1.8401704848161962e-05,
"loss": 2.2767,
"step": 3000
},
{
"epoch": 1.62,
"grad_norm": 1.0325031280517578,
"learning_rate": 1.837506659563133e-05,
"loss": 2.262,
"step": 3050
},
{
"epoch": 1.65,
"grad_norm": 0.9252289533615112,
"learning_rate": 1.8348428343100696e-05,
"loss": 2.2824,
"step": 3100
},
{
"epoch": 1.68,
"grad_norm": 0.8026869297027588,
"learning_rate": 1.832179009057006e-05,
"loss": 2.2762,
"step": 3150
},
{
"epoch": 1.7,
"grad_norm": 1.017001748085022,
"learning_rate": 1.8295151838039426e-05,
"loss": 2.2742,
"step": 3200
},
{
"epoch": 1.73,
"grad_norm": 0.8680307269096375,
"learning_rate": 1.8268513585508792e-05,
"loss": 2.252,
"step": 3250
},
{
"epoch": 1.76,
"grad_norm": 0.9704865217208862,
"learning_rate": 1.824187533297816e-05,
"loss": 2.2627,
"step": 3300
},
{
"epoch": 1.78,
"grad_norm": 1.1407126188278198,
"learning_rate": 1.8215237080447523e-05,
"loss": 2.305,
"step": 3350
},
{
"epoch": 1.81,
"grad_norm": 1.0476961135864258,
"learning_rate": 1.818859882791689e-05,
"loss": 2.2659,
"step": 3400
},
{
"epoch": 1.84,
"grad_norm": 0.9194826483726501,
"learning_rate": 1.8161960575386256e-05,
"loss": 2.2728,
"step": 3450
},
{
"epoch": 1.86,
"grad_norm": 1.061948299407959,
"learning_rate": 1.8135322322855623e-05,
"loss": 2.262,
"step": 3500
},
{
"epoch": 1.89,
"grad_norm": 0.8690770864486694,
"learning_rate": 1.810868407032499e-05,
"loss": 2.2611,
"step": 3550
},
{
"epoch": 1.92,
"grad_norm": 1.000588297843933,
"learning_rate": 1.8082045817794353e-05,
"loss": 2.2707,
"step": 3600
},
{
"epoch": 1.94,
"grad_norm": 1.2689441442489624,
"learning_rate": 1.805540756526372e-05,
"loss": 2.2688,
"step": 3650
},
{
"epoch": 1.97,
"grad_norm": 0.9255685210227966,
"learning_rate": 1.8028769312733086e-05,
"loss": 2.2559,
"step": 3700
},
{
"epoch": 2.0,
"grad_norm": 1.0140782594680786,
"learning_rate": 1.8002131060202453e-05,
"loss": 2.2838,
"step": 3750
},
{
"epoch": 2.0,
"eval_bert-score-f1": 0.8773485723212565,
"eval_bert-score-precision": 0.8805842198545674,
"eval_bert-score-recall": 0.8744701069949739,
"eval_gen_len": 49.16461991271694,
"eval_loss": 2.062197685241699,
"eval_rouge1": 0.3196,
"eval_rouge2": 0.1183,
"eval_rougeL": 0.2349,
"eval_rougeLsum": 0.235,
"eval_runtime": 1347.8022,
"eval_samples_per_second": 14.621,
"eval_steps_per_second": 0.349,
"step": 3754
},
{
"epoch": 2.02,
"grad_norm": 0.8860589861869812,
"learning_rate": 1.7975492807671817e-05,
"loss": 2.2635,
"step": 3800
},
{
"epoch": 2.05,
"grad_norm": 0.9218833446502686,
"learning_rate": 1.7948854555141183e-05,
"loss": 2.2621,
"step": 3850
},
{
"epoch": 2.08,
"grad_norm": 0.9549726247787476,
"learning_rate": 1.792221630261055e-05,
"loss": 2.2513,
"step": 3900
},
{
"epoch": 2.1,
"grad_norm": 1.135712742805481,
"learning_rate": 1.7895578050079917e-05,
"loss": 2.2721,
"step": 3950
},
{
"epoch": 2.13,
"grad_norm": 1.056344985961914,
"learning_rate": 1.7868939797549283e-05,
"loss": 2.2601,
"step": 4000
},
{
"epoch": 2.16,
"grad_norm": 0.8976427316665649,
"learning_rate": 1.7842301545018647e-05,
"loss": 2.2441,
"step": 4050
},
{
"epoch": 2.18,
"grad_norm": 1.0189875364303589,
"learning_rate": 1.7815663292488014e-05,
"loss": 2.2441,
"step": 4100
},
{
"epoch": 2.21,
"grad_norm": 1.1941207647323608,
"learning_rate": 1.778902503995738e-05,
"loss": 2.2518,
"step": 4150
},
{
"epoch": 2.24,
"grad_norm": 0.9858642816543579,
"learning_rate": 1.7762386787426747e-05,
"loss": 2.2417,
"step": 4200
},
{
"epoch": 2.26,
"grad_norm": 0.8909502625465393,
"learning_rate": 1.7735748534896114e-05,
"loss": 2.2741,
"step": 4250
},
{
"epoch": 2.29,
"grad_norm": 1.1010240316390991,
"learning_rate": 1.7709110282365477e-05,
"loss": 2.2538,
"step": 4300
},
{
"epoch": 2.32,
"grad_norm": 1.1926771402359009,
"learning_rate": 1.7682472029834844e-05,
"loss": 2.2844,
"step": 4350
},
{
"epoch": 2.34,
"grad_norm": 1.048973798751831,
"learning_rate": 1.765583377730421e-05,
"loss": 2.2677,
"step": 4400
},
{
"epoch": 2.37,
"grad_norm": 1.0072720050811768,
"learning_rate": 1.7629195524773577e-05,
"loss": 2.2616,
"step": 4450
},
{
"epoch": 2.4,
"grad_norm": 0.854369044303894,
"learning_rate": 1.760255727224294e-05,
"loss": 2.2498,
"step": 4500
},
{
"epoch": 2.42,
"grad_norm": 0.9605410099029541,
"learning_rate": 1.7575919019712307e-05,
"loss": 2.2541,
"step": 4550
},
{
"epoch": 2.45,
"grad_norm": 0.9667823314666748,
"learning_rate": 1.7549280767181674e-05,
"loss": 2.26,
"step": 4600
},
{
"epoch": 2.48,
"grad_norm": 2.42110013961792,
"learning_rate": 1.752264251465104e-05,
"loss": 2.2585,
"step": 4650
},
{
"epoch": 2.5,
"grad_norm": 1.0035040378570557,
"learning_rate": 1.7496004262120408e-05,
"loss": 2.2538,
"step": 4700
},
{
"epoch": 2.53,
"grad_norm": 1.1149507761001587,
"learning_rate": 1.746936600958977e-05,
"loss": 2.2643,
"step": 4750
},
{
"epoch": 2.56,
"grad_norm": 1.4942309856414795,
"learning_rate": 1.7442727757059138e-05,
"loss": 2.3053,
"step": 4800
},
{
"epoch": 2.58,
"grad_norm": 1.016640305519104,
"learning_rate": 1.7416089504528505e-05,
"loss": 2.2325,
"step": 4850
},
{
"epoch": 2.61,
"grad_norm": 0.8914662003517151,
"learning_rate": 1.738945125199787e-05,
"loss": 2.26,
"step": 4900
},
{
"epoch": 2.64,
"grad_norm": 1.0502891540527344,
"learning_rate": 1.7362812999467235e-05,
"loss": 2.2357,
"step": 4950
},
{
"epoch": 2.66,
"grad_norm": 0.9784315824508667,
"learning_rate": 1.73361747469366e-05,
"loss": 2.2371,
"step": 5000
},
{
"epoch": 2.69,
"grad_norm": 0.9308114647865295,
"learning_rate": 1.7309536494405968e-05,
"loss": 2.2452,
"step": 5050
},
{
"epoch": 2.72,
"grad_norm": 0.9364565014839172,
"learning_rate": 1.7282898241875335e-05,
"loss": 2.235,
"step": 5100
},
{
"epoch": 2.74,
"grad_norm": 0.8989120721817017,
"learning_rate": 1.72562599893447e-05,
"loss": 2.272,
"step": 5150
},
{
"epoch": 2.77,
"grad_norm": 1.2291622161865234,
"learning_rate": 1.7229621736814065e-05,
"loss": 2.278,
"step": 5200
},
{
"epoch": 2.8,
"grad_norm": 0.9619302749633789,
"learning_rate": 1.7202983484283432e-05,
"loss": 2.2554,
"step": 5250
},
{
"epoch": 2.82,
"grad_norm": 1.0430196523666382,
"learning_rate": 1.71763452317528e-05,
"loss": 2.2546,
"step": 5300
},
{
"epoch": 2.85,
"grad_norm": 1.0834633111953735,
"learning_rate": 1.7149706979222165e-05,
"loss": 2.2379,
"step": 5350
},
{
"epoch": 2.88,
"grad_norm": 1.0730029344558716,
"learning_rate": 1.712306872669153e-05,
"loss": 2.2789,
"step": 5400
},
{
"epoch": 2.9,
"grad_norm": 1.124229907989502,
"learning_rate": 1.7096430474160895e-05,
"loss": 2.2293,
"step": 5450
},
{
"epoch": 2.93,
"grad_norm": 0.9467495679855347,
"learning_rate": 1.7069792221630262e-05,
"loss": 2.2606,
"step": 5500
},
{
"epoch": 2.96,
"grad_norm": 0.8856106400489807,
"learning_rate": 1.704315396909963e-05,
"loss": 2.2713,
"step": 5550
},
{
"epoch": 2.98,
"grad_norm": 0.7882632613182068,
"learning_rate": 1.7016515716568996e-05,
"loss": 2.2508,
"step": 5600
},
{
"epoch": 3.0,
"eval_bert-score-f1": 0.8785184490539139,
"eval_bert-score-precision": 0.8821266702464495,
"eval_bert-score-recall": 0.8752563403945616,
"eval_gen_len": 48.600781487871714,
"eval_loss": 2.047697067260742,
"eval_rouge1": 0.324,
"eval_rouge2": 0.1204,
"eval_rougeL": 0.2387,
"eval_rougeLsum": 0.2389,
"eval_runtime": 1313.571,
"eval_samples_per_second": 15.002,
"eval_steps_per_second": 0.358,
"step": 5631
},
{
"epoch": 3.01,
"grad_norm": 0.9630438685417175,
"learning_rate": 1.698987746403836e-05,
"loss": 2.246,
"step": 5650
},
{
"epoch": 3.04,
"grad_norm": 0.8204315900802612,
"learning_rate": 1.6963239211507726e-05,
"loss": 2.225,
"step": 5700
},
{
"epoch": 3.06,
"grad_norm": 1.1320478916168213,
"learning_rate": 1.6936600958977092e-05,
"loss": 2.2206,
"step": 5750
},
{
"epoch": 3.09,
"grad_norm": 0.9559237360954285,
"learning_rate": 1.690996270644646e-05,
"loss": 2.2459,
"step": 5800
},
{
"epoch": 3.12,
"grad_norm": 0.9689226150512695,
"learning_rate": 1.6883324453915822e-05,
"loss": 2.2517,
"step": 5850
},
{
"epoch": 3.14,
"grad_norm": 0.9438573718070984,
"learning_rate": 1.685668620138519e-05,
"loss": 2.2579,
"step": 5900
},
{
"epoch": 3.17,
"grad_norm": 1.1130074262619019,
"learning_rate": 1.6830047948854556e-05,
"loss": 2.2387,
"step": 5950
},
{
"epoch": 3.2,
"grad_norm": 1.1083691120147705,
"learning_rate": 1.6803409696323923e-05,
"loss": 2.2558,
"step": 6000
},
{
"epoch": 3.22,
"grad_norm": 0.96318119764328,
"learning_rate": 1.677677144379329e-05,
"loss": 2.257,
"step": 6050
},
{
"epoch": 3.25,
"grad_norm": 0.8898953795433044,
"learning_rate": 1.6750133191262653e-05,
"loss": 2.2524,
"step": 6100
},
{
"epoch": 3.28,
"grad_norm": 1.2933117151260376,
"learning_rate": 1.672349493873202e-05,
"loss": 2.2551,
"step": 6150
},
{
"epoch": 3.3,
"grad_norm": 1.1499016284942627,
"learning_rate": 1.6696856686201386e-05,
"loss": 2.244,
"step": 6200
},
{
"epoch": 3.33,
"grad_norm": 0.9967105388641357,
"learning_rate": 1.6670218433670753e-05,
"loss": 2.2514,
"step": 6250
},
{
"epoch": 3.36,
"grad_norm": 1.0175275802612305,
"learning_rate": 1.6643580181140116e-05,
"loss": 2.2298,
"step": 6300
},
{
"epoch": 3.38,
"grad_norm": 0.8878999352455139,
"learning_rate": 1.6616941928609483e-05,
"loss": 2.2394,
"step": 6350
},
{
"epoch": 3.41,
"grad_norm": 1.1751534938812256,
"learning_rate": 1.6590303676078853e-05,
"loss": 2.2321,
"step": 6400
},
{
"epoch": 3.44,
"grad_norm": 1.0338590145111084,
"learning_rate": 1.6563665423548217e-05,
"loss": 2.2562,
"step": 6450
},
{
"epoch": 3.46,
"grad_norm": 0.8953673243522644,
"learning_rate": 1.6537027171017583e-05,
"loss": 2.2199,
"step": 6500
},
{
"epoch": 3.49,
"grad_norm": 1.24599027633667,
"learning_rate": 1.6510388918486947e-05,
"loss": 2.2346,
"step": 6550
},
{
"epoch": 3.52,
"grad_norm": 0.953091561794281,
"learning_rate": 1.6483750665956313e-05,
"loss": 2.246,
"step": 6600
},
{
"epoch": 3.54,
"grad_norm": 0.919922947883606,
"learning_rate": 1.645711241342568e-05,
"loss": 2.2288,
"step": 6650
},
{
"epoch": 3.57,
"grad_norm": 1.1812031269073486,
"learning_rate": 1.6430474160895047e-05,
"loss": 2.2419,
"step": 6700
},
{
"epoch": 3.6,
"grad_norm": 0.9377938508987427,
"learning_rate": 1.640383590836441e-05,
"loss": 2.2478,
"step": 6750
},
{
"epoch": 3.62,
"grad_norm": 1.247550368309021,
"learning_rate": 1.6377197655833777e-05,
"loss": 2.2495,
"step": 6800
},
{
"epoch": 3.65,
"grad_norm": 1.1216537952423096,
"learning_rate": 1.6350559403303147e-05,
"loss": 2.2241,
"step": 6850
},
{
"epoch": 3.68,
"grad_norm": 1.038252353668213,
"learning_rate": 1.632392115077251e-05,
"loss": 2.2346,
"step": 6900
},
{
"epoch": 3.7,
"grad_norm": 0.8921188116073608,
"learning_rate": 1.6297282898241877e-05,
"loss": 2.2488,
"step": 6950
},
{
"epoch": 3.73,
"grad_norm": 1.0422166585922241,
"learning_rate": 1.627064464571124e-05,
"loss": 2.2336,
"step": 7000
},
{
"epoch": 3.76,
"grad_norm": 1.0294251441955566,
"learning_rate": 1.624400639318061e-05,
"loss": 2.234,
"step": 7050
},
{
"epoch": 3.78,
"grad_norm": 0.8972188830375671,
"learning_rate": 1.6217368140649974e-05,
"loss": 2.2147,
"step": 7100
},
{
"epoch": 3.81,
"grad_norm": 1.1645957231521606,
"learning_rate": 1.619072988811934e-05,
"loss": 2.2336,
"step": 7150
},
{
"epoch": 3.84,
"grad_norm": 1.2658772468566895,
"learning_rate": 1.6164091635588704e-05,
"loss": 2.2322,
"step": 7200
},
{
"epoch": 3.86,
"grad_norm": 0.923834502696991,
"learning_rate": 1.613745338305807e-05,
"loss": 2.2348,
"step": 7250
},
{
"epoch": 3.89,
"grad_norm": 0.8885565996170044,
"learning_rate": 1.611081513052744e-05,
"loss": 2.2251,
"step": 7300
},
{
"epoch": 3.92,
"grad_norm": 1.1370388269424438,
"learning_rate": 1.6084176877996804e-05,
"loss": 2.2266,
"step": 7350
},
{
"epoch": 3.94,
"grad_norm": 0.9725862145423889,
"learning_rate": 1.605753862546617e-05,
"loss": 2.2243,
"step": 7400
},
{
"epoch": 3.97,
"grad_norm": 0.8951359987258911,
"learning_rate": 1.6030900372935535e-05,
"loss": 2.2278,
"step": 7450
},
{
"epoch": 4.0,
"grad_norm": 0.9756078720092773,
"learning_rate": 1.6004262120404905e-05,
"loss": 2.2591,
"step": 7500
},
{
"epoch": 4.0,
"eval_bert-score-f1": 0.8792554777484974,
"eval_bert-score-precision": 0.8833729287797178,
"eval_bert-score-recall": 0.8754834527081552,
"eval_gen_len": 47.54663554247438,
"eval_loss": 2.039776086807251,
"eval_rouge1": 0.3271,
"eval_rouge2": 0.1225,
"eval_rougeL": 0.2416,
"eval_rougeLsum": 0.2417,
"eval_runtime": 1382.3306,
"eval_samples_per_second": 14.256,
"eval_steps_per_second": 0.34,
"step": 7508
},
{
"epoch": 4.02,
"grad_norm": 0.9563359618186951,
"learning_rate": 1.5977623867874268e-05,
"loss": 2.2286,
"step": 7550
},
{
"epoch": 4.05,
"grad_norm": 0.9959126114845276,
"learning_rate": 1.5950985615343635e-05,
"loss": 2.2287,
"step": 7600
},
{
"epoch": 4.08,
"grad_norm": 0.9405544996261597,
"learning_rate": 1.5924347362813e-05,
"loss": 2.244,
"step": 7650
},
{
"epoch": 4.1,
"grad_norm": 1.0261281728744507,
"learning_rate": 1.5897709110282368e-05,
"loss": 2.2141,
"step": 7700
},
{
"epoch": 4.13,
"grad_norm": 0.8446668386459351,
"learning_rate": 1.5871070857751735e-05,
"loss": 2.2217,
"step": 7750
},
{
"epoch": 4.16,
"grad_norm": 0.981716513633728,
"learning_rate": 1.58444326052211e-05,
"loss": 2.2535,
"step": 7800
},
{
"epoch": 4.18,
"grad_norm": 0.9390552043914795,
"learning_rate": 1.5817794352690465e-05,
"loss": 2.1902,
"step": 7850
},
{
"epoch": 4.21,
"grad_norm": 0.9392485618591309,
"learning_rate": 1.5791156100159832e-05,
"loss": 2.2325,
"step": 7900
},
{
"epoch": 4.24,
"grad_norm": 0.8909654021263123,
"learning_rate": 1.57645178476292e-05,
"loss": 2.2106,
"step": 7950
},
{
"epoch": 4.26,
"grad_norm": 0.9464941620826721,
"learning_rate": 1.5737879595098562e-05,
"loss": 2.1985,
"step": 8000
},
{
"epoch": 4.29,
"grad_norm": 0.9561291337013245,
"learning_rate": 1.571124134256793e-05,
"loss": 2.2595,
"step": 8050
},
{
"epoch": 4.32,
"grad_norm": 1.0075396299362183,
"learning_rate": 1.5684603090037295e-05,
"loss": 2.2328,
"step": 8100
},
{
"epoch": 4.34,
"grad_norm": 1.0439025163650513,
"learning_rate": 1.5657964837506662e-05,
"loss": 2.2036,
"step": 8150
},
{
"epoch": 4.37,
"grad_norm": 0.9002524614334106,
"learning_rate": 1.563132658497603e-05,
"loss": 2.2157,
"step": 8200
},
{
"epoch": 4.4,
"grad_norm": 0.9860824346542358,
"learning_rate": 1.5604688332445392e-05,
"loss": 2.2526,
"step": 8250
},
{
"epoch": 4.42,
"grad_norm": 0.8125404715538025,
"learning_rate": 1.557805007991476e-05,
"loss": 2.2458,
"step": 8300
},
{
"epoch": 4.45,
"grad_norm": 0.9793021082878113,
"learning_rate": 1.5551411827384126e-05,
"loss": 2.2111,
"step": 8350
},
{
"epoch": 4.48,
"grad_norm": 0.9543974995613098,
"learning_rate": 1.5524773574853492e-05,
"loss": 2.2372,
"step": 8400
},
{
"epoch": 4.5,
"grad_norm": 1.0786656141281128,
"learning_rate": 1.5498135322322856e-05,
"loss": 2.2256,
"step": 8450
},
{
"epoch": 4.53,
"grad_norm": 1.1105990409851074,
"learning_rate": 1.5471497069792223e-05,
"loss": 2.2282,
"step": 8500
},
{
"epoch": 4.56,
"grad_norm": 1.4228782653808594,
"learning_rate": 1.544485881726159e-05,
"loss": 2.2482,
"step": 8550
},
{
"epoch": 4.58,
"grad_norm": 1.0002621412277222,
"learning_rate": 1.5418220564730956e-05,
"loss": 2.2104,
"step": 8600
},
{
"epoch": 4.61,
"grad_norm": 0.8620786666870117,
"learning_rate": 1.5391582312200323e-05,
"loss": 2.2201,
"step": 8650
},
{
"epoch": 4.64,
"grad_norm": 0.9315156936645508,
"learning_rate": 1.5364944059669686e-05,
"loss": 2.2519,
"step": 8700
},
{
"epoch": 4.66,
"grad_norm": 0.9382575750350952,
"learning_rate": 1.5338305807139053e-05,
"loss": 2.2439,
"step": 8750
},
{
"epoch": 4.69,
"grad_norm": 0.9228121042251587,
"learning_rate": 1.531166755460842e-05,
"loss": 2.2105,
"step": 8800
},
{
"epoch": 4.71,
"grad_norm": 0.9347973465919495,
"learning_rate": 1.5285029302077786e-05,
"loss": 2.2201,
"step": 8850
},
{
"epoch": 4.74,
"grad_norm": 1.0652745962142944,
"learning_rate": 1.525839104954715e-05,
"loss": 2.2117,
"step": 8900
},
{
"epoch": 4.77,
"grad_norm": 0.9884467720985413,
"learning_rate": 1.5231752797016516e-05,
"loss": 2.2058,
"step": 8950
},
{
"epoch": 4.79,
"grad_norm": 1.2926949262619019,
"learning_rate": 1.5205114544485883e-05,
"loss": 2.2062,
"step": 9000
},
{
"epoch": 4.82,
"grad_norm": 0.9000586867332458,
"learning_rate": 1.5178476291955248e-05,
"loss": 2.2578,
"step": 9050
},
{
"epoch": 4.85,
"grad_norm": 1.1215986013412476,
"learning_rate": 1.5151838039424615e-05,
"loss": 2.2035,
"step": 9100
},
{
"epoch": 4.87,
"grad_norm": 0.9411687254905701,
"learning_rate": 1.512519978689398e-05,
"loss": 2.2392,
"step": 9150
},
{
"epoch": 4.9,
"grad_norm": 0.9772723913192749,
"learning_rate": 1.5098561534363347e-05,
"loss": 2.2341,
"step": 9200
},
{
"epoch": 4.93,
"grad_norm": 0.9062642455101013,
"learning_rate": 1.5071923281832712e-05,
"loss": 2.2132,
"step": 9250
},
{
"epoch": 4.95,
"grad_norm": 0.9838491082191467,
"learning_rate": 1.5045285029302079e-05,
"loss": 2.2316,
"step": 9300
},
{
"epoch": 4.98,
"grad_norm": 0.7986493110656738,
"learning_rate": 1.5018646776771445e-05,
"loss": 2.2406,
"step": 9350
},
{
"epoch": 5.0,
"eval_bert-score-f1": 0.8796141604796214,
"eval_bert-score-precision": 0.883697147159108,
"eval_bert-score-recall": 0.8758704174409406,
"eval_gen_len": 47.69806150411042,
"eval_loss": 2.032108783721924,
"eval_rouge1": 0.3291,
"eval_rouge2": 0.1235,
"eval_rougeL": 0.2433,
"eval_rougeLsum": 0.2434,
"eval_runtime": 1320.3538,
"eval_samples_per_second": 14.925,
"eval_steps_per_second": 0.356,
"step": 9385
},
{
"epoch": 5.01,
"grad_norm": 1.181012511253357,
"learning_rate": 1.499200852424081e-05,
"loss": 2.205,
"step": 9400
},
{
"epoch": 5.03,
"grad_norm": 0.8982387185096741,
"learning_rate": 1.4965370271710177e-05,
"loss": 2.2044,
"step": 9450
},
{
"epoch": 5.06,
"grad_norm": 0.9843763709068298,
"learning_rate": 1.4938732019179542e-05,
"loss": 2.2087,
"step": 9500
},
{
"epoch": 5.09,
"grad_norm": 0.9226497411727905,
"learning_rate": 1.4912093766648909e-05,
"loss": 2.2019,
"step": 9550
},
{
"epoch": 5.11,
"grad_norm": 1.0900003910064697,
"learning_rate": 1.4885455514118274e-05,
"loss": 2.2219,
"step": 9600
},
{
"epoch": 5.14,
"grad_norm": 1.0898627042770386,
"learning_rate": 1.485881726158764e-05,
"loss": 2.2281,
"step": 9650
},
{
"epoch": 5.17,
"grad_norm": 1.1963268518447876,
"learning_rate": 1.4832179009057006e-05,
"loss": 2.2289,
"step": 9700
},
{
"epoch": 5.19,
"grad_norm": 0.9371781349182129,
"learning_rate": 1.4805540756526373e-05,
"loss": 2.2287,
"step": 9750
},
{
"epoch": 5.22,
"grad_norm": 0.9393157958984375,
"learning_rate": 1.4778902503995741e-05,
"loss": 2.1454,
"step": 9800
},
{
"epoch": 5.25,
"grad_norm": 1.1463308334350586,
"learning_rate": 1.4752264251465104e-05,
"loss": 2.2116,
"step": 9850
},
{
"epoch": 5.27,
"grad_norm": 1.232642412185669,
"learning_rate": 1.4725625998934471e-05,
"loss": 2.2056,
"step": 9900
},
{
"epoch": 5.3,
"grad_norm": 0.8566424250602722,
"learning_rate": 1.4698987746403836e-05,
"loss": 2.2077,
"step": 9950
},
{
"epoch": 5.33,
"grad_norm": 1.073114275932312,
"learning_rate": 1.4672349493873203e-05,
"loss": 2.2043,
"step": 10000
},
{
"epoch": 5.35,
"grad_norm": 1.100190281867981,
"learning_rate": 1.4645711241342568e-05,
"loss": 2.2105,
"step": 10050
},
{
"epoch": 5.38,
"grad_norm": 0.8915311694145203,
"learning_rate": 1.4619072988811935e-05,
"loss": 2.2263,
"step": 10100
},
{
"epoch": 5.41,
"grad_norm": 1.0838483572006226,
"learning_rate": 1.45924347362813e-05,
"loss": 2.2386,
"step": 10150
},
{
"epoch": 5.43,
"grad_norm": 1.0265840291976929,
"learning_rate": 1.4565796483750666e-05,
"loss": 2.2046,
"step": 10200
},
{
"epoch": 5.46,
"grad_norm": 1.012404441833496,
"learning_rate": 1.4539158231220035e-05,
"loss": 2.1914,
"step": 10250
},
{
"epoch": 5.49,
"grad_norm": 0.8332634568214417,
"learning_rate": 1.4512519978689398e-05,
"loss": 2.2424,
"step": 10300
},
{
"epoch": 5.51,
"grad_norm": 0.9419781565666199,
"learning_rate": 1.4485881726158767e-05,
"loss": 2.2081,
"step": 10350
},
{
"epoch": 5.54,
"grad_norm": 0.9537687301635742,
"learning_rate": 1.445924347362813e-05,
"loss": 2.2382,
"step": 10400
},
{
"epoch": 5.57,
"grad_norm": 0.8523421883583069,
"learning_rate": 1.4432605221097498e-05,
"loss": 2.2479,
"step": 10450
},
{
"epoch": 5.59,
"grad_norm": 0.9717277884483337,
"learning_rate": 1.4405966968566862e-05,
"loss": 2.2335,
"step": 10500
},
{
"epoch": 5.62,
"grad_norm": 1.2556852102279663,
"learning_rate": 1.4379328716036229e-05,
"loss": 2.207,
"step": 10550
},
{
"epoch": 5.65,
"grad_norm": 1.0949528217315674,
"learning_rate": 1.4352690463505594e-05,
"loss": 2.2454,
"step": 10600
},
{
"epoch": 5.67,
"grad_norm": 0.8859919309616089,
"learning_rate": 1.432605221097496e-05,
"loss": 2.1991,
"step": 10650
},
{
"epoch": 5.7,
"grad_norm": 1.0127480030059814,
"learning_rate": 1.4299413958444329e-05,
"loss": 2.2031,
"step": 10700
},
{
"epoch": 5.73,
"grad_norm": 1.0773361921310425,
"learning_rate": 1.4272775705913692e-05,
"loss": 2.2183,
"step": 10750
},
{
"epoch": 5.75,
"grad_norm": 1.2354990243911743,
"learning_rate": 1.424613745338306e-05,
"loss": 2.1787,
"step": 10800
},
{
"epoch": 5.78,
"grad_norm": 0.8633403182029724,
"learning_rate": 1.4219499200852424e-05,
"loss": 2.2058,
"step": 10850
},
{
"epoch": 5.81,
"grad_norm": 0.8349035382270813,
"learning_rate": 1.4192860948321792e-05,
"loss": 2.2102,
"step": 10900
},
{
"epoch": 5.83,
"grad_norm": 0.954465389251709,
"learning_rate": 1.4166222695791156e-05,
"loss": 2.2074,
"step": 10950
},
{
"epoch": 5.86,
"grad_norm": 1.0947058200836182,
"learning_rate": 1.4139584443260524e-05,
"loss": 2.2063,
"step": 11000
},
{
"epoch": 5.89,
"grad_norm": 1.0066620111465454,
"learning_rate": 1.4112946190729891e-05,
"loss": 2.2391,
"step": 11050
},
{
"epoch": 5.91,
"grad_norm": 1.3262407779693604,
"learning_rate": 1.4086307938199256e-05,
"loss": 2.1883,
"step": 11100
},
{
"epoch": 5.94,
"grad_norm": 0.898461639881134,
"learning_rate": 1.4059669685668623e-05,
"loss": 2.2254,
"step": 11150
},
{
"epoch": 5.97,
"grad_norm": 1.2728203535079956,
"learning_rate": 1.4033031433137986e-05,
"loss": 2.2002,
"step": 11200
},
{
"epoch": 5.99,
"grad_norm": 0.8816812038421631,
"learning_rate": 1.4006393180607354e-05,
"loss": 2.222,
"step": 11250
},
{
"epoch": 6.0,
"eval_bert-score-f1": 0.8800827935451446,
"eval_bert-score-precision": 0.884203289352576,
"eval_bert-score-recall": 0.8763012464314104,
"eval_gen_len": 47.73409113975439,
"eval_loss": 2.0270581245422363,
"eval_rouge1": 0.3308,
"eval_rouge2": 0.125,
"eval_rougeL": 0.2449,
"eval_rougeLsum": 0.245,
"eval_runtime": 1367.5469,
"eval_samples_per_second": 14.41,
"eval_steps_per_second": 0.344,
"step": 11262
},
{
"epoch": 6.02,
"grad_norm": 0.9405691623687744,
"learning_rate": 1.3979754928076718e-05,
"loss": 2.1931,
"step": 11300
},
{
"epoch": 6.05,
"grad_norm": 0.8556106686592102,
"learning_rate": 1.3953116675546086e-05,
"loss": 2.2086,
"step": 11350
},
{
"epoch": 6.07,
"grad_norm": 1.0254322290420532,
"learning_rate": 1.392647842301545e-05,
"loss": 2.2235,
"step": 11400
},
{
"epoch": 6.1,
"grad_norm": 0.9665789604187012,
"learning_rate": 1.3899840170484818e-05,
"loss": 2.1956,
"step": 11450
},
{
"epoch": 6.13,
"grad_norm": 1.1138479709625244,
"learning_rate": 1.3873201917954185e-05,
"loss": 2.2233,
"step": 11500
},
{
"epoch": 6.15,
"grad_norm": 0.9537137150764465,
"learning_rate": 1.384656366542355e-05,
"loss": 2.2047,
"step": 11550
},
{
"epoch": 6.18,
"grad_norm": 0.9105952978134155,
"learning_rate": 1.3819925412892917e-05,
"loss": 2.1739,
"step": 11600
},
{
"epoch": 6.21,
"grad_norm": 0.9230592250823975,
"learning_rate": 1.3793287160362282e-05,
"loss": 2.1911,
"step": 11650
},
{
"epoch": 6.23,
"grad_norm": 0.944442868232727,
"learning_rate": 1.3766648907831648e-05,
"loss": 2.2074,
"step": 11700
},
{
"epoch": 6.26,
"grad_norm": 0.9925301671028137,
"learning_rate": 1.3740010655301013e-05,
"loss": 2.1973,
"step": 11750
},
{
"epoch": 6.29,
"grad_norm": 1.0720447301864624,
"learning_rate": 1.371337240277038e-05,
"loss": 2.181,
"step": 11800
},
{
"epoch": 6.31,
"grad_norm": 0.9020980000495911,
"learning_rate": 1.3686734150239745e-05,
"loss": 2.193,
"step": 11850
},
{
"epoch": 6.34,
"grad_norm": 0.9980204701423645,
"learning_rate": 1.3660095897709112e-05,
"loss": 2.2342,
"step": 11900
},
{
"epoch": 6.37,
"grad_norm": 0.9240878820419312,
"learning_rate": 1.3633457645178479e-05,
"loss": 2.1855,
"step": 11950
},
{
"epoch": 6.39,
"grad_norm": 1.1149895191192627,
"learning_rate": 1.3606819392647844e-05,
"loss": 2.1918,
"step": 12000
},
{
"epoch": 6.42,
"grad_norm": 0.8434773683547974,
"learning_rate": 1.358018114011721e-05,
"loss": 2.2347,
"step": 12050
},
{
"epoch": 6.45,
"grad_norm": 0.8160800337791443,
"learning_rate": 1.3553542887586576e-05,
"loss": 2.2275,
"step": 12100
},
{
"epoch": 6.47,
"grad_norm": 1.0393248796463013,
"learning_rate": 1.3526904635055942e-05,
"loss": 2.2449,
"step": 12150
},
{
"epoch": 6.5,
"grad_norm": 0.9805082082748413,
"learning_rate": 1.3500266382525307e-05,
"loss": 2.2118,
"step": 12200
},
{
"epoch": 6.53,
"grad_norm": 0.8884342908859253,
"learning_rate": 1.3473628129994674e-05,
"loss": 2.2184,
"step": 12250
},
{
"epoch": 6.55,
"grad_norm": 1.1096868515014648,
"learning_rate": 1.3446989877464039e-05,
"loss": 2.231,
"step": 12300
},
{
"epoch": 6.58,
"grad_norm": 0.937127947807312,
"learning_rate": 1.3420351624933406e-05,
"loss": 2.2032,
"step": 12350
},
{
"epoch": 6.61,
"grad_norm": 1.0019285678863525,
"learning_rate": 1.3393713372402773e-05,
"loss": 2.2028,
"step": 12400
},
{
"epoch": 6.63,
"grad_norm": 1.070123553276062,
"learning_rate": 1.3367075119872138e-05,
"loss": 2.2673,
"step": 12450
},
{
"epoch": 6.66,
"grad_norm": 0.8974484801292419,
"learning_rate": 1.3340436867341504e-05,
"loss": 2.1909,
"step": 12500
},
{
"epoch": 6.69,
"grad_norm": 0.9534024596214294,
"learning_rate": 1.331379861481087e-05,
"loss": 2.2061,
"step": 12550
},
{
"epoch": 6.71,
"grad_norm": 0.8722013831138611,
"learning_rate": 1.3287160362280236e-05,
"loss": 2.2054,
"step": 12600
},
{
"epoch": 6.74,
"grad_norm": 1.222458004951477,
"learning_rate": 1.3260522109749601e-05,
"loss": 2.167,
"step": 12650
},
{
"epoch": 6.77,
"grad_norm": 0.8800060153007507,
"learning_rate": 1.3233883857218968e-05,
"loss": 2.1955,
"step": 12700
},
{
"epoch": 6.79,
"grad_norm": 1.005924105644226,
"learning_rate": 1.3207245604688335e-05,
"loss": 2.1644,
"step": 12750
},
{
"epoch": 6.82,
"grad_norm": 1.0622010231018066,
"learning_rate": 1.31806073521577e-05,
"loss": 2.226,
"step": 12800
},
{
"epoch": 6.85,
"grad_norm": 0.9073338508605957,
"learning_rate": 1.3153969099627067e-05,
"loss": 2.1781,
"step": 12850
},
{
"epoch": 6.87,
"grad_norm": 1.0695091485977173,
"learning_rate": 1.3127330847096432e-05,
"loss": 2.1965,
"step": 12900
},
{
"epoch": 6.9,
"grad_norm": 1.0539031028747559,
"learning_rate": 1.3100692594565798e-05,
"loss": 2.1864,
"step": 12950
},
{
"epoch": 6.93,
"grad_norm": 1.0355671644210815,
"learning_rate": 1.3074054342035163e-05,
"loss": 2.21,
"step": 13000
},
{
"epoch": 6.95,
"grad_norm": 0.9524690508842468,
"learning_rate": 1.304741608950453e-05,
"loss": 2.2146,
"step": 13050
},
{
"epoch": 6.98,
"grad_norm": 0.8750210404396057,
"learning_rate": 1.3020777836973895e-05,
"loss": 2.2185,
"step": 13100
},
{
"epoch": 7.0,
"eval_bert-score-f1": 0.8803590643584976,
"eval_bert-score-precision": 0.8845637915861083,
"eval_bert-score-recall": 0.8764868623034598,
"eval_gen_len": 47.655891606617274,
"eval_loss": 2.022202253341675,
"eval_rouge1": 0.3314,
"eval_rouge2": 0.1248,
"eval_rougeL": 0.2451,
"eval_rougeLsum": 0.2452,
"eval_runtime": 1324.8965,
"eval_samples_per_second": 14.874,
"eval_steps_per_second": 0.355,
"step": 13139
},
{
"epoch": 7.01,
"grad_norm": 1.8067052364349365,
"learning_rate": 1.2994139584443262e-05,
"loss": 2.2044,
"step": 13150
},
{
"epoch": 7.03,
"grad_norm": 1.0369492769241333,
"learning_rate": 1.2967501331912629e-05,
"loss": 2.2093,
"step": 13200
},
{
"epoch": 7.06,
"grad_norm": 0.7998984456062317,
"learning_rate": 1.2940863079381994e-05,
"loss": 2.1803,
"step": 13250
},
{
"epoch": 7.09,
"grad_norm": 0.8763892650604248,
"learning_rate": 1.291422482685136e-05,
"loss": 2.2028,
"step": 13300
},
{
"epoch": 7.11,
"grad_norm": 1.031553864479065,
"learning_rate": 1.2887586574320726e-05,
"loss": 2.1598,
"step": 13350
},
{
"epoch": 7.14,
"grad_norm": 1.2158304452896118,
"learning_rate": 1.2860948321790092e-05,
"loss": 2.2008,
"step": 13400
},
{
"epoch": 7.17,
"grad_norm": 1.0015206336975098,
"learning_rate": 1.2834310069259457e-05,
"loss": 2.2118,
"step": 13450
},
{
"epoch": 7.19,
"grad_norm": 1.3029738664627075,
"learning_rate": 1.2807671816728824e-05,
"loss": 2.163,
"step": 13500
},
{
"epoch": 7.22,
"grad_norm": 1.1144462823867798,
"learning_rate": 1.2781033564198189e-05,
"loss": 2.1821,
"step": 13550
},
{
"epoch": 7.25,
"grad_norm": 0.9808390736579895,
"learning_rate": 1.2754395311667556e-05,
"loss": 2.1844,
"step": 13600
},
{
"epoch": 7.27,
"grad_norm": 0.8838719129562378,
"learning_rate": 1.2727757059136923e-05,
"loss": 2.1936,
"step": 13650
},
{
"epoch": 7.3,
"grad_norm": 1.0219964981079102,
"learning_rate": 1.2701118806606288e-05,
"loss": 2.1864,
"step": 13700
},
{
"epoch": 7.33,
"grad_norm": 0.9708404541015625,
"learning_rate": 1.2674480554075654e-05,
"loss": 2.1734,
"step": 13750
},
{
"epoch": 7.35,
"grad_norm": 0.9595683217048645,
"learning_rate": 1.264784230154502e-05,
"loss": 2.2069,
"step": 13800
},
{
"epoch": 7.38,
"grad_norm": 1.2408117055892944,
"learning_rate": 1.2621204049014386e-05,
"loss": 2.1918,
"step": 13850
},
{
"epoch": 7.41,
"grad_norm": 0.9526282548904419,
"learning_rate": 1.2594565796483751e-05,
"loss": 2.2069,
"step": 13900
},
{
"epoch": 7.43,
"grad_norm": 0.9085534811019897,
"learning_rate": 1.2567927543953118e-05,
"loss": 2.1929,
"step": 13950
},
{
"epoch": 7.46,
"grad_norm": 0.9988005757331848,
"learning_rate": 1.2541289291422483e-05,
"loss": 2.212,
"step": 14000
},
{
"epoch": 7.49,
"grad_norm": 0.9731308221817017,
"learning_rate": 1.251465103889185e-05,
"loss": 2.2168,
"step": 14050
},
{
"epoch": 7.51,
"grad_norm": 1.2218581438064575,
"learning_rate": 1.2488012786361216e-05,
"loss": 2.2136,
"step": 14100
},
{
"epoch": 7.54,
"grad_norm": 1.3860177993774414,
"learning_rate": 1.2461374533830582e-05,
"loss": 2.1844,
"step": 14150
},
{
"epoch": 7.57,
"grad_norm": 1.3227049112319946,
"learning_rate": 1.2434736281299948e-05,
"loss": 2.2017,
"step": 14200
},
{
"epoch": 7.59,
"grad_norm": 0.9687981009483337,
"learning_rate": 1.2408098028769313e-05,
"loss": 2.2242,
"step": 14250
},
{
"epoch": 7.62,
"grad_norm": 1.0260825157165527,
"learning_rate": 1.238145977623868e-05,
"loss": 2.2096,
"step": 14300
},
{
"epoch": 7.65,
"grad_norm": 0.9353439211845398,
"learning_rate": 1.2354821523708045e-05,
"loss": 2.2122,
"step": 14350
},
{
"epoch": 7.67,
"grad_norm": 1.0099036693572998,
"learning_rate": 1.2328183271177412e-05,
"loss": 2.2187,
"step": 14400
},
{
"epoch": 7.7,
"grad_norm": 0.8156920075416565,
"learning_rate": 1.2301545018646779e-05,
"loss": 2.1853,
"step": 14450
},
{
"epoch": 7.73,
"grad_norm": 14.783089637756348,
"learning_rate": 1.2274906766116144e-05,
"loss": 2.1917,
"step": 14500
},
{
"epoch": 7.75,
"grad_norm": 0.9252942204475403,
"learning_rate": 1.224826851358551e-05,
"loss": 2.2074,
"step": 14550
},
{
"epoch": 7.78,
"grad_norm": 0.9822306632995605,
"learning_rate": 1.2221630261054875e-05,
"loss": 2.193,
"step": 14600
},
{
"epoch": 7.81,
"grad_norm": 1.1552455425262451,
"learning_rate": 1.2194992008524242e-05,
"loss": 2.1851,
"step": 14650
},
{
"epoch": 7.83,
"grad_norm": 1.0293680429458618,
"learning_rate": 1.2168353755993607e-05,
"loss": 2.2109,
"step": 14700
},
{
"epoch": 7.86,
"grad_norm": 1.0043119192123413,
"learning_rate": 1.2141715503462974e-05,
"loss": 2.2112,
"step": 14750
},
{
"epoch": 7.88,
"grad_norm": 0.9331013560295105,
"learning_rate": 1.2115077250932339e-05,
"loss": 2.1837,
"step": 14800
},
{
"epoch": 7.91,
"grad_norm": 0.9587385058403015,
"learning_rate": 1.2088438998401706e-05,
"loss": 2.1773,
"step": 14850
},
{
"epoch": 7.94,
"grad_norm": 1.1071295738220215,
"learning_rate": 1.2061800745871073e-05,
"loss": 2.2065,
"step": 14900
},
{
"epoch": 7.96,
"grad_norm": 1.34778892993927,
"learning_rate": 1.2035162493340438e-05,
"loss": 2.1854,
"step": 14950
},
{
"epoch": 7.99,
"grad_norm": 3.2136006355285645,
"learning_rate": 1.2008524240809804e-05,
"loss": 2.1886,
"step": 15000
},
{
"epoch": 8.0,
"eval_bert-score-f1": 0.8805449074880007,
"eval_bert-score-precision": 0.8848301609899627,
"eval_bert-score-recall": 0.8765954361686147,
"eval_gen_len": 47.339947224195676,
"eval_loss": 2.0204520225524902,
"eval_rouge1": 0.3323,
"eval_rouge2": 0.1257,
"eval_rougeL": 0.2461,
"eval_rougeLsum": 0.2462,
"eval_runtime": 1323.5674,
"eval_samples_per_second": 14.889,
"eval_steps_per_second": 0.355,
"step": 15016
},
{
"epoch": 8.02,
"grad_norm": 0.8995711803436279,
"learning_rate": 1.198188598827917e-05,
"loss": 2.1601,
"step": 15050
},
{
"epoch": 8.04,
"grad_norm": 1.0043758153915405,
"learning_rate": 1.1955247735748536e-05,
"loss": 2.2012,
"step": 15100
},
{
"epoch": 8.07,
"grad_norm": 0.9039593935012817,
"learning_rate": 1.1928609483217901e-05,
"loss": 2.2207,
"step": 15150
},
{
"epoch": 8.1,
"grad_norm": 0.9619396328926086,
"learning_rate": 1.1901971230687268e-05,
"loss": 2.1609,
"step": 15200
},
{
"epoch": 8.12,
"grad_norm": 1.0527337789535522,
"learning_rate": 1.1875332978156633e-05,
"loss": 2.221,
"step": 15250
},
{
"epoch": 8.15,
"grad_norm": 0.9329215288162231,
"learning_rate": 1.1848694725626e-05,
"loss": 2.1669,
"step": 15300
},
{
"epoch": 8.18,
"grad_norm": 1.0186532735824585,
"learning_rate": 1.1822056473095366e-05,
"loss": 2.1857,
"step": 15350
},
{
"epoch": 8.2,
"grad_norm": 1.1034983396530151,
"learning_rate": 1.1795418220564731e-05,
"loss": 2.1956,
"step": 15400
},
{
"epoch": 8.23,
"grad_norm": 1.0719212293624878,
"learning_rate": 1.1768779968034098e-05,
"loss": 2.2007,
"step": 15450
},
{
"epoch": 8.26,
"grad_norm": 1.5037603378295898,
"learning_rate": 1.1742141715503463e-05,
"loss": 2.1967,
"step": 15500
},
{
"epoch": 8.28,
"grad_norm": 0.9770453572273254,
"learning_rate": 1.171550346297283e-05,
"loss": 2.218,
"step": 15550
},
{
"epoch": 8.31,
"grad_norm": 1.0334933996200562,
"learning_rate": 1.1688865210442195e-05,
"loss": 2.1892,
"step": 15600
},
{
"epoch": 8.34,
"grad_norm": 0.9753350615501404,
"learning_rate": 1.1662226957911562e-05,
"loss": 2.2074,
"step": 15650
},
{
"epoch": 8.36,
"grad_norm": 0.9147941470146179,
"learning_rate": 1.1635588705380927e-05,
"loss": 2.2111,
"step": 15700
},
{
"epoch": 8.39,
"grad_norm": 1.0154210329055786,
"learning_rate": 1.1608950452850294e-05,
"loss": 2.1971,
"step": 15750
},
{
"epoch": 8.42,
"grad_norm": 1.0365736484527588,
"learning_rate": 1.158231220031966e-05,
"loss": 2.1874,
"step": 15800
},
{
"epoch": 8.44,
"grad_norm": 0.894719660282135,
"learning_rate": 1.1555673947789025e-05,
"loss": 2.2059,
"step": 15850
},
{
"epoch": 8.47,
"grad_norm": 0.9420655369758606,
"learning_rate": 1.1529035695258392e-05,
"loss": 2.1473,
"step": 15900
},
{
"epoch": 8.5,
"grad_norm": 1.0870007276535034,
"learning_rate": 1.1502397442727757e-05,
"loss": 2.1944,
"step": 15950
},
{
"epoch": 8.52,
"grad_norm": 1.072520136833191,
"learning_rate": 1.1475759190197124e-05,
"loss": 2.1914,
"step": 16000
},
{
"epoch": 8.55,
"grad_norm": 0.9754800200462341,
"learning_rate": 1.1449120937666489e-05,
"loss": 2.1726,
"step": 16050
},
{
"epoch": 8.58,
"grad_norm": 0.983051598072052,
"learning_rate": 1.1422482685135856e-05,
"loss": 2.1591,
"step": 16100
},
{
"epoch": 8.6,
"grad_norm": 0.9521353244781494,
"learning_rate": 1.1395844432605222e-05,
"loss": 2.1675,
"step": 16150
},
{
"epoch": 8.63,
"grad_norm": 1.016432762145996,
"learning_rate": 1.1369206180074587e-05,
"loss": 2.1821,
"step": 16200
},
{
"epoch": 8.66,
"grad_norm": 0.9886535406112671,
"learning_rate": 1.1342567927543954e-05,
"loss": 2.1711,
"step": 16250
},
{
"epoch": 8.68,
"grad_norm": 0.9348542094230652,
"learning_rate": 1.131592967501332e-05,
"loss": 2.2016,
"step": 16300
},
{
"epoch": 8.71,
"grad_norm": 0.8952618837356567,
"learning_rate": 1.1289291422482686e-05,
"loss": 2.1632,
"step": 16350
},
{
"epoch": 8.74,
"grad_norm": 0.9894302487373352,
"learning_rate": 1.1262653169952051e-05,
"loss": 2.1792,
"step": 16400
},
{
"epoch": 8.76,
"grad_norm": 0.899584949016571,
"learning_rate": 1.1236014917421418e-05,
"loss": 2.1698,
"step": 16450
},
{
"epoch": 8.79,
"grad_norm": 0.9734613299369812,
"learning_rate": 1.1209376664890783e-05,
"loss": 2.2253,
"step": 16500
},
{
"epoch": 8.82,
"grad_norm": 1.0697177648544312,
"learning_rate": 1.118273841236015e-05,
"loss": 2.2307,
"step": 16550
},
{
"epoch": 8.84,
"grad_norm": 0.827741801738739,
"learning_rate": 1.1156100159829516e-05,
"loss": 2.179,
"step": 16600
},
{
"epoch": 8.87,
"grad_norm": 0.8641231060028076,
"learning_rate": 1.1129461907298881e-05,
"loss": 2.1768,
"step": 16650
},
{
"epoch": 8.9,
"grad_norm": 0.9500383734703064,
"learning_rate": 1.1102823654768248e-05,
"loss": 2.185,
"step": 16700
},
{
"epoch": 8.92,
"grad_norm": 0.8771828413009644,
"learning_rate": 1.1076185402237613e-05,
"loss": 2.2018,
"step": 16750
},
{
"epoch": 8.95,
"grad_norm": 0.7786308526992798,
"learning_rate": 1.104954714970698e-05,
"loss": 2.1856,
"step": 16800
},
{
"epoch": 8.98,
"grad_norm": 0.9104407429695129,
"learning_rate": 1.1022908897176345e-05,
"loss": 2.2215,
"step": 16850
},
{
"epoch": 9.0,
"eval_bert-score-f1": 0.8808141563956924,
"eval_bert-score-precision": 0.8852914021691005,
"eval_bert-score-recall": 0.8766740019322894,
"eval_gen_len": 47.14802598193444,
"eval_loss": 2.0156819820404053,
"eval_rouge1": 0.333,
"eval_rouge2": 0.1262,
"eval_rougeL": 0.2467,
"eval_rougeLsum": 0.2467,
"eval_runtime": 1329.5202,
"eval_samples_per_second": 14.822,
"eval_steps_per_second": 0.354,
"step": 16893
},
{
"epoch": 9.0,
"grad_norm": 0.9584008455276489,
"learning_rate": 1.0996270644645712e-05,
"loss": 2.1627,
"step": 16900
},
{
"epoch": 9.03,
"grad_norm": 1.0817357301712036,
"learning_rate": 1.0969632392115077e-05,
"loss": 2.174,
"step": 16950
},
{
"epoch": 9.06,
"grad_norm": 1.1630058288574219,
"learning_rate": 1.0942994139584444e-05,
"loss": 2.1988,
"step": 17000
},
{
"epoch": 9.08,
"grad_norm": 1.065058946609497,
"learning_rate": 1.091635588705381e-05,
"loss": 2.1628,
"step": 17050
},
{
"epoch": 9.11,
"grad_norm": 1.3880870342254639,
"learning_rate": 1.0889717634523175e-05,
"loss": 2.177,
"step": 17100
},
{
"epoch": 9.14,
"grad_norm": 1.0271745920181274,
"learning_rate": 1.0863079381992542e-05,
"loss": 2.212,
"step": 17150
},
{
"epoch": 9.16,
"grad_norm": 0.9514613747596741,
"learning_rate": 1.0836441129461907e-05,
"loss": 2.1729,
"step": 17200
},
{
"epoch": 9.19,
"grad_norm": 0.9049180746078491,
"learning_rate": 1.0809802876931274e-05,
"loss": 2.1928,
"step": 17250
},
{
"epoch": 9.22,
"grad_norm": 1.0372991561889648,
"learning_rate": 1.0783164624400639e-05,
"loss": 2.207,
"step": 17300
},
{
"epoch": 9.24,
"grad_norm": 0.9088106751441956,
"learning_rate": 1.0756526371870006e-05,
"loss": 2.1818,
"step": 17350
},
{
"epoch": 9.27,
"grad_norm": 1.0153067111968994,
"learning_rate": 1.072988811933937e-05,
"loss": 2.1996,
"step": 17400
},
{
"epoch": 9.3,
"grad_norm": 1.0672743320465088,
"learning_rate": 1.0703249866808737e-05,
"loss": 2.2183,
"step": 17450
},
{
"epoch": 9.32,
"grad_norm": 1.0173332691192627,
"learning_rate": 1.0676611614278106e-05,
"loss": 2.2153,
"step": 17500
},
{
"epoch": 9.35,
"grad_norm": 1.1739368438720703,
"learning_rate": 1.064997336174747e-05,
"loss": 2.1694,
"step": 17550
},
{
"epoch": 9.38,
"grad_norm": 0.8909546136856079,
"learning_rate": 1.0623335109216838e-05,
"loss": 2.1908,
"step": 17600
},
{
"epoch": 9.4,
"grad_norm": 0.9530623555183411,
"learning_rate": 1.0596696856686201e-05,
"loss": 2.2027,
"step": 17650
},
{
"epoch": 9.43,
"grad_norm": 0.8588944673538208,
"learning_rate": 1.057005860415557e-05,
"loss": 2.143,
"step": 17700
},
{
"epoch": 9.46,
"grad_norm": 1.096243977546692,
"learning_rate": 1.0543420351624933e-05,
"loss": 2.1315,
"step": 17750
},
{
"epoch": 9.48,
"grad_norm": 0.888977587223053,
"learning_rate": 1.05167820990943e-05,
"loss": 2.1478,
"step": 17800
},
{
"epoch": 9.51,
"grad_norm": 1.374234676361084,
"learning_rate": 1.0490143846563668e-05,
"loss": 2.1564,
"step": 17850
},
{
"epoch": 9.54,
"grad_norm": 1.0646979808807373,
"learning_rate": 1.0463505594033031e-05,
"loss": 2.1963,
"step": 17900
},
{
"epoch": 9.56,
"grad_norm": 0.8910280466079712,
"learning_rate": 1.04368673415024e-05,
"loss": 2.1892,
"step": 17950
},
{
"epoch": 9.59,
"grad_norm": 1.0399140119552612,
"learning_rate": 1.0410229088971763e-05,
"loss": 2.2132,
"step": 18000
},
{
"epoch": 9.62,
"grad_norm": 1.1613365411758423,
"learning_rate": 1.0383590836441132e-05,
"loss": 2.1946,
"step": 18050
},
{
"epoch": 9.64,
"grad_norm": 0.9431652426719666,
"learning_rate": 1.0356952583910495e-05,
"loss": 2.1825,
"step": 18100
},
{
"epoch": 9.67,
"grad_norm": 1.1334350109100342,
"learning_rate": 1.0330314331379863e-05,
"loss": 2.1937,
"step": 18150
},
{
"epoch": 9.7,
"grad_norm": 1.1028201580047607,
"learning_rate": 1.0303676078849227e-05,
"loss": 2.2181,
"step": 18200
},
{
"epoch": 9.72,
"grad_norm": 0.9916718602180481,
"learning_rate": 1.0277037826318595e-05,
"loss": 2.174,
"step": 18250
},
{
"epoch": 9.75,
"grad_norm": 1.0355281829833984,
"learning_rate": 1.0250399573787962e-05,
"loss": 2.1565,
"step": 18300
},
{
"epoch": 9.78,
"grad_norm": 0.901858925819397,
"learning_rate": 1.0223761321257327e-05,
"loss": 2.1732,
"step": 18350
},
{
"epoch": 9.8,
"grad_norm": 1.1691358089447021,
"learning_rate": 1.0197123068726694e-05,
"loss": 2.1854,
"step": 18400
},
{
"epoch": 9.83,
"grad_norm": 1.1045140027999878,
"learning_rate": 1.0170484816196057e-05,
"loss": 2.1906,
"step": 18450
},
{
"epoch": 9.86,
"grad_norm": 1.4803402423858643,
"learning_rate": 1.0143846563665425e-05,
"loss": 2.1744,
"step": 18500
},
{
"epoch": 9.88,
"grad_norm": 1.0411149263381958,
"learning_rate": 1.0117208311134789e-05,
"loss": 2.2064,
"step": 18550
},
{
"epoch": 9.91,
"grad_norm": 0.9394697546958923,
"learning_rate": 1.0090570058604157e-05,
"loss": 2.1646,
"step": 18600
},
{
"epoch": 9.94,
"grad_norm": 1.0079654455184937,
"learning_rate": 1.006393180607352e-05,
"loss": 2.1638,
"step": 18650
},
{
"epoch": 9.96,
"grad_norm": 0.9374598264694214,
"learning_rate": 1.0037293553542889e-05,
"loss": 2.2037,
"step": 18700
},
{
"epoch": 9.99,
"grad_norm": 0.9795972108840942,
"learning_rate": 1.0010655301012256e-05,
"loss": 2.2119,
"step": 18750
},
{
"epoch": 10.0,
"eval_bert-score-f1": 0.8809602989904121,
"eval_bert-score-precision": 0.8852206096635378,
"eval_bert-score-recall": 0.8770327499302876,
"eval_gen_len": 47.48157921445245,
"eval_loss": 2.0146677494049072,
"eval_rouge1": 0.3344,
"eval_rouge2": 0.1266,
"eval_rougeL": 0.2474,
"eval_rougeLsum": 0.2475,
"eval_runtime": 1332.7051,
"eval_samples_per_second": 14.786,
"eval_steps_per_second": 0.353,
"step": 18770
},
{
"epoch": 10.02,
"grad_norm": 1.0391963720321655,
"learning_rate": 9.984017048481621e-06,
"loss": 2.1618,
"step": 18800
},
{
"epoch": 10.04,
"grad_norm": 0.9638227820396423,
"learning_rate": 9.957378795950986e-06,
"loss": 2.1847,
"step": 18850
},
{
"epoch": 10.07,
"grad_norm": 0.9130365252494812,
"learning_rate": 9.930740543420353e-06,
"loss": 2.1409,
"step": 18900
},
{
"epoch": 10.1,
"grad_norm": 0.97170490026474,
"learning_rate": 9.90410229088972e-06,
"loss": 2.2007,
"step": 18950
},
{
"epoch": 10.12,
"grad_norm": 0.9903939962387085,
"learning_rate": 9.877464038359084e-06,
"loss": 2.1873,
"step": 19000
},
{
"epoch": 10.15,
"grad_norm": 1.0155619382858276,
"learning_rate": 9.850825785828451e-06,
"loss": 2.2185,
"step": 19050
},
{
"epoch": 10.18,
"grad_norm": 1.0440953969955444,
"learning_rate": 9.824187533297816e-06,
"loss": 2.183,
"step": 19100
},
{
"epoch": 10.2,
"grad_norm": 1.002216100692749,
"learning_rate": 9.797549280767183e-06,
"loss": 2.1827,
"step": 19150
},
{
"epoch": 10.23,
"grad_norm": 0.9036744236946106,
"learning_rate": 9.770911028236548e-06,
"loss": 2.157,
"step": 19200
},
{
"epoch": 10.26,
"grad_norm": 1.0186132192611694,
"learning_rate": 9.744272775705915e-06,
"loss": 2.1458,
"step": 19250
},
{
"epoch": 10.28,
"grad_norm": 1.157223105430603,
"learning_rate": 9.71763452317528e-06,
"loss": 2.1578,
"step": 19300
},
{
"epoch": 10.31,
"grad_norm": 1.0317802429199219,
"learning_rate": 9.690996270644647e-06,
"loss": 2.1852,
"step": 19350
},
{
"epoch": 10.34,
"grad_norm": 0.9137316942214966,
"learning_rate": 9.664358018114013e-06,
"loss": 2.228,
"step": 19400
},
{
"epoch": 10.36,
"grad_norm": 0.8266160488128662,
"learning_rate": 9.637719765583378e-06,
"loss": 2.2115,
"step": 19450
},
{
"epoch": 10.39,
"grad_norm": 0.9575254917144775,
"learning_rate": 9.611081513052745e-06,
"loss": 2.1671,
"step": 19500
},
{
"epoch": 10.42,
"grad_norm": 1.1885944604873657,
"learning_rate": 9.58444326052211e-06,
"loss": 2.1953,
"step": 19550
},
{
"epoch": 10.44,
"grad_norm": 0.8843189477920532,
"learning_rate": 9.557805007991477e-06,
"loss": 2.1859,
"step": 19600
},
{
"epoch": 10.47,
"grad_norm": 1.5487014055252075,
"learning_rate": 9.531166755460842e-06,
"loss": 2.1678,
"step": 19650
},
{
"epoch": 10.5,
"grad_norm": 1.1339489221572876,
"learning_rate": 9.504528502930209e-06,
"loss": 2.1917,
"step": 19700
},
{
"epoch": 10.52,
"grad_norm": 0.973892331123352,
"learning_rate": 9.477890250399575e-06,
"loss": 2.1741,
"step": 19750
},
{
"epoch": 10.55,
"grad_norm": 1.0158993005752563,
"learning_rate": 9.45125199786894e-06,
"loss": 2.1949,
"step": 19800
},
{
"epoch": 10.58,
"grad_norm": 0.8538472056388855,
"learning_rate": 9.424613745338307e-06,
"loss": 2.1792,
"step": 19850
},
{
"epoch": 10.6,
"grad_norm": 1.1701879501342773,
"learning_rate": 9.397975492807672e-06,
"loss": 2.1807,
"step": 19900
},
{
"epoch": 10.63,
"grad_norm": 1.2362642288208008,
"learning_rate": 9.371337240277039e-06,
"loss": 2.1767,
"step": 19950
},
{
"epoch": 10.66,
"grad_norm": 1.123152494430542,
"learning_rate": 9.344698987746404e-06,
"loss": 2.1598,
"step": 20000
},
{
"epoch": 10.68,
"grad_norm": 0.953671932220459,
"learning_rate": 9.31806073521577e-06,
"loss": 2.1543,
"step": 20050
},
{
"epoch": 10.71,
"grad_norm": 1.0523099899291992,
"learning_rate": 9.291422482685136e-06,
"loss": 2.2153,
"step": 20100
},
{
"epoch": 10.74,
"grad_norm": 1.3634711503982544,
"learning_rate": 9.264784230154503e-06,
"loss": 2.1659,
"step": 20150
},
{
"epoch": 10.76,
"grad_norm": 0.9536843299865723,
"learning_rate": 9.23814597762387e-06,
"loss": 2.1771,
"step": 20200
},
{
"epoch": 10.79,
"grad_norm": 0.9196369647979736,
"learning_rate": 9.211507725093234e-06,
"loss": 2.1675,
"step": 20250
},
{
"epoch": 10.82,
"grad_norm": 0.846582293510437,
"learning_rate": 9.184869472562601e-06,
"loss": 2.1461,
"step": 20300
},
{
"epoch": 10.84,
"grad_norm": 0.9845914840698242,
"learning_rate": 9.158231220031966e-06,
"loss": 2.2221,
"step": 20350
},
{
"epoch": 10.87,
"grad_norm": 1.0751748085021973,
"learning_rate": 9.131592967501333e-06,
"loss": 2.1662,
"step": 20400
},
{
"epoch": 10.9,
"grad_norm": 0.9254376292228699,
"learning_rate": 9.104954714970698e-06,
"loss": 2.1693,
"step": 20450
},
{
"epoch": 10.92,
"grad_norm": 0.9914495348930359,
"learning_rate": 9.078316462440065e-06,
"loss": 2.1774,
"step": 20500
},
{
"epoch": 10.95,
"grad_norm": 1.060456395149231,
"learning_rate": 9.05167820990943e-06,
"loss": 2.1928,
"step": 20550
},
{
"epoch": 10.97,
"grad_norm": 0.9841185212135315,
"learning_rate": 9.025039957378797e-06,
"loss": 2.1774,
"step": 20600
},
{
"epoch": 11.0,
"eval_bert-score-f1": 0.8810000661683061,
"eval_bert-score-precision": 0.8851854187195088,
"eval_bert-score-recall": 0.8771485046185927,
"eval_gen_len": 47.51730437430224,
"eval_loss": 2.013378381729126,
"eval_rouge1": 0.3348,
"eval_rouge2": 0.1268,
"eval_rougeL": 0.2476,
"eval_rougeLsum": 0.2477,
"eval_runtime": 1329.4509,
"eval_samples_per_second": 14.823,
"eval_steps_per_second": 0.354,
"step": 20647
},
{
"epoch": 11.0,
"grad_norm": 0.9042637944221497,
"learning_rate": 8.998401704848163e-06,
"loss": 2.1635,
"step": 20650
},
{
"epoch": 11.03,
"grad_norm": 0.9590170979499817,
"learning_rate": 8.971763452317528e-06,
"loss": 2.165,
"step": 20700
},
{
"epoch": 11.05,
"grad_norm": 0.8360864520072937,
"learning_rate": 8.945125199786895e-06,
"loss": 2.1263,
"step": 20750
},
{
"epoch": 11.08,
"grad_norm": 1.0940536260604858,
"learning_rate": 8.91848694725626e-06,
"loss": 2.1797,
"step": 20800
},
{
"epoch": 11.11,
"grad_norm": 0.9590544104576111,
"learning_rate": 8.891848694725627e-06,
"loss": 2.1848,
"step": 20850
},
{
"epoch": 11.13,
"grad_norm": 1.009817361831665,
"learning_rate": 8.865210442194992e-06,
"loss": 2.1582,
"step": 20900
},
{
"epoch": 11.16,
"grad_norm": 0.9471531510353088,
"learning_rate": 8.838572189664359e-06,
"loss": 2.193,
"step": 20950
},
{
"epoch": 11.19,
"grad_norm": 1.0452817678451538,
"learning_rate": 8.811933937133724e-06,
"loss": 2.1514,
"step": 21000
},
{
"epoch": 11.21,
"grad_norm": 1.0049973726272583,
"learning_rate": 8.78529568460309e-06,
"loss": 2.1969,
"step": 21050
},
{
"epoch": 11.24,
"grad_norm": 1.1841228008270264,
"learning_rate": 8.758657432072457e-06,
"loss": 2.1479,
"step": 21100
},
{
"epoch": 11.27,
"grad_norm": 0.9284167289733887,
"learning_rate": 8.732019179541822e-06,
"loss": 2.1597,
"step": 21150
},
{
"epoch": 11.29,
"grad_norm": 1.0026378631591797,
"learning_rate": 8.705380927011189e-06,
"loss": 2.1856,
"step": 21200
},
{
"epoch": 11.32,
"grad_norm": 1.1704638004302979,
"learning_rate": 8.678742674480554e-06,
"loss": 2.1991,
"step": 21250
},
{
"epoch": 11.35,
"grad_norm": 0.9646838307380676,
"learning_rate": 8.65210442194992e-06,
"loss": 2.1673,
"step": 21300
},
{
"epoch": 11.37,
"grad_norm": 0.9169173240661621,
"learning_rate": 8.625466169419286e-06,
"loss": 2.1879,
"step": 21350
},
{
"epoch": 11.4,
"grad_norm": 1.2698827981948853,
"learning_rate": 8.598827916888653e-06,
"loss": 2.1711,
"step": 21400
},
{
"epoch": 11.43,
"grad_norm": 1.0014339685440063,
"learning_rate": 8.57218966435802e-06,
"loss": 2.1574,
"step": 21450
},
{
"epoch": 11.45,
"grad_norm": 1.141926884651184,
"learning_rate": 8.545551411827386e-06,
"loss": 2.181,
"step": 21500
},
{
"epoch": 11.48,
"grad_norm": 0.9993128180503845,
"learning_rate": 8.518913159296751e-06,
"loss": 2.1954,
"step": 21550
},
{
"epoch": 11.51,
"grad_norm": 0.8284913301467896,
"learning_rate": 8.492274906766118e-06,
"loss": 2.1507,
"step": 21600
},
{
"epoch": 11.53,
"grad_norm": 0.9283605217933655,
"learning_rate": 8.465636654235483e-06,
"loss": 2.164,
"step": 21650
},
{
"epoch": 11.56,
"grad_norm": 1.0683674812316895,
"learning_rate": 8.438998401704848e-06,
"loss": 2.1689,
"step": 21700
},
{
"epoch": 11.59,
"grad_norm": 0.9687473177909851,
"learning_rate": 8.412360149174215e-06,
"loss": 2.1447,
"step": 21750
},
{
"epoch": 11.61,
"grad_norm": 1.2580022811889648,
"learning_rate": 8.38572189664358e-06,
"loss": 2.1895,
"step": 21800
},
{
"epoch": 11.64,
"grad_norm": 0.9309559464454651,
"learning_rate": 8.359083644112946e-06,
"loss": 2.178,
"step": 21850
},
{
"epoch": 11.67,
"grad_norm": 0.9081181883811951,
"learning_rate": 8.332445391582313e-06,
"loss": 2.1827,
"step": 21900
},
{
"epoch": 11.69,
"grad_norm": 0.9058334231376648,
"learning_rate": 8.30580713905168e-06,
"loss": 2.1636,
"step": 21950
},
{
"epoch": 11.72,
"grad_norm": 0.9327389597892761,
"learning_rate": 8.279168886521045e-06,
"loss": 2.175,
"step": 22000
},
{
"epoch": 11.75,
"grad_norm": 1.3059749603271484,
"learning_rate": 8.252530633990412e-06,
"loss": 2.1655,
"step": 22050
},
{
"epoch": 11.77,
"grad_norm": 0.9360121488571167,
"learning_rate": 8.225892381459777e-06,
"loss": 2.1759,
"step": 22100
},
{
"epoch": 11.8,
"grad_norm": 2.760277032852173,
"learning_rate": 8.199254128929144e-06,
"loss": 2.1784,
"step": 22150
},
{
"epoch": 11.83,
"grad_norm": 0.9712272882461548,
"learning_rate": 8.172615876398509e-06,
"loss": 2.1736,
"step": 22200
},
{
"epoch": 11.85,
"grad_norm": 0.8907904028892517,
"learning_rate": 8.145977623867875e-06,
"loss": 2.1918,
"step": 22250
},
{
"epoch": 11.88,
"grad_norm": 1.1707508563995361,
"learning_rate": 8.119339371337242e-06,
"loss": 2.1679,
"step": 22300
},
{
"epoch": 11.91,
"grad_norm": 0.8738240003585815,
"learning_rate": 8.092701118806607e-06,
"loss": 2.1831,
"step": 22350
},
{
"epoch": 11.93,
"grad_norm": 1.0194109678268433,
"learning_rate": 8.066062866275974e-06,
"loss": 2.1654,
"step": 22400
},
{
"epoch": 11.96,
"grad_norm": 2.5614118576049805,
"learning_rate": 8.039424613745339e-06,
"loss": 2.1985,
"step": 22450
},
{
"epoch": 11.99,
"grad_norm": 1.0855169296264648,
"learning_rate": 8.012786361214706e-06,
"loss": 2.2065,
"step": 22500
},
{
"epoch": 12.0,
"eval_bert-score-f1": 0.8812093867313604,
"eval_bert-score-precision": 0.8855559532955481,
"eval_bert-score-recall": 0.8771963626476008,
"eval_gen_len": 47.54465644981224,
"eval_loss": 2.0089080333709717,
"eval_rouge1": 0.3353,
"eval_rouge2": 0.1269,
"eval_rougeL": 0.2482,
"eval_rougeLsum": 0.2483,
"eval_runtime": 1322.3144,
"eval_samples_per_second": 14.903,
"eval_steps_per_second": 0.355,
"step": 22524
},
{
"epoch": 12.01,
"grad_norm": 0.9317869544029236,
"learning_rate": 7.98614810868407e-06,
"loss": 2.1648,
"step": 22550
},
{
"epoch": 12.04,
"grad_norm": 0.9064331650733948,
"learning_rate": 7.959509856153437e-06,
"loss": 2.1624,
"step": 22600
},
{
"epoch": 12.07,
"grad_norm": 0.867645263671875,
"learning_rate": 7.932871603622802e-06,
"loss": 2.135,
"step": 22650
},
{
"epoch": 12.09,
"grad_norm": 0.9419721364974976,
"learning_rate": 7.90623335109217e-06,
"loss": 2.1511,
"step": 22700
},
{
"epoch": 12.12,
"grad_norm": 1.0487383604049683,
"learning_rate": 7.879595098561536e-06,
"loss": 2.2027,
"step": 22750
},
{
"epoch": 12.15,
"grad_norm": 0.9865580797195435,
"learning_rate": 7.852956846030901e-06,
"loss": 2.1988,
"step": 22800
},
{
"epoch": 12.17,
"grad_norm": 0.8484292030334473,
"learning_rate": 7.826318593500268e-06,
"loss": 2.1662,
"step": 22850
},
{
"epoch": 12.2,
"grad_norm": 1.0033077001571655,
"learning_rate": 7.799680340969633e-06,
"loss": 2.1494,
"step": 22900
},
{
"epoch": 12.23,
"grad_norm": 0.967682957649231,
"learning_rate": 7.773042088439e-06,
"loss": 2.149,
"step": 22950
},
{
"epoch": 12.25,
"grad_norm": 1.0493502616882324,
"learning_rate": 7.746403835908365e-06,
"loss": 2.1783,
"step": 23000
},
{
"epoch": 12.28,
"grad_norm": 1.7216830253601074,
"learning_rate": 7.719765583377731e-06,
"loss": 2.2004,
"step": 23050
},
{
"epoch": 12.31,
"grad_norm": 0.8698610067367554,
"learning_rate": 7.693127330847096e-06,
"loss": 2.1873,
"step": 23100
},
{
"epoch": 12.33,
"grad_norm": 0.9329569935798645,
"learning_rate": 7.666489078316463e-06,
"loss": 2.1386,
"step": 23150
},
{
"epoch": 12.36,
"grad_norm": 1.0915803909301758,
"learning_rate": 7.63985082578583e-06,
"loss": 2.1847,
"step": 23200
},
{
"epoch": 12.39,
"grad_norm": 0.945863664150238,
"learning_rate": 7.613212573255196e-06,
"loss": 2.1515,
"step": 23250
},
{
"epoch": 12.41,
"grad_norm": 1.3315609693527222,
"learning_rate": 7.586574320724561e-06,
"loss": 2.1758,
"step": 23300
},
{
"epoch": 12.44,
"grad_norm": 0.9587283134460449,
"learning_rate": 7.559936068193927e-06,
"loss": 2.1817,
"step": 23350
},
{
"epoch": 12.47,
"grad_norm": 1.0035264492034912,
"learning_rate": 7.533297815663293e-06,
"loss": 2.2093,
"step": 23400
},
{
"epoch": 12.49,
"grad_norm": 0.9835115671157837,
"learning_rate": 7.5066595631326585e-06,
"loss": 2.1545,
"step": 23450
},
{
"epoch": 12.52,
"grad_norm": 1.103174090385437,
"learning_rate": 7.480021310602024e-06,
"loss": 2.1901,
"step": 23500
},
{
"epoch": 12.55,
"grad_norm": 0.9141381978988647,
"learning_rate": 7.45338305807139e-06,
"loss": 2.1721,
"step": 23550
},
{
"epoch": 12.57,
"grad_norm": 0.9373638033866882,
"learning_rate": 7.426744805540758e-06,
"loss": 2.1484,
"step": 23600
},
{
"epoch": 12.6,
"grad_norm": 0.8388597369194031,
"learning_rate": 7.400106553010124e-06,
"loss": 2.1476,
"step": 23650
},
{
"epoch": 12.63,
"grad_norm": 1.1504141092300415,
"learning_rate": 7.37346830047949e-06,
"loss": 2.1471,
"step": 23700
},
{
"epoch": 12.65,
"grad_norm": 1.0790530443191528,
"learning_rate": 7.3468300479488556e-06,
"loss": 2.1905,
"step": 23750
},
{
"epoch": 12.68,
"grad_norm": 1.2390844821929932,
"learning_rate": 7.3201917954182215e-06,
"loss": 2.1447,
"step": 23800
},
{
"epoch": 12.71,
"grad_norm": 1.1462445259094238,
"learning_rate": 7.293553542887587e-06,
"loss": 2.176,
"step": 23850
},
{
"epoch": 12.73,
"grad_norm": 1.1283916234970093,
"learning_rate": 7.266915290356953e-06,
"loss": 2.2012,
"step": 23900
},
{
"epoch": 12.76,
"grad_norm": 0.9534170031547546,
"learning_rate": 7.240277037826319e-06,
"loss": 2.1723,
"step": 23950
},
{
"epoch": 12.79,
"grad_norm": 1.0652652978897095,
"learning_rate": 7.213638785295686e-06,
"loss": 2.1637,
"step": 24000
},
{
"epoch": 12.81,
"grad_norm": 0.9238381385803223,
"learning_rate": 7.187000532765052e-06,
"loss": 2.1684,
"step": 24050
},
{
"epoch": 12.84,
"grad_norm": 1.0384712219238281,
"learning_rate": 7.160362280234418e-06,
"loss": 2.1827,
"step": 24100
},
{
"epoch": 12.87,
"grad_norm": 1.0068541765213013,
"learning_rate": 7.133724027703784e-06,
"loss": 2.1631,
"step": 24150
},
{
"epoch": 12.89,
"grad_norm": 1.0497870445251465,
"learning_rate": 7.1070857751731495e-06,
"loss": 2.1397,
"step": 24200
},
{
"epoch": 12.92,
"grad_norm": 0.9750410318374634,
"learning_rate": 7.080447522642515e-06,
"loss": 2.1806,
"step": 24250
},
{
"epoch": 12.95,
"grad_norm": 1.383784532546997,
"learning_rate": 7.053809270111881e-06,
"loss": 2.1835,
"step": 24300
},
{
"epoch": 12.97,
"grad_norm": 1.006396770477295,
"learning_rate": 7.027171017581247e-06,
"loss": 2.1553,
"step": 24350
},
{
"epoch": 13.0,
"grad_norm": 1.2312555313110352,
"learning_rate": 7.000532765050613e-06,
"loss": 2.1748,
"step": 24400
},
{
"epoch": 13.0,
"eval_bert-score-f1": 0.8812570747936678,
"eval_bert-score-precision": 0.8856106671139539,
"eval_bert-score-recall": 0.8772335333635659,
"eval_gen_len": 47.40327818938395,
"eval_loss": 2.0081796646118164,
"eval_rouge1": 0.3355,
"eval_rouge2": 0.1268,
"eval_rougeL": 0.2484,
"eval_rougeLsum": 0.2485,
"eval_runtime": 1330.0155,
"eval_samples_per_second": 14.816,
"eval_steps_per_second": 0.353,
"step": 24401
},
{
"epoch": 13.03,
"grad_norm": 0.9376835227012634,
"learning_rate": 6.97389451251998e-06,
"loss": 2.177,
"step": 24450
},
{
"epoch": 13.05,
"grad_norm": 0.8431729078292847,
"learning_rate": 6.947256259989346e-06,
"loss": 2.151,
"step": 24500
},
{
"epoch": 13.08,
"grad_norm": 0.9604516625404358,
"learning_rate": 6.920618007458712e-06,
"loss": 2.1812,
"step": 24550
},
{
"epoch": 13.11,
"grad_norm": 1.005323886871338,
"learning_rate": 6.8939797549280775e-06,
"loss": 2.1375,
"step": 24600
},
{
"epoch": 13.13,
"grad_norm": 0.8559876680374146,
"learning_rate": 6.867341502397443e-06,
"loss": 2.1757,
"step": 24650
},
{
"epoch": 13.16,
"grad_norm": 0.9345346689224243,
"learning_rate": 6.840703249866809e-06,
"loss": 2.1612,
"step": 24700
},
{
"epoch": 13.19,
"grad_norm": 0.8913391828536987,
"learning_rate": 6.814064997336175e-06,
"loss": 2.1644,
"step": 24750
},
{
"epoch": 13.21,
"grad_norm": 1.023695468902588,
"learning_rate": 6.787426744805541e-06,
"loss": 2.1737,
"step": 24800
},
{
"epoch": 13.24,
"grad_norm": 0.9663516879081726,
"learning_rate": 6.760788492274908e-06,
"loss": 2.194,
"step": 24850
},
{
"epoch": 13.27,
"grad_norm": 2.1459882259368896,
"learning_rate": 6.734150239744274e-06,
"loss": 2.1383,
"step": 24900
},
{
"epoch": 13.29,
"grad_norm": 1.0092273950576782,
"learning_rate": 6.70751198721364e-06,
"loss": 2.1482,
"step": 24950
},
{
"epoch": 13.32,
"grad_norm": 0.9323935508728027,
"learning_rate": 6.6808737346830055e-06,
"loss": 2.1585,
"step": 25000
},
{
"epoch": 13.35,
"grad_norm": 0.9155877828598022,
"learning_rate": 6.654235482152371e-06,
"loss": 2.1752,
"step": 25050
},
{
"epoch": 13.37,
"grad_norm": 1.0836352109909058,
"learning_rate": 6.627597229621737e-06,
"loss": 2.1702,
"step": 25100
},
{
"epoch": 13.4,
"grad_norm": 1.0014092922210693,
"learning_rate": 6.600958977091103e-06,
"loss": 2.1667,
"step": 25150
},
{
"epoch": 13.43,
"grad_norm": 0.917218804359436,
"learning_rate": 6.574320724560469e-06,
"loss": 2.174,
"step": 25200
},
{
"epoch": 13.45,
"grad_norm": 0.9018741846084595,
"learning_rate": 6.547682472029835e-06,
"loss": 2.1612,
"step": 25250
},
{
"epoch": 13.48,
"grad_norm": 1.0210672616958618,
"learning_rate": 6.521044219499202e-06,
"loss": 2.1516,
"step": 25300
},
{
"epoch": 13.51,
"grad_norm": 0.9759948253631592,
"learning_rate": 6.494405966968568e-06,
"loss": 2.1683,
"step": 25350
},
{
"epoch": 13.53,
"grad_norm": 0.8901129961013794,
"learning_rate": 6.4677677144379335e-06,
"loss": 2.1777,
"step": 25400
},
{
"epoch": 13.56,
"grad_norm": 0.9080635905265808,
"learning_rate": 6.4411294619072994e-06,
"loss": 2.1821,
"step": 25450
},
{
"epoch": 13.59,
"grad_norm": 0.9286373257637024,
"learning_rate": 6.414491209376665e-06,
"loss": 2.1779,
"step": 25500
},
{
"epoch": 13.61,
"grad_norm": 1.1481138467788696,
"learning_rate": 6.387852956846031e-06,
"loss": 2.1692,
"step": 25550
},
{
"epoch": 13.64,
"grad_norm": 0.9707843661308289,
"learning_rate": 6.361214704315397e-06,
"loss": 2.1682,
"step": 25600
},
{
"epoch": 13.67,
"grad_norm": 1.042262077331543,
"learning_rate": 6.334576451784763e-06,
"loss": 2.1584,
"step": 25650
},
{
"epoch": 13.69,
"grad_norm": 0.9933112859725952,
"learning_rate": 6.30793819925413e-06,
"loss": 2.1558,
"step": 25700
},
{
"epoch": 13.72,
"grad_norm": 0.8889453411102295,
"learning_rate": 6.281299946723496e-06,
"loss": 2.1592,
"step": 25750
},
{
"epoch": 13.75,
"grad_norm": 0.8605052828788757,
"learning_rate": 6.2546616941928615e-06,
"loss": 2.1993,
"step": 25800
},
{
"epoch": 13.77,
"grad_norm": 0.946685791015625,
"learning_rate": 6.2280234416622274e-06,
"loss": 2.1609,
"step": 25850
},
{
"epoch": 13.8,
"grad_norm": 0.9031264185905457,
"learning_rate": 6.201385189131593e-06,
"loss": 2.1578,
"step": 25900
},
{
"epoch": 13.83,
"grad_norm": 1.1098228693008423,
"learning_rate": 6.174746936600959e-06,
"loss": 2.1487,
"step": 25950
},
{
"epoch": 13.85,
"grad_norm": 0.9173837900161743,
"learning_rate": 6.148108684070325e-06,
"loss": 2.1639,
"step": 26000
},
{
"epoch": 13.88,
"grad_norm": 1.1397854089736938,
"learning_rate": 6.121470431539691e-06,
"loss": 2.165,
"step": 26050
},
{
"epoch": 13.91,
"grad_norm": 1.0020257234573364,
"learning_rate": 6.094832179009057e-06,
"loss": 2.1713,
"step": 26100
},
{
"epoch": 13.93,
"grad_norm": 0.9584590792655945,
"learning_rate": 6.068193926478424e-06,
"loss": 2.2049,
"step": 26150
},
{
"epoch": 13.96,
"grad_norm": 1.1951069831848145,
"learning_rate": 6.0415556739477896e-06,
"loss": 2.1632,
"step": 26200
},
{
"epoch": 13.99,
"grad_norm": 1.0568323135375977,
"learning_rate": 6.0149174214171555e-06,
"loss": 2.1792,
"step": 26250
},
{
"epoch": 14.0,
"eval_bert-score-f1": 0.8812965099779855,
"eval_bert-score-precision": 0.8854620127155008,
"eval_bert-score-recall": 0.877461512582043,
"eval_gen_len": 47.727494164213944,
"eval_loss": 2.0084121227264404,
"eval_rouge1": 0.3362,
"eval_rouge2": 0.1273,
"eval_rougeL": 0.2487,
"eval_rougeLsum": 0.2487,
"eval_runtime": 1350.2387,
"eval_samples_per_second": 14.594,
"eval_steps_per_second": 0.348,
"step": 26278
},
{
"epoch": 14.01,
"grad_norm": 0.9785760045051575,
"learning_rate": 5.988279168886521e-06,
"loss": 2.1468,
"step": 26300
},
{
"epoch": 14.04,
"grad_norm": 1.0162745714187622,
"learning_rate": 5.961640916355887e-06,
"loss": 2.1717,
"step": 26350
},
{
"epoch": 14.06,
"grad_norm": 1.0883443355560303,
"learning_rate": 5.935002663825253e-06,
"loss": 2.1565,
"step": 26400
},
{
"epoch": 14.09,
"grad_norm": 1.0060546398162842,
"learning_rate": 5.908364411294619e-06,
"loss": 2.1691,
"step": 26450
},
{
"epoch": 14.12,
"grad_norm": 0.9702796936035156,
"learning_rate": 5.881726158763985e-06,
"loss": 2.1747,
"step": 26500
},
{
"epoch": 14.14,
"grad_norm": 0.9459964632987976,
"learning_rate": 5.8550879062333525e-06,
"loss": 2.1781,
"step": 26550
},
{
"epoch": 14.17,
"grad_norm": 0.8751854300498962,
"learning_rate": 5.8284496537027176e-06,
"loss": 2.1531,
"step": 26600
},
{
"epoch": 14.2,
"grad_norm": 0.9021329879760742,
"learning_rate": 5.8018114011720835e-06,
"loss": 2.1741,
"step": 26650
},
{
"epoch": 14.22,
"grad_norm": 0.9062153100967407,
"learning_rate": 5.775173148641449e-06,
"loss": 2.1424,
"step": 26700
},
{
"epoch": 14.25,
"grad_norm": 0.8709902167320251,
"learning_rate": 5.748534896110815e-06,
"loss": 2.1458,
"step": 26750
},
{
"epoch": 14.28,
"grad_norm": 0.8705712556838989,
"learning_rate": 5.721896643580181e-06,
"loss": 2.113,
"step": 26800
},
{
"epoch": 14.3,
"grad_norm": 0.880886435508728,
"learning_rate": 5.695258391049547e-06,
"loss": 2.2009,
"step": 26850
},
{
"epoch": 14.33,
"grad_norm": 1.1115506887435913,
"learning_rate": 5.668620138518913e-06,
"loss": 2.1809,
"step": 26900
},
{
"epoch": 14.36,
"grad_norm": 0.9538648128509521,
"learning_rate": 5.641981885988279e-06,
"loss": 2.1776,
"step": 26950
},
{
"epoch": 14.38,
"grad_norm": 0.8338669538497925,
"learning_rate": 5.6153436334576464e-06,
"loss": 2.1357,
"step": 27000
},
{
"epoch": 14.41,
"grad_norm": 0.9421921372413635,
"learning_rate": 5.588705380927012e-06,
"loss": 2.1514,
"step": 27050
},
{
"epoch": 14.44,
"grad_norm": 0.8788719177246094,
"learning_rate": 5.562067128396378e-06,
"loss": 2.1643,
"step": 27100
},
{
"epoch": 14.46,
"grad_norm": 0.9146755337715149,
"learning_rate": 5.535428875865744e-06,
"loss": 2.1334,
"step": 27150
},
{
"epoch": 14.49,
"grad_norm": 1.042197346687317,
"learning_rate": 5.50879062333511e-06,
"loss": 2.1927,
"step": 27200
},
{
"epoch": 14.52,
"grad_norm": 0.9642235636711121,
"learning_rate": 5.482152370804475e-06,
"loss": 2.1628,
"step": 27250
},
{
"epoch": 14.54,
"grad_norm": 1.0911927223205566,
"learning_rate": 5.455514118273841e-06,
"loss": 2.1643,
"step": 27300
},
{
"epoch": 14.57,
"grad_norm": 1.1119507551193237,
"learning_rate": 5.428875865743207e-06,
"loss": 2.1757,
"step": 27350
},
{
"epoch": 14.6,
"grad_norm": 0.9982576370239258,
"learning_rate": 5.4022376132125744e-06,
"loss": 2.1912,
"step": 27400
},
{
"epoch": 14.62,
"grad_norm": 0.9314507842063904,
"learning_rate": 5.37559936068194e-06,
"loss": 2.171,
"step": 27450
},
{
"epoch": 14.65,
"grad_norm": 1.2115583419799805,
"learning_rate": 5.348961108151306e-06,
"loss": 2.16,
"step": 27500
},
{
"epoch": 14.68,
"grad_norm": 0.9271109700202942,
"learning_rate": 5.322322855620672e-06,
"loss": 2.1908,
"step": 27550
},
{
"epoch": 14.7,
"grad_norm": 0.8988925218582153,
"learning_rate": 5.295684603090038e-06,
"loss": 2.1852,
"step": 27600
},
{
"epoch": 14.73,
"grad_norm": 0.9780814051628113,
"learning_rate": 5.269046350559404e-06,
"loss": 2.1417,
"step": 27650
},
{
"epoch": 14.76,
"grad_norm": 1.1456106901168823,
"learning_rate": 5.24240809802877e-06,
"loss": 2.1775,
"step": 27700
},
{
"epoch": 14.78,
"grad_norm": 0.813176155090332,
"learning_rate": 5.215769845498136e-06,
"loss": 2.1916,
"step": 27750
},
{
"epoch": 14.81,
"grad_norm": 0.9489790797233582,
"learning_rate": 5.189131592967502e-06,
"loss": 2.1561,
"step": 27800
},
{
"epoch": 14.84,
"grad_norm": 0.9345203042030334,
"learning_rate": 5.162493340436868e-06,
"loss": 2.1511,
"step": 27850
},
{
"epoch": 14.86,
"grad_norm": 0.7995460033416748,
"learning_rate": 5.135855087906234e-06,
"loss": 2.1918,
"step": 27900
},
{
"epoch": 14.89,
"grad_norm": 0.8729720711708069,
"learning_rate": 5.1092168353756e-06,
"loss": 2.1598,
"step": 27950
},
{
"epoch": 14.92,
"grad_norm": 0.928892970085144,
"learning_rate": 5.082578582844966e-06,
"loss": 2.1602,
"step": 28000
},
{
"epoch": 14.94,
"grad_norm": 0.9653081297874451,
"learning_rate": 5.055940330314332e-06,
"loss": 2.1591,
"step": 28050
},
{
"epoch": 14.97,
"grad_norm": 1.006082534790039,
"learning_rate": 5.029302077783698e-06,
"loss": 2.1554,
"step": 28100
},
{
"epoch": 15.0,
"grad_norm": 0.9210333228111267,
"learning_rate": 5.002663825253064e-06,
"loss": 2.1609,
"step": 28150
},
{
"epoch": 15.0,
"eval_bert-score-f1": 0.8813901242448342,
"eval_bert-score-precision": 0.8855356388553972,
"eval_bert-score-recall": 0.8775755109870953,
"eval_gen_len": 47.89211407693088,
"eval_loss": 2.008052349090576,
"eval_rouge1": 0.3364,
"eval_rouge2": 0.1275,
"eval_rougeL": 0.249,
"eval_rougeLsum": 0.2491,
"eval_runtime": 1342.1803,
"eval_samples_per_second": 14.682,
"eval_steps_per_second": 0.35,
"step": 28155
},
{
"epoch": 15.02,
"grad_norm": 0.9067476987838745,
"learning_rate": 4.97602557272243e-06,
"loss": 2.1855,
"step": 28200
},
{
"epoch": 15.05,
"grad_norm": 0.9924964904785156,
"learning_rate": 4.9493873201917955e-06,
"loss": 2.1434,
"step": 28250
},
{
"epoch": 15.08,
"grad_norm": 0.9526183009147644,
"learning_rate": 4.922749067661161e-06,
"loss": 2.1306,
"step": 28300
},
{
"epoch": 15.1,
"grad_norm": 0.8999783396720886,
"learning_rate": 4.896110815130528e-06,
"loss": 2.1683,
"step": 28350
},
{
"epoch": 15.13,
"grad_norm": 1.3067386150360107,
"learning_rate": 4.869472562599894e-06,
"loss": 2.175,
"step": 28400
},
{
"epoch": 15.16,
"grad_norm": 0.930998682975769,
"learning_rate": 4.84283431006926e-06,
"loss": 2.1571,
"step": 28450
},
{
"epoch": 15.18,
"grad_norm": 1.0874801874160767,
"learning_rate": 4.816196057538626e-06,
"loss": 2.1648,
"step": 28500
},
{
"epoch": 15.21,
"grad_norm": 0.8937533497810364,
"learning_rate": 4.789557805007992e-06,
"loss": 2.1593,
"step": 28550
},
{
"epoch": 15.24,
"grad_norm": 1.1147658824920654,
"learning_rate": 4.762919552477358e-06,
"loss": 2.1554,
"step": 28600
},
{
"epoch": 15.26,
"grad_norm": 1.2026888132095337,
"learning_rate": 4.7362812999467235e-06,
"loss": 2.1702,
"step": 28650
},
{
"epoch": 15.29,
"grad_norm": 1.02727210521698,
"learning_rate": 4.7096430474160894e-06,
"loss": 2.1316,
"step": 28700
},
{
"epoch": 15.32,
"grad_norm": 0.9779881834983826,
"learning_rate": 4.683004794885455e-06,
"loss": 2.1673,
"step": 28750
},
{
"epoch": 15.34,
"grad_norm": 1.0025492906570435,
"learning_rate": 4.656366542354822e-06,
"loss": 2.1949,
"step": 28800
},
{
"epoch": 15.37,
"grad_norm": 1.1579878330230713,
"learning_rate": 4.629728289824188e-06,
"loss": 2.161,
"step": 28850
},
{
"epoch": 15.4,
"grad_norm": 0.9376475811004639,
"learning_rate": 4.603090037293554e-06,
"loss": 2.1849,
"step": 28900
},
{
"epoch": 15.42,
"grad_norm": 1.239538550376892,
"learning_rate": 4.57645178476292e-06,
"loss": 2.148,
"step": 28950
},
{
"epoch": 15.45,
"grad_norm": 0.9662672281265259,
"learning_rate": 4.5498135322322865e-06,
"loss": 2.1917,
"step": 29000
},
{
"epoch": 15.48,
"grad_norm": 0.999487578868866,
"learning_rate": 4.523175279701652e-06,
"loss": 2.1746,
"step": 29050
},
{
"epoch": 15.5,
"grad_norm": 1.1087113618850708,
"learning_rate": 4.496537027171018e-06,
"loss": 2.1756,
"step": 29100
},
{
"epoch": 15.53,
"grad_norm": 0.9873372316360474,
"learning_rate": 4.469898774640384e-06,
"loss": 2.1333,
"step": 29150
},
{
"epoch": 15.56,
"grad_norm": 0.9368143677711487,
"learning_rate": 4.44326052210975e-06,
"loss": 2.1784,
"step": 29200
},
{
"epoch": 15.58,
"grad_norm": 1.0396299362182617,
"learning_rate": 4.416622269579116e-06,
"loss": 2.1786,
"step": 29250
},
{
"epoch": 15.61,
"grad_norm": 0.9846010804176331,
"learning_rate": 4.389984017048482e-06,
"loss": 2.1886,
"step": 29300
},
{
"epoch": 15.64,
"grad_norm": 0.7157499194145203,
"learning_rate": 4.363345764517848e-06,
"loss": 2.1454,
"step": 29350
},
{
"epoch": 15.66,
"grad_norm": 0.8496772050857544,
"learning_rate": 4.336707511987214e-06,
"loss": 2.1456,
"step": 29400
},
{
"epoch": 15.69,
"grad_norm": 1.0150104761123657,
"learning_rate": 4.31006925945658e-06,
"loss": 2.1725,
"step": 29450
},
{
"epoch": 15.72,
"grad_norm": 0.9437685012817383,
"learning_rate": 4.283431006925946e-06,
"loss": 2.1655,
"step": 29500
},
{
"epoch": 15.74,
"grad_norm": 0.978090226650238,
"learning_rate": 4.256792754395312e-06,
"loss": 2.1476,
"step": 29550
},
{
"epoch": 15.77,
"grad_norm": 0.8782944679260254,
"learning_rate": 4.230154501864678e-06,
"loss": 2.1482,
"step": 29600
},
{
"epoch": 15.8,
"grad_norm": 0.830117404460907,
"learning_rate": 4.203516249334044e-06,
"loss": 2.118,
"step": 29650
},
{
"epoch": 15.82,
"grad_norm": 0.9760297536849976,
"learning_rate": 4.17687799680341e-06,
"loss": 2.2007,
"step": 29700
},
{
"epoch": 15.85,
"grad_norm": 0.8977949619293213,
"learning_rate": 4.150239744272776e-06,
"loss": 2.1008,
"step": 29750
},
{
"epoch": 15.88,
"grad_norm": 1.0869261026382446,
"learning_rate": 4.123601491742142e-06,
"loss": 2.1915,
"step": 29800
},
{
"epoch": 15.9,
"grad_norm": 1.1186174154281616,
"learning_rate": 4.0969632392115084e-06,
"loss": 2.1653,
"step": 29850
},
{
"epoch": 15.93,
"grad_norm": 0.9164936542510986,
"learning_rate": 4.070324986680874e-06,
"loss": 2.1479,
"step": 29900
},
{
"epoch": 15.96,
"grad_norm": 0.9677796959877014,
"learning_rate": 4.04368673415024e-06,
"loss": 2.1207,
"step": 29950
},
{
"epoch": 15.98,
"grad_norm": 0.8449875116348267,
"learning_rate": 4.017048481619606e-06,
"loss": 2.189,
"step": 30000
},
{
"epoch": 16.0,
"eval_bert-score-f1": 0.8815181989403869,
"eval_bert-score-precision": 0.8858505654647171,
"eval_bert-score-recall": 0.8775155795209725,
"eval_gen_len": 47.55181163097534,
"eval_loss": 2.0050902366638184,
"eval_rouge1": 0.3365,
"eval_rouge2": 0.1273,
"eval_rougeL": 0.249,
"eval_rougeLsum": 0.2491,
"eval_runtime": 1330.728,
"eval_samples_per_second": 14.808,
"eval_steps_per_second": 0.353,
"step": 30032
},
{
"epoch": 16.01,
"grad_norm": 0.9980852603912354,
"learning_rate": 3.990410229088972e-06,
"loss": 2.1959,
"step": 30050
},
{
"epoch": 16.04,
"grad_norm": 1.3163436651229858,
"learning_rate": 3.963771976558338e-06,
"loss": 2.173,
"step": 30100
},
{
"epoch": 16.06,
"grad_norm": 0.9099076986312866,
"learning_rate": 3.937133724027704e-06,
"loss": 2.1724,
"step": 30150
},
{
"epoch": 16.09,
"grad_norm": 1.0009883642196655,
"learning_rate": 3.91049547149707e-06,
"loss": 2.1544,
"step": 30200
},
{
"epoch": 16.12,
"grad_norm": 0.9975098967552185,
"learning_rate": 3.883857218966436e-06,
"loss": 2.1458,
"step": 30250
},
{
"epoch": 16.14,
"grad_norm": 0.8518524169921875,
"learning_rate": 3.857218966435802e-06,
"loss": 2.1569,
"step": 30300
},
{
"epoch": 16.17,
"grad_norm": 0.9639623761177063,
"learning_rate": 3.830580713905168e-06,
"loss": 2.15,
"step": 30350
},
{
"epoch": 16.2,
"grad_norm": 0.8993538022041321,
"learning_rate": 3.803942461374534e-06,
"loss": 2.1448,
"step": 30400
},
{
"epoch": 16.22,
"grad_norm": 1.0397539138793945,
"learning_rate": 3.7773042088439e-06,
"loss": 2.1305,
"step": 30450
},
{
"epoch": 16.25,
"grad_norm": 1.0676199197769165,
"learning_rate": 3.7506659563132663e-06,
"loss": 2.141,
"step": 30500
},
{
"epoch": 16.28,
"grad_norm": 1.0054043531417847,
"learning_rate": 3.7240277037826322e-06,
"loss": 2.1595,
"step": 30550
},
{
"epoch": 16.3,
"grad_norm": 0.949520468711853,
"learning_rate": 3.697389451251998e-06,
"loss": 2.2005,
"step": 30600
},
{
"epoch": 16.33,
"grad_norm": 0.9753077030181885,
"learning_rate": 3.670751198721364e-06,
"loss": 2.1924,
"step": 30650
},
{
"epoch": 16.36,
"grad_norm": 0.8877549767494202,
"learning_rate": 3.6441129461907304e-06,
"loss": 2.1553,
"step": 30700
},
{
"epoch": 16.38,
"grad_norm": 1.0130952596664429,
"learning_rate": 3.6174746936600963e-06,
"loss": 2.1451,
"step": 30750
},
{
"epoch": 16.41,
"grad_norm": 1.2479366064071655,
"learning_rate": 3.590836441129462e-06,
"loss": 2.168,
"step": 30800
},
{
"epoch": 16.44,
"grad_norm": 1.0229461193084717,
"learning_rate": 3.564198188598828e-06,
"loss": 2.1642,
"step": 30850
},
{
"epoch": 16.46,
"grad_norm": 0.9332823157310486,
"learning_rate": 3.5375599360681944e-06,
"loss": 2.1587,
"step": 30900
},
{
"epoch": 16.49,
"grad_norm": 1.008484125137329,
"learning_rate": 3.5109216835375603e-06,
"loss": 2.1725,
"step": 30950
},
{
"epoch": 16.52,
"grad_norm": 0.9926919341087341,
"learning_rate": 3.484283431006926e-06,
"loss": 2.1694,
"step": 31000
},
{
"epoch": 16.54,
"grad_norm": 0.9659560322761536,
"learning_rate": 3.457645178476292e-06,
"loss": 2.1787,
"step": 31050
},
{
"epoch": 16.57,
"grad_norm": 0.9388399720191956,
"learning_rate": 3.431006925945658e-06,
"loss": 2.1726,
"step": 31100
},
{
"epoch": 16.6,
"grad_norm": 0.8616447448730469,
"learning_rate": 3.4043686734150243e-06,
"loss": 2.1566,
"step": 31150
},
{
"epoch": 16.62,
"grad_norm": 0.8849464058876038,
"learning_rate": 3.37773042088439e-06,
"loss": 2.1769,
"step": 31200
},
{
"epoch": 16.65,
"grad_norm": 0.9730740785598755,
"learning_rate": 3.351092168353756e-06,
"loss": 2.1858,
"step": 31250
},
{
"epoch": 16.68,
"grad_norm": 1.1059538125991821,
"learning_rate": 3.324453915823122e-06,
"loss": 2.1722,
"step": 31300
},
{
"epoch": 16.7,
"grad_norm": 1.2941957712173462,
"learning_rate": 3.2978156632924883e-06,
"loss": 2.1652,
"step": 31350
},
{
"epoch": 16.73,
"grad_norm": 0.9166463017463684,
"learning_rate": 3.271177410761854e-06,
"loss": 2.158,
"step": 31400
},
{
"epoch": 16.76,
"grad_norm": 1.1810513734817505,
"learning_rate": 3.24453915823122e-06,
"loss": 2.1334,
"step": 31450
},
{
"epoch": 16.78,
"grad_norm": 0.9561477899551392,
"learning_rate": 3.217900905700586e-06,
"loss": 2.1354,
"step": 31500
},
{
"epoch": 16.81,
"grad_norm": 0.8539523482322693,
"learning_rate": 3.1912626531699527e-06,
"loss": 2.1458,
"step": 31550
},
{
"epoch": 16.84,
"grad_norm": 1.099672794342041,
"learning_rate": 3.1646244006393186e-06,
"loss": 2.1784,
"step": 31600
},
{
"epoch": 16.86,
"grad_norm": 0.8352581858634949,
"learning_rate": 3.1379861481086845e-06,
"loss": 2.1348,
"step": 31650
},
{
"epoch": 16.89,
"grad_norm": 1.0823798179626465,
"learning_rate": 3.11134789557805e-06,
"loss": 2.1657,
"step": 31700
},
{
"epoch": 16.92,
"grad_norm": 0.9096495509147644,
"learning_rate": 3.0847096430474167e-06,
"loss": 2.1526,
"step": 31750
},
{
"epoch": 16.94,
"grad_norm": 0.9881473779678345,
"learning_rate": 3.0580713905167826e-06,
"loss": 2.1759,
"step": 31800
},
{
"epoch": 16.97,
"grad_norm": 1.2694969177246094,
"learning_rate": 3.0314331379861485e-06,
"loss": 2.1896,
"step": 31850
},
{
"epoch": 17.0,
"grad_norm": 1.1195552349090576,
"learning_rate": 3.0047948854555144e-06,
"loss": 2.1417,
"step": 31900
},
{
"epoch": 17.0,
"eval_bert-score-f1": 0.8814209539291642,
"eval_bert-score-precision": 0.8856624760483045,
"eval_bert-score-recall": 0.8775075128371735,
"eval_gen_len": 47.645996143306604,
"eval_loss": 2.005204439163208,
"eval_rouge1": 0.3363,
"eval_rouge2": 0.1273,
"eval_rougeL": 0.2488,
"eval_rougeLsum": 0.2489,
"eval_runtime": 1335.7013,
"eval_samples_per_second": 14.753,
"eval_steps_per_second": 0.352,
"step": 31909
},
{
"epoch": 17.02,
"grad_norm": 0.8519843816757202,
"learning_rate": 2.9781566329248803e-06,
"loss": 2.139,
"step": 31950
},
{
"epoch": 17.05,
"grad_norm": 0.9282755255699158,
"learning_rate": 2.9515183803942466e-06,
"loss": 2.1555,
"step": 32000
},
{
"epoch": 17.08,
"grad_norm": 1.0514159202575684,
"learning_rate": 2.9248801278636125e-06,
"loss": 2.1316,
"step": 32050
},
{
"epoch": 17.1,
"grad_norm": 1.034957766532898,
"learning_rate": 2.8982418753329784e-06,
"loss": 2.2094,
"step": 32100
},
{
"epoch": 17.13,
"grad_norm": 1.0574824810028076,
"learning_rate": 2.8716036228023443e-06,
"loss": 2.1641,
"step": 32150
},
{
"epoch": 17.16,
"grad_norm": 0.9753189086914062,
"learning_rate": 2.8449653702717106e-06,
"loss": 2.1442,
"step": 32200
},
{
"epoch": 17.18,
"grad_norm": 1.2037853002548218,
"learning_rate": 2.8183271177410765e-06,
"loss": 2.147,
"step": 32250
},
{
"epoch": 17.21,
"grad_norm": 0.8287584185600281,
"learning_rate": 2.7916888652104424e-06,
"loss": 2.1765,
"step": 32300
},
{
"epoch": 17.23,
"grad_norm": 1.0360517501831055,
"learning_rate": 2.7650506126798083e-06,
"loss": 2.1864,
"step": 32350
},
{
"epoch": 17.26,
"grad_norm": 1.1471740007400513,
"learning_rate": 2.7384123601491746e-06,
"loss": 2.1629,
"step": 32400
},
{
"epoch": 17.29,
"grad_norm": 0.9185000658035278,
"learning_rate": 2.7117741076185405e-06,
"loss": 2.1639,
"step": 32450
},
{
"epoch": 17.31,
"grad_norm": 1.1663320064544678,
"learning_rate": 2.6851358550879064e-06,
"loss": 2.1174,
"step": 32500
},
{
"epoch": 17.34,
"grad_norm": 0.980441689491272,
"learning_rate": 2.6584976025572723e-06,
"loss": 2.1397,
"step": 32550
},
{
"epoch": 17.37,
"grad_norm": 1.1273858547210693,
"learning_rate": 2.6318593500266386e-06,
"loss": 2.1462,
"step": 32600
},
{
"epoch": 17.39,
"grad_norm": 1.146296501159668,
"learning_rate": 2.6052210974960045e-06,
"loss": 2.1797,
"step": 32650
},
{
"epoch": 17.42,
"grad_norm": 0.9205301403999329,
"learning_rate": 2.5785828449653704e-06,
"loss": 2.1909,
"step": 32700
},
{
"epoch": 17.45,
"grad_norm": 1.103619933128357,
"learning_rate": 2.5519445924347363e-06,
"loss": 2.1637,
"step": 32750
},
{
"epoch": 17.47,
"grad_norm": 0.8971651196479797,
"learning_rate": 2.5253063399041022e-06,
"loss": 2.1693,
"step": 32800
},
{
"epoch": 17.5,
"grad_norm": 1.088956356048584,
"learning_rate": 2.4986680873734685e-06,
"loss": 2.151,
"step": 32850
},
{
"epoch": 17.53,
"grad_norm": 0.9202156066894531,
"learning_rate": 2.4720298348428344e-06,
"loss": 2.1679,
"step": 32900
},
{
"epoch": 17.55,
"grad_norm": 0.9409503936767578,
"learning_rate": 2.4453915823122003e-06,
"loss": 2.157,
"step": 32950
},
{
"epoch": 17.58,
"grad_norm": 1.0779467821121216,
"learning_rate": 2.4187533297815667e-06,
"loss": 2.1291,
"step": 33000
},
{
"epoch": 17.61,
"grad_norm": 0.918696403503418,
"learning_rate": 2.3921150772509325e-06,
"loss": 2.1844,
"step": 33050
},
{
"epoch": 17.63,
"grad_norm": 1.5606318712234497,
"learning_rate": 2.3654768247202984e-06,
"loss": 2.1489,
"step": 33100
},
{
"epoch": 17.66,
"grad_norm": 1.205295443534851,
"learning_rate": 2.3388385721896643e-06,
"loss": 2.1667,
"step": 33150
},
{
"epoch": 17.69,
"grad_norm": 1.052687644958496,
"learning_rate": 2.3122003196590307e-06,
"loss": 2.1258,
"step": 33200
},
{
"epoch": 17.71,
"grad_norm": 0.9863780736923218,
"learning_rate": 2.2855620671283966e-06,
"loss": 2.1722,
"step": 33250
},
{
"epoch": 17.74,
"grad_norm": 0.9677025079727173,
"learning_rate": 2.258923814597763e-06,
"loss": 2.1813,
"step": 33300
},
{
"epoch": 17.77,
"grad_norm": 1.0325006246566772,
"learning_rate": 2.2322855620671283e-06,
"loss": 2.1258,
"step": 33350
},
{
"epoch": 17.79,
"grad_norm": 1.2607378959655762,
"learning_rate": 2.2056473095364947e-06,
"loss": 2.1531,
"step": 33400
},
{
"epoch": 17.82,
"grad_norm": 0.8155959844589233,
"learning_rate": 2.1790090570058606e-06,
"loss": 2.1356,
"step": 33450
},
{
"epoch": 17.85,
"grad_norm": 1.000097632408142,
"learning_rate": 2.152370804475227e-06,
"loss": 2.1564,
"step": 33500
},
{
"epoch": 17.87,
"grad_norm": 1.0435174703598022,
"learning_rate": 2.1257325519445928e-06,
"loss": 2.1442,
"step": 33550
},
{
"epoch": 17.9,
"grad_norm": 0.8841784000396729,
"learning_rate": 2.0990942994139587e-06,
"loss": 2.1775,
"step": 33600
},
{
"epoch": 17.93,
"grad_norm": 0.971834123134613,
"learning_rate": 2.0724560468833246e-06,
"loss": 2.1737,
"step": 33650
},
{
"epoch": 17.95,
"grad_norm": 1.0215661525726318,
"learning_rate": 2.0458177943526905e-06,
"loss": 2.1105,
"step": 33700
},
{
"epoch": 17.98,
"grad_norm": 0.964297890663147,
"learning_rate": 2.0191795418220568e-06,
"loss": 2.1731,
"step": 33750
},
{
"epoch": 18.0,
"eval_bert-score-f1": 0.8815276650811141,
"eval_bert-score-precision": 0.8858032303631735,
"eval_bert-score-recall": 0.8775806915883447,
"eval_gen_len": 47.6104739673196,
"eval_loss": 2.004894495010376,
"eval_rouge1": 0.3366,
"eval_rouge2": 0.1273,
"eval_rougeL": 0.249,
"eval_rougeLsum": 0.2491,
"eval_runtime": 1338.1647,
"eval_samples_per_second": 14.726,
"eval_steps_per_second": 0.351,
"step": 33786
}
],
"logging_steps": 50,
"max_steps": 37540,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 3.840531477455831e+17,
"train_batch_size": 42,
"trial_name": null,
"trial_params": null
}