8000

Browse files

Files changed (8) hide show

{checkpoint-7000 → checkpoint-8000}/README.md +0 -0
{checkpoint-7000 → checkpoint-8000}/adapter_config.json +0 -0
{checkpoint-7000 → checkpoint-8000}/adapter_model.safetensors +1 -1
{checkpoint-7000 → checkpoint-8000}/optimizer.pt +1 -1
{checkpoint-7000 → checkpoint-8000}/rng_state.pth +1 -1
{checkpoint-7000 → checkpoint-8000}/scheduler.pt +1 -1
{checkpoint-7000 → checkpoint-8000}/trainer_state.json +703 -3
{checkpoint-7000 → checkpoint-8000}/training_args.bin +0 -0

{checkpoint-7000 → checkpoint-8000}/README.md RENAMED Viewed

File without changes

{checkpoint-7000 → checkpoint-8000}/adapter_config.json RENAMED Viewed

File without changes

{checkpoint-7000 → checkpoint-8000}/adapter_model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:904a4ef92c61ccc6c50c07a0afb16c79eca8345d63d05b6fba20d672883caf9c
 size 8535970848

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad06aee126b0d1bc6ad5f7300e9f866b52d5ddfa03c94459458c802bca279d27
 size 8535970848

{checkpoint-7000 → checkpoint-8000}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2496581a1d322ee8cdb74b4d9913aa030f52ad250c7a36328009cb9a925d20b3
 size 6576969753

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d92dea01c8c29de2a4005cc9690e2f9d72caf688c782c20775dc59d995cfa7b
 size 6576969753

{checkpoint-7000 → checkpoint-8000}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b98ab2f9046186de382cffe0ac794b64835264151053a24e157ecc237266a430
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:51c2939c18e59171b164ef03eea9767004735b3684fc33ed54d0af9e9aa5a8ec
 size 14645

{checkpoint-7000 → checkpoint-8000}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb8edb6711b4ae2f588fdfdebd7cb5e4b8458aa95884c8a213e06a55271fdc66
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ef9fdb1f0fce4718b7fd8d7a72f390bbaa45bb8e37f4d0f2d8b474443eb5ef2
 size 1465

{checkpoint-7000 → checkpoint-8000}/trainer_state.json RENAMED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 8.951406649616368,
   "eval_steps": 500,
-  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4908,6 +4908,706 @@
       "learning_rate": 0.00017794528798804519,
       "loss": 5.3009,
       "step": 7000
     }
   ],
   "logging_steps": 10,
@@ -4927,7 +5627,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.267749345144734e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 10.230179028132993,
   "eval_steps": 500,
+  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00017794528798804519,
       "loss": 5.3009,
       "step": 7000
+    },
+    {
+      "epoch": 8.964194373401535,
+      "grad_norm": 1.296583652496338,
+      "learning_rate": 0.00017764536719815918,
+      "loss": 5.3158,
+      "step": 7010
+    },
+    {
+      "epoch": 8.976982097186701,
+      "grad_norm": 1.0986963510513306,
+      "learning_rate": 0.00017734533195522424,
+      "loss": 5.3068,
+      "step": 7020
+    },
+    {
+      "epoch": 8.989769820971867,
+      "grad_norm": 1.0342364311218262,
+      "learning_rate": 0.00017704518350139965,
+      "loss": 5.2997,
+      "step": 7030
+    },
+    {
+      "epoch": 9.002557544757034,
+      "grad_norm": 1.445008635520935,
+      "learning_rate": 0.0001767449230793133,
+      "loss": 5.3086,
+      "step": 7040
+    },
+    {
+      "epoch": 9.0153452685422,
+      "grad_norm": 1.7098116874694824,
+      "learning_rate": 0.00017644455193205666,
+      "loss": 5.3005,
+      "step": 7050
+    },
+    {
+      "epoch": 9.028132992327366,
+      "grad_norm": 0.9708366990089417,
+      "learning_rate": 0.00017614407130317968,
+      "loss": 5.2737,
+      "step": 7060
+    },
+    {
+      "epoch": 9.040920716112533,
+      "grad_norm": 1.089858889579773,
+      "learning_rate": 0.00017584348243668556,
+      "loss": 5.262,
+      "step": 7070
+    },
+    {
+      "epoch": 9.053708439897699,
+      "grad_norm": 2.534782886505127,
+      "learning_rate": 0.00017554278657702549,
+      "loss": 5.2854,
+      "step": 7080
+    },
+    {
+      "epoch": 9.066496163682864,
+      "grad_norm": 1.3707194328308105,
+      "learning_rate": 0.00017524198496909373,
+      "loss": 5.2936,
+      "step": 7090
+    },
+    {
+      "epoch": 9.07928388746803,
+      "grad_norm": 1.4009782075881958,
+      "learning_rate": 0.0001749410788582223,
+      "loss": 5.2795,
+      "step": 7100
+    },
+    {
+      "epoch": 9.092071611253196,
+      "grad_norm": 1.1263126134872437,
+      "learning_rate": 0.00017464006949017584,
+      "loss": 5.2806,
+      "step": 7110
+    },
+    {
+      "epoch": 9.104859335038363,
+      "grad_norm": 0.9099717140197754,
+      "learning_rate": 0.00017433895811114658,
+      "loss": 5.3049,
+      "step": 7120
+    },
+    {
+      "epoch": 9.117647058823529,
+      "grad_norm": 1.3195571899414062,
+      "learning_rate": 0.00017403774596774893,
+      "loss": 5.2803,
+      "step": 7130
+    },
+    {
+      "epoch": 9.130434782608695,
+      "grad_norm": 1.1069055795669556,
+      "learning_rate": 0.00017373643430701463,
+      "loss": 5.2579,
+      "step": 7140
+    },
+    {
+      "epoch": 9.143222506393862,
+      "grad_norm": 1.1975563764572144,
+      "learning_rate": 0.00017343502437638727,
+      "loss": 5.2795,
+      "step": 7150
+    },
+    {
+      "epoch": 9.156010230179028,
+      "grad_norm": 1.6220418214797974,
+      "learning_rate": 0.00017313351742371746,
+      "loss": 5.2797,
+      "step": 7160
+    },
+    {
+      "epoch": 9.168797953964194,
+      "grad_norm": 1.490394115447998,
+      "learning_rate": 0.00017283191469725728,
+      "loss": 5.2768,
+      "step": 7170
+    },
+    {
+      "epoch": 9.18158567774936,
+      "grad_norm": 1.4838827848434448,
+      "learning_rate": 0.00017253021744565548,
+      "loss": 5.2871,
+      "step": 7180
+    },
+    {
+      "epoch": 9.194373401534527,
+      "grad_norm": 5.531613826751709,
+      "learning_rate": 0.0001722284269179521,
+      "loss": 5.2537,
+      "step": 7190
+    },
+    {
+      "epoch": 9.207161125319693,
+      "grad_norm": 1.5700544118881226,
+      "learning_rate": 0.0001719265443635733,
+      "loss": 5.2875,
+      "step": 7200
+    },
+    {
+      "epoch": 9.21994884910486,
+      "grad_norm": 1.2028346061706543,
+      "learning_rate": 0.00017162457103232632,
+      "loss": 5.2707,
+      "step": 7210
+    },
+    {
+      "epoch": 9.232736572890026,
+      "grad_norm": 1.9414821863174438,
+      "learning_rate": 0.00017132250817439412,
+      "loss": 5.2918,
+      "step": 7220
+    },
+    {
+      "epoch": 9.245524296675192,
+      "grad_norm": 3.8366823196411133,
+      "learning_rate": 0.00017102035704033038,
+      "loss": 5.277,
+      "step": 7230
+    },
+    {
+      "epoch": 9.258312020460359,
+      "grad_norm": 41.69551086425781,
+      "learning_rate": 0.0001707181188810542,
+      "loss": 5.2691,
+      "step": 7240
+    },
+    {
+      "epoch": 9.271099744245525,
+      "grad_norm": 2.0435402393341064,
+      "learning_rate": 0.00017041579494784506,
+      "loss": 5.3075,
+      "step": 7250
+    },
+    {
+      "epoch": 9.28388746803069,
+      "grad_norm": 2.1493489742279053,
+      "learning_rate": 0.00017011338649233743,
+      "loss": 5.3234,
+      "step": 7260
+    },
+    {
+      "epoch": 9.296675191815856,
+      "grad_norm": 3.627615213394165,
+      "learning_rate": 0.0001698108947665158,
+      "loss": 5.3018,
+      "step": 7270
+    },
+    {
+      "epoch": 9.309462915601022,
+      "grad_norm": 24.722545623779297,
+      "learning_rate": 0.00016950832102270927,
+      "loss": 5.3123,
+      "step": 7280
+    },
+    {
+      "epoch": 9.322250639386189,
+      "grad_norm": 19.32564353942871,
+      "learning_rate": 0.00016920566651358666,
+      "loss": 5.346,
+      "step": 7290
+    },
+    {
+      "epoch": 9.335038363171355,
+      "grad_norm": 173.1358184814453,
+      "learning_rate": 0.00016890293249215109,
+      "loss": 5.3385,
+      "step": 7300
+    },
+    {
+      "epoch": 9.347826086956522,
+      "grad_norm": 139.24111938476562,
+      "learning_rate": 0.0001686001202117348,
+      "loss": 5.3411,
+      "step": 7310
+    },
+    {
+      "epoch": 9.360613810741688,
+      "grad_norm": 12987.6923828125,
+      "learning_rate": 0.00016829723092599418,
+      "loss": 5.3288,
+      "step": 7320
+    },
+    {
+      "epoch": 9.373401534526854,
+      "grad_norm": 53.43489074707031,
+      "learning_rate": 0.00016799426588890427,
+      "loss": 5.3403,
+      "step": 7330
+    },
+    {
+      "epoch": 9.38618925831202,
+      "grad_norm": 29.375526428222656,
+      "learning_rate": 0.00016769122635475385,
+      "loss": 5.3186,
+      "step": 7340
+    },
+    {
+      "epoch": 9.398976982097187,
+      "grad_norm": 47.31606674194336,
+      "learning_rate": 0.00016738811357813998,
+      "loss": 5.3178,
+      "step": 7350
+    },
+    {
+      "epoch": 9.411764705882353,
+      "grad_norm": 12.416561126708984,
+      "learning_rate": 0.00016708492881396307,
+      "loss": 5.3385,
+      "step": 7360
+    },
+    {
+      "epoch": 9.42455242966752,
+      "grad_norm": 9.562813758850098,
+      "learning_rate": 0.0001667816733174215,
+      "loss": 5.3481,
+      "step": 7370
+    },
+    {
+      "epoch": 9.437340153452686,
+      "grad_norm": 3.935084819793701,
+      "learning_rate": 0.00016647834834400654,
+      "loss": 5.3439,
+      "step": 7380
+    },
+    {
+      "epoch": 9.450127877237852,
+      "grad_norm": 2.569079875946045,
+      "learning_rate": 0.00016617495514949704,
+      "loss": 5.3225,
+      "step": 7390
+    },
+    {
+      "epoch": 9.462915601023019,
+      "grad_norm": 1.732500672340393,
+      "learning_rate": 0.0001658714949899543,
+      "loss": 5.3235,
+      "step": 7400
+    },
+    {
+      "epoch": 9.475703324808185,
+      "grad_norm": 1.2219961881637573,
+      "learning_rate": 0.00016556796912171689,
+      "loss": 5.3413,
+      "step": 7410
+    },
+    {
+      "epoch": 9.48849104859335,
+      "grad_norm": 0.9043082594871521,
+      "learning_rate": 0.00016526437880139537,
+      "loss": 5.288,
+      "step": 7420
+    },
+    {
+      "epoch": 9.501278772378516,
+      "grad_norm": 1.0241756439208984,
+      "learning_rate": 0.0001649607252858672,
+      "loss": 5.302,
+      "step": 7430
+    },
+    {
+      "epoch": 9.514066496163682,
+      "grad_norm": 1.5669431686401367,
+      "learning_rate": 0.00016465700983227138,
+      "loss": 5.2899,
+      "step": 7440
+    },
+    {
+      "epoch": 9.526854219948849,
+      "grad_norm": 1.5617988109588623,
+      "learning_rate": 0.00016435323369800344,
+      "loss": 5.2868,
+      "step": 7450
+    },
+    {
+      "epoch": 9.539641943734015,
+      "grad_norm": 1.1847914457321167,
+      "learning_rate": 0.00016404939814071003,
+      "loss": 5.2617,
+      "step": 7460
+    },
+    {
+      "epoch": 9.552429667519181,
+      "grad_norm": 0.8560781478881836,
+      "learning_rate": 0.0001637455044182839,
+      "loss": 5.2855,
+      "step": 7470
+    },
+    {
+      "epoch": 9.565217391304348,
+      "grad_norm": 0.9926068782806396,
+      "learning_rate": 0.0001634415537888585,
+      "loss": 5.265,
+      "step": 7480
+    },
+    {
+      "epoch": 9.578005115089514,
+      "grad_norm": 2.2098798751831055,
+      "learning_rate": 0.00016313754751080302,
+      "loss": 5.2773,
+      "step": 7490
+    },
+    {
+      "epoch": 9.59079283887468,
+      "grad_norm": 1.3162308931350708,
+      "learning_rate": 0.00016283348684271694,
+      "loss": 5.276,
+      "step": 7500
+    },
+    {
+      "epoch": 9.603580562659847,
+      "grad_norm": 1.2072679996490479,
+      "learning_rate": 0.00016252937304342494,
+      "loss": 5.2825,
+      "step": 7510
+    },
+    {
+      "epoch": 9.616368286445013,
+      "grad_norm": 1.212632656097412,
+      "learning_rate": 0.0001622252073719717,
+      "loss": 5.2609,
+      "step": 7520
+    },
+    {
+      "epoch": 9.62915601023018,
+      "grad_norm": 1.244361400604248,
+      "learning_rate": 0.0001619209910876165,
+      "loss": 5.247,
+      "step": 7530
+    },
+    {
+      "epoch": 9.641943734015346,
+      "grad_norm": 1.2935197353363037,
+      "learning_rate": 0.00016161672544982842,
+      "loss": 5.2666,
+      "step": 7540
+    },
+    {
+      "epoch": 9.654731457800512,
+      "grad_norm": 1.270849347114563,
+      "learning_rate": 0.00016131241171828063,
+      "loss": 5.2556,
+      "step": 7550
+    },
+    {
+      "epoch": 9.667519181585678,
+      "grad_norm": 1.5244646072387695,
+      "learning_rate": 0.00016100805115284555,
+      "loss": 5.2594,
+      "step": 7560
+    },
+    {
+      "epoch": 9.680306905370845,
+      "grad_norm": 1.245428204536438,
+      "learning_rate": 0.00016070364501358944,
+      "loss": 5.2452,
+      "step": 7570
+    },
+    {
+      "epoch": 9.693094629156011,
+      "grad_norm": 2.1988797187805176,
+      "learning_rate": 0.00016039919456076727,
+      "loss": 5.289,
+      "step": 7580
+    },
+    {
+      "epoch": 9.705882352941176,
+      "grad_norm": 4.7878031730651855,
+      "learning_rate": 0.00016009470105481736,
+      "loss": 5.2933,
+      "step": 7590
+    },
+    {
+      "epoch": 9.718670076726342,
+      "grad_norm": 1.0919615030288696,
+      "learning_rate": 0.00015979016575635644,
+      "loss": 5.2634,
+      "step": 7600
+    },
+    {
+      "epoch": 9.731457800511508,
+      "grad_norm": 1.3478410243988037,
+      "learning_rate": 0.00015948558992617416,
+      "loss": 5.2808,
+      "step": 7610
+    },
+    {
+      "epoch": 9.744245524296675,
+      "grad_norm": 1.088616132736206,
+      "learning_rate": 0.00015918097482522798,
+      "loss": 5.2656,
+      "step": 7620
+    },
+    {
+      "epoch": 9.757033248081841,
+      "grad_norm": 1.4594988822937012,
+      "learning_rate": 0.00015887632171463794,
+      "loss": 5.2422,
+      "step": 7630
+    },
+    {
+      "epoch": 9.769820971867007,
+      "grad_norm": 1.346463918685913,
+      "learning_rate": 0.00015857163185568153,
+      "loss": 5.2668,
+      "step": 7640
+    },
+    {
+      "epoch": 9.782608695652174,
+      "grad_norm": 2.3005871772766113,
+      "learning_rate": 0.00015826690650978825,
+      "loss": 5.2295,
+      "step": 7650
+    },
+    {
+      "epoch": 9.79539641943734,
+      "grad_norm": 1.372439980506897,
+      "learning_rate": 0.0001579621469385346,
+      "loss": 5.2419,
+      "step": 7660
+    },
+    {
+      "epoch": 9.808184143222507,
+      "grad_norm": 1.0069390535354614,
+      "learning_rate": 0.00015765735440363872,
+      "loss": 5.2692,
+      "step": 7670
+    },
+    {
+      "epoch": 9.820971867007673,
+      "grad_norm": 1.3591490983963013,
+      "learning_rate": 0.00015735253016695527,
+      "loss": 5.269,
+      "step": 7680
+    },
+    {
+      "epoch": 9.83375959079284,
+      "grad_norm": 1.47946298122406,
+      "learning_rate": 0.00015704767549047015,
+      "loss": 5.2615,
+      "step": 7690
+    },
+    {
+      "epoch": 9.846547314578006,
+      "grad_norm": 2.2882235050201416,
+      "learning_rate": 0.00015674279163629528,
+      "loss": 5.2452,
+      "step": 7700
+    },
+    {
+      "epoch": 9.859335038363172,
+      "grad_norm": 1.406764268875122,
+      "learning_rate": 0.00015643787986666333,
+      "loss": 5.2515,
+      "step": 7710
+    },
+    {
+      "epoch": 9.872122762148338,
+      "grad_norm": 0.9985896348953247,
+      "learning_rate": 0.00015613294144392256,
+      "loss": 5.2536,
+      "step": 7720
+    },
+    {
+      "epoch": 9.884910485933505,
+      "grad_norm": 1.1201996803283691,
+      "learning_rate": 0.00015582797763053166,
+      "loss": 5.2459,
+      "step": 7730
+    },
+    {
+      "epoch": 9.89769820971867,
+      "grad_norm": 1.2289462089538574,
+      "learning_rate": 0.00015552298968905432,
+      "loss": 5.2162,
+      "step": 7740
+    },
+    {
+      "epoch": 9.910485933503836,
+      "grad_norm": 1.3729525804519653,
+      "learning_rate": 0.00015521797888215424,
+      "loss": 5.2488,
+      "step": 7750
+    },
+    {
+      "epoch": 9.923273657289002,
+      "grad_norm": 1.3408769369125366,
+      "learning_rate": 0.00015491294647258967,
+      "loss": 5.2608,
+      "step": 7760
+    },
+    {
+      "epoch": 9.936061381074168,
+      "grad_norm": 1.3026928901672363,
+      "learning_rate": 0.0001546078937232083,
+      "loss": 5.227,
+      "step": 7770
+    },
+    {
+      "epoch": 9.948849104859335,
+      "grad_norm": 1.3715628385543823,
+      "learning_rate": 0.00015430282189694212,
+      "loss": 5.2677,
+      "step": 7780
+    },
+    {
+      "epoch": 9.961636828644501,
+      "grad_norm": 1.0734012126922607,
+      "learning_rate": 0.00015399773225680208,
+      "loss": 5.2575,
+      "step": 7790
+    },
+    {
+      "epoch": 9.974424552429667,
+      "grad_norm": 1.0179634094238281,
+      "learning_rate": 0.00015369262606587281,
+      "loss": 5.2117,
+      "step": 7800
+    },
+    {
+      "epoch": 9.987212276214834,
+      "grad_norm": 1.2135021686553955,
+      "learning_rate": 0.00015338750458730746,
+      "loss": 5.2387,
+      "step": 7810
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 1.7299256324768066,
+      "learning_rate": 0.00015308236908432264,
+      "loss": 5.2517,
+      "step": 7820
+    },
+    {
+      "epoch": 10.012787723785166,
+      "grad_norm": 1.3696908950805664,
+      "learning_rate": 0.00015277722082019272,
+      "loss": 5.2373,
+      "step": 7830
+    },
+    {
+      "epoch": 10.025575447570333,
+      "grad_norm": 1.4967882633209229,
+      "learning_rate": 0.00015247206105824522,
+      "loss": 5.2045,
+      "step": 7840
+    },
+    {
+      "epoch": 10.038363171355499,
+      "grad_norm": 1.4495280981063843,
+      "learning_rate": 0.00015216689106185505,
+      "loss": 5.2286,
+      "step": 7850
+    },
+    {
+      "epoch": 10.051150895140665,
+      "grad_norm": 2.255189895629883,
+      "learning_rate": 0.00015186171209443958,
+      "loss": 5.1753,
+      "step": 7860
+    },
+    {
+      "epoch": 10.063938618925832,
+      "grad_norm": 1.22813081741333,
+      "learning_rate": 0.00015155652541945326,
+      "loss": 5.2259,
+      "step": 7870
+    },
+    {
+      "epoch": 10.076726342710998,
+      "grad_norm": 1.1559652090072632,
+      "learning_rate": 0.00015125133230038256,
+      "loss": 5.1997,
+      "step": 7880
+    },
+    {
+      "epoch": 10.089514066496164,
+      "grad_norm": 1.5692169666290283,
+      "learning_rate": 0.00015094613400074052,
+      "loss": 5.2184,
+      "step": 7890
+    },
+    {
+      "epoch": 10.10230179028133,
+      "grad_norm": 1.143264651298523,
+      "learning_rate": 0.00015064093178406165,
+      "loss": 5.2217,
+      "step": 7900
+    },
+    {
+      "epoch": 10.115089514066495,
+      "grad_norm": 1.3026010990142822,
+      "learning_rate": 0.00015033572691389673,
+      "loss": 5.2219,
+      "step": 7910
+    },
+    {
+      "epoch": 10.127877237851662,
+      "grad_norm": 1.255898356437683,
+      "learning_rate": 0.00015003052065380742,
+      "loss": 5.1964,
+      "step": 7920
+    },
+    {
+      "epoch": 10.140664961636828,
+      "grad_norm": 1.5015469789505005,
+      "learning_rate": 0.00014972531426736131,
+      "loss": 5.241,
+      "step": 7930
+    },
+    {
+      "epoch": 10.153452685421994,
+      "grad_norm": 1.5134257078170776,
+      "learning_rate": 0.0001494201090181263,
+      "loss": 5.2255,
+      "step": 7940
+    },
+    {
+      "epoch": 10.16624040920716,
+      "grad_norm": 1.295350432395935,
+      "learning_rate": 0.00014911490616966575,
+      "loss": 5.2201,
+      "step": 7950
+    },
+    {
+      "epoch": 10.179028132992327,
+      "grad_norm": 1.8612785339355469,
+      "learning_rate": 0.000148809706985533,
+      "loss": 5.2109,
+      "step": 7960
+    },
+    {
+      "epoch": 10.191815856777493,
+      "grad_norm": 1.5443718433380127,
+      "learning_rate": 0.0001485045127292662,
+      "loss": 5.2247,
+      "step": 7970
+    },
+    {
+      "epoch": 10.20460358056266,
+      "grad_norm": 1.7104902267456055,
+      "learning_rate": 0.00014819932466438317,
+      "loss": 5.2141,
+      "step": 7980
+    },
+    {
+      "epoch": 10.217391304347826,
+      "grad_norm": 1.7787065505981445,
+      "learning_rate": 0.00014789414405437607,
+      "loss": 5.1911,
+      "step": 7990
+    },
+    {
+      "epoch": 10.230179028132993,
+      "grad_norm": 1.0850389003753662,
+      "learning_rate": 0.0001475889721627062,
+      "loss": 5.2156,
+      "step": 8000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.0590849988794814e+19,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

{checkpoint-7000 → checkpoint-8000}/training_args.bin RENAMED Viewed

File without changes