Training in progress, step 18800, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +256 -4
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "gate_proj",
     "o_proj",
     "k_proj",
     "up_proj",
-    "v_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
     "o_proj",
     "k_proj",
+    "gate_proj",
+    "down_proj",
     "up_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b74082b50533e4333b88e29acd5ec8eecf39804ba0295840da3c866df5050cc
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a9c62f875c287b7c4d9167adebf029a6ce30f517d94dbbe27ecde3a226f0357
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4f867fe76c879c3168a89d511c76e1fa1b26ec6a18b25ec83cf06f006a135e0
-size 84581014

 version https://git-lfs.github.com/spec/v1
+oid sha256:06b6897debb3d46ba4387737336e6f97292f866a07ef61439e53f84fd4505d62
+size 85736914

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68341ef163cc0109a8713173add8dbcfd98bf67468e1a64939a8b03523666bf9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9989851a017fcf0ea72ad3948880d1a8db6c3206bc2c0667a86129b6301b196
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6fa196d6a1215f8cb5411f481e0c89a67b0e9876e4efe3f1ada6c1ba44ba0a3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:821633e9583ccb6e3bb6cf440d7524519270ce3ff11ad9b4c39204df191e39fc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.008626491509226999,
   "eval_steps": 2000,
-  "global_step": 11600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -413,14 +413,266 @@
       "learning_rate": 1.9999853411898932e-05,
       "loss": 1.5097,
       "step": 11600
     }
   ],
   "logging_steps": 200,
-  "max_steps": 6723475,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
-  "total_flos": 1.5179994612574618e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.027961731098873722,
   "eval_steps": 2000,
+  "global_step": 18800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9999853411898932e-05,
       "loss": 1.5097,
       "step": 11600
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 3.229198455810547,
+      "learning_rate": 1.999939333873553e-05,
+      "loss": 1.5721,
+      "step": 11800
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.2855397462844849,
+      "learning_rate": 1.9999372576820398e-05,
+      "loss": 1.5382,
+      "step": 12000
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.536872148513794,
+      "learning_rate": 1.9999351465598642e-05,
+      "loss": 1.5964,
+      "step": 12200
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.0981087684631348,
+      "learning_rate": 1.9999330005070992e-05,
+      "loss": 1.5269,
+      "step": 12400
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.213561773300171,
+      "learning_rate": 1.99993081952382e-05,
+      "loss": 1.488,
+      "step": 12600
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.3960020542144775,
+      "learning_rate": 1.999928603610103e-05,
+      "loss": 1.5714,
+      "step": 12800
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.198500394821167,
+      "learning_rate": 1.9999263641071352e-05,
+      "loss": 1.5587,
+      "step": 13000
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.4841859340667725,
+      "learning_rate": 1.9999240785074275e-05,
+      "loss": 1.5417,
+      "step": 13200
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.9682819843292236,
+      "learning_rate": 1.999921757977517e-05,
+      "loss": 1.578,
+      "step": 13400
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.8368330001831055,
+      "learning_rate": 1.999919402517485e-05,
+      "loss": 1.5703,
+      "step": 13600
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 3.0925166606903076,
+      "learning_rate": 1.9999170121274143e-05,
+      "loss": 1.5163,
+      "step": 13800
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.2362563610076904,
+      "learning_rate": 1.999914586807388e-05,
+      "loss": 1.6078,
+      "step": 14000
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 3.019454002380371,
+      "learning_rate": 1.9999121265574902e-05,
+      "loss": 1.5317,
+      "step": 14200
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.67069411277771,
+      "learning_rate": 1.9999096313778082e-05,
+      "loss": 1.529,
+      "step": 14400
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.8095571994781494,
+      "learning_rate": 1.9999071012684285e-05,
+      "loss": 1.557,
+      "step": 14600
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.3300442695617676,
+      "learning_rate": 1.9999045362294388e-05,
+      "loss": 1.5554,
+      "step": 14800
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.160933256149292,
+      "learning_rate": 1.9999019362609297e-05,
+      "loss": 1.528,
+      "step": 15000
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.6309542655944824,
+      "learning_rate": 1.999899301362992e-05,
+      "loss": 1.5344,
+      "step": 15200
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 3.1774258613586426,
+      "learning_rate": 1.9998966315357173e-05,
+      "loss": 1.5661,
+      "step": 15400
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.8362374305725098,
+      "learning_rate": 1.9998939267791986e-05,
+      "loss": 1.5404,
+      "step": 15600
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.6643764972686768,
+      "learning_rate": 1.999891187093531e-05,
+      "loss": 1.562,
+      "step": 15800
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 2.519455671310425,
+      "learning_rate": 1.99988841247881e-05,
+      "loss": 1.5468,
+      "step": 16000
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.8681560754776,
+      "learning_rate": 1.9998856029351327e-05,
+      "loss": 1.501,
+      "step": 16200
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.5082764625549316,
+      "learning_rate": 1.999882758462597e-05,
+      "loss": 1.5632,
+      "step": 16400
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.8632557392120361,
+      "learning_rate": 1.9998798790613018e-05,
+      "loss": 1.5509,
+      "step": 16600
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 3.0881147384643555,
+      "learning_rate": 1.999876964731349e-05,
+      "loss": 1.5277,
+      "step": 16800
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.9005630016326904,
+      "learning_rate": 1.9998740303060157e-05,
+      "loss": 1.5542,
+      "step": 17000
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.4960647821426392,
+      "learning_rate": 1.9998710462936946e-05,
+      "loss": 1.5781,
+      "step": 17200
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.5842814445495605,
+      "learning_rate": 1.9998680273530233e-05,
+      "loss": 1.5535,
+      "step": 17400
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.9667937755584717,
+      "learning_rate": 1.9998649734841075e-05,
+      "loss": 1.5764,
+      "step": 17600
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.2704834938049316,
+      "learning_rate": 1.9998618846870542e-05,
+      "loss": 1.55,
+      "step": 17800
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.67142391204834,
+      "learning_rate": 1.9998587609619712e-05,
+      "loss": 1.5648,
+      "step": 18000
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.281129837036133,
+      "learning_rate": 1.9998556023089672e-05,
+      "loss": 1.5405,
+      "step": 18200
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.508354425430298,
+      "learning_rate": 1.999852408728153e-05,
+      "loss": 1.5574,
+      "step": 18400
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.8000833988189697,
+      "learning_rate": 1.99984918021964e-05,
+      "loss": 1.5638,
+      "step": 18600
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 3.3880839347839355,
+      "learning_rate": 1.999845916783541e-05,
+      "loss": 1.553,
+      "step": 18800
     }
   ],
   "logging_steps": 200,
+  "max_steps": 3361735,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
+  "total_flos": 3.385280035214623e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45d4c3e11daa354fcf86b7301a1fc0e8bc31167e5dec2140d52b76922a0af4ca
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc9e75826f834526adf57daa4ee7a58f88bf2ec9679f7599af2037d01589eb4f
 size 4920