Training in progress, step 600, checkpoint

Files changed (9) hide show

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "up_proj",
     "gate_proj",
-    "k_proj",
-    "o_proj",
     "q_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "gate_proj",
     "q_proj",
+    "k_proj",
+    "up_proj",
+    "v_proj",
+    "down_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30602946db0511e89472d2985d62506942982f91c28840fc64989587d9d5d5f3
 size 360740440

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a1dd12a210017a07f561dace36dc00f59b55ff12d579593e8e5f59db1ca495d
 size 360740440

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae593a71e87e4da53cca674152b73afa1b696d643d8531e897898e897d841578
-size 183445626

 version https://git-lfs.github.com/spec/v1
+oid sha256:09dbea6aaf941a049c9aa3656b362f19b6c30102fcdcd2d81680e63cd278a9c4
+size 184018770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf56788b1af92ed8ac278c2b5a5a7e56b531312246249a57078017be8884a01f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe9c2e6cee455bb212d5ae0dd7c343acca65b7f37490224e2c429dd428b2c9ad
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4460b404e87366806d8b48914980f9d75ea1e74835c790fae129ccdf808017b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:06eec3ac373cc6b81236635fbbf70132276a159a47d5685288ad3485f23d8131
 size 1064

last-checkpoint/special_tokens_map.json CHANGED Viewed

@@ -13,5 +13,11 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|eot_id|>"
 }

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<|eot_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -2055,6 +2055,7 @@
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
   "extra_special_tokens": {},
   "model_input_names": [
     "input_ids",
     "attention_mask"
@@ -2062,5 +2063,8 @@
   "model_max_length": 4096,
   "pad_token": "<|eot_id|>",
   "padding_side": "left",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
   "extra_special_tokens": {},
+  "max_length": 4096,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   "model_max_length": 4096,
   "pad_token": "<|eot_id|>",
   "padding_side": "left",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0002781929421059623,
   "eval_steps": 500,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -21,6 +21,13 @@
       "learning_rate": 1.9999999331656075e-05,
       "loss": 2.076,
       "step": 400
     }
   ],
   "logging_steps": 200,
@@ -40,7 +47,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 731147720306688.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.00041728941315894347,
   "eval_steps": 500,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9999999331656075e-05,
       "loss": 2.076,
       "step": 400
+    },
+    {
+      "epoch": 0.00041728941315894347,
+      "grad_norm": 7.084391117095947,
+      "learning_rate": 1.9999998496226195e-05,
+      "loss": 2.202,
+      "step": 600
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 1138117833400320.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dafb3d7d0b401b02200850bddb3d6e4859621630290323fd4a0dc0ddd9a3d4fc
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:f73bef2970b56da564b1d8c87d27fe806335e746653d451535ecd6b817d641ba
 size 6840