Training in progress, step 3200, checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/README.md +0 -12
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/special_tokens_map.json +7 -1
last-checkpoint/tokenizer_config.json +5 -1
last-checkpoint/trainer_state.json +10 -66
last-checkpoint/training_args.bin +1 -1

last-checkpoint/README.md CHANGED Viewed

@@ -201,18 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ## Training procedure
-The following `bitsandbytes` quantization config was used during training:
-- quant_method: bitsandbytes
-- load_in_8bit: False
-- load_in_4bit: True
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: nf4
-- bnb_4bit_use_double_quant: True
-- bnb_4bit_compute_dtype: float16
 ### Framework versions


201	## Training procedure
202
203












204	### Framework versions
205
206

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e17afdd5ee88313e3bb7f01e153b4d1ebe56021d4123305e609e23d3cd06fd74
 size 75507072

 version https://git-lfs.github.com/spec/v1
+oid sha256:51285f336292e7c2fdd6412ce56c0212934410ad3dc0c472b93adc76bc0b3f77
 size 75507072

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76585761215f7c28b01c08237b1a8e04a0a675a97605ae7ab28746453d9762fe
-size 151034501

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4fddfb02ad1801dab56ea0155f7e7788d159e94e876de4ddb2af82d3fbc7917
+size 151032837

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63998d98da08278c470436d7f3090e0552bc25f2e2fd93ff495fe8ccda5df6f6
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:eefe083b6454775aee01bb69e64bad53187f7b97719dea614c013fe397ac511b
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43e3cf8d56a3f083d00cc85544d76ada2f884a1018c8752332d96f2799911117
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:50b57a34df83b700e2c13775ff734b4569b74ce7e20da3479db76577bb4e906e
 size 627

last-checkpoint/special_tokens_map.json CHANGED Viewed

@@ -12,6 +12,12 @@
     ">>SUFFIX<<",
     ">>MIDDLE<<"
   ],
-  "eos_token": "<|endoftext|>",
   "pad_token": "<|endoftext|>"
 }

     ">>SUFFIX<<",
     ">>MIDDLE<<"
   ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
   "pad_token": "<|endoftext|>"
 }

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -113,11 +113,15 @@
   ],
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 2048,
   "pad_token": "<|endoftext|>",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   ],
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
+  "max_length": 512,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 2048,
   "pad_token": "<|endoftext|>",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1208428144454956,
-  "best_model_checkpoint": "./outputs/checkpoint-3600",
-  "epoch": 2.6229508196721314,
   "eval_steps": 100,
-  "global_step": 3600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -445,79 +445,23 @@
     {
       "epoch": 2.33,
       "learning_rate": 0.0002,
-      "loss": 1.1424,
       "step": 3200
     },
     {
       "epoch": 2.33,
-      "eval_loss": 1.1579593420028687,
-      "eval_runtime": 417.5279,
-      "eval_samples_per_second": 15.027,
-      "eval_steps_per_second": 1.88,
       "step": 3200
-    },
-    {
-      "epoch": 2.4,
-      "learning_rate": 0.0002,
-      "loss": 1.1263,
-      "step": 3300
-    },
-    {
-      "epoch": 2.4,
-      "eval_loss": 1.147441029548645,
-      "eval_runtime": 424.7891,
-      "eval_samples_per_second": 14.77,
-      "eval_steps_per_second": 1.848,
-      "step": 3300
-    },
-    {
-      "epoch": 2.48,
-      "learning_rate": 0.0002,
-      "loss": 1.1114,
-      "step": 3400
-    },
-    {
-      "epoch": 2.48,
-      "eval_loss": 1.1393115520477295,
-      "eval_runtime": 423.9015,
-      "eval_samples_per_second": 14.801,
-      "eval_steps_per_second": 1.852,
-      "step": 3400
-    },
-    {
-      "epoch": 2.55,
-      "learning_rate": 0.0002,
-      "loss": 1.1116,
-      "step": 3500
-    },
-    {
-      "epoch": 2.55,
-      "eval_loss": 1.1295558214187622,
-      "eval_runtime": 417.5664,
-      "eval_samples_per_second": 15.025,
-      "eval_steps_per_second": 1.88,
-      "step": 3500
-    },
-    {
-      "epoch": 2.62,
-      "learning_rate": 0.0002,
-      "loss": 1.1054,
-      "step": 3600
-    },
-    {
-      "epoch": 2.62,
-      "eval_loss": 1.1208428144454956,
-      "eval_runtime": 437.4502,
-      "eval_samples_per_second": 14.342,
-      "eval_steps_per_second": 1.794,
-      "step": 3600
     }
   ],
   "logging_steps": 100,
   "max_steps": 4116,
   "num_train_epochs": 3,
   "save_steps": 100,
-  "total_flos": 1.1670242076660173e+18,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.1677733659744263,
+  "best_model_checkpoint": "./outputs/checkpoint-3100",
+  "epoch": 2.33224043715847,
   "eval_steps": 100,
+  "global_step": 3200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     {
       "epoch": 2.33,
       "learning_rate": 0.0002,
+      "loss": 1.1528,
       "step": 3200
     },
     {
       "epoch": 2.33,
+      "eval_loss": 1.1819865703582764,
+      "eval_runtime": 339.2738,
+      "eval_samples_per_second": 18.492,
+      "eval_steps_per_second": 2.314,
       "step": 3200
     }
   ],
   "logging_steps": 100,
   "max_steps": 4116,
   "num_train_epochs": 3,
   "save_steps": 100,
+  "total_flos": 1.0375834790343045e+18,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9be4a5c3125bd86c3b49d502b0c0839bdc6f3434e2dafd94eac7cbe0088004e2
 size 4219

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a34ff3b82cda74a4055322bc8d99fe3f390b24562c7e145c3f2497c2ff62607
 size 4219