Training in progress, step 200, checkpoint

Browse files

Files changed (7) hide show

checkpoint-200/README.md +0 -19
checkpoint-200/adapter_model.bin +1 -1
checkpoint-200/optimizer.pt +1 -1
checkpoint-200/rng_state.pth +1 -1
checkpoint-200/scheduler.pt +1 -1
checkpoint-200/trainer_state.json +45 -45
checkpoint-200/training_args.bin +1 -1

checkpoint-200/README.md CHANGED Viewed

@@ -216,23 +216,4 @@ The following `bitsandbytes` quantization config was used during training:
 ### Framework versions
-- PEFT 0.6.0.dev0
-## Training procedure
-The following `bitsandbytes` quantization config was used during training:
-- quant_method: bitsandbytes
-- load_in_8bit: True
-- load_in_4bit: False
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: fp4
-- bnb_4bit_use_double_quant: False
-- bnb_4bit_compute_dtype: float32
-### Framework versions
 - PEFT 0.6.0.dev0


216	### Framework versions
217
218



















219	- PEFT 0.6.0.dev0

checkpoint-200/adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e4650c6715b7603246bfd038984d45d9685d9f7a07ba6a9f234044e77c1c76e
 size 9873829

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c915b534dd5c63af4a4da8b51c402aa67519f4f509b5abe822440b79d0dada7
 size 9873829

checkpoint-200/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72618a45d8a50c4f5e08ee8a9c5515001f65ebc6b695979e3054665f22a31d46
 size 42724

 version https://git-lfs.github.com/spec/v1
+oid sha256:e98623969976c0a14b109a3c67209e22c6ee22bd13f81248dbbc28c82287acfc
 size 42724

checkpoint-200/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b7d888b148eb21b791090fe08933aa4e2a93c346d8904e0237d31802a4c179b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:24ca455dde448b70aa653e0498d3346b98980a6d89972d5d56f2b2ec8d9c7be4
 size 14244

checkpoint-200/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f2c7ad61cb507f1edc7b5922a62a9509826b96a88d5fa2c74a8bf3946bfd30f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d191efbbc9b0763e805de64fb2ed1cd1a38712d135eda6d145caf5703e52c386
 size 1064

checkpoint-200/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.665068507194519,
-  "best_model_checkpoint": "output/checkpoint-50",
-  "epoch": 0.8184143222506394,
   "eval_steps": 50,
   "global_step": 200,
   "is_hyper_param_search": false,
@@ -9,77 +9,77 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0,
-      "learning_rate": 7.575757575757576e-07,
-      "loss": 0.4968,
       "step": 1
     },
     {
-      "epoch": 0.2,
-      "learning_rate": 3.484848484848485e-05,
-      "loss": 0.5254,
       "step": 50
     },
     {
-      "epoch": 0.2,
-      "eval_accuracy": 0.6507462686567164,
-      "eval_loss": 0.665068507194519,
-      "eval_runtime": 61.5592,
-      "eval_samples_per_second": 5.442,
-      "eval_steps_per_second": 1.365,
       "step": 50
     },
     {
-      "epoch": 0.41,
-      "learning_rate": 7.272727272727273e-05,
-      "loss": 0.4971,
       "step": 100
     },
     {
-      "epoch": 0.41,
-      "eval_accuracy": 0.6656716417910448,
-      "eval_loss": 0.8002150058746338,
-      "eval_runtime": 61.211,
-      "eval_samples_per_second": 5.473,
-      "eval_steps_per_second": 1.372,
       "step": 100
     },
     {
-      "epoch": 0.61,
-      "learning_rate": 9.783333333333334e-05,
-      "loss": 0.5039,
       "step": 150
     },
     {
-      "epoch": 0.61,
-      "eval_accuracy": 0.6865671641791045,
-      "eval_loss": 0.7404947280883789,
-      "eval_runtime": 61.4487,
-      "eval_samples_per_second": 5.452,
-      "eval_steps_per_second": 1.367,
       "step": 150
     },
     {
-      "epoch": 0.82,
-      "learning_rate": 8.950000000000001e-05,
-      "loss": 0.4944,
       "step": 200
     },
     {
-      "epoch": 0.82,
-      "eval_accuracy": 0.6238805970149254,
-      "eval_loss": 0.9986834526062012,
-      "eval_runtime": 60.5135,
-      "eval_samples_per_second": 5.536,
-      "eval_steps_per_second": 1.388,
       "step": 200
     }
   ],
   "logging_steps": 50,
-  "max_steps": 732,
-  "num_train_epochs": 3,
   "save_steps": 50,
-  "total_flos": 8.514407519772672e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 0.5725014805793762,
+  "best_model_checkpoint": "output/checkpoint-100",
+  "epoch": 3.7735849056603774,
   "eval_steps": 50,
   "global_step": 200,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.02,
+      "learning_rate": 1.234567901234568e-06,
+      "loss": 0.4437,
       "step": 1
     },
     {
+      "epoch": 0.94,
+      "learning_rate": 5.679012345679012e-05,
+      "loss": 0.455,
       "step": 50
     },
     {
+      "epoch": 0.94,
+      "eval_accuracy": 0.68,
+      "eval_loss": 0.6171127557754517,
+      "eval_runtime": 29.0494,
+      "eval_samples_per_second": 6.024,
+      "eval_steps_per_second": 1.515,
       "step": 50
     },
     {
+      "epoch": 1.89,
+      "learning_rate": 9.59349593495935e-05,
+      "loss": 0.4555,
       "step": 100
     },
     {
+      "epoch": 1.89,
+      "eval_accuracy": 0.7257142857142858,
+      "eval_loss": 0.5725014805793762,
+      "eval_runtime": 28.7986,
+      "eval_samples_per_second": 6.077,
+      "eval_steps_per_second": 1.528,
       "step": 100
     },
     {
+      "epoch": 2.83,
+      "learning_rate": 8.265582655826559e-05,
+      "loss": 0.455,
       "step": 150
     },
     {
+      "epoch": 2.83,
+      "eval_accuracy": 0.7371428571428571,
+      "eval_loss": 0.5729417204856873,
+      "eval_runtime": 28.9054,
+      "eval_samples_per_second": 6.054,
+      "eval_steps_per_second": 1.522,
       "step": 150
     },
     {
+      "epoch": 3.77,
+      "learning_rate": 6.910569105691057e-05,
+      "loss": 0.4718,
       "step": 200
     },
     {
+      "epoch": 3.77,
+      "eval_accuracy": 0.6685714285714286,
+      "eval_loss": 0.6322054862976074,
+      "eval_runtime": 28.8878,
+      "eval_samples_per_second": 6.058,
+      "eval_steps_per_second": 1.523,
       "step": 200
     }
   ],
   "logging_steps": 50,
+  "max_steps": 450,
+  "num_train_epochs": 9,
   "save_steps": 50,
+  "total_flos": 7.652955795308544e+16,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-200/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9feea516a8a208c393c324d84d047c0ab5036bbdf3fb1f3eea6a2563e9c79a6b
 size 4472

 version https://git-lfs.github.com/spec/v1
+oid sha256:d91c1e3ec7fc28d5ed2a9d783b7ded0a7affbbd029319e299d9244ac0f3fbc29
 size 4472