End of training

Browse files

Files changed (6) hide show

README.md +2 -4
adapter_model.safetensors +1 -1
all_results.json +5 -5
runs/Apr01_01-06-45_83eaa54fbb08/events.out.tfevents.1711933606.83eaa54fbb08.5375.0 +2 -2
train_results.json +5 -5
trainer_state.json +62 -167

README.md CHANGED Viewed

@@ -14,8 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
 # fine_tuned_llama2_7b
 This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
-It achieves the following results on the evaluation set:
-- Loss: 2.3496
 ## Model description
@@ -38,8 +36,8 @@ The following hyperparameters were used during training:
 - train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1

 # fine_tuned_llama2_7b
 This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
 ## Model description
 - train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9033711981d03c882b5d170b9aa711c76812c0b76771ee81cd0335be4a220bb
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:234f19e4eaf40da80182f856e7da52635d1e006d160f1eb4d153da9ba6891f03
 size 639691872

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "total_flos": 1.1131515504795648e+16,
-    "train_loss": 2.3624265621191913,
-    "train_runtime": 6766.2575,
-    "train_samples_per_second": 1.706,
-    "train_steps_per_second": 0.426
 }

 {
     "epoch": 1.0,
+    "total_flos": 2.5585123528237056e+16,
+    "train_loss": 0.9235503957773034,
+    "train_runtime": 6177.2242,
+    "train_samples_per_second": 1.707,
+    "train_steps_per_second": 0.213
 }

runs/Apr01_01-06-45_83eaa54fbb08/events.out.tfevents.1711933606.83eaa54fbb08.5375.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c7f32ffedb0aaf052bb9a7be180e0d00bec3881dd1c72ace8dd09681a6c9b99
-size 7165

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a24416d7c619131464dad8de9f25e2e293f17a4406e36b610de7766635f3e34
+size 8152

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "total_flos": 1.1131515504795648e+16,
-    "train_loss": 2.3624265621191913,
-    "train_runtime": 6766.2575,
-    "train_samples_per_second": 1.706,
-    "train_steps_per_second": 0.426
 }

 {
     "epoch": 1.0,
+    "total_flos": 2.5585123528237056e+16,
+    "train_loss": 0.9235503957773034,
+    "train_runtime": 6177.2242,
+    "train_samples_per_second": 1.707,
+    "train_steps_per_second": 0.213
 }

trainer_state.json CHANGED Viewed

@@ -1,225 +1,120 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9997401022264576,
   "eval_steps": 500,
-  "global_step": 2885,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.03,
-      "grad_norm": 22.53300666809082,
-      "learning_rate": 6.71280276816609e-06,
-      "loss": 3.1491,
       "step": 100
     },
     {
-      "epoch": 0.07,
-      "grad_norm": 36.242984771728516,
-      "learning_rate": 1.356401384083045e-05,
-      "loss": 2.6131,
       "step": 200
     },
     {
-      "epoch": 0.1,
-      "grad_norm": 25.216327667236328,
-      "learning_rate": 1.999973639055537e-05,
-      "loss": 2.5726,
       "step": 300
     },
     {
-      "epoch": 0.14,
-      "grad_norm": 14.273802757263184,
-      "learning_rate": 1.9917836961775225e-05,
-      "loss": 2.4989,
       "step": 400
     },
     {
-      "epoch": 0.17,
-      "grad_norm": 27.216812133789062,
-      "learning_rate": 1.969086765436979e-05,
-      "loss": 2.5906,
       "step": 500
     },
     {
-      "epoch": 0.21,
-      "grad_norm": 18.74100112915039,
-      "learning_rate": 1.9322148386785378e-05,
-      "loss": 2.4275,
       "step": 600
     },
     {
-      "epoch": 0.24,
-      "grad_norm": 20.627084732055664,
-      "learning_rate": 1.8817072478109763e-05,
-      "loss": 2.5103,
       "step": 700
     },
     {
-      "epoch": 0.28,
-      "grad_norm": 15.611855506896973,
-      "learning_rate": 1.818302775908169e-05,
-      "loss": 2.3706,
       "step": 800
     },
     {
-      "epoch": 0.31,
-      "grad_norm": 25.303524017333984,
-      "learning_rate": 1.7429288509041197e-05,
-      "loss": 2.3601,
       "step": 900
     },
     {
-      "epoch": 0.35,
-      "grad_norm": 20.18657875061035,
-      "learning_rate": 1.6566879799477148e-05,
-      "loss": 2.5054,
       "step": 1000
     },
     {
-      "epoch": 0.38,
-      "grad_norm": 17.65004539489746,
-      "learning_rate": 1.560841622844192e-05,
-      "loss": 2.3717,
       "step": 1100
     },
     {
-      "epoch": 0.42,
-      "grad_norm": 19.5482177734375,
-      "learning_rate": 1.4578679381126853e-05,
-      "loss": 2.3772,
       "step": 1200
     },
     {
-      "epoch": 0.45,
-      "grad_norm": 14.92688274383545,
-      "learning_rate": 1.3471954275891059e-05,
-      "loss": 2.2991,
       "step": 1300
     },
-    {
-      "epoch": 0.49,
-      "grad_norm": 10.425432205200195,
-      "learning_rate": 1.2314444308256605e-05,
-      "loss": 2.2865,
-      "step": 1400
-    },
-    {
-      "epoch": 0.52,
-      "grad_norm": 16.403301239013672,
-      "learning_rate": 1.1123080572287608e-05,
-      "loss": 2.2595,
-      "step": 1500
-    },
-    {
-      "epoch": 0.55,
-      "grad_norm": 11.935959815979004,
-      "learning_rate": 9.915289346843219e-06,
-      "loss": 2.3662,
-      "step": 1600
-    },
-    {
-      "epoch": 0.59,
-      "grad_norm": 18.410987854003906,
-      "learning_rate": 8.708737198449509e-06,
-      "loss": 2.2021,
-      "step": 1700
-    },
-    {
-      "epoch": 0.62,
-      "grad_norm": 15.293601036071777,
-      "learning_rate": 7.521072569442963e-06,
-      "loss": 2.2545,
-      "step": 1800
-    },
-    {
-      "epoch": 0.66,
-      "grad_norm": 16.34610939025879,
-      "learning_rate": 6.369667631219584e-06,
-      "loss": 2.3199,
-      "step": 1900
-    },
-    {
-      "epoch": 0.69,
-      "grad_norm": 15.948208808898926,
-      "learning_rate": 5.2713641785457504e-06,
-      "loss": 2.2029,
-      "step": 2000
-    },
-    {
-      "epoch": 0.73,
-      "grad_norm": 27.17706298828125,
-      "learning_rate": 4.242227281777747e-06,
-      "loss": 2.2861,
-      "step": 2100
-    },
-    {
-      "epoch": 0.76,
-      "grad_norm": 19.407489776611328,
-      "learning_rate": 3.297310300360622e-06,
-      "loss": 2.2157,
-      "step": 2200
-    },
-    {
-      "epoch": 0.8,
-      "grad_norm": 11.622710227966309,
-      "learning_rate": 2.450434694793621e-06,
-      "loss": 2.2724,
-      "step": 2300
-    },
-    {
-      "epoch": 0.83,
-      "grad_norm": 16.701732635498047,
-      "learning_rate": 1.7139878577898772e-06,
-      "loss": 2.1622,
-      "step": 2400
-    },
-    {
-      "epoch": 0.87,
-      "grad_norm": 10.720149040222168,
-      "learning_rate": 1.0987419217881333e-06,
-      "loss": 2.2026,
-      "step": 2500
-    },
-    {
-      "epoch": 0.9,
-      "grad_norm": 14.398381233215332,
-      "learning_rate": 6.136961931496943e-07,
-      "loss": 2.2619,
-      "step": 2600
-    },
-    {
-      "epoch": 0.94,
-      "grad_norm": 16.95086669921875,
-      "learning_rate": 2.6594551778223896e-07,
-      "loss": 2.2626,
-      "step": 2700
-    },
-    {
-      "epoch": 0.97,
-      "grad_norm": 12.132495880126953,
-      "learning_rate": 6.057650362879753e-08,
-      "loss": 2.1139,
-      "step": 2800
-    },
     {
       "epoch": 1.0,
-      "step": 2885,
-      "total_flos": 1.1131515504795648e+16,
-      "train_loss": 2.3624265621191913,
-      "train_runtime": 6766.2575,
-      "train_samples_per_second": 1.706,
-      "train_steps_per_second": 0.426
     }
   ],
   "logging_steps": 100,
-  "max_steps": 2885,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
-  "total_flos": 1.1131515504795648e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9993360523570142,
   "eval_steps": 500,
+  "global_step": 1317,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.08,
+      "grad_norm": 2.6666550636291504,
+      "learning_rate": 1.5151515151515153e-05,
+      "loss": 1.75,
       "step": 100
     },
     {
+      "epoch": 0.15,
+      "grad_norm": 1.9126778841018677,
+      "learning_rate": 1.983794055463009e-05,
+      "loss": 0.9131,
       "step": 200
     },
     {
+      "epoch": 0.23,
+      "grad_norm": 2.010279655456543,
+      "learning_rate": 1.90244256701717e-05,
+      "loss": 0.921,
       "step": 300
     },
     {
+      "epoch": 0.3,
+      "grad_norm": 1.661656141281128,
+      "learning_rate": 1.7580334804873595e-05,
+      "loss": 0.8664,
       "step": 400
     },
     {
+      "epoch": 0.38,
+      "grad_norm": 1.8409279584884644,
+      "learning_rate": 1.5606572885773613e-05,
+      "loss": 0.8562,
       "step": 500
     },
     {
+      "epoch": 0.46,
+      "grad_norm": 1.8635607957839966,
+      "learning_rate": 1.324105526655396e-05,
+      "loss": 0.8561,
       "step": 600
     },
     {
+      "epoch": 0.53,
+      "grad_norm": 1.4191073179244995,
+      "learning_rate": 1.0649070980273363e-05,
+      "loss": 0.8621,
       "step": 700
     },
     {
+      "epoch": 0.61,
+      "grad_norm": 1.4062063694000244,
+      "learning_rate": 8.011733273733208e-06,
+      "loss": 0.8523,
       "step": 800
     },
     {
+      "epoch": 0.68,
+      "grad_norm": 1.7386995553970337,
+      "learning_rate": 5.51332443501349e-06,
+      "loss": 0.8277,
       "step": 900
     },
     {
+      "epoch": 0.76,
+      "grad_norm": 1.6414889097213745,
+      "learning_rate": 3.3284191862731585e-06,
+      "loss": 0.8406,
       "step": 1000
     },
     {
+      "epoch": 0.83,
+      "grad_norm": 1.6184569597244263,
+      "learning_rate": 1.6096863865200606e-06,
+      "loss": 0.8189,
       "step": 1100
     },
     {
+      "epoch": 0.91,
+      "grad_norm": 1.6727625131607056,
+      "learning_rate": 4.772213925798331e-07,
+      "loss": 0.837,
       "step": 1200
     },
     {
+      "epoch": 0.99,
+      "grad_norm": 1.7798255681991577,
+      "learning_rate": 1.0154472728808318e-08,
+      "loss": 0.8195,
       "step": 1300
     },
     {
       "epoch": 1.0,
+      "step": 1317,
+      "total_flos": 2.5585123528237056e+16,
+      "train_loss": 0.9235503957773034,
+      "train_runtime": 6177.2242,
+      "train_samples_per_second": 1.707,
+      "train_steps_per_second": 0.213
     }
   ],
   "logging_steps": 100,
+  "max_steps": 1317,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
+  "total_flos": 2.5585123528237056e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null