Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -2,15 +2,12 @@
 license: apache-2.0
 base_model: mistralai/Mistral-7B-v0.1
 tags:
-- alignment-handbook
-- trl
-- sft
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 datasets:
-- HuggingFaceH4/deita-10k-v0-sft
 model-index:
 - name: mistral-7b-wo-healthsearch_qa-sft
   results: []
@@ -21,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # mistral-7b-wo-healthsearch_qa-sft
-This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/deita-10k-v0-sft dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.3012
 ## Model description
@@ -60,9 +57,9 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.2606        | 0.89  | 2    | 2.2660          |
-| 1.2606        | 1.78  | 4    | 1.4258          |
-| 1.8401        | 2.67  | 6    | 1.3012          |
 ### Framework versions

 license: apache-2.0
 base_model: mistralai/Mistral-7B-v0.1
 tags:
 - trl
 - sft
+- alignment-handbook
 - generated_from_trainer
 datasets:
+- generator
 model-index:
 - name: mistral-7b-wo-healthsearch_qa-sft
   results: []
 # mistral-7b-wo-healthsearch_qa-sft
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.3010
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.2606        | 0.89  | 2    | 2.2667          |
+| 1.2606        | 1.78  | 4    | 1.4257          |
+| 1.8405        | 2.67  | 6    | 1.3010          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -5,9 +5,9 @@
     "eval_samples": 3843,
     "eval_samples_per_second": 11.699,
     "eval_steps_per_second": 0.749,
-    "train_loss": 1.647927165031433,
-    "train_runtime": 194.6137,
     "train_samples": 1871,
-    "train_samples_per_second": 2.019,
     "train_steps_per_second": 0.031
 }

     "eval_samples": 3843,
     "eval_samples_per_second": 11.699,
     "eval_steps_per_second": 0.749,
+    "train_loss": 1.648087501525879,
+    "train_runtime": 194.9727,
     "train_samples": 1871,
+    "train_samples_per_second": 2.016,
     "train_steps_per_second": 0.031
 }

config.json CHANGED Viewed

@@ -21,6 +21,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.39.0.dev0",
-  "use_cache": true,
   "vocab_size": 32000
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.39.0.dev0",
+  "use_cache": false,
   "vocab_size": 32000
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55d3f2b16c0b2f47525d8089bda867460f17f3b61bf26bffb7105f2aa625a193
 size 4943162336

 version https://git-lfs.github.com/spec/v1
+oid sha256:5de56f5a3cbbb14b19f65b482c98c2faa7647c4ff2418720623df3a3c7cf0936
 size 4943162336

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea6e87a656694880df0c266280bbe16c3141db4c5283e84ec54c988ed3d3758e
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f62610c5e9421d06c5c45cf5f893f182dc5d2e22e82ad39039cfbd4ac861915
 size 4999819336

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a5917fa918112d10357a976934192d0deda633692974190b07b464e5e556868
 size 4540516344

 version https://git-lfs.github.com/spec/v1
+oid sha256:35a68939dc7a88cc5f01c470f6eddf0bbd643ffd63f999ac2f8c7c1774d27abb
 size 4540516344

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 2.67,
-    "train_loss": 1.647927165031433,
-    "train_runtime": 194.6137,
     "train_samples": 1871,
-    "train_samples_per_second": 2.019,
     "train_steps_per_second": 0.031
 }

 {
     "epoch": 2.67,
+    "train_loss": 1.648087501525879,
+    "train_runtime": 194.9727,
     "train_samples": 1871,
+    "train_samples_per_second": 2.016,
     "train_steps_per_second": 0.031
 }

trainer_state.json CHANGED Viewed

@@ -10,49 +10,49 @@
   "log_history": [
     {
       "epoch": 0.44,
-      "grad_norm": 6.007920941654517,
       "learning_rate": 2e-05,
       "loss": 1.2606,
       "step": 1
     },
     {
       "epoch": 0.89,
-      "eval_loss": 2.2660489082336426,
-      "eval_runtime": 28.506,
-      "eval_samples_per_second": 11.506,
-      "eval_steps_per_second": 0.737,
       "step": 2
     },
     {
       "epoch": 1.78,
-      "eval_loss": 1.4257768392562866,
-      "eval_runtime": 27.8152,
-      "eval_samples_per_second": 11.792,
-      "eval_steps_per_second": 0.755,
       "step": 4
     },
     {
       "epoch": 2.22,
-      "grad_norm": 21.287450950061043,
       "learning_rate": 1.9098300562505266e-06,
-      "loss": 1.8401,
       "step": 5
     },
     {
       "epoch": 2.67,
-      "eval_loss": 1.301224708557129,
-      "eval_runtime": 27.8884,
-      "eval_samples_per_second": 11.761,
-      "eval_steps_per_second": 0.753,
       "step": 6
     },
     {
       "epoch": 2.67,
       "step": 6,
       "total_flos": 1203933020160.0,
-      "train_loss": 1.647927165031433,
-      "train_runtime": 194.6137,
-      "train_samples_per_second": 2.019,
       "train_steps_per_second": 0.031
     }
   ],

   "log_history": [
     {
       "epoch": 0.44,
+      "grad_norm": 6.008237230982008,
       "learning_rate": 2e-05,
       "loss": 1.2606,
       "step": 1
     },
     {
       "epoch": 0.89,
+      "eval_loss": 2.266657829284668,
+      "eval_runtime": 28.2854,
+      "eval_samples_per_second": 11.596,
+      "eval_steps_per_second": 0.742,
       "step": 2
     },
     {
       "epoch": 1.78,
+      "eval_loss": 1.4257351160049438,
+      "eval_runtime": 27.8495,
+      "eval_samples_per_second": 11.778,
+      "eval_steps_per_second": 0.754,
       "step": 4
     },
     {
       "epoch": 2.22,
+      "grad_norm": 21.372843769871217,
       "learning_rate": 1.9098300562505266e-06,
+      "loss": 1.8405,
       "step": 5
     },
     {
       "epoch": 2.67,
+      "eval_loss": 1.301017165184021,
+      "eval_runtime": 27.912,
+      "eval_samples_per_second": 11.751,
+      "eval_steps_per_second": 0.752,
       "step": 6
     },
     {
       "epoch": 2.67,
       "step": 6,
       "total_flos": 1203933020160.0,
+      "train_loss": 1.648087501525879,
+      "train_runtime": 194.9727,
+      "train_samples_per_second": 2.016,
       "train_steps_per_second": 0.031
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6c922e11ae665ba946a90f179236aa9828c018035a6735e33ec0ebc1d525da6
 size 6200

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f75bc46e318838d6e61335461b936f19886708c7967a07d6b3a309215a06fa6
 size 6200