Add HuggingFaceH4/mistral-7b-cai-v20.0.grokai.3.2 checkpoint

Browse files

Files changed (11) hide show

README.md +15 -52
all_results.json +12 -19
config.json +1 -1
eval_results.json +7 -14
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
special_tokens_map.json +1 -7
train_results.json +6 -6
trainer_state.json +0 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,33 +1,25 @@
 ---
 license: apache-2.0
-base_model: HuggingFaceH4/mistral-7b-cai
 tags:
 - alignment-handbook
 - generated_from_trainer
 datasets:
-- HuggingFaceH4/ultrafeedback_binarized_fixed
 - HuggingFaceH4/grok-conversation-harmless
 model-index:
-- name: mistral-7b-dpo-v21.0grokai.0.3
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# mistral-7b-dpo-v21.0grokai.0.3
-This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-cai](https://huggingface.co/HuggingFaceH4/mistral-7b-cai) on the HuggingFaceH4/ultrafeedback_binarized_fixed and the HuggingFaceH4/grok-conversation-harmless datasets.
 It achieves the following results on the evaluation set:
-- Loss: 0.6270
-- Rewards/chosen: -7.6611
-- Rewards/rejected: -12.0970
-- Rewards/accuracies: 0.6925
-- Rewards/margins: 4.4359
-- Logps/rejected: -310.5013
-- Logps/chosen: -278.5390
-- Logits/rejected: -2.1614
-- Logits/chosen: -2.1988
 ## Model description
@@ -46,54 +38,25 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 5e-07
-- train_batch_size: 2
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 8
-- total_train_batch_size: 16
 - total_eval_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 3
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
-|:-------------:|:-----:|:-----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.5994        | 0.1   | 400   | 0.5895          | 0.3053         | -0.0377          | 0.5950             | 0.3430          | -189.9080      | -198.8744    | -2.6272         | -2.6485       |
-| 0.5024        | 0.19  | 800   | 0.5112          | -0.1278        | -1.0425          | 0.6675             | 0.9147          | -199.9562      | -203.2059    | -2.5093         | -2.5329       |
-| 0.5728        | 0.29  | 1200  | 0.5324          | -0.7435        | -1.7880          | 0.6425             | 1.0445          | -207.4112      | -209.3627    | -2.4771         | -2.5058       |
-| 0.7378        | 0.39  | 1600  | 0.5213          | -1.6390        | -2.9308          | 0.6650             | 1.2918          | -218.8383      | -218.3174    | -2.4237         | -2.4525       |
-| 0.7467        | 0.48  | 2000  | 0.5788          | -2.2099        | -3.4247          | 0.6575             | 1.2148          | -223.7781      | -224.0264    | -2.4106         | -2.4441       |
-| 0.4646        | 0.58  | 2400  | 0.5309          | -1.1360        | -2.6997          | 0.6500             | 1.5638          | -216.5279      | -213.2871    | -2.3683         | -2.3994       |
-| 0.7454        | 0.67  | 2800  | 0.5290          | -1.9997        | -3.5594          | 0.6700             | 1.5597          | -225.1247      | -221.9242    | -2.4289         | -2.4575       |
-| 0.6092        | 0.77  | 3200  | 0.5124          | -1.6120        | -3.1452          | 0.6850             | 1.5332          | -220.9823      | -218.0472    | -2.4755         | -2.5024       |
-| 0.674         | 0.87  | 3600  | 0.5134          | -2.9907        | -4.6954          | 0.6750             | 1.7046          | -236.4846      | -231.8350    | -2.2564         | -2.2885       |
-| 0.5585        | 0.96  | 4000  | 0.5065          | -2.5232        | -4.1851          | 0.6725             | 1.6619          | -231.3815      | -227.1594    | -2.3968         | -2.4273       |
-| 0.0829        | 1.06  | 4400  | 0.5306          | -3.8333        | -6.1055          | 0.6950             | 2.2723          | -250.5862      | -240.2602    | -2.2149         | -2.2565       |
-| 0.1383        | 1.16  | 4800  | 0.5432          | -3.8147        | -5.7333          | 0.6675             | 1.9186          | -246.8635      | -240.0743    | -2.3301         | -2.3643       |
-| 0.1425        | 1.25  | 5200  | 0.5238          | -4.7732        | -7.0560          | 0.6650             | 2.2827          | -260.0906      | -249.6600    | -2.1705         | -2.2021       |
-| 0.1053        | 1.35  | 5600  | 0.5298          | -4.8922        | -7.5361          | 0.6900             | 2.6439          | -264.8917      | -250.8497    | -2.2597         | -2.2978       |
-| 0.1301        | 1.44  | 6000  | 0.5190          | -4.0353        | -6.5781          | 0.6850             | 2.5428          | -255.3118      | -242.2802    | -2.1606         | -2.1992       |
-| 0.0789        | 1.54  | 6400  | 0.5184          | -4.6125        | -7.3571          | 0.6775             | 2.7446          | -263.1015      | -248.0527    | -2.2220         | -2.2593       |
-| 0.1274        | 1.64  | 6800  | 0.5138          | -3.9081        | -6.5224          | 0.6650             | 2.6143          | -254.7549      | -241.0087    | -2.3238         | -2.3653       |
-| 0.1095        | 1.73  | 7200  | 0.5153          | -4.1355        | -6.9746          | 0.6750             | 2.8392          | -259.2772      | -243.2823    | -2.2983         | -2.3396       |
-| 0.1515        | 1.83  | 7600  | 0.5242          | -4.5052        | -7.4464          | 0.6625             | 2.9412          | -263.9946      | -246.9796    | -2.2513         | -2.2896       |
-| 0.1152        | 1.93  | 8000  | 0.5280          | -4.5281        | -7.5632          | 0.6825             | 3.0351          | -265.1628      | -247.2084    | -2.2822         | -2.3185       |
-| 0.0385        | 2.02  | 8400  | 0.5478          | -4.9592        | -8.1827          | 0.6800             | 3.2235          | -271.3580      | -251.5196    | -2.2850         | -2.3214       |
-| 0.0401        | 2.12  | 8800  | 0.5999          | -6.1863        | -10.0632         | 0.6800             | 3.8769          | -290.1624      | -263.7904    | -2.1925         | -2.2326       |
-| 0.0327        | 2.21  | 9200  | 0.6190          | -5.6591        | -9.4406          | 0.6925             | 3.7815          | -283.9365      | -258.5182    | -2.1369         | -2.1748       |
-| 0.0425        | 2.31  | 9600  | 0.6298          | -7.3701        | -11.3769         | 0.6925             | 4.0068          | -303.3002      | -275.6286    | -2.1410         | -2.1775       |
-| 0.0387        | 2.41  | 10000 | 0.6269          | -7.3259        | -11.5280         | 0.6975             | 4.2020          | -304.8104      | -275.1870    | -2.1791         | -2.2169       |
-| 0.043         | 2.5   | 10400 | 0.6376          | -7.2239        | -11.5783         | 0.6925             | 4.3544          | -305.3137      | -274.1667    | -2.2301         | -2.2663       |
-| 0.0577        | 2.6   | 10800 | 0.6290          | -7.6726        | -11.9683         | 0.6925             | 4.2956          | -309.2136      | -278.6540    | -2.1968         | -2.2342       |
-| 0.019         | 2.7   | 11200 | 0.6260          | -7.2301        | -11.5298         | 0.6825             | 4.2997          | -304.8287      | -274.2284    | -2.1623         | -2.2006       |
-| 0.0328        | 2.79  | 11600 | 0.6325          | -7.6096        | -12.0115         | 0.6950             | 4.4019          | -309.6460      | -278.0234    | -2.1388         | -2.1767       |
-| 0.036         | 2.89  | 12000 | 0.6312          | -7.8237        | -12.2628         | 0.6900             | 4.4391          | -312.1590      | -280.1643    | -2.1641         | -2.2011       |
-| 0.0216        | 2.98  | 12400 | 0.6283          | -7.6679        | -12.0919         | 0.6900             | 4.4240          | -310.4496      | -278.6061    | -2.1613         | -2.1986       |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: mistralai/Mistral-7B-v0.1
 tags:
 - alignment-handbook
 - generated_from_trainer
 datasets:
 - HuggingFaceH4/grok-conversation-harmless
+- HuggingFaceH4/ultrachat_200k
 model-index:
+- name: mistral-7b-ift-v20.0.grokai.3.2
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# mistral-7b-ift-v20.0.grokai.3.2
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/grok-conversation-harmless and the HuggingFaceH4/ultrachat_200k datasets.
 It achieves the following results on the evaluation set:
+- Loss: 0.9348
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 8
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 256
 - total_eval_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.9326        | 1.0   | 545  | 0.9348          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,21 +1,14 @@
 {
-    "epoch": 3.0,
-    "eval_logits/chosen": -2.198760986328125,
-    "eval_logits/rejected": -2.1614327430725098,
-    "eval_logps/chosen": -278.5389709472656,
-    "eval_logps/rejected": -310.5012512207031,
-    "eval_loss": 0.6269853711128235,
-    "eval_rewards/accuracies": 0.6924999952316284,
-    "eval_rewards/chosen": -7.6611433029174805,
-    "eval_rewards/margins": 4.435902118682861,
-    "eval_rewards/rejected": -12.0970458984375,
-    "eval_runtime": 132.6261,
-    "eval_samples": 3156,
-    "eval_samples_per_second": 23.796,
-    "eval_steps_per_second": 0.377,
-    "train_loss": 0.24049862180692672,
-    "train_runtime": 20948.1804,
-    "train_samples": 82424,
-    "train_samples_per_second": 9.519,
-    "train_steps_per_second": 0.595
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 0.934799313545227,
+    "eval_runtime": 142.2532,
+    "eval_samples": 24266,
+    "eval_samples_per_second": 109.024,
+    "eval_steps_per_second": 1.708,
+    "perplexity": 2.5467023177917714,
+    "train_loss": 0.9725383741046311,
+    "train_runtime": 5277.8235,
+    "train_samples": 211055,
+    "train_samples_per_second": 26.46,
+    "train_steps_per_second": 0.103
 }

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "HuggingFaceH4/mistral-7b-cai",
   "architectures": [
     "MistralForCausalLM"
   ],

 {
+  "_name_or_path": "mistralai/Mistral-7B-v0.1",
   "architectures": [
     "MistralForCausalLM"
   ],

eval_results.json CHANGED Viewed

@@ -1,16 +1,9 @@
 {
-    "epoch": 3.0,
-    "eval_logits/chosen": -2.198760986328125,
-    "eval_logits/rejected": -2.1614327430725098,
-    "eval_logps/chosen": -278.5389709472656,
-    "eval_logps/rejected": -310.5012512207031,
-    "eval_loss": 0.6269853711128235,
-    "eval_rewards/accuracies": 0.6924999952316284,
-    "eval_rewards/chosen": -7.6611433029174805,
-    "eval_rewards/margins": 4.435902118682861,
-    "eval_rewards/rejected": -12.0970458984375,
-    "eval_runtime": 132.6261,
-    "eval_samples": 3156,
-    "eval_samples_per_second": 23.796,
-    "eval_steps_per_second": 0.377
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 0.934799313545227,
+    "eval_runtime": 142.2532,
+    "eval_samples": 24266,
+    "eval_samples_per_second": 109.024,
+    "eval_steps_per_second": 1.708,
+    "perplexity": 2.5467023177917714
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82d5f2b63643649c5c57b449db25080bc6c81c2dea922fcb8aab5190ec15f0fd
 size 4943162336

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c8e5d6996e054b7c10c1f891947d59e5e462db78fc33252fa58424451d706ea
 size 4943162336

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f75e305a1c770759a46794fcf331b8a1e2c64700eba20832e1c462cebef8af57
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed331b509cdee31d8323097ac2b434359b7d8fae383c530753695b221139108c
 size 4999819336

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b60e0d9e9e37e59c473aa943a82737a311d71b07ef0409a96de40dfa97bdccc1
 size 4540516344

 version https://git-lfs.github.com/spec/v1
+oid sha256:95a38af2f763dfd084506059d5663c66a2410432723f91502dc3d828cad3d822
 size 4540516344

special_tokens_map.json CHANGED Viewed

@@ -13,13 +13,7 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": "</s>",
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 3.0,
-    "train_loss": 0.24049862180692672,
-    "train_runtime": 20948.1804,
-    "train_samples": 82424,
-    "train_samples_per_second": 9.519,
-    "train_steps_per_second": 0.595
 }

 {
+    "epoch": 1.0,
+    "train_loss": 0.9725383741046311,
+    "train_runtime": 5277.8235,
+    "train_samples": 211055,
+    "train_samples_per_second": 26.46,
+    "train_steps_per_second": 0.103
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e25a30fcdad636c8cc467560e7ea03470e7cb814424c89d88018e2649b3fe57
 size 6264

 version https://git-lfs.github.com/spec/v1
+oid sha256:97393c2a78f330ed3bfc7e5d7c10eb73f641d9d68463a042f14a4296fc7f29d8
 size 6264