model_hh_usp4_400

Browse files

Files changed (6) hide show

README.md +19 -19
adapter_config.json +3 -3
adapter_model.safetensors +1 -1
final_checkpoint/adapter_config.json +3 -3
final_checkpoint/adapter_model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,15 +18,15 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 4.8447
-- Rewards/chosen: -12.8728
-- Rewards/rejected: -15.9738
-- Rewards/accuracies: 0.6200
-- Rewards/margins: 3.1010
-- Logps/rejected: -130.8977
-- Logps/chosen: -125.8617
-- Logits/rejected: -0.9554
-- Logits/chosen: -0.9016
 ## Model description
@@ -60,16 +60,16 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.0115        | 4.0   | 100  | 2.1095          | -12.3873       | -13.8364         | 0.6200             | 1.4491          | -128.5228      | -125.3223    | -0.2640         | -0.2414       |
-| 0.0271        | 8.0   | 200  | 3.0011          | -0.7751        | -1.2662          | 0.5200             | 0.4911          | -114.5559      | -112.4198    | -0.1794         | -0.1752       |
-| 0.0029        | 12.0  | 300  | 5.8848          | -30.6815       | -32.8502         | 0.5300             | 2.1687          | -149.6492      | -145.6491    | -0.9305         | -0.8856       |
-| 0.0           | 16.0  | 400  | 4.8706          | -12.8599       | -15.9629         | 0.6200             | 3.1030          | -130.8855      | -125.8473    | -0.9564         | -0.9029       |
-| 0.0           | 20.0  | 500  | 4.8654          | -12.8852       | -15.9625         | 0.6200             | 3.0773          | -130.8851      | -125.8754    | -0.9556         | -0.9016       |
-| 0.0           | 24.0  | 600  | 4.8663          | -12.8617       | -15.9688         | 0.6200             | 3.1071          | -130.8921      | -125.8493    | -0.9551         | -0.9013       |
-| 0.0           | 28.0  | 700  | 4.8599          | -12.8977       | -15.9664         | 0.6200             | 3.0686          | -130.8894      | -125.8893    | -0.9553         | -0.9015       |
-| 0.0           | 32.0  | 800  | 4.8410          | -12.8636       | -15.9914         | 0.6200             | 3.1277          | -130.9172      | -125.8515    | -0.9553         | -0.9014       |
-| 0.0           | 36.0  | 900  | 4.8425          | -12.8856       | -15.9582         | 0.6200             | 3.0726          | -130.8803      | -125.8759    | -0.9551         | -0.9014       |
-| 0.0           | 40.0  | 1000 | 4.8447          | -12.8728       | -15.9738         | 0.6200             | 3.1010          | -130.8977      | -125.8617    | -0.9554         | -0.9016       |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 4.4266
+- Rewards/chosen: -7.2918
+- Rewards/rejected: -9.3870
+- Rewards/accuracies: 0.5500
+- Rewards/margins: 2.0952
+- Logps/rejected: -122.5611
+- Logps/chosen: -121.2051
+- Logits/rejected: -0.2787
+- Logits/chosen: -0.2572
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.098         | 4.0   | 100  | 2.3307          | -3.2289        | -4.9359          | 0.5700             | 1.7070          | -117.6155      | -116.6908    | -0.5136         | -0.5011       |
+| 0.2615        | 8.0   | 200  | 3.5637          | -3.5399        | -4.5546          | 0.5700             | 1.0147          | -117.1918      | -117.0363    | -0.4837         | -0.4844       |
+| 0.0137        | 12.0  | 300  | 4.2146          | -3.4955        | -5.8321          | 0.5600             | 2.3366          | -118.6113      | -116.9870    | -0.3503         | -0.3327       |
+| 0.0           | 16.0  | 400  | 4.4247          | -7.2840        | -9.3968          | 0.5500             | 2.1128          | -122.5721      | -121.1964    | -0.2788         | -0.2574       |
+| 0.0           | 20.0  | 500  | 4.4045          | -7.2800        | -9.4193          | 0.5600             | 2.1393          | -122.5971      | -121.1920    | -0.2793         | -0.2578       |
+| 0.0           | 24.0  | 600  | 4.4242          | -7.2774        | -9.3711          | 0.5600             | 2.0936          | -122.5435      | -121.1891    | -0.2789         | -0.2573       |
+| 0.0           | 28.0  | 700  | 4.4048          | -7.2951        | -9.4062          | 0.5600             | 2.1110          | -122.5825      | -121.2088    | -0.2785         | -0.2570       |
+| 0.0           | 32.0  | 800  | 4.4098          | -7.2804        | -9.3847          | 0.5500             | 2.1043          | -122.5586      | -121.1924    | -0.2783         | -0.2569       |
+| 0.0           | 36.0  | 900  | 4.4251          | -7.2849        | -9.3768          | 0.5500             | 2.0918          | -122.5498      | -121.1974    | -0.2792         | -0.2575       |
+| 0.0           | 40.0  | 1000 | 4.4266          | -7.2918        | -9.3870          | 0.5500             | 2.0952          | -122.5611      | -121.2051    | -0.2787         | -0.2572       |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "fc_out",
-    "k_proj",
-    "wte",
     "v_proj",
-    "fc_in",
     "q_proj",
     "out_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "fc_in",
     "fc_out",
     "v_proj",
+    "wte",
     "q_proj",
+    "k_proj",
     "out_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ae941c4d05b8de3d6f0cf83630ef65a1fb53d1382a900d5b317ea3a6936bdae
 size 12608472

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c541264c6df00ab1c1c7f2b98855c93d0ad81ac4752045da7df9d9d4253e9b1
 size 12608472

final_checkpoint/adapter_config.json CHANGED Viewed

@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "fc_out",
-    "k_proj",
-    "wte",
     "v_proj",
-    "fc_in",
     "q_proj",
     "out_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "fc_in",
     "fc_out",
     "v_proj",
+    "wte",
     "q_proj",
+    "k_proj",
     "out_proj"
   ],
   "task_type": "CAUSAL_LM",

final_checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ae941c4d05b8de3d6f0cf83630ef65a1fb53d1382a900d5b317ea3a6936bdae
 size 12608472

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c541264c6df00ab1c1c7f2b98855c93d0ad81ac4752045da7df9d9d4253e9b1
 size 12608472

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c4bf29ebe38e562cba8c2a8bd6b3f6016dc211695290125d3803e8c63282560
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6bb695e37bfcc88c4754bdc34684ec1747667d55be1250a5b962a9f6ce62fdc
 size 4984