Model save
Browse files- README.md +23 -22
- all_results.json +5 -18
- generation_config.json +1 -1
- model-00001-of-00003.safetensors +2 -2
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +1 -1
- runs/May17_14-55-11_n136-082-130/events.out.tfevents.1715929074.n136-082-130.1748187.0 +2 -2
- train_results.json +5 -5
- trainer_state.json +0 -0
- training_args.bin +2 -2
README.md
CHANGED
@@ -1,16 +1,8 @@
|
|
1 |
---
|
2 |
-
license: apache-2.0
|
3 |
-
base_model: alignment-handbook/zephyr-7b-sft-full
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
- trl
|
7 |
- dpo
|
8 |
- generated_from_trainer
|
9 |
-
- trl
|
10 |
-
- dpo
|
11 |
-
- generated_from_trainer
|
12 |
-
datasets:
|
13 |
-
- HuggingFaceH4/ultrafeedback_binarized
|
14 |
model-index:
|
15 |
- name: zephyr-7b-dpo-full
|
16 |
results: []
|
@@ -21,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
21 |
|
22 |
# zephyr-7b-dpo-full
|
23 |
|
24 |
-
This model
|
25 |
It achieves the following results on the evaluation set:
|
26 |
-
- Loss: 0.
|
27 |
-
- Rewards/chosen:
|
28 |
-
- Rewards/rejected: -
|
29 |
-
- Rewards/accuracies: 0.
|
30 |
-
- Rewards/margins:
|
31 |
-
- Logps/rejected: -
|
32 |
-
- Logps/chosen: -
|
33 |
-
- Logits/rejected:
|
34 |
-
- Logits/chosen: -
|
35 |
|
36 |
## Model description
|
37 |
|
@@ -68,10 +60,19 @@ The following hyperparameters were used during training:
|
|
68 |
|
69 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
70 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
|
77 |
### Framework versions
|
|
|
1 |
---
|
|
|
|
|
2 |
tags:
|
|
|
3 |
- trl
|
4 |
- dpo
|
5 |
- generated_from_trainer
|
|
|
|
|
|
|
|
|
|
|
6 |
model-index:
|
7 |
- name: zephyr-7b-dpo-full
|
8 |
results: []
|
|
|
13 |
|
14 |
# zephyr-7b-dpo-full
|
15 |
|
16 |
+
This model was trained from scratch on the None dataset.
|
17 |
It achieves the following results on the evaluation set:
|
18 |
+
- Loss: 0.5418
|
19 |
+
- Rewards/chosen: -3.1726
|
20 |
+
- Rewards/rejected: -4.7390
|
21 |
+
- Rewards/accuracies: 0.7539
|
22 |
+
- Rewards/margins: 1.5664
|
23 |
+
- Logps/rejected: -761.6608
|
24 |
+
- Logps/chosen: -598.8974
|
25 |
+
- Logits/rejected: 0.2389
|
26 |
+
- Logits/chosen: -0.0634
|
27 |
|
28 |
## Model description
|
29 |
|
|
|
60 |
|
61 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
+
| 0.6142 | 0.07 | 100 | 0.6372 | -0.2112 | -0.4255 | 0.6992 | 0.2143 | -330.3116 | -302.7545 | -1.7521 | -1.7871 |
|
64 |
+
| 0.4726 | 0.15 | 200 | 0.5516 | -1.3441 | -2.1046 | 0.75 | 0.7605 | -498.2208 | -416.0410 | -2.0018 | -2.0471 |
|
65 |
+
| 0.4421 | 0.22 | 300 | 0.5335 | -1.1470 | -2.0463 | 0.7539 | 0.8992 | -492.3901 | -396.3379 | -1.7522 | -1.8325 |
|
66 |
+
| 0.3828 | 0.3 | 400 | 0.5238 | -1.6652 | -2.7617 | 0.7695 | 1.0965 | -563.9280 | -448.1488 | -0.9530 | -1.1204 |
|
67 |
+
| 0.3576 | 0.37 | 500 | 0.5184 | -1.6238 | -2.7277 | 0.7695 | 1.1039 | -560.5328 | -444.0173 | -0.8922 | -1.1202 |
|
68 |
+
| 0.3328 | 0.45 | 600 | 0.5151 | -2.1202 | -3.4092 | 0.7656 | 1.2890 | -628.6859 | -493.6552 | 0.2423 | -0.0694 |
|
69 |
+
| 0.3131 | 0.52 | 700 | 0.5153 | -1.7034 | -2.9038 | 0.7656 | 1.2004 | -578.1398 | -451.9696 | 0.1729 | -0.1656 |
|
70 |
+
| 0.2547 | 0.59 | 800 | 0.5256 | -2.5366 | -3.8570 | 0.7617 | 1.3204 | -673.4565 | -535.2915 | 0.4476 | 0.1270 |
|
71 |
+
| 0.2764 | 0.67 | 900 | 0.5221 | -2.5675 | -3.9457 | 0.7773 | 1.3782 | -682.3342 | -538.3813 | 0.0520 | -0.2431 |
|
72 |
+
| 0.2261 | 0.74 | 1000 | 0.5298 | -2.7657 | -4.2499 | 0.7695 | 1.4842 | -712.7483 | -558.2006 | 0.2023 | -0.1104 |
|
73 |
+
| 0.2219 | 0.82 | 1100 | 0.5380 | -3.0986 | -4.6646 | 0.7695 | 1.5660 | -754.2211 | -591.4904 | 0.3078 | -0.0067 |
|
74 |
+
| 0.2165 | 0.89 | 1200 | 0.5336 | -2.9855 | -4.5026 | 0.7617 | 1.5170 | -738.0179 | -580.1855 | 0.2015 | -0.0980 |
|
75 |
+
| 0.1728 | 0.97 | 1300 | 0.5418 | -3.1726 | -4.7390 | 0.7539 | 1.5664 | -761.6608 | -598.8974 | 0.2389 | -0.0634 |
|
76 |
|
77 |
|
78 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,21 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"eval_rewards/accuracies": 0.9140625,
|
9 |
-
"eval_rewards/chosen": 4.242725372314453,
|
10 |
-
"eval_rewards/margins": 9.778908729553223,
|
11 |
-
"eval_rewards/rejected": -5.536184310913086,
|
12 |
-
"eval_runtime": 97.1606,
|
13 |
-
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 20.584,
|
15 |
-
"eval_steps_per_second": 0.329,
|
16 |
-
"train_loss": 0.4018711235732713,
|
17 |
-
"train_runtime": 7633.33,
|
18 |
-
"train_samples": 61135,
|
19 |
-
"train_samples_per_second": 8.009,
|
20 |
-
"train_steps_per_second": 0.063
|
21 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.3438705863959722,
|
4 |
+
"train_runtime": 21850.8794,
|
5 |
+
"train_samples": 172268,
|
6 |
+
"train_samples_per_second": 7.884,
|
7 |
+
"train_steps_per_second": 0.062
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
}
|
generation_config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
-
"eos_token_id":
|
5 |
"transformers_version": "4.38.2"
|
6 |
}
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 32000,
|
5 |
"transformers_version": "4.38.2"
|
6 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd6ded02da6ed2a67b1ccb05b3e25c1a08de70168c93e24452883b16a331d860
|
3 |
+
size 4943178720
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89ce8ea846a3d7c9e49af7bf0cabce9bf61d2357db44c6a45bf678e5f26f2442
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fd7ad7a8a30cf5d12237b54b2b2736d530ebb352c2b25a8a564e6dd23102581
|
3 |
+
size 4540532728
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 14483496960
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
runs/May17_14-55-11_n136-082-130/events.out.tfevents.1715929074.n136-082-130.1748187.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14c040c01cefed894d5e32c42475eb45f945221e65a089933344299efdbd8272
|
3 |
+
size 107531
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.3438705863959722,
|
4 |
+
"train_runtime": 21850.8794,
|
5 |
+
"train_samples": 172268,
|
6 |
+
"train_samples_per_second": 7.884,
|
7 |
+
"train_steps_per_second": 0.062
|
8 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0caf9fbe23969e1c8a215e53c1fd063c17f339e7fb0eda9b65ed88e2360b089
|
3 |
+
size 6200
|