Model save
Browse files- README.md +13 -13
- all_results.json +16 -16
- eval_results.json +12 -12
- train_results.json +4 -4
- trainer_state.json +0 -0
README.md
CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
15 |
|
16 |
This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
|
17 |
It achieves the following results on the evaluation set:
|
18 |
-
- Loss:
|
19 |
-
- Rewards/chosen: -
|
20 |
-
- Rewards/rejected: -
|
21 |
-
- Rewards/accuracies: 0.
|
22 |
-
- Rewards/margins:
|
23 |
-
- Logps/rejected: -
|
24 |
-
- Logps/chosen: -
|
25 |
-
- Logits/rejected: -2.
|
26 |
-
- Logits/chosen: -2.
|
27 |
|
28 |
## Model description
|
29 |
|
@@ -59,14 +59,14 @@ The following hyperparameters were used during training:
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
|
66 |
|
67 |
### Framework versions
|
68 |
|
69 |
- Transformers 4.35.0
|
70 |
-
- Pytorch 2.1.
|
71 |
- Datasets 2.14.6
|
72 |
- Tokenizers 0.14.1
|
|
|
15 |
|
16 |
This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
|
17 |
It achieves the following results on the evaluation set:
|
18 |
+
- Loss: 1.0563
|
19 |
+
- Rewards/chosen: -6.7505
|
20 |
+
- Rewards/rejected: -10.0735
|
21 |
+
- Rewards/accuracies: 0.7227
|
22 |
+
- Rewards/margins: 3.3230
|
23 |
+
- Logps/rejected: -273.7712
|
24 |
+
- Logps/chosen: -341.3420
|
25 |
+
- Logits/rejected: -2.2189
|
26 |
+
- Logits/chosen: -2.3037
|
27 |
|
28 |
## Model description
|
29 |
|
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
+
| 0.0636 | 1.0 | 485 | 0.6042 | -4.0022 | -7.1190 | 0.8086 | 3.1168 | -244.2258 | -313.8589 | -2.3960 | -2.4665 |
|
63 |
+
| 0.0443 | 2.0 | 970 | 0.7951 | -5.5853 | -8.9194 | 0.7383 | 3.3341 | -262.2304 | -329.6904 | -2.3026 | -2.3851 |
|
64 |
+
| 0.0238 | 3.0 | 1455 | 1.0563 | -6.7505 | -10.0735 | 0.7227 | 3.3230 | -273.7712 | -341.3420 | -2.2189 | -2.3037 |
|
65 |
|
66 |
|
67 |
### Framework versions
|
68 |
|
69 |
- Transformers 4.35.0
|
70 |
+
- Pytorch 2.1.1+cu121
|
71 |
- Datasets 2.14.6
|
72 |
- Tokenizers 0.14.1
|
all_results.json
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
-
"eval_logits/chosen": -2.
|
4 |
-
"eval_logits/rejected": -2.
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss":
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins":
|
11 |
-
"eval_rewards/rejected": -
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 7.
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
-
"train_loss": 0.
|
17 |
-
"train_runtime":
|
18 |
-
"train_samples":
|
19 |
-
"train_samples_per_second":
|
20 |
"train_steps_per_second": 0.031
|
21 |
}
|
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
+
"eval_logits/chosen": -2.3036882877349854,
|
4 |
+
"eval_logits/rejected": -2.218935012817383,
|
5 |
+
"eval_logps/chosen": -341.342041015625,
|
6 |
+
"eval_logps/rejected": -273.77117919921875,
|
7 |
+
"eval_loss": 1.0562912225723267,
|
8 |
+
"eval_rewards/accuracies": 0.72265625,
|
9 |
+
"eval_rewards/chosen": -6.750503063201904,
|
10 |
+
"eval_rewards/margins": 3.3230087757110596,
|
11 |
+
"eval_rewards/rejected": -10.07351303100586,
|
12 |
+
"eval_runtime": 258.1215,
|
13 |
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 7.748,
|
15 |
+
"eval_steps_per_second": 0.062,
|
16 |
+
"train_loss": 0.07034083745375122,
|
17 |
+
"train_runtime": 46831.0549,
|
18 |
+
"train_samples": 62064,
|
19 |
+
"train_samples_per_second": 3.976,
|
20 |
"train_steps_per_second": 0.031
|
21 |
}
|
eval_results.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
-
"eval_logits/chosen": -2.
|
4 |
-
"eval_logits/rejected": -2.
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss":
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins":
|
11 |
-
"eval_rewards/rejected": -
|
12 |
-
"eval_runtime":
|
13 |
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 7.
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
}
|
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
+
"eval_logits/chosen": -2.3036882877349854,
|
4 |
+
"eval_logits/rejected": -2.218935012817383,
|
5 |
+
"eval_logps/chosen": -341.342041015625,
|
6 |
+
"eval_logps/rejected": -273.77117919921875,
|
7 |
+
"eval_loss": 1.0562912225723267,
|
8 |
+
"eval_rewards/accuracies": 0.72265625,
|
9 |
+
"eval_rewards/chosen": -6.750503063201904,
|
10 |
+
"eval_rewards/margins": 3.3230087757110596,
|
11 |
+
"eval_rewards/rejected": -10.07351303100586,
|
12 |
+
"eval_runtime": 258.1215,
|
13 |
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 7.748,
|
15 |
+
"eval_steps_per_second": 0.062
|
16 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second":
|
7 |
"train_steps_per_second": 0.031
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 3.0,
|
3 |
+
"train_loss": 0.07034083745375122,
|
4 |
+
"train_runtime": 46831.0549,
|
5 |
+
"train_samples": 62064,
|
6 |
+
"train_samples_per_second": 3.976,
|
7 |
"train_steps_per_second": 0.031
|
8 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|