jiuhai commited on
Commit
709dd80
1 Parent(s): b2778d9

Model save

Browse files
Files changed (5) hide show
  1. README.md +13 -13
  2. all_results.json +16 -16
  3. eval_results.json +12 -12
  4. train_results.json +4 -4
  5. trainer_state.json +0 -0
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.4553
19
- - Rewards/chosen: -0.5876
20
- - Rewards/rejected: -2.1911
21
- - Rewards/accuracies: 0.8359
22
- - Rewards/margins: 1.6035
23
- - Logps/rejected: -246.6992
24
- - Logps/chosen: -279.5245
25
- - Logits/rejected: -2.8331
26
- - Logits/chosen: -2.8422
27
 
28
  ## Model description
29
 
@@ -59,14 +59,14 @@ The following hyperparameters were used during training:
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
- | 0.4771 | 1.0 | 485 | 0.4617 | -0.0843 | -1.3520 | 0.7891 | 1.2677 | -238.3082 | -274.4911 | -2.8501 | -2.8796 |
63
- | 0.4124 | 2.0 | 970 | 0.4545 | -0.3551 | -1.7590 | 0.8164 | 1.4038 | -242.3781 | -277.1996 | -2.8563 | -2.8659 |
64
- | 0.3549 | 3.0 | 1455 | 0.4553 | -0.5876 | -2.1911 | 0.8359 | 1.6035 | -246.6992 | -279.5245 | -2.8331 | -2.8422 |
65
 
66
 
67
  ### Framework versions
68
 
69
  - Transformers 4.35.0
70
- - Pytorch 2.1.0+cu121
71
  - Datasets 2.14.6
72
  - Tokenizers 0.14.1
 
15
 
16
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.0563
19
+ - Rewards/chosen: -6.7505
20
+ - Rewards/rejected: -10.0735
21
+ - Rewards/accuracies: 0.7227
22
+ - Rewards/margins: 3.3230
23
+ - Logps/rejected: -273.7712
24
+ - Logps/chosen: -341.3420
25
+ - Logits/rejected: -2.2189
26
+ - Logits/chosen: -2.3037
27
 
28
  ## Model description
29
 
 
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
+ | 0.0636 | 1.0 | 485 | 0.6042 | -4.0022 | -7.1190 | 0.8086 | 3.1168 | -244.2258 | -313.8589 | -2.3960 | -2.4665 |
63
+ | 0.0443 | 2.0 | 970 | 0.7951 | -5.5853 | -8.9194 | 0.7383 | 3.3341 | -262.2304 | -329.6904 | -2.3026 | -2.3851 |
64
+ | 0.0238 | 3.0 | 1455 | 1.0563 | -6.7505 | -10.0735 | 0.7227 | 3.3230 | -273.7712 | -341.3420 | -2.2189 | -2.3037 |
65
 
66
 
67
  ### Framework versions
68
 
69
  - Transformers 4.35.0
70
+ - Pytorch 2.1.1+cu121
71
  - Datasets 2.14.6
72
  - Tokenizers 0.14.1
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_logits/chosen": -2.842160701751709,
4
- "eval_logits/rejected": -2.833141326904297,
5
- "eval_logps/chosen": -279.5245056152344,
6
- "eval_logps/rejected": -246.69915771484375,
7
- "eval_loss": 0.45531293749809265,
8
- "eval_rewards/accuracies": 0.8359375,
9
- "eval_rewards/chosen": -0.5876308083534241,
10
- "eval_rewards/margins": 1.6034575700759888,
11
- "eval_rewards/rejected": -2.1910881996154785,
12
- "eval_runtime": 252.3832,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 7.924,
15
- "eval_steps_per_second": 0.063,
16
- "train_loss": 0.43281792414557074,
17
- "train_runtime": 46468.4841,
18
- "train_samples": 61966,
19
- "train_samples_per_second": 4.001,
20
  "train_steps_per_second": 0.031
21
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_logits/chosen": -2.3036882877349854,
4
+ "eval_logits/rejected": -2.218935012817383,
5
+ "eval_logps/chosen": -341.342041015625,
6
+ "eval_logps/rejected": -273.77117919921875,
7
+ "eval_loss": 1.0562912225723267,
8
+ "eval_rewards/accuracies": 0.72265625,
9
+ "eval_rewards/chosen": -6.750503063201904,
10
+ "eval_rewards/margins": 3.3230087757110596,
11
+ "eval_rewards/rejected": -10.07351303100586,
12
+ "eval_runtime": 258.1215,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.748,
15
+ "eval_steps_per_second": 0.062,
16
+ "train_loss": 0.07034083745375122,
17
+ "train_runtime": 46831.0549,
18
+ "train_samples": 62064,
19
+ "train_samples_per_second": 3.976,
20
  "train_steps_per_second": 0.031
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_logits/chosen": -2.842160701751709,
4
- "eval_logits/rejected": -2.833141326904297,
5
- "eval_logps/chosen": -279.5245056152344,
6
- "eval_logps/rejected": -246.69915771484375,
7
- "eval_loss": 0.45531293749809265,
8
- "eval_rewards/accuracies": 0.8359375,
9
- "eval_rewards/chosen": -0.5876308083534241,
10
- "eval_rewards/margins": 1.6034575700759888,
11
- "eval_rewards/rejected": -2.1910881996154785,
12
- "eval_runtime": 252.3832,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 7.924,
15
- "eval_steps_per_second": 0.063
16
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_logits/chosen": -2.3036882877349854,
4
+ "eval_logits/rejected": -2.218935012817383,
5
+ "eval_logps/chosen": -341.342041015625,
6
+ "eval_logps/rejected": -273.77117919921875,
7
+ "eval_loss": 1.0562912225723267,
8
+ "eval_rewards/accuracies": 0.72265625,
9
+ "eval_rewards/chosen": -6.750503063201904,
10
+ "eval_rewards/margins": 3.3230087757110596,
11
+ "eval_rewards/rejected": -10.07351303100586,
12
+ "eval_runtime": 258.1215,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.748,
15
+ "eval_steps_per_second": 0.062
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.43281792414557074,
4
- "train_runtime": 46468.4841,
5
- "train_samples": 61966,
6
- "train_samples_per_second": 4.001,
7
  "train_steps_per_second": 0.031
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.07034083745375122,
4
+ "train_runtime": 46831.0549,
5
+ "train_samples": 62064,
6
+ "train_samples_per_second": 3.976,
7
  "train_steps_per_second": 0.031
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff