Wenboz commited on
Commit
7182753
·
verified ·
1 Parent(s): 27a6e3f

Model save

Browse files
Files changed (4) hide show
  1. README.md +26 -27
  2. all_results.json +6 -18
  3. train_results.json +6 -5
  4. trainer_state.json +0 -0
README.md CHANGED
@@ -1,9 +1,8 @@
1
  ---
2
- base_model: mistralai/Mistral-7B-v0.1
3
  library_name: peft
4
- license: apache-2.0
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - dpo
9
  - generated_from_trainer
@@ -17,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # zephyr-7b-dpo-lora
19
 
20
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.4930
23
- - Rewards/chosen: -1.7956
24
- - Rewards/rejected: -2.7390
25
- - Rewards/accuracies: 0.7242
26
- - Rewards/margins: 0.9434
27
- - Logps/rejected: -536.2667
28
- - Logps/chosen: -447.0530
29
- - Logits/rejected: 0.9396
30
- - Logits/chosen: 0.5316
31
 
32
  ## Model description
33
 
@@ -62,23 +61,23 @@ The following hyperparameters were used during training:
62
 
63
  ### Training results
64
 
65
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
66
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
67
- | 0.6087 | 0.1 | 100 | 0.6158 | -0.3136 | -0.5466 | 0.6726 | 0.2330 | -317.0252 | -298.8513 | -2.0360 | -2.1198 |
68
- | 0.5463 | 0.21 | 200 | 0.5504 | -1.1262 | -1.6978 | 0.6925 | 0.5716 | -432.1413 | -380.1157 | -0.0431 | -0.2986 |
69
- | 0.4949 | 0.31 | 300 | 0.5161 | -1.6535 | -2.4330 | 0.7183 | 0.7794 | -505.6621 | -432.8479 | 0.4034 | 0.1418 |
70
- | 0.5239 | 0.42 | 400 | 0.5101 | -1.3693 | -2.0810 | 0.7302 | 0.7116 | -470.4624 | -404.4282 | 0.8585 | 0.5591 |
71
- | 0.5272 | 0.52 | 500 | 0.5003 | -2.0358 | -2.9629 | 0.7381 | 0.9271 | -558.6534 | -471.0703 | 1.0404 | 0.7150 |
72
- | 0.4886 | 0.63 | 600 | 0.4982 | -1.7739 | -2.6428 | 0.7262 | 0.8689 | -526.6414 | -444.8822 | 0.3752 | 0.0594 |
73
- | 0.516 | 0.73 | 700 | 0.4933 | -2.0243 | -2.9388 | 0.7302 | 0.9144 | -556.2413 | -469.9273 | 0.8898 | 0.5312 |
74
- | 0.495 | 0.84 | 800 | 0.4949 | -1.7382 | -2.6840 | 0.7262 | 0.9458 | -530.7620 | -441.3121 | 0.8308 | 0.4157 |
75
- | 0.4866 | 0.94 | 900 | 0.4932 | -1.7916 | -2.7322 | 0.7262 | 0.9407 | -535.5854 | -446.6503 | 0.9353 | 0.5257 |
76
 
77
 
78
  ### Framework versions
79
 
80
  - PEFT 0.7.1
81
- - Transformers 4.38.2
82
- - Pytorch 2.1.2+cu121
83
  - Datasets 2.14.6
84
- - Tokenizers 0.15.2
 
1
  ---
2
+ base_model: HuggingFaceH4/mistral-7b-sft-beta
3
  library_name: peft
4
+ license: mit
5
  tags:
 
6
  - trl
7
  - dpo
8
  - generated_from_trainer
 
16
 
17
  # zephyr-7b-dpo-lora
18
 
19
+ This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.5084
22
+ - Rewards/chosen: -0.0008
23
+ - Rewards/rejected: -0.9080
24
+ - Rewards/accuracies: 0.7222
25
+ - Rewards/margins: 0.9072
26
+ - Logps/rejected: -276.7181
27
+ - Logps/chosen: -271.8789
28
+ - Logits/rejected: -2.7174
29
+ - Logits/chosen: -2.7372
30
 
31
  ## Model description
32
 
 
61
 
62
  ### Training results
63
 
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.5795 | 0.1047 | 100 | 0.5875 | 0.0265 | -0.3721 | 0.6825 | 0.3986 | -271.3593 | -271.6063 | -2.7688 | -2.7900 |
67
+ | 0.5449 | 0.2094 | 200 | 0.5520 | 0.0601 | -0.5726 | 0.7103 | 0.6327 | -273.3645 | -271.2704 | -2.7792 | -2.7981 |
68
+ | 0.545 | 0.3141 | 300 | 0.5320 | -0.0197 | -0.7637 | 0.7044 | 0.7439 | -275.2751 | -272.0686 | -2.7616 | -2.7803 |
69
+ | 0.4747 | 0.4187 | 400 | 0.5228 | -0.1728 | -0.9527 | 0.7004 | 0.7798 | -277.1651 | -273.5996 | -2.7532 | -2.7732 |
70
+ | 0.5367 | 0.5234 | 500 | 0.5175 | -0.2142 | -1.0435 | 0.7143 | 0.8293 | -278.0737 | -274.0135 | -2.7339 | -2.7540 |
71
+ | 0.5031 | 0.6281 | 600 | 0.5139 | -0.2939 | -1.1329 | 0.7024 | 0.8389 | -278.9670 | -274.8105 | -2.7071 | -2.7268 |
72
+ | 0.5057 | 0.7328 | 700 | 0.5084 | -0.0108 | -0.9049 | 0.7202 | 0.8941 | -276.6876 | -271.9794 | -2.7207 | -2.7404 |
73
+ | 0.5172 | 0.8375 | 800 | 0.5090 | -0.0300 | -0.9231 | 0.7183 | 0.8931 | -276.8697 | -272.1711 | -2.7173 | -2.7371 |
74
+ | 0.5173 | 0.9422 | 900 | 0.5084 | -0.0008 | -0.9080 | 0.7222 | 0.9072 | -276.7181 | -271.8789 | -2.7174 | -2.7372 |
75
 
76
 
77
  ### Framework versions
78
 
79
  - PEFT 0.7.1
80
+ - Transformers 4.45.2
81
+ - Pytorch 2.4.0+cu121
82
  - Datasets 2.14.6
83
+ - Tokenizers 0.20.1
all_results.json CHANGED
@@ -1,21 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_logits/chosen": 0.5316108465194702,
4
- "eval_logits/rejected": 0.9396071434020996,
5
- "eval_logps/chosen": -447.05303955078125,
6
- "eval_logps/rejected": -536.2667236328125,
7
- "eval_loss": 0.49303239583969116,
8
- "eval_rewards/accuracies": 0.7242063283920288,
9
- "eval_rewards/chosen": -1.7955820560455322,
10
- "eval_rewards/margins": 0.9434418678283691,
11
- "eval_rewards/rejected": -2.7390236854553223,
12
- "eval_runtime": 363.1163,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 5.508,
15
- "eval_steps_per_second": 0.173,
16
- "train_loss": 0.5264237219126436,
17
- "train_runtime": 29801.8239,
18
  "train_samples": 61135,
19
- "train_samples_per_second": 2.051,
20
- "train_steps_per_second": 0.032
21
  }
 
1
  {
2
+ "epoch": 0.9997382884061764,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.5278735879828168,
5
+ "train_runtime": 28474.7268,
 
 
 
 
 
 
 
 
 
 
 
 
6
  "train_samples": 61135,
7
+ "train_samples_per_second": 2.147,
8
+ "train_steps_per_second": 0.034
9
  }
train_results.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.5264237219126436,
4
- "train_runtime": 29801.8239,
 
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.051,
7
- "train_steps_per_second": 0.032
8
  }
 
1
  {
2
+ "epoch": 0.9997382884061764,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.5278735879828168,
5
+ "train_runtime": 28474.7268,
6
  "train_samples": 61135,
7
+ "train_samples_per_second": 2.147,
8
+ "train_steps_per_second": 0.034
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff