shenxq commited on
Commit
a1c8240
1 Parent(s): 0e135e5

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,9 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
- datasets:
11
- - snorkelai/Snorkel-Mistral-PairRM-DPO-Dataset
12
  base_model: mistralai/Mistral-7B-Instruct-v0.2
13
  model-index:
14
  - name: zephyr-7b-dpo-lora-pairrm
@@ -20,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # zephyr-7b-dpo-lora-pairrm
22
 
23
- This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the snorkelai/Snorkel-Mistral-PairRM-DPO-Dataset dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 0.6747
26
- - Rewards/chosen: -1.3181
27
- - Rewards/rejected: -1.4367
28
- - Rewards/accuracies: 0.5727
29
- - Rewards/margins: 0.1186
30
- - Logps/rejected: -357.3805
31
- - Logps/chosen: -340.2056
32
- - Logits/rejected: -4.5482
33
- - Logits/chosen: -4.5594
34
 
35
  ## Model description
36
 
@@ -65,18 +61,18 @@ The following hyperparameters were used during training:
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
68
- | 0.6917 | 0.08 | 100 | 0.6924 | -0.0160 | -0.0177 | 0.5287 | 0.0016 | -215.4761 | -210.0007 | -2.5273 | -2.5303 |
69
- | 0.6854 | 0.16 | 200 | 0.6875 | -0.0702 | -0.0835 | 0.5563 | 0.0133 | -222.0610 | -215.4225 | -2.5736 | -2.5764 |
70
- | 0.682 | 0.24 | 300 | 0.6841 | -0.2388 | -0.2651 | 0.5450 | 0.0263 | -240.2197 | -232.2801 | -2.9180 | -2.9209 |
71
- | 0.6634 | 0.32 | 400 | 0.6812 | -0.4832 | -0.5288 | 0.5487 | 0.0455 | -266.5857 | -256.7237 | -3.4549 | -3.4603 |
72
- | 0.6296 | 0.4 | 500 | 0.6782 | -0.6896 | -0.7564 | 0.5600 | 0.0668 | -289.3543 | -277.3629 | -4.1668 | -4.1749 |
73
- | 0.6503 | 0.48 | 600 | 0.6770 | -0.9588 | -1.0440 | 0.5533 | 0.0852 | -318.1134 | -304.2834 | -4.4345 | -4.4433 |
74
- | 0.5974 | 0.56 | 700 | 0.6778 | -1.1455 | -1.2432 | 0.5653 | 0.0977 | -338.0312 | -322.9485 | -4.4370 | -4.4480 |
75
- | 0.6508 | 0.64 | 800 | 0.6748 | -1.1002 | -1.2023 | 0.5650 | 0.1022 | -333.9435 | -318.4168 | -4.2618 | -4.2711 |
76
- | 0.6746 | 0.72 | 900 | 0.6757 | -1.3289 | -1.4445 | 0.5687 | 0.1155 | -358.1558 | -341.2940 | -4.5662 | -4.5772 |
77
- | 0.6151 | 0.8 | 1000 | 0.6755 | -1.3559 | -1.4746 | 0.5690 | 0.1187 | -361.1742 | -343.9893 | -4.6070 | -4.6184 |
78
- | 0.6837 | 0.88 | 1100 | 0.6748 | -1.3246 | -1.4437 | 0.5710 | 0.1192 | -358.0839 | -340.8576 | -4.5607 | -4.5717 |
79
- | 0.6539 | 0.96 | 1200 | 0.6746 | -1.3182 | -1.4369 | 0.5730 | 0.1187 | -357.4036 | -340.2231 | -4.5483 | -4.5595 |
80
 
81
 
82
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
8
  base_model: mistralai/Mistral-7B-Instruct-v0.2
9
  model-index:
10
  - name: zephyr-7b-dpo-lora-pairrm
 
16
 
17
  # zephyr-7b-dpo-lora-pairrm
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.6764
22
+ - Rewards/chosen: -0.9885
23
+ - Rewards/rejected: -1.0650
24
+ - Rewards/accuracies: 0.5657
25
+ - Rewards/margins: 0.0765
26
+ - Logps/rejected: -320.4450
27
+ - Logps/chosen: -307.4615
28
+ - Logits/rejected: -2.7535
29
+ - Logits/chosen: -2.7599
30
 
31
  ## Model description
32
 
 
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
+ | 0.6916 | 0.08 | 100 | 0.6925 | -0.0162 | -0.0177 | 0.5280 | 0.0015 | -215.7187 | -210.2296 | -2.5058 | -2.5086 |
65
+ | 0.6855 | 0.16 | 200 | 0.6880 | -0.0651 | -0.0772 | 0.5613 | 0.0121 | -221.6710 | -215.1240 | -2.5152 | -2.5178 |
66
+ | 0.6825 | 0.24 | 300 | 0.6854 | -0.1874 | -0.2081 | 0.5473 | 0.0207 | -234.7546 | -227.3457 | -2.5175 | -2.5192 |
67
+ | 0.6676 | 0.32 | 400 | 0.6827 | -0.2909 | -0.3222 | 0.5477 | 0.0313 | -246.1682 | -237.7042 | -2.5347 | -2.5368 |
68
+ | 0.6458 | 0.4 | 500 | 0.6805 | -0.3693 | -0.4104 | 0.5567 | 0.0410 | -254.9852 | -245.5435 | -2.6328 | -2.6364 |
69
+ | 0.6592 | 0.48 | 600 | 0.6789 | -0.6010 | -0.6528 | 0.5560 | 0.0518 | -279.2278 | -268.7087 | -2.6805 | -2.6845 |
70
+ | 0.6107 | 0.56 | 700 | 0.6785 | -0.8159 | -0.8786 | 0.5550 | 0.0627 | -301.8047 | -290.1964 | -2.6914 | -2.6969 |
71
+ | 0.6475 | 0.64 | 800 | 0.6770 | -0.8845 | -0.9544 | 0.5610 | 0.0699 | -309.3867 | -297.0627 | -2.7237 | -2.7295 |
72
+ | 0.6639 | 0.72 | 900 | 0.6766 | -0.9705 | -1.0450 | 0.5667 | 0.0746 | -318.4507 | -305.6558 | -2.7464 | -2.7525 |
73
+ | 0.6305 | 0.8 | 1000 | 0.6764 | -0.9844 | -1.0603 | 0.5680 | 0.0759 | -319.9799 | -307.0536 | -2.7543 | -2.7606 |
74
+ | 0.6754 | 0.88 | 1100 | 0.6763 | -0.9882 | -1.0648 | 0.5687 | 0.0766 | -320.4283 | -307.4264 | -2.7538 | -2.7602 |
75
+ | 0.6577 | 0.96 | 1200 | 0.6764 | -0.9885 | -1.0649 | 0.5663 | 0.0764 | -320.4412 | -307.4615 | -2.7538 | -2.7602 |
76
 
77
 
78
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2786146e84f6354e0a1812270c60e73348e604c7f8f9fb916b21958b2ba56bf9
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cab7cdc14994d01df2444438178c50ef99e56ba6a159f575b5ec4bfbd04d646
3
  size 671150064
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -4.5593791007995605,
4
- "eval_logits/rejected": -4.548151016235352,
5
- "eval_logps/chosen": -340.20562744140625,
6
- "eval_logps/rejected": -357.3804626464844,
7
- "eval_loss": 0.6746748089790344,
8
- "eval_rewards/accuracies": 0.5726666450500488,
9
- "eval_rewards/chosen": -1.3180568218231201,
10
- "eval_rewards/margins": 0.11864501982927322,
11
- "eval_rewards/rejected": -1.436701774597168,
12
- "eval_runtime": 1558.97,
13
  "eval_samples": 2994,
14
- "eval_samples_per_second": 1.92,
15
- "eval_steps_per_second": 0.241,
16
- "train_loss": 0.6508825262419409,
17
- "train_runtime": 40390.1729,
18
  "train_samples": 19996,
19
- "train_samples_per_second": 0.495,
20
- "train_steps_per_second": 0.031
21
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.75989031791687,
4
+ "eval_logits/rejected": -2.753535509109497,
5
+ "eval_logps/chosen": -307.4615173339844,
6
+ "eval_logps/rejected": -320.4450378417969,
7
+ "eval_loss": 0.6764041185379028,
8
+ "eval_rewards/accuracies": 0.565666675567627,
9
+ "eval_rewards/chosen": -0.988508403301239,
10
+ "eval_rewards/margins": 0.07645130157470703,
11
+ "eval_rewards/rejected": -1.0649596452713013,
12
+ "eval_runtime": 1679.0616,
13
  "eval_samples": 2994,
14
+ "eval_samples_per_second": 1.783,
15
+ "eval_steps_per_second": 0.223,
16
+ "train_loss": 0.6566078573155155,
17
+ "train_runtime": 38377.8314,
18
  "train_samples": 19996,
19
+ "train_samples_per_second": 0.521,
20
+ "train_steps_per_second": 0.033
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -4.5593791007995605,
4
- "eval_logits/rejected": -4.548151016235352,
5
- "eval_logps/chosen": -340.20562744140625,
6
- "eval_logps/rejected": -357.3804626464844,
7
- "eval_loss": 0.6746748089790344,
8
- "eval_rewards/accuracies": 0.5726666450500488,
9
- "eval_rewards/chosen": -1.3180568218231201,
10
- "eval_rewards/margins": 0.11864501982927322,
11
- "eval_rewards/rejected": -1.436701774597168,
12
- "eval_runtime": 1558.97,
13
  "eval_samples": 2994,
14
- "eval_samples_per_second": 1.92,
15
- "eval_steps_per_second": 0.241
16
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.75989031791687,
4
+ "eval_logits/rejected": -2.753535509109497,
5
+ "eval_logps/chosen": -307.4615173339844,
6
+ "eval_logps/rejected": -320.4450378417969,
7
+ "eval_loss": 0.6764041185379028,
8
+ "eval_rewards/accuracies": 0.565666675567627,
9
+ "eval_rewards/chosen": -0.988508403301239,
10
+ "eval_rewards/margins": 0.07645130157470703,
11
+ "eval_rewards/rejected": -1.0649596452713013,
12
+ "eval_runtime": 1679.0616,
13
  "eval_samples": 2994,
14
+ "eval_samples_per_second": 1.783,
15
+ "eval_steps_per_second": 0.223
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6508825262419409,
4
- "train_runtime": 40390.1729,
5
  "train_samples": 19996,
6
- "train_samples_per_second": 0.495,
7
- "train_steps_per_second": 0.031
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6566078573155155,
4
+ "train_runtime": 38377.8314,
5
  "train_samples": 19996,
6
+ "train_samples_per_second": 0.521,
7
+ "train_steps_per_second": 0.033
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff