shichengshuai98
commited on
Commit
•
49cb0a3
1
Parent(s):
623242e
Model save
Browse files
README.md
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
library_name: peft
|
4 |
+
tags:
|
5 |
+
- trl
|
6 |
+
- dpo
|
7 |
+
- generated_from_trainer
|
8 |
+
base_model: mistralai/Mistral-7B-v0.1
|
9 |
+
model-index:
|
10 |
+
- name: zephyr-7b-dpo-qlora
|
11 |
+
results: []
|
12 |
+
---
|
13 |
+
|
14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
+
should probably proofread and complete it, then remove this comment. -->
|
16 |
+
|
17 |
+
# zephyr-7b-dpo-qlora
|
18 |
+
|
19 |
+
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
|
20 |
+
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.4840
|
22 |
+
- Rewards/chosen: -3.2859
|
23 |
+
- Rewards/rejected: -4.7173
|
24 |
+
- Rewards/accuracies: 0.7580
|
25 |
+
- Rewards/margins: 1.4314
|
26 |
+
- Logps/rejected: -712.9290
|
27 |
+
- Logps/chosen: -593.8395
|
28 |
+
- Logits/rejected: -1.2242
|
29 |
+
- Logits/chosen: -1.3436
|
30 |
+
|
31 |
+
## Model description
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Intended uses & limitations
|
36 |
+
|
37 |
+
More information needed
|
38 |
+
|
39 |
+
## Training and evaluation data
|
40 |
+
|
41 |
+
More information needed
|
42 |
+
|
43 |
+
## Training procedure
|
44 |
+
|
45 |
+
### Training hyperparameters
|
46 |
+
|
47 |
+
The following hyperparameters were used during training:
|
48 |
+
- learning_rate: 5e-06
|
49 |
+
- train_batch_size: 1
|
50 |
+
- eval_batch_size: 8
|
51 |
+
- seed: 42
|
52 |
+
- distributed_type: multi-GPU
|
53 |
+
- num_devices: 2
|
54 |
+
- gradient_accumulation_steps: 4
|
55 |
+
- total_train_batch_size: 8
|
56 |
+
- total_eval_batch_size: 16
|
57 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
58 |
+
- lr_scheduler_type: cosine
|
59 |
+
- lr_scheduler_warmup_ratio: 0.1
|
60 |
+
- num_epochs: 1
|
61 |
+
|
62 |
+
### Training results
|
63 |
+
|
64 |
+
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
65 |
+
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
66 |
+
| 0.689 | 0.0131 | 100 | 0.6888 | 0.0395 | 0.0292 | 0.6460 | 0.0104 | -238.2805 | -261.2910 | -2.0650 | -2.1624 |
|
67 |
+
| 0.6814 | 0.0262 | 200 | 0.6774 | 0.0567 | 0.0177 | 0.6730 | 0.0390 | -239.4271 | -259.5726 | -2.0645 | -2.1610 |
|
68 |
+
| 0.6494 | 0.0393 | 300 | 0.6551 | -0.1140 | -0.2169 | 0.6900 | 0.1030 | -262.8893 | -276.6416 | -2.0393 | -2.1346 |
|
69 |
+
| 0.6429 | 0.0523 | 400 | 0.6381 | -0.1019 | -0.2573 | 0.6990 | 0.1554 | -266.9226 | -275.4342 | -2.0028 | -2.0982 |
|
70 |
+
| 0.6276 | 0.0654 | 500 | 0.6316 | -0.5681 | -0.7967 | 0.6740 | 0.2287 | -320.8681 | -322.0505 | -1.9742 | -2.0660 |
|
71 |
+
| 0.6018 | 0.0785 | 600 | 0.6195 | -0.5906 | -0.8834 | 0.6790 | 0.2928 | -329.5356 | -324.3075 | -1.9555 | -2.0464 |
|
72 |
+
| 0.636 | 0.0916 | 700 | 0.5904 | -1.0299 | -1.4568 | 0.7000 | 0.4269 | -386.8725 | -368.2313 | -1.8347 | -1.9222 |
|
73 |
+
| 0.6106 | 0.1047 | 800 | 0.5813 | -1.3495 | -1.8826 | 0.7060 | 0.5331 | -429.4577 | -400.1949 | -1.7387 | -1.8269 |
|
74 |
+
| 0.5485 | 0.1178 | 900 | 0.5887 | -0.9831 | -1.4632 | 0.7080 | 0.4802 | -387.5207 | -363.5506 | -1.6837 | -1.7763 |
|
75 |
+
| 0.5219 | 0.1309 | 1000 | 0.5734 | -1.2919 | -1.9059 | 0.7000 | 0.6140 | -431.7823 | -394.4334 | -1.6714 | -1.7692 |
|
76 |
+
| 0.6123 | 0.1439 | 1100 | 0.5642 | -1.0935 | -1.7835 | 0.7060 | 0.6900 | -419.5487 | -374.5962 | -1.6159 | -1.7168 |
|
77 |
+
| 0.5182 | 0.1570 | 1200 | 0.5560 | -0.7569 | -1.3791 | 0.7130 | 0.6222 | -379.1057 | -340.9370 | -1.5582 | -1.6661 |
|
78 |
+
| 0.5708 | 0.1701 | 1300 | 0.5492 | -1.6962 | -2.4990 | 0.7090 | 0.8027 | -491.0939 | -434.8680 | -1.2951 | -1.4013 |
|
79 |
+
| 0.5412 | 0.1832 | 1400 | 0.5653 | -2.9109 | -3.8734 | 0.7120 | 0.9625 | -628.5356 | -556.3367 | -1.0746 | -1.1874 |
|
80 |
+
| 0.5234 | 0.1963 | 1500 | 0.5587 | -2.1669 | -3.2180 | 0.7130 | 1.0511 | -562.9972 | -481.9323 | -1.0396 | -1.1558 |
|
81 |
+
| 0.4682 | 0.2094 | 1600 | 0.5576 | -2.2793 | -3.1913 | 0.7250 | 0.9120 | -560.3236 | -493.1748 | -1.2721 | -1.3888 |
|
82 |
+
| 0.5418 | 0.2225 | 1700 | 0.5464 | -1.1492 | -1.8734 | 0.7180 | 0.7242 | -428.5392 | -380.1679 | -1.2443 | -1.3556 |
|
83 |
+
| 0.5283 | 0.2355 | 1800 | 0.5346 | -2.0287 | -2.9473 | 0.7240 | 0.9187 | -535.9321 | -468.1107 | -1.2145 | -1.3166 |
|
84 |
+
| 0.4953 | 0.2486 | 1900 | 0.5348 | -1.6558 | -2.4907 | 0.7210 | 0.8349 | -490.2633 | -430.8226 | -1.3297 | -1.4330 |
|
85 |
+
| 0.5594 | 0.2617 | 2000 | 0.5368 | -1.6608 | -2.6852 | 0.7320 | 1.0244 | -509.7210 | -431.3297 | -1.1973 | -1.3083 |
|
86 |
+
| 0.4645 | 0.2748 | 2100 | 0.5278 | -1.6835 | -2.7694 | 0.7420 | 1.0858 | -518.1336 | -433.5981 | -1.2531 | -1.3679 |
|
87 |
+
| 0.647 | 0.2879 | 2200 | 0.5192 | -2.5747 | -3.6325 | 0.7410 | 1.0578 | -604.4510 | -522.7143 | -1.1386 | -1.2472 |
|
88 |
+
| 0.45 | 0.3010 | 2300 | 0.5135 | -2.4369 | -3.5724 | 0.7480 | 1.1355 | -598.4383 | -508.9362 | -1.1509 | -1.2593 |
|
89 |
+
| 0.5644 | 0.3141 | 2400 | 0.5071 | -1.8863 | -2.8362 | 0.7490 | 0.9499 | -524.8181 | -453.8725 | -1.1747 | -1.2832 |
|
90 |
+
| 0.5536 | 0.3272 | 2500 | 0.5034 | -2.1790 | -3.1810 | 0.7530 | 1.0020 | -559.2987 | -483.1492 | -1.2050 | -1.3134 |
|
91 |
+
| 0.5293 | 0.3402 | 2600 | 0.5129 | -3.1825 | -4.4339 | 0.7430 | 1.2514 | -684.5871 | -583.4978 | -1.0627 | -1.1776 |
|
92 |
+
| 0.5843 | 0.3533 | 2700 | 0.5062 | -2.7265 | -3.8307 | 0.7490 | 1.1041 | -624.2646 | -537.8979 | -1.1514 | -1.2624 |
|
93 |
+
| 0.5032 | 0.3664 | 2800 | 0.5348 | -2.7048 | -3.9803 | 0.7340 | 1.2755 | -639.2272 | -535.7208 | -1.1400 | -1.2559 |
|
94 |
+
| 0.4179 | 0.3795 | 2900 | 0.5106 | -2.6726 | -3.9336 | 0.7410 | 1.2611 | -634.5601 | -532.5026 | -1.1224 | -1.2393 |
|
95 |
+
| 0.4537 | 0.3926 | 3000 | 0.5151 | -2.4863 | -3.7684 | 0.7440 | 1.2821 | -618.0381 | -513.8768 | -1.2113 | -1.3309 |
|
96 |
+
| 0.4542 | 0.4057 | 3100 | 0.5243 | -3.2414 | -4.5512 | 0.7370 | 1.3097 | -696.3145 | -589.3881 | -1.1533 | -1.2732 |
|
97 |
+
| 0.5944 | 0.4188 | 3200 | 0.5122 | -2.4267 | -3.6096 | 0.7530 | 1.1828 | -602.1553 | -507.9196 | -1.1939 | -1.3089 |
|
98 |
+
| 0.6654 | 0.4318 | 3300 | 0.5025 | -2.0404 | -3.1651 | 0.7510 | 1.1247 | -557.7056 | -469.2853 | -1.1881 | -1.3096 |
|
99 |
+
| 0.4912 | 0.4449 | 3400 | 0.5007 | -2.8810 | -4.0895 | 0.7560 | 1.2085 | -650.1498 | -553.3435 | -1.2282 | -1.3499 |
|
100 |
+
| 0.5173 | 0.4580 | 3500 | 0.4936 | -2.0455 | -3.2014 | 0.7490 | 1.1560 | -561.3413 | -469.7918 | -1.2989 | -1.4178 |
|
101 |
+
| 0.4581 | 0.4711 | 3600 | 0.5022 | -3.1210 | -4.5082 | 0.7430 | 1.3873 | -692.0210 | -577.3400 | -1.1668 | -1.2881 |
|
102 |
+
| 0.4583 | 0.4842 | 3700 | 0.5078 | -3.6972 | -5.1415 | 0.7590 | 1.4443 | -755.3519 | -634.9664 | -1.1082 | -1.2313 |
|
103 |
+
| 0.3869 | 0.4973 | 3800 | 0.4976 | -3.2051 | -4.5185 | 0.7510 | 1.3135 | -693.0497 | -585.7507 | -1.1739 | -1.2953 |
|
104 |
+
| 0.56 | 0.5104 | 3900 | 0.4936 | -2.9600 | -4.1725 | 0.7530 | 1.2125 | -658.4496 | -561.2438 | -1.2195 | -1.3363 |
|
105 |
+
| 0.4668 | 0.5234 | 4000 | 0.4933 | -2.6908 | -3.9425 | 0.7520 | 1.2517 | -635.4434 | -534.3256 | -1.2108 | -1.3303 |
|
106 |
+
| 0.5857 | 0.5365 | 4100 | 0.5042 | -3.9892 | -5.4676 | 0.7450 | 1.4784 | -787.9547 | -664.1624 | -1.0956 | -1.2197 |
|
107 |
+
| 0.5061 | 0.5496 | 4200 | 0.4935 | -3.7216 | -5.1245 | 0.7530 | 1.4028 | -753.6463 | -637.4094 | -1.1464 | -1.2683 |
|
108 |
+
| 0.5986 | 0.5627 | 4300 | 0.4914 | -3.1969 | -4.5356 | 0.7390 | 1.3387 | -694.7587 | -584.9332 | -1.1938 | -1.3167 |
|
109 |
+
| 0.4241 | 0.5758 | 4400 | 0.4893 | -2.6486 | -3.8966 | 0.7530 | 1.2479 | -630.8525 | -530.1075 | -1.2460 | -1.3678 |
|
110 |
+
| 0.3475 | 0.5889 | 4500 | 0.4877 | -2.8234 | -4.1317 | 0.7570 | 1.3083 | -654.3662 | -547.5814 | -1.2301 | -1.3518 |
|
111 |
+
| 0.4631 | 0.6020 | 4600 | 0.4989 | -3.3953 | -4.9609 | 0.7560 | 1.5656 | -737.2855 | -604.7740 | -1.1498 | -1.2733 |
|
112 |
+
| 0.3956 | 0.6150 | 4700 | 0.4925 | -3.2188 | -4.6087 | 0.7590 | 1.3899 | -702.0704 | -587.1262 | -1.1608 | -1.2820 |
|
113 |
+
| 0.4484 | 0.6281 | 4800 | 0.4914 | -3.4710 | -4.9376 | 0.7560 | 1.4666 | -734.9601 | -612.3439 | -1.1318 | -1.2532 |
|
114 |
+
| 0.5111 | 0.6412 | 4900 | 0.4833 | -3.1162 | -4.4317 | 0.7480 | 1.3155 | -684.3722 | -576.8661 | -1.2048 | -1.3263 |
|
115 |
+
| 0.5135 | 0.6543 | 5000 | 0.4862 | -3.2329 | -4.6196 | 0.7520 | 1.3867 | -703.1549 | -588.5310 | -1.1914 | -1.3136 |
|
116 |
+
| 0.576 | 0.6674 | 5100 | 0.4960 | -3.3247 | -4.8649 | 0.7590 | 1.5402 | -727.6852 | -597.7153 | -1.1834 | -1.3046 |
|
117 |
+
| 0.4551 | 0.6805 | 5200 | 0.4904 | -3.2523 | -4.7025 | 0.7580 | 1.4503 | -711.4512 | -590.4719 | -1.2041 | -1.3253 |
|
118 |
+
| 0.4653 | 0.6936 | 5300 | 0.4902 | -3.3055 | -4.7753 | 0.7600 | 1.4697 | -718.7239 | -595.7977 | -1.1999 | -1.3212 |
|
119 |
+
| 0.5424 | 0.7066 | 5400 | 0.4876 | -3.1443 | -4.5701 | 0.7590 | 1.4258 | -698.2104 | -579.6744 | -1.2230 | -1.3422 |
|
120 |
+
| 0.5207 | 0.7197 | 5500 | 0.4857 | -3.0716 | -4.4795 | 0.7590 | 1.4079 | -689.1460 | -572.4054 | -1.2322 | -1.3514 |
|
121 |
+
| 0.4543 | 0.7328 | 5600 | 0.4877 | -3.1036 | -4.5314 | 0.7560 | 1.4278 | -694.3380 | -575.6021 | -1.2395 | -1.3586 |
|
122 |
+
| 0.5223 | 0.7459 | 5700 | 0.4855 | -3.0351 | -4.4211 | 0.7580 | 1.3859 | -683.3027 | -568.7588 | -1.2496 | -1.3686 |
|
123 |
+
| 0.4744 | 0.7590 | 5800 | 0.4856 | -3.2712 | -4.7065 | 0.7590 | 1.4353 | -711.8522 | -592.3647 | -1.2287 | -1.3484 |
|
124 |
+
| 0.6225 | 0.7721 | 5900 | 0.4851 | -3.2614 | -4.6768 | 0.7580 | 1.4154 | -708.8779 | -591.3820 | -1.2366 | -1.3556 |
|
125 |
+
| 0.411 | 0.7852 | 6000 | 0.4849 | -3.2566 | -4.6727 | 0.7570 | 1.4160 | -708.4648 | -590.9095 | -1.2298 | -1.3495 |
|
126 |
+
| 0.3609 | 0.7982 | 6100 | 0.4853 | -3.2738 | -4.6973 | 0.7550 | 1.4236 | -710.9300 | -592.6219 | -1.2331 | -1.3524 |
|
127 |
+
| 0.4411 | 0.8113 | 6200 | 0.4853 | -3.3447 | -4.7904 | 0.7570 | 1.4458 | -720.2413 | -599.7125 | -1.2213 | -1.3412 |
|
128 |
+
| 0.5948 | 0.8244 | 6300 | 0.4847 | -3.3188 | -4.7534 | 0.7590 | 1.4347 | -716.5390 | -597.1201 | -1.2233 | -1.3430 |
|
129 |
+
| 0.5653 | 0.8375 | 6400 | 0.4840 | -3.2673 | -4.6932 | 0.7590 | 1.4258 | -710.5150 | -591.9785 | -1.2257 | -1.3453 |
|
130 |
+
| 0.3609 | 0.8506 | 6500 | 0.4839 | -3.2750 | -4.6975 | 0.7580 | 1.4226 | -710.9508 | -592.7427 | -1.2217 | -1.3416 |
|
131 |
+
| 0.4754 | 0.8637 | 6600 | 0.4839 | -3.2851 | -4.7106 | 0.7550 | 1.4255 | -712.2548 | -593.7542 | -1.2186 | -1.3387 |
|
132 |
+
| 0.4108 | 0.8768 | 6700 | 0.4838 | -3.2752 | -4.6988 | 0.7560 | 1.4236 | -711.0748 | -592.7615 | -1.2243 | -1.3438 |
|
133 |
+
| 0.4527 | 0.8898 | 6800 | 0.4838 | -3.2834 | -4.7125 | 0.7570 | 1.4291 | -712.4506 | -593.5831 | -1.2307 | -1.3496 |
|
134 |
+
| 0.447 | 0.9029 | 6900 | 0.4840 | -3.2884 | -4.7197 | 0.7590 | 1.4312 | -713.1663 | -594.0894 | -1.2289 | -1.3480 |
|
135 |
+
| 0.4922 | 0.9160 | 7000 | 0.4841 | -3.2871 | -4.7179 | 0.7580 | 1.4308 | -712.9905 | -593.9543 | -1.2270 | -1.3462 |
|
136 |
+
| 0.5316 | 0.9291 | 7100 | 0.4840 | -3.2857 | -4.7169 | 0.7580 | 1.4312 | -712.8836 | -593.8110 | -1.2269 | -1.3462 |
|
137 |
+
| 0.5271 | 0.9422 | 7200 | 0.4839 | -3.2855 | -4.7168 | 0.7570 | 1.4313 | -712.8786 | -593.7981 | -1.2256 | -1.3450 |
|
138 |
+
| 0.48 | 0.9553 | 7300 | 0.4840 | -3.2860 | -4.7168 | 0.7570 | 1.4308 | -712.8812 | -593.8459 | -1.2305 | -1.3494 |
|
139 |
+
| 0.4415 | 0.9684 | 7400 | 0.4840 | -3.2842 | -4.7150 | 0.7580 | 1.4309 | -712.6991 | -593.6600 | -1.2190 | -1.3390 |
|
140 |
+
| 0.4848 | 0.9815 | 7500 | 0.4840 | -3.2860 | -4.7173 | 0.7600 | 1.4313 | -712.9272 | -593.8441 | -1.2235 | -1.3430 |
|
141 |
+
| 0.5862 | 0.9945 | 7600 | 0.4840 | -3.2859 | -4.7173 | 0.7580 | 1.4314 | -712.9290 | -593.8395 | -1.2242 | -1.3436 |
|
142 |
+
|
143 |
+
|
144 |
+
### Framework versions
|
145 |
+
|
146 |
+
- PEFT 0.11.1
|
147 |
+
- Transformers 4.41.2
|
148 |
+
- Pytorch 2.3.0+cu121
|
149 |
+
- Datasets 2.19.1
|
150 |
+
- Tokenizers 0.19.1
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671150064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8951361d69f532f7cc6788194d096fe46afa234e4f9c60670a1e28c13fdabbe5
|
3 |
size 671150064
|
all_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.9999018549416037,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.5203102414925643,
|
5 |
+
"train_runtime": 90452.7583,
|
6 |
+
"train_samples": 61134,
|
7 |
+
"train_samples_per_second": 0.676,
|
8 |
+
"train_steps_per_second": 0.084
|
9 |
+
}
|
runs/Jun01_22-36-28_lican-Lambda-Vector2/events.out.tfevents.1717295813.lican-Lambda-Vector2.2296060.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:670ae33c0078806bae5a56a58bc4805ff4b4102a4657cb502351d5f9c8a1db36
|
3 |
+
size 588229
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.9999018549416037,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.5203102414925643,
|
5 |
+
"train_runtime": 90452.7583,
|
6 |
+
"train_samples": 61134,
|
7 |
+
"train_samples_per_second": 0.676,
|
8 |
+
"train_steps_per_second": 0.084
|
9 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|