martimfasantos commited on
Commit
20bd495
1 Parent(s): 7d570d7

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,10 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
  base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
10
- datasets:
11
- - openai/summarize_from_feedback
12
  model-index:
13
  - name: tinyllama-1.1b-sum-dpo-qlora
14
  results: []
@@ -19,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # tinyllama-1.1b-sum-dpo-qlora
21
 
22
- This model is a fine-tuned version of [martimfasantos/tinyllama-1.1b-sum-sft-qlora](https://huggingface.co/martimfasantos/tinyllama-1.1b-sum-sft-qlora) on the openai/summarize_from_feedback dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.6482
25
- - Rewards/chosen: -0.9538
26
- - Rewards/rejected: -1.1194
27
- - Rewards/accuracies: 0.6171
28
- - Rewards/margins: 0.1656
29
- - Logps/rejected: -187.0472
30
- - Logps/chosen: -166.7881
31
- - Logits/rejected: -3.0176
32
- - Logits/chosen: -3.0239
33
 
34
  ## Model description
35
 
@@ -62,66 +59,66 @@ The following hyperparameters were used during training:
62
 
63
  ### Training results
64
 
65
- | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
66
- |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
67
- | 0.6926 | 0.02 | 100 | -3.4980 | -3.4962 | -70.9186 | -74.6392 | 0.6930 | 0.5193 | 0.0049 | 0.0002 | 0.0047 |
68
- | 0.6919 | 0.03 | 200 | -3.4925 | -3.4908 | -69.9505 | -73.7540 | 0.6926 | 0.5678 | 0.0146 | 0.0011 | 0.0135 |
69
- | 0.6888 | 0.05 | 300 | -3.4861 | -3.4843 | -67.8994 | -72.0238 | 0.6911 | 0.5748 | 0.0351 | 0.0043 | 0.0308 |
70
- | 0.6864 | 0.07 | 400 | -3.4827 | -3.4809 | -69.7504 | -74.3218 | 0.6890 | 0.5627 | 0.0166 | 0.0087 | 0.0079 |
71
- | 0.6864 | 0.09 | 500 | -3.4687 | -3.4669 | -69.0559 | -74.2092 | 0.6864 | 0.5716 | 0.0235 | 0.0146 | 0.0090 |
72
- | 0.6729 | 0.1 | 600 | -3.4506 | -3.4489 | -71.3562 | -77.1629 | 0.6837 | 0.5869 | 0.0005 | 0.0211 | -0.0206 |
73
- | 0.6745 | 0.12 | 700 | -3.4487 | -3.4467 | -78.9372 | -85.9956 | 0.6786 | 0.5955 | -0.0753 | 0.0336 | -0.1089 |
74
- | 0.6681 | 0.14 | 800 | -3.4169 | -3.4151 | -90.1915 | -98.6570 | 0.6738 | 0.5955 | -0.1878 | 0.0477 | -0.2355 |
75
- | 0.6661 | 0.16 | 900 | -3.3755 | -3.3740 | -88.5994 | -97.6376 | 0.6715 | 0.5922 | -0.1719 | 0.0534 | -0.2253 |
76
- | 0.6686 | 0.17 | 1000 | -3.3483 | -3.3467 | -111.1606 | -121.9167 | 0.6681 | 0.5936 | -0.3975 | 0.0706 | -0.4681 |
77
- | 0.665 | 0.19 | 1100 | -3.3477 | -3.3463 | -92.1750 | -101.6747 | 0.6708 | 0.5950 | -0.2076 | 0.0580 | -0.2657 |
78
- | 0.6549 | 0.21 | 1200 | -3.3173 | -3.3159 | -107.3321 | -119.3906 | 0.6631 | 0.5974 | -0.3592 | 0.0836 | -0.4428 |
79
- | 0.6536 | 0.22 | 1300 | -3.2737 | -3.2722 | -121.8111 | -135.5439 | 0.6591 | 0.5978 | -0.5040 | 0.1004 | -0.6044 |
80
- | 0.6303 | 0.24 | 1400 | -3.2790 | -3.2775 | -111.6529 | -124.7296 | 0.6593 | 0.6055 | -0.4024 | 0.0938 | -0.4962 |
81
- | 0.6611 | 0.26 | 1500 | -3.2472 | -3.2454 | -132.2458 | -148.1280 | 0.6527 | 0.6138 | -0.6084 | 0.1219 | -0.7302 |
82
- | 0.6395 | 0.28 | 1600 | -3.2525 | -3.2505 | -126.2706 | -141.6170 | 0.6536 | 0.6155 | -0.5486 | 0.1165 | -0.6651 |
83
- | 0.678 | 0.29 | 1700 | -3.2125 | -3.2107 | -117.8728 | -131.2285 | 0.6587 | 0.6169 | -0.4646 | 0.0966 | -0.5612 |
84
- | 0.629 | 0.31 | 1800 | -3.1113 | -3.1087 | -146.8860 | -164.9026 | 0.6489 | 0.6187 | -0.7548 | 0.1432 | -0.8980 |
85
- | 0.6622 | 0.33 | 1900 | -3.1419 | -3.1399 | -125.9992 | -140.6700 | 0.6555 | 0.6069 | -0.5459 | 0.1097 | -0.6556 |
86
- | 0.64 | 0.34 | 2000 | -3.1847 | -3.1824 | -140.1714 | -156.3843 | 0.6523 | 0.6101 | -0.6876 | 0.1252 | -0.8128 |
87
- | 0.6479 | 0.36 | 2100 | -3.1160 | -3.1130 | -150.8988 | -167.6336 | 0.6537 | 0.6104 | -0.7949 | 0.1304 | -0.9253 |
88
- | 0.6023 | 0.38 | 2200 | -3.1479 | -3.1449 | -137.7163 | -153.7927 | 0.6536 | 0.6034 | -0.6631 | 0.1238 | -0.7869 |
89
- | 0.5962 | 0.4 | 2300 | -3.1012 | -3.0975 | -159.4141 | -177.2301 | 0.6523 | 0.6078 | -0.8800 | 0.1412 | -1.0212 |
90
- | 0.6176 | 0.41 | 2400 | -3.0320 | -3.0265 | -172.7089 | -192.7748 | 0.6506 | 0.6027 | -1.0130 | 0.1637 | -1.1767 |
91
- | 0.6255 | 0.43 | 2500 | -3.0629 | -3.0584 | -156.9642 | -175.3398 | 0.6507 | 0.6101 | -0.8555 | 0.1468 | -1.0023 |
92
- | 0.6075 | 0.45 | 2600 | -3.0877 | -3.0839 | -146.0736 | -162.3147 | 0.6547 | 0.6046 | -0.7466 | 0.1254 | -0.8721 |
93
- | 0.6282 | 0.47 | 2700 | -3.1221 | -3.1185 | -140.7325 | -157.2624 | 0.6531 | 0.6101 | -0.6932 | 0.1283 | -0.8216 |
94
- | 0.6495 | 0.48 | 2800 | -3.0926 | -3.0887 | -148.7372 | -166.3009 | 0.6517 | 0.6080 | -0.7733 | 0.1387 | -0.9119 |
95
- | 0.6202 | 0.5 | 2900 | -3.0787 | -3.0744 | -152.9659 | -170.9832 | 0.6512 | 0.6048 | -0.8156 | 0.1432 | -0.9588 |
96
- | 0.6252 | 0.52 | 3000 | -3.0824 | -3.0782 | -148.4267 | -166.3868 | 0.6505 | 0.6055 | -0.7702 | 0.1426 | -0.9128 |
97
- | 0.6082 | 0.53 | 3100 | -3.0723 | -3.0678 | -149.2047 | -167.4548 | 0.6500 | 0.6115 | -0.7779 | 0.1455 | -0.9235 |
98
- | 0.6072 | 0.55 | 3200 | -3.0863 | -3.0819 | -147.0810 | -164.9669 | 0.6499 | 0.6090 | -0.7567 | 0.1419 | -0.8986 |
99
- | 0.6142 | 0.57 | 3300 | -3.0087 | -3.0026 | -179.2665 | -200.5992 | 0.6468 | 0.6176 | -1.0786 | 0.1764 | -1.2549 |
100
- | 0.602 | 0.59 | 3400 | -3.0674 | -3.0624 | -150.3082 | -168.4087 | 0.6504 | 0.6136 | -0.7890 | 0.1440 | -0.9330 |
101
- | 0.605 | 0.6 | 3500 | -3.0590 | -3.0538 | -154.1790 | -172.9109 | 0.6497 | 0.6122 | -0.8277 | 0.1503 | -0.9780 |
102
- | 0.6263 | 0.62 | 3600 | -3.0721 | -3.0672 | -149.9757 | -168.0735 | 0.6508 | 0.6043 | -0.7857 | 0.1440 | -0.9297 |
103
- | 0.5961 | 0.64 | 3700 | -3.0151 | -3.0090 | -169.4567 | -189.3689 | 0.6492 | 0.6136 | -0.9805 | 0.1622 | -1.1426 |
104
- | 0.6273 | 0.65 | 3800 | -3.0117 | -3.0057 | -167.9805 | -187.6573 | 0.6494 | 0.6141 | -0.9657 | 0.1598 | -1.1255 |
105
- | 0.6183 | 0.67 | 3900 | -3.0137 | -3.0077 | -167.4417 | -187.2734 | 0.6488 | 0.6166 | -0.9603 | 0.1613 | -1.1217 |
106
- | 0.6051 | 0.69 | 4000 | -2.9974 | -2.9908 | -176.3739 | -197.1255 | 0.6482 | 0.6178 | -1.0496 | 0.1705 | -1.2202 |
107
- | 0.5867 | 0.71 | 4100 | -3.0151 | -3.0088 | -169.1084 | -189.3998 | 0.6484 | 0.6125 | -0.9770 | 0.1659 | -1.1429 |
108
- | 0.6554 | 0.72 | 4200 | -3.0270 | -3.0209 | -164.2755 | -184.0126 | 0.6489 | 0.6176 | -0.9287 | 0.1604 | -1.0891 |
109
- | 0.6053 | 0.74 | 4300 | -3.0362 | -3.0303 | -159.9774 | -179.4446 | 0.6489 | 0.6097 | -0.8857 | 0.1577 | -1.0434 |
110
- | 0.6153 | 0.76 | 4400 | -3.0351 | -3.0292 | -160.5470 | -180.1235 | 0.6489 | 0.6120 | -0.8914 | 0.1588 | -1.0502 |
111
- | 0.6145 | 0.78 | 4500 | -3.0378 | -3.0319 | -160.1720 | -179.6728 | 0.6490 | 0.6113 | -0.8876 | 0.1580 | -1.0457 |
112
- | 0.5798 | 0.79 | 4600 | -3.0308 | -3.0247 | -162.6813 | -182.4701 | 0.6488 | 0.6148 | -0.9127 | 0.1609 | -1.0736 |
113
- | 0.6218 | 0.81 | 4700 | -3.0307 | -3.0246 | -163.0493 | -182.9482 | 0.6486 | 0.6152 | -0.9164 | 0.1620 | -1.0784 |
114
- | 0.6102 | 0.83 | 4800 | -3.0259 | -3.0197 | -164.8939 | -184.9769 | 0.6484 | 0.6150 | -0.9348 | 0.1639 | -1.0987 |
115
- | 0.6176 | 0.84 | 4900 | -3.0273 | -3.0211 | -165.7554 | -185.9428 | 0.6483 | 0.6157 | -0.9435 | 0.1649 | -1.1084 |
116
- | 0.5907 | 0.86 | 5000 | -3.0259 | -3.0196 | -167.1301 | -187.4627 | 0.6482 | 0.6164 | -0.9572 | 0.1664 | -1.1236 |
117
- | 0.6534 | 0.88 | 5100 | -3.0211 | -3.0148 | -167.2241 | -187.5712 | 0.6481 | 0.6155 | -0.9581 | 0.1665 | -1.1246 |
118
- | 0.5973 | 0.9 | 5200 | -3.0194 | -3.0130 | -166.8823 | -187.1679 | 0.6483 | 0.6169 | -0.9547 | 0.1659 | -1.1206 |
119
- | 0.5975 | 0.91 | 5300 | -3.0248 | -3.0185 | -166.6118 | -186.8759 | 0.6482 | 0.6162 | -0.9520 | 0.1657 | -1.1177 |
120
- | 0.5986 | 0.93 | 5400 | -3.0249 | -3.0186 | -166.6502 | -186.8928 | 0.6483 | 0.6190 | -0.9524 | 0.1655 | -1.1179 |
121
- | 0.6025 | 0.95 | 5500 | -3.0252 | -3.0189 | -166.7467 | -186.9980 | 0.6483 | 0.6169 | -0.9534 | 0.1655 | -1.1189 |
122
- | 0.6149 | 0.96 | 5600 | -3.0244 | -3.0181 | -166.7859 | -187.1137 | 0.6480 | 0.6155 | -0.9538 | 0.1663 | -1.1201 |
123
- | 0.6275 | 0.98 | 5700 | -3.0245 | -3.0182 | -166.6791 | -186.9484 | 0.6482 | 0.6178 | -0.9527 | 0.1657 | -1.1184 |
124
- | 0.5876 | 1.0 | 5800 | -3.0239 | -3.0176 | -166.7881 | -187.0472 | 0.6482 | 0.6171 | -0.9538 | 0.1656 | -1.1194 |
125
 
126
 
127
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
8
  base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 
 
9
  model-index:
10
  - name: tinyllama-1.1b-sum-dpo-qlora
11
  results: []
 
16
 
17
  # tinyllama-1.1b-sum-dpo-qlora
18
 
19
+ This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.6463
22
+ - Rewards/chosen: -0.9560
23
+ - Rewards/rejected: -1.1279
24
+ - Rewards/accuracies: 0.6204
25
+ - Rewards/margins: 0.1719
26
+ - Logps/rejected: -187.9012
27
+ - Logps/chosen: -167.0102
28
+ - Logits/rejected: -3.0162
29
+ - Logits/chosen: -3.0224
30
 
31
  ## Model description
32
 
 
59
 
60
  ### Training results
61
 
62
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
+ | 0.6927 | 0.02 | 100 | 0.6930 | 0.0049 | 0.0047 | 0.5204 | 0.0003 | -74.6416 | -70.9175 | -3.4966 | -3.4983 |
65
+ | 0.692 | 0.03 | 200 | 0.6926 | 0.0146 | 0.0135 | 0.5616 | 0.0012 | -73.7585 | -69.9458 | -3.4924 | -3.4942 |
66
+ | 0.6887 | 0.05 | 300 | 0.6911 | 0.0351 | 0.0308 | 0.5732 | 0.0043 | -72.0302 | -67.9024 | -3.4858 | -3.4876 |
67
+ | 0.6865 | 0.07 | 400 | 0.6890 | 0.0164 | 0.0077 | 0.5609 | 0.0087 | -74.3370 | -69.7677 | -3.4786 | -3.4805 |
68
+ | 0.6864 | 0.09 | 500 | 0.6864 | 0.0236 | 0.0089 | 0.5755 | 0.0146 | -74.2129 | -69.0538 | -3.4662 | -3.4680 |
69
+ | 0.6731 | 0.1 | 600 | 0.6838 | 0.0019 | -0.0189 | 0.5871 | 0.0209 | -77.0012 | -71.2189 | -3.4497 | -3.4515 |
70
+ | 0.6749 | 0.12 | 700 | 0.6788 | -0.0758 | -0.1091 | 0.5980 | 0.0333 | -86.0178 | -78.9945 | -3.4470 | -3.4489 |
71
+ | 0.6678 | 0.14 | 800 | 0.6741 | -0.1859 | -0.2330 | 0.5906 | 0.0471 | -98.4033 | -89.9991 | -3.4169 | -3.4188 |
72
+ | 0.6655 | 0.16 | 900 | 0.6709 | -0.1856 | -0.2411 | 0.5927 | 0.0555 | -99.2188 | -89.9669 | -3.3811 | -3.3826 |
73
+ | 0.6695 | 0.17 | 1000 | 0.6686 | -0.3893 | -0.4584 | 0.5946 | 0.0691 | -120.9453 | -110.3432 | -3.3595 | -3.3611 |
74
+ | 0.6648 | 0.19 | 1100 | 0.6702 | -0.2078 | -0.2671 | 0.5976 | 0.0593 | -101.8174 | -92.1903 | -3.3439 | -3.3453 |
75
+ | 0.6543 | 0.21 | 1200 | 0.6642 | -0.3511 | -0.4313 | 0.6011 | 0.0802 | -118.2354 | -106.5216 | -3.3096 | -3.3110 |
76
+ | 0.6535 | 0.22 | 1300 | 0.6605 | -0.4651 | -0.5609 | 0.5990 | 0.0957 | -131.1967 | -117.9248 | -3.2817 | -3.2832 |
77
+ | 0.6315 | 0.24 | 1400 | 0.6606 | -0.3801 | -0.4704 | 0.6138 | 0.0903 | -122.1497 | -109.4246 | -3.2773 | -3.2788 |
78
+ | 0.6595 | 0.26 | 1500 | 0.6544 | -0.5561 | -0.6712 | 0.6197 | 0.1151 | -142.2231 | -127.0196 | -3.2429 | -3.2446 |
79
+ | 0.6383 | 0.28 | 1600 | 0.6538 | -0.5868 | -0.7052 | 0.6178 | 0.1184 | -145.6309 | -130.0926 | -3.2318 | -3.2338 |
80
+ | 0.6775 | 0.29 | 1700 | 0.6568 | -0.4687 | -0.5717 | 0.6173 | 0.1030 | -132.2748 | -118.2820 | -3.2194 | -3.2212 |
81
+ | 0.6312 | 0.31 | 1800 | 0.6497 | -0.7203 | -0.8617 | 0.6111 | 0.1414 | -161.2767 | -143.4406 | -3.1213 | -3.1237 |
82
+ | 0.665 | 0.33 | 1900 | 0.6551 | -0.5175 | -0.6278 | 0.6134 | 0.1103 | -137.8867 | -123.1614 | -3.1660 | -3.1680 |
83
+ | 0.6385 | 0.34 | 2000 | 0.6522 | -0.6166 | -0.7379 | 0.6162 | 0.1213 | -148.8959 | -133.0700 | -3.1823 | -3.1845 |
84
+ | 0.6452 | 0.36 | 2100 | 0.6538 | -0.7088 | -0.8325 | 0.6048 | 0.1237 | -158.3535 | -142.2912 | -3.1344 | -3.1369 |
85
+ | 0.6024 | 0.38 | 2200 | 0.6527 | -0.6378 | -0.7639 | 0.6120 | 0.1262 | -151.5019 | -135.1858 | -3.1567 | -3.1596 |
86
+ | 0.5912 | 0.4 | 2300 | 0.6485 | -0.8992 | -1.0561 | 0.6106 | 0.1569 | -180.7164 | -161.3302 | -3.0812 | -3.0853 |
87
+ | 0.6188 | 0.41 | 2400 | 0.6488 | -0.9960 | -1.1662 | 0.6204 | 0.1702 | -191.7268 | -171.0100 | -3.0219 | -3.0276 |
88
+ | 0.6286 | 0.43 | 2500 | 0.6483 | -0.8764 | -1.0333 | 0.6076 | 0.1568 | -178.4354 | -159.0542 | -3.0428 | -3.0475 |
89
+ | 0.61 | 0.45 | 2600 | 0.6532 | -0.7428 | -0.8730 | 0.6018 | 0.1302 | -162.4074 | -145.6894 | -3.0767 | -3.0804 |
90
+ | 0.6295 | 0.47 | 2700 | 0.6526 | -0.6786 | -0.8083 | 0.6138 | 0.1296 | -155.9322 | -139.2748 | -3.1080 | -3.1114 |
91
+ | 0.6504 | 0.48 | 2800 | 0.6510 | -0.7810 | -0.9243 | 0.6106 | 0.1432 | -167.5323 | -149.5115 | -3.0877 | -3.0915 |
92
+ | 0.6226 | 0.5 | 2900 | 0.6513 | -0.7637 | -0.9050 | 0.6127 | 0.1413 | -165.6116 | -147.7837 | -3.0831 | -3.0870 |
93
+ | 0.6226 | 0.52 | 3000 | 0.6494 | -0.7375 | -0.8834 | 0.6078 | 0.1459 | -163.4444 | -145.1619 | -3.0916 | -3.0955 |
94
+ | 0.6062 | 0.53 | 3100 | 0.6485 | -0.7793 | -0.9311 | 0.6129 | 0.1518 | -168.2215 | -149.3398 | -3.0906 | -3.0949 |
95
+ | 0.6071 | 0.55 | 3200 | 0.6477 | -0.8041 | -0.9577 | 0.6118 | 0.1536 | -170.8775 | -151.8242 | -3.0911 | -3.0956 |
96
+ | 0.608 | 0.57 | 3300 | 0.6461 | -1.1115 | -1.2974 | 0.6150 | 0.1859 | -204.8467 | -182.5597 | -3.0002 | -3.0064 |
97
+ | 0.5996 | 0.59 | 3400 | 0.6486 | -0.7960 | -0.9481 | 0.6099 | 0.1520 | -169.9129 | -151.0113 | -3.0691 | -3.0742 |
98
+ | 0.6081 | 0.6 | 3500 | 0.6478 | -0.8354 | -0.9930 | 0.6157 | 0.1576 | -174.4116 | -154.9542 | -3.0630 | -3.0681 |
99
+ | 0.6256 | 0.62 | 3600 | 0.6491 | -0.7744 | -0.9234 | 0.6145 | 0.1489 | -167.4422 | -148.8546 | -3.0722 | -3.0769 |
100
+ | 0.5969 | 0.64 | 3700 | 0.6469 | -0.9732 | -1.1419 | 0.6150 | 0.1687 | -189.2978 | -168.7282 | -3.0171 | -3.0231 |
101
+ | 0.6272 | 0.65 | 3800 | 0.6472 | -0.9477 | -1.1124 | 0.6176 | 0.1648 | -186.3489 | -166.1768 | -3.0087 | -3.0145 |
102
+ | 0.6222 | 0.67 | 3900 | 0.6467 | -0.9719 | -1.1400 | 0.6166 | 0.1681 | -189.1107 | -168.6043 | -3.0040 | -3.0100 |
103
+ | 0.605 | 0.69 | 4000 | 0.6461 | -1.0773 | -1.2558 | 0.6204 | 0.1785 | -200.6857 | -179.1379 | -2.9783 | -2.9849 |
104
+ | 0.585 | 0.71 | 4100 | 0.6464 | -0.9836 | -1.1556 | 0.6164 | 0.1720 | -190.6670 | -169.7659 | -3.0024 | -3.0086 |
105
+ | 0.6602 | 0.72 | 4200 | 0.6465 | -0.9496 | -1.1182 | 0.6178 | 0.1686 | -186.9268 | -166.3669 | -3.0089 | -3.0150 |
106
+ | 0.6074 | 0.74 | 4300 | 0.6468 | -0.8954 | -1.0597 | 0.6183 | 0.1643 | -181.0816 | -160.9504 | -3.0248 | -3.0306 |
107
+ | 0.6105 | 0.76 | 4400 | 0.6470 | -0.8905 | -1.0547 | 0.6150 | 0.1641 | -180.5745 | -160.4626 | -3.0306 | -3.0365 |
108
+ | 0.6127 | 0.78 | 4500 | 0.6470 | -0.8899 | -1.0538 | 0.6183 | 0.1638 | -180.4842 | -160.4037 | -3.0280 | -3.0338 |
109
+ | 0.5798 | 0.79 | 4600 | 0.6468 | -0.9128 | -1.0793 | 0.6208 | 0.1665 | -183.0344 | -162.6864 | -3.0195 | -3.0255 |
110
+ | 0.6228 | 0.81 | 4700 | 0.6467 | -0.9215 | -1.0896 | 0.6192 | 0.1681 | -184.0640 | -163.5562 | -3.0231 | -3.0291 |
111
+ | 0.6131 | 0.83 | 4800 | 0.6466 | -0.9391 | -1.1091 | 0.6199 | 0.1700 | -186.0176 | -165.3165 | -3.0141 | -3.0202 |
112
+ | 0.6215 | 0.84 | 4900 | 0.6465 | -0.9478 | -1.1189 | 0.6197 | 0.1711 | -186.9947 | -166.1919 | -3.0180 | -3.0241 |
113
+ | 0.585 | 0.86 | 5000 | 0.6460 | -0.9592 | -1.1321 | 0.6201 | 0.1729 | -188.3154 | -167.3252 | -3.0164 | -3.0226 |
114
+ | 0.6478 | 0.88 | 5100 | 0.6460 | -0.9606 | -1.1336 | 0.6194 | 0.1730 | -188.4695 | -167.4737 | -3.0151 | -3.0213 |
115
+ | 0.6018 | 0.9 | 5200 | 0.6462 | -0.9572 | -1.1296 | 0.6206 | 0.1725 | -188.0692 | -167.1259 | -3.0105 | -3.0167 |
116
+ | 0.5963 | 0.91 | 5300 | 0.6465 | -0.9564 | -1.1282 | 0.6199 | 0.1718 | -187.9285 | -167.0541 | -3.0167 | -3.0229 |
117
+ | 0.5921 | 0.93 | 5400 | 0.6462 | -0.9569 | -1.1292 | 0.6199 | 0.1723 | -188.0274 | -167.0996 | -3.0133 | -3.0196 |
118
+ | 0.6015 | 0.95 | 5500 | 0.6463 | -0.9570 | -1.1292 | 0.6192 | 0.1723 | -188.0282 | -167.1056 | -3.0164 | -3.0226 |
119
+ | 0.6148 | 0.96 | 5600 | 0.6461 | -0.9543 | -1.1269 | 0.6194 | 0.1726 | -187.7934 | -166.8396 | -3.0142 | -3.0205 |
120
+ | 0.6299 | 0.98 | 5700 | 0.6462 | -0.9543 | -1.1263 | 0.6194 | 0.1720 | -187.7363 | -166.8363 | -3.0166 | -3.0228 |
121
+ | 0.5854 | 1.0 | 5800 | 0.6463 | -0.9560 | -1.1279 | 0.6204 | 0.1719 | -187.9012 | -167.0102 | -3.0162 | -3.0224 |
122
 
123
 
124
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2641688e1f40a083553ce1075049529ce2459db0650b694128976c0b65a3b83
3
  size 201892728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c93710965f225a21f3fdc401c50be5976067c894cf98340846cf05aeb8a1236
3
  size 201892728
all_results.json CHANGED
@@ -1,21 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -3.023923397064209,
4
- "eval_logits/rejected": -3.017603874206543,
5
- "eval_logps/chosen": -166.78807067871094,
6
- "eval_logps/rejected": -187.0471954345703,
7
- "eval_loss": 0.6481729745864868,
8
- "eval_rewards/accuracies": 0.6171003580093384,
9
- "eval_rewards/chosen": -0.9537805914878845,
10
- "eval_rewards/margins": 0.1656205952167511,
11
- "eval_rewards/rejected": -1.1194013357162476,
12
- "eval_runtime": 483.9291,
13
- "eval_samples": 4304,
14
- "eval_samples_per_second": 8.894,
15
- "eval_steps_per_second": 1.112,
16
- "train_loss": 0.0003277428618961422,
17
- "train_runtime": 17.7068,
18
  "train_samples": 92858,
19
- "train_samples_per_second": 5244.214,
20
- "train_steps_per_second": 327.728
21
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6317814285541924,
4
+ "train_runtime": 53813.0859,
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "train_samples": 92858,
6
+ "train_samples_per_second": 1.726,
7
+ "train_steps_per_second": 0.108
8
  }
runs/May10_18-01-31_poseidon/events.out.tfevents.1715364109.poseidon.2827305.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b1a0c62754038677f6ad7b2fea0371138313f302e699ce6c36620e8314b3bcd
3
- size 447854
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f23feeb876e0c222209ff8399a71856d1e98b9d9d2d8d9c822b00e91cd2c02
3
+ size 448208
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.0003277428618961422,
4
- "train_runtime": 17.7068,
5
  "train_samples": 92858,
6
- "train_samples_per_second": 5244.214,
7
- "train_steps_per_second": 327.728
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6317814285541924,
4
+ "train_runtime": 53813.0859,
5
  "train_samples": 92858,
6
+ "train_samples_per_second": 1.726,
7
+ "train_steps_per_second": 0.108
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff