--- license: apache-2.0 library_name: peft tags: - alignment-handbook - trl - dpo - generated_from_trainer base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T datasets: - openai/summarize_from_feedback model-index: - name: tinyllama-1.1b-sum-dpo-qlora results: [] --- # tinyllama-1.1b-sum-dpo-qlora This model is a fine-tuned version of [martimfasantos/tinyllama-1.1b-sum-sft-qlora](https://huggingface.co/martimfasantos/tinyllama-1.1b-sum-sft-qlora) on the openai/summarize_from_feedback dataset. It achieves the following results on the evaluation set: - Loss: 0.6463 - Rewards/chosen: -0.9560 - Rewards/rejected: -1.1279 - Rewards/accuracies: 0.6204 - Rewards/margins: 0.1719 - Logps/rejected: -187.9012 - Logps/chosen: -167.0102 - Logits/rejected: -3.0162 - Logits/chosen: -3.0224 ## Model description More information needed ## Intended uses & limitations More information needed ## Training and evaluation data More information needed ## Training procedure ### Training hyperparameters The following hyperparameters were used during training: - learning_rate: 5e-06 - train_batch_size: 4 - eval_batch_size: 8 - seed: 42 - distributed_type: multi-GPU - gradient_accumulation_steps: 4 - total_train_batch_size: 16 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 - lr_scheduler_type: cosine - lr_scheduler_warmup_ratio: 0.1 - num_epochs: 1 ### Training results | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:| | 0.6927 | 0.02 | 100 | 0.6930 | 0.0049 | 0.0047 | 0.5204 | 0.0003 | -74.6416 | -70.9175 | -3.4966 | -3.4983 | | 0.692 | 0.03 | 200 | 0.6926 | 0.0146 | 0.0135 | 0.5616 | 0.0012 | -73.7585 | -69.9458 | -3.4924 | -3.4942 | | 0.6887 | 0.05 | 300 | 0.6911 | 0.0351 | 0.0308 | 0.5732 | 0.0043 | -72.0302 | -67.9024 | -3.4858 | -3.4876 | | 0.6865 | 0.07 | 400 | 0.6890 | 0.0164 | 0.0077 | 0.5609 | 0.0087 | -74.3370 | -69.7677 | -3.4786 | -3.4805 | | 0.6864 | 0.09 | 500 | 0.6864 | 0.0236 | 0.0089 | 0.5755 | 0.0146 | -74.2129 | -69.0538 | -3.4662 | -3.4680 | | 0.6731 | 0.1 | 600 | 0.6838 | 0.0019 | -0.0189 | 0.5871 | 0.0209 | -77.0012 | -71.2189 | -3.4497 | -3.4515 | | 0.6749 | 0.12 | 700 | 0.6788 | -0.0758 | -0.1091 | 0.5980 | 0.0333 | -86.0178 | -78.9945 | -3.4470 | -3.4489 | | 0.6678 | 0.14 | 800 | 0.6741 | -0.1859 | -0.2330 | 0.5906 | 0.0471 | -98.4033 | -89.9991 | -3.4169 | -3.4188 | | 0.6655 | 0.16 | 900 | 0.6709 | -0.1856 | -0.2411 | 0.5927 | 0.0555 | -99.2188 | -89.9669 | -3.3811 | -3.3826 | | 0.6695 | 0.17 | 1000 | 0.6686 | -0.3893 | -0.4584 | 0.5946 | 0.0691 | -120.9453 | -110.3432 | -3.3595 | -3.3611 | | 0.6648 | 0.19 | 1100 | 0.6702 | -0.2078 | -0.2671 | 0.5976 | 0.0593 | -101.8174 | -92.1903 | -3.3439 | -3.3453 | | 0.6543 | 0.21 | 1200 | 0.6642 | -0.3511 | -0.4313 | 0.6011 | 0.0802 | -118.2354 | -106.5216 | -3.3096 | -3.3110 | | 0.6535 | 0.22 | 1300 | 0.6605 | -0.4651 | -0.5609 | 0.5990 | 0.0957 | -131.1967 | -117.9248 | -3.2817 | -3.2832 | | 0.6315 | 0.24 | 1400 | 0.6606 | -0.3801 | -0.4704 | 0.6138 | 0.0903 | -122.1497 | -109.4246 | -3.2773 | -3.2788 | | 0.6595 | 0.26 | 1500 | 0.6544 | -0.5561 | -0.6712 | 0.6197 | 0.1151 | -142.2231 | -127.0196 | -3.2429 | -3.2446 | | 0.6383 | 0.28 | 1600 | 0.6538 | -0.5868 | -0.7052 | 0.6178 | 0.1184 | -145.6309 | -130.0926 | -3.2318 | -3.2338 | | 0.6775 | 0.29 | 1700 | 0.6568 | -0.4687 | -0.5717 | 0.6173 | 0.1030 | -132.2748 | -118.2820 | -3.2194 | -3.2212 | | 0.6312 | 0.31 | 1800 | 0.6497 | -0.7203 | -0.8617 | 0.6111 | 0.1414 | -161.2767 | -143.4406 | -3.1213 | -3.1237 | | 0.665 | 0.33 | 1900 | 0.6551 | -0.5175 | -0.6278 | 0.6134 | 0.1103 | -137.8867 | -123.1614 | -3.1660 | -3.1680 | | 0.6385 | 0.34 | 2000 | 0.6522 | -0.6166 | -0.7379 | 0.6162 | 0.1213 | -148.8959 | -133.0700 | -3.1823 | -3.1845 | | 0.6452 | 0.36 | 2100 | 0.6538 | -0.7088 | -0.8325 | 0.6048 | 0.1237 | -158.3535 | -142.2912 | -3.1344 | -3.1369 | | 0.6024 | 0.38 | 2200 | 0.6527 | -0.6378 | -0.7639 | 0.6120 | 0.1262 | -151.5019 | -135.1858 | -3.1567 | -3.1596 | | 0.5912 | 0.4 | 2300 | 0.6485 | -0.8992 | -1.0561 | 0.6106 | 0.1569 | -180.7164 | -161.3302 | -3.0812 | -3.0853 | | 0.6188 | 0.41 | 2400 | 0.6488 | -0.9960 | -1.1662 | 0.6204 | 0.1702 | -191.7268 | -171.0100 | -3.0219 | -3.0276 | | 0.6286 | 0.43 | 2500 | 0.6483 | -0.8764 | -1.0333 | 0.6076 | 0.1568 | -178.4354 | -159.0542 | -3.0428 | -3.0475 | | 0.61 | 0.45 | 2600 | 0.6532 | -0.7428 | -0.8730 | 0.6018 | 0.1302 | -162.4074 | -145.6894 | -3.0767 | -3.0804 | | 0.6295 | 0.47 | 2700 | 0.6526 | -0.6786 | -0.8083 | 0.6138 | 0.1296 | -155.9322 | -139.2748 | -3.1080 | -3.1114 | | 0.6504 | 0.48 | 2800 | 0.6510 | -0.7810 | -0.9243 | 0.6106 | 0.1432 | -167.5323 | -149.5115 | -3.0877 | -3.0915 | | 0.6226 | 0.5 | 2900 | 0.6513 | -0.7637 | -0.9050 | 0.6127 | 0.1413 | -165.6116 | -147.7837 | -3.0831 | -3.0870 | | 0.6226 | 0.52 | 3000 | 0.6494 | -0.7375 | -0.8834 | 0.6078 | 0.1459 | -163.4444 | -145.1619 | -3.0916 | -3.0955 | | 0.6062 | 0.53 | 3100 | 0.6485 | -0.7793 | -0.9311 | 0.6129 | 0.1518 | -168.2215 | -149.3398 | -3.0906 | -3.0949 | | 0.6071 | 0.55 | 3200 | 0.6477 | -0.8041 | -0.9577 | 0.6118 | 0.1536 | -170.8775 | -151.8242 | -3.0911 | -3.0956 | | 0.608 | 0.57 | 3300 | 0.6461 | -1.1115 | -1.2974 | 0.6150 | 0.1859 | -204.8467 | -182.5597 | -3.0002 | -3.0064 | | 0.5996 | 0.59 | 3400 | 0.6486 | -0.7960 | -0.9481 | 0.6099 | 0.1520 | -169.9129 | -151.0113 | -3.0691 | -3.0742 | | 0.6081 | 0.6 | 3500 | 0.6478 | -0.8354 | -0.9930 | 0.6157 | 0.1576 | -174.4116 | -154.9542 | -3.0630 | -3.0681 | | 0.6256 | 0.62 | 3600 | 0.6491 | -0.7744 | -0.9234 | 0.6145 | 0.1489 | -167.4422 | -148.8546 | -3.0722 | -3.0769 | | 0.5969 | 0.64 | 3700 | 0.6469 | -0.9732 | -1.1419 | 0.6150 | 0.1687 | -189.2978 | -168.7282 | -3.0171 | -3.0231 | | 0.6272 | 0.65 | 3800 | 0.6472 | -0.9477 | -1.1124 | 0.6176 | 0.1648 | -186.3489 | -166.1768 | -3.0087 | -3.0145 | | 0.6222 | 0.67 | 3900 | 0.6467 | -0.9719 | -1.1400 | 0.6166 | 0.1681 | -189.1107 | -168.6043 | -3.0040 | -3.0100 | | 0.605 | 0.69 | 4000 | 0.6461 | -1.0773 | -1.2558 | 0.6204 | 0.1785 | -200.6857 | -179.1379 | -2.9783 | -2.9849 | | 0.585 | 0.71 | 4100 | 0.6464 | -0.9836 | -1.1556 | 0.6164 | 0.1720 | -190.6670 | -169.7659 | -3.0024 | -3.0086 | | 0.6602 | 0.72 | 4200 | 0.6465 | -0.9496 | -1.1182 | 0.6178 | 0.1686 | -186.9268 | -166.3669 | -3.0089 | -3.0150 | | 0.6074 | 0.74 | 4300 | 0.6468 | -0.8954 | -1.0597 | 0.6183 | 0.1643 | -181.0816 | -160.9504 | -3.0248 | -3.0306 | | 0.6105 | 0.76 | 4400 | 0.6470 | -0.8905 | -1.0547 | 0.6150 | 0.1641 | -180.5745 | -160.4626 | -3.0306 | -3.0365 | | 0.6127 | 0.78 | 4500 | 0.6470 | -0.8899 | -1.0538 | 0.6183 | 0.1638 | -180.4842 | -160.4037 | -3.0280 | -3.0338 | | 0.5798 | 0.79 | 4600 | 0.6468 | -0.9128 | -1.0793 | 0.6208 | 0.1665 | -183.0344 | -162.6864 | -3.0195 | -3.0255 | | 0.6228 | 0.81 | 4700 | 0.6467 | -0.9215 | -1.0896 | 0.6192 | 0.1681 | -184.0640 | -163.5562 | -3.0231 | -3.0291 | | 0.6131 | 0.83 | 4800 | 0.6466 | -0.9391 | -1.1091 | 0.6199 | 0.1700 | -186.0176 | -165.3165 | -3.0141 | -3.0202 | | 0.6215 | 0.84 | 4900 | 0.6465 | -0.9478 | -1.1189 | 0.6197 | 0.1711 | -186.9947 | -166.1919 | -3.0180 | -3.0241 | | 0.585 | 0.86 | 5000 | 0.6460 | -0.9592 | -1.1321 | 0.6201 | 0.1729 | -188.3154 | -167.3252 | -3.0164 | -3.0226 | | 0.6478 | 0.88 | 5100 | 0.6460 | -0.9606 | -1.1336 | 0.6194 | 0.1730 | -188.4695 | -167.4737 | -3.0151 | -3.0213 | | 0.6018 | 0.9 | 5200 | 0.6462 | -0.9572 | -1.1296 | 0.6206 | 0.1725 | -188.0692 | -167.1259 | -3.0105 | -3.0167 | | 0.5963 | 0.91 | 5300 | 0.6465 | -0.9564 | -1.1282 | 0.6199 | 0.1718 | -187.9285 | -167.0541 | -3.0167 | -3.0229 | | 0.5921 | 0.93 | 5400 | 0.6462 | -0.9569 | -1.1292 | 0.6199 | 0.1723 | -188.0274 | -167.0996 | -3.0133 | -3.0196 | | 0.6015 | 0.95 | 5500 | 0.6463 | -0.9570 | -1.1292 | 0.6192 | 0.1723 | -188.0282 | -167.1056 | -3.0164 | -3.0226 | | 0.6148 | 0.96 | 5600 | 0.6461 | -0.9543 | -1.1269 | 0.6194 | 0.1726 | -187.7934 | -166.8396 | -3.0142 | -3.0205 | | 0.6299 | 0.98 | 5700 | 0.6462 | -0.9543 | -1.1263 | 0.6194 | 0.1720 | -187.7363 | -166.8363 | -3.0166 | -3.0228 | | 0.5854 | 1.0 | 5800 | 0.6463 | -0.9560 | -1.1279 | 0.6204 | 0.1719 | -187.9012 | -167.0102 | -3.0162 | -3.0224 | ### Framework versions - PEFT 0.7.1 - Transformers 4.39.3 - Pytorch 2.1.2 - Datasets 2.18.0 - Tokenizers 0.15.2