|
--- |
|
base_model: |
|
- meta-llama/Meta-Llama-3-8B |
|
- meta-llama/Meta-Llama-3-8B-Instruct |
|
library_name: transformers |
|
tags: |
|
- mergekit |
|
- merge |
|
license: other |
|
license_name: llama3 |
|
license_link: LICENSE |
|
extra_gated_prompt: >- |
|
### META LLAMA 3 COMMUNITY LICENSE AGREEMENT |
|
--- |
|
# Llama-3-DARE-v3-8B |
|
|
|
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). |
|
|
|
## Merge Details |
|
### Merge Method |
|
|
|
This model was merged using the [DARE](https://arxiv.org/abs/2311.03099) [TIES](https://arxiv.org/abs/2306.01708) merge method using [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) as a base. |
|
|
|
### Models Merged |
|
|
|
The following models were included in the merge: |
|
* [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) |
|
|
|
### Configuration |
|
|
|
The following YAML configuration was used to produce this model: |
|
|
|
```yaml |
|
base_model: |
|
model: |
|
path: meta-llama/Meta-Llama-3-8B |
|
dtype: bfloat16 |
|
merge_method: dare_ties |
|
parameters: |
|
int8_mask: 1.0 |
|
normalize: 0.0 |
|
slices: |
|
- sources: |
|
- layer_range: [0, 32] |
|
model: |
|
model: |
|
path: meta-llama/Meta-Llama-3-8B-Instruct |
|
parameters: |
|
density: |
|
- filter: embed_token |
|
value: 0.12392239047187575 |
|
- filter: model.norm |
|
value: 0.9321540995757155 |
|
- filter: lm_head |
|
value: 0.9321540995757155 |
|
- filter: layers.0. |
|
value: 0.9790541113047215 |
|
- filter: layers.1. |
|
value: 0.5837293662960215 |
|
- filter: layers.2. |
|
value: 0.9412235995535374 |
|
- filter: layers.3. |
|
value: 0.31233149627589435 |
|
- filter: layers.4. |
|
value: 0.8429344053665633 |
|
- filter: layers.5. |
|
value: 0.6736586892578483 |
|
- filter: layers.6. |
|
value: 0.24511379602231775 |
|
- filter: layers.7. |
|
value: 0.9579106307398759 |
|
- filter: layers.8. |
|
value: 0.763438755789315 |
|
- filter: layers.9. |
|
value: 0.9682444116383796 |
|
- filter: layers.10. |
|
value: 0.08453321074167956 |
|
- filter: layers.11. |
|
value: 0.7964240843030714 |
|
- filter: layers.12. |
|
value: 0.33878902628372387 |
|
- filter: layers.13. |
|
value: 0.8458690962458848 |
|
- filter: layers.14. |
|
value: 0.1052429440590172 |
|
- filter: layers.15. |
|
value: 0.7623565162481113 |
|
- filter: layers.16. |
|
value: 0.9707532532287503 |
|
- filter: layers.17. |
|
value: 0.12523916859700104 |
|
- filter: layers.18. |
|
value: 0.8415224301240337 |
|
- filter: layers.19. |
|
value: 0.12872802862625543 |
|
- filter: layers.20. |
|
value: 0.5529482316673654 |
|
- filter: layers.21. |
|
value: 0.09282157218446654 |
|
- filter: layers.22. |
|
value: 0.8370536041906024 |
|
- filter: layers.23. |
|
value: 0.9175102292532279 |
|
- filter: layers.24. |
|
value: 0.8983418171724273 |
|
- filter: layers.25. |
|
value: 0.8136717935920286 |
|
- filter: layers.26. |
|
value: 0.05054222298359671 |
|
- filter: layers.27. |
|
value: 0.869544796603939 |
|
- filter: layers.28. |
|
value: 0.04716191274361657 |
|
- filter: layers.29. |
|
value: 0.13032011470396976 |
|
- filter: layers.30. |
|
value: 0.19116844757457122 |
|
- filter: layers.31. |
|
value: 0.1455500526734667 |
|
weight: |
|
- filter: embed_token |
|
value: 0.12232308541622408 |
|
- filter: model.norm. |
|
value: 0.7266901175725669 |
|
- filter: lm_head |
|
value: 0.7266901175725669 |
|
- filter: layers.0. |
|
value: 0.8207345096435786 |
|
- filter: layers.1. |
|
value: 0.9504884225844141 |
|
- filter: layers.2. |
|
value: 0.7328920145925348 |
|
- filter: layers.3. |
|
value: 0.6736895869883676 |
|
- filter: layers.4. |
|
value: 0.7970121175937948 |
|
- filter: layers.5. |
|
value: 0.9789312914172503 |
|
- filter: layers.6. |
|
value: 0.962551880054289 |
|
- filter: layers.7. |
|
value: 0.9561739657469092 |
|
- filter: layers.8. |
|
value: 0.8536201095014567 |
|
- filter: layers.9. |
|
value: 0.9376890733815005 |
|
- filter: layers.10. |
|
value: 0.9551398977410172 |
|
- filter: layers.11. |
|
value: 0.9967262117722387 |
|
- filter: layers.12. |
|
value: 0.7701592243202565 |
|
- filter: layers.13. |
|
value: 0.6842573291853765 |
|
- filter: layers.14. |
|
value: 0.798376050387875 |
|
- filter: layers.15. |
|
value: 0.801001533828631 |
|
- filter: layers.16. |
|
value: 0.14199137490635572 |
|
- filter: layers.17. |
|
value: 0.7587521819162459 |
|
- filter: layers.18. |
|
value: 0.9769968221517621 |
|
- filter: layers.19. |
|
value: 0.5936888514834866 |
|
- filter: layers.20. |
|
value: 0.979481555973458 |
|
- filter: layers.21. |
|
value: 0.1362420472755318 |
|
- filter: layers.22. |
|
value: 0.1451804836602873 |
|
- filter: layers.23. |
|
value: 0.9319964347718136 |
|
- filter: layers.24. |
|
value: 0.8814265997262563 |
|
- filter: layers.25. |
|
value: 0.870638468633288 |
|
- filter: layers.26. |
|
value: 0.06311119172889679 |
|
- filter: layers.27. |
|
value: 0.902932718098389 |
|
- filter: layers.28. |
|
value: 0.9174145551871369 |
|
- filter: layers.29. |
|
value: 0.9048467992426628 |
|
- filter: layers.30. |
|
value: 0.04929564345988049 |
|
- filter: layers.31. |
|
value: 0.922707420329624 |
|
- layer_range: [0, 32] |
|
model: |
|
model: |
|
path: meta-llama/Meta-Llama-3-8B |
|
parameters: |
|
density: |
|
- filter: embed_token |
|
value: 0.1479082895745973 |
|
- filter: model.norm |
|
value: 0.18334257522610492 |
|
- filter: lm_head |
|
value: 0.18334257522610492 |
|
- filter: layers.0. |
|
value: 0.17476905394590242 |
|
- filter: layers.1. |
|
value: 0.11161623400742576 |
|
- filter: layers.2. |
|
value: 0.16109344344908105 |
|
- filter: layers.3. |
|
value: 0.2735834275693588 |
|
- filter: layers.4. |
|
value: 0.8258891898417566 |
|
- filter: layers.5. |
|
value: 0.21085556872053604 |
|
- filter: layers.6. |
|
value: 0.20766543320815006 |
|
- filter: layers.7. |
|
value: 0.8947694253855037 |
|
- filter: layers.8. |
|
value: 0.734275334571558 |
|
- filter: layers.9. |
|
value: 0.1632311874735626 |
|
- filter: layers.10. |
|
value: 0.940700711783812 |
|
- filter: layers.11. |
|
value: 0.07148774488326176 |
|
- filter: layers.12. |
|
value: 0.07541557340487534 |
|
- filter: layers.13. |
|
value: 0.13833770311269455 |
|
- filter: layers.14. |
|
value: 0.9612379711004643 |
|
- filter: layers.15. |
|
value: 0.8090075125599039 |
|
- filter: layers.16. |
|
value: 0.7255233959581611 |
|
- filter: layers.17. |
|
value: 0.2634507144990253 |
|
- filter: layers.18. |
|
value: 0.07135903934561608 |
|
- filter: layers.19. |
|
value: 0.1180822729914722 |
|
- filter: layers.20. |
|
value: 0.07751975543731829 |
|
- filter: layers.21. |
|
value: 0.9990557487897024 |
|
- filter: layers.22. |
|
value: 0.17045615586066107 |
|
- filter: layers.23. |
|
value: 0.19588339382290734 |
|
- filter: layers.24. |
|
value: 0.152313213824124 |
|
- filter: layers.25. |
|
value: 0.8120646024357844 |
|
- filter: layers.26. |
|
value: 0.6661112930033101 |
|
- filter: layers.27. |
|
value: 0.7782416079783356 |
|
- filter: layers.28. |
|
value: 0.24425477536875875 |
|
- filter: layers.29. |
|
value: 0.05962906198631645 |
|
- filter: layers.30. |
|
value: 0.023125010859717736 |
|
- filter: layers.31. |
|
value: 0.9109899850283665 |
|
weight: |
|
- filter: embed_token |
|
value: 0.12126630242759481 |
|
- filter: model.norm. |
|
value: 0.07734624352533248 |
|
- filter: lm_head |
|
value: 0.07734624352533248 |
|
- filter: layers.0. |
|
value: 0.16823028525905875 |
|
- filter: layers.1. |
|
value: 0.9417449451303712 |
|
- filter: layers.2. |
|
value: 0.7717519522673566 |
|
- filter: layers.3. |
|
value: 0.7601040526349441 |
|
- filter: layers.4. |
|
value: 0.0019090753772779204 |
|
- filter: layers.5. |
|
value: 0.16032547702469566 |
|
- filter: layers.6. |
|
value: 0.12224994873335546 |
|
- filter: layers.7. |
|
value: 0.27695385066177564 |
|
- filter: layers.8. |
|
value: 0.018799614691291815 |
|
- filter: layers.9. |
|
value: 0.9759168818301882 |
|
- filter: layers.10. |
|
value: 0.006525097827571269 |
|
- filter: layers.11. |
|
value: 0.756537797885991 |
|
- filter: layers.12. |
|
value: 0.8051453838823787 |
|
- filter: layers.13. |
|
value: 0.8879631547059472 |
|
- filter: layers.14. |
|
value: 0.713799746085261 |
|
- filter: layers.15. |
|
value: 0.03862352880564701 |
|
- filter: layers.16. |
|
value: 0.1837448681603537 |
|
- filter: layers.17. |
|
value: 0.30267576939315943 |
|
- filter: layers.18. |
|
value: 0.17332405807285195 |
|
- filter: layers.19. |
|
value: 0.11686420946772721 |
|
- filter: layers.20. |
|
value: 0.2826021601318976 |
|
- filter: layers.21. |
|
value: 0.14782621450845335 |
|
- filter: layers.22. |
|
value: 0.8764989337980243 |
|
- filter: layers.23. |
|
value: 0.5836574402524565 |
|
- filter: layers.24. |
|
value: 0.8579541606567384 |
|
- filter: layers.25. |
|
value: 0.2310998812434597 |
|
- filter: layers.26. |
|
value: 0.13443251834995432 |
|
- filter: layers.27. |
|
value: 0.9754382468614297 |
|
- filter: layers.28. |
|
value: 0.9406099007353652 |
|
- filter: layers.29. |
|
value: 0.10203532427654999 |
|
- filter: layers.30. |
|
value: 0.747420490316978 |
|
- filter: layers.31. |
|
value: 0.06383831695667043 |
|
``` |
|
|