mychen76's picture
Update README.md
44dff50 verified
|
raw
history blame
14.8 kB
metadata
license: apache-2.0
tags:
  - merge
  - OpenPipe/mistral-ft-optimized-1218
  - mlabonne/NeuralHermes-2.5-Mistral-7B

mistral-7b-merged-ties

mistral-7b-merged-ties is a merge of the following models:

🧩 Configuration

models:
    - model: mistralai/Mistral-7B-v0.1  
    - model: OpenPipe/mistral-ft-optimized-1218
      parameters:
        density: 0.5  # density gradient
        weight: 0.3
    - model: mlabonne/NeuralHermes-2.5-Mistral-7B
      parameters:
        density: 0.5
        weight: 0.3  # weight gradient
merge_method: ties
base_model: mistralai/Mistral-7B-v0.1
parameters:
  normalize: true
dtype: bfloat16

Evaluation

https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-ties

Latest Result: https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-ties/blob/main/results_2024-03-10T11-05-18.535141.json

{
    "all": {
        "acc": 0.6445924072176131,
        "acc_stderr": 0.03213293328697562,
        "acc_norm": 0.6450342620069291,
        "acc_norm_stderr": 0.032788565108750604,
        "mc1": 0.4455324357405141,
        "mc1_stderr": 0.017399335280140357,
        "mc2": 0.6131109579182783,
        "mc2_stderr": 0.015351738756398125
    },
    "harness|arc:challenge|25": {
        "acc": 0.6390784982935154,
        "acc_stderr": 0.014034761386175452,
        "acc_norm": 0.6791808873720137,
        "acc_norm_stderr": 0.013640943091946531
    },
    "harness|hellaswag|10": {
        "acc": 0.6722764389563832,
        "acc_stderr": 0.004684241685200317,
        "acc_norm": 0.85929097789285,
        "acc_norm_stderr": 0.00347010499020439
    },
    "harness|hendrycksTest-abstract_algebra|5": {
        "acc": 0.28,
        "acc_stderr": 0.04512608598542128,
        "acc_norm": 0.28,
        "acc_norm_stderr": 0.04512608598542128
    },
    "harness|hendrycksTest-anatomy|5": {
        "acc": 0.6074074074074074,
        "acc_stderr": 0.0421850621536888,
        "acc_norm": 0.6074074074074074,
        "acc_norm_stderr": 0.0421850621536888
    },
    "harness|hendrycksTest-astronomy|5": {
        "acc": 0.743421052631579,
        "acc_stderr": 0.0355418036802569,
        "acc_norm": 0.743421052631579,
        "acc_norm_stderr": 0.0355418036802569
    },
    "harness|hendrycksTest-business_ethics|5": {
        "acc": 0.61,
        "acc_stderr": 0.04902071300001975,
        "acc_norm": 0.61,
        "acc_norm_stderr": 0.04902071300001975
    },
    "harness|hendrycksTest-clinical_knowledge|5": {
        "acc": 0.6867924528301886,
        "acc_stderr": 0.028544793319055326,
        "acc_norm": 0.6867924528301886,
        "acc_norm_stderr": 0.028544793319055326
    },
    "harness|hendrycksTest-college_biology|5": {
        "acc": 0.7777777777777778,
        "acc_stderr": 0.03476590104304134,
        "acc_norm": 0.7777777777777778,
        "acc_norm_stderr": 0.03476590104304134
    },
    "harness|hendrycksTest-college_chemistry|5": {
        "acc": 0.48,
        "acc_stderr": 0.050211673156867795,
        "acc_norm": 0.48,
        "acc_norm_stderr": 0.050211673156867795
    },
    "harness|hendrycksTest-college_computer_science|5": {
        "acc": 0.48,
        "acc_stderr": 0.050211673156867795,
        "acc_norm": 0.48,
        "acc_norm_stderr": 0.050211673156867795
    },
    "harness|hendrycksTest-college_mathematics|5": {
        "acc": 0.32,
        "acc_stderr": 0.04688261722621504,
        "acc_norm": 0.32,
        "acc_norm_stderr": 0.04688261722621504
    },
    "harness|hendrycksTest-college_medicine|5": {
        "acc": 0.630057803468208,
        "acc_stderr": 0.036812296333943194,
        "acc_norm": 0.630057803468208,
        "acc_norm_stderr": 0.036812296333943194
    },
    "harness|hendrycksTest-college_physics|5": {
        "acc": 0.4117647058823529,
        "acc_stderr": 0.048971049527263666,
        "acc_norm": 0.4117647058823529,
        "acc_norm_stderr": 0.048971049527263666
    },
    "harness|hendrycksTest-computer_security|5": {
        "acc": 0.76,
        "acc_stderr": 0.042923469599092816,
        "acc_norm": 0.76,
        "acc_norm_stderr": 0.042923469599092816
    },
    "harness|hendrycksTest-conceptual_physics|5": {
        "acc": 0.574468085106383,
        "acc_stderr": 0.03232146916224468,
        "acc_norm": 0.574468085106383,
        "acc_norm_stderr": 0.03232146916224468
    },
    "harness|hendrycksTest-econometrics|5": {
        "acc": 0.5175438596491229,
        "acc_stderr": 0.04700708033551038,
        "acc_norm": 0.5175438596491229,
        "acc_norm_stderr": 0.04700708033551038
    },
    "harness|hendrycksTest-electrical_engineering|5": {
        "acc": 0.5448275862068965,
        "acc_stderr": 0.04149886942192117,
        "acc_norm": 0.5448275862068965,
        "acc_norm_stderr": 0.04149886942192117
    },
    "harness|hendrycksTest-elementary_mathematics|5": {
        "acc": 0.4126984126984127,
        "acc_stderr": 0.025355741263055263,
        "acc_norm": 0.4126984126984127,
        "acc_norm_stderr": 0.025355741263055263
    },
    "harness|hendrycksTest-formal_logic|5": {
        "acc": 0.4365079365079365,
        "acc_stderr": 0.04435932892851466,
        "acc_norm": 0.4365079365079365,
        "acc_norm_stderr": 0.04435932892851466
    },
    "harness|hendrycksTest-global_facts|5": {
        "acc": 0.35,
        "acc_stderr": 0.047937248544110196,
        "acc_norm": 0.35,
        "acc_norm_stderr": 0.047937248544110196
    },
    "harness|hendrycksTest-high_school_biology|5": {
        "acc": 0.7645161290322581,
        "acc_stderr": 0.02413763242933771,
        "acc_norm": 0.7645161290322581,
        "acc_norm_stderr": 0.02413763242933771
    },
    "harness|hendrycksTest-high_school_chemistry|5": {
        "acc": 0.49261083743842365,
        "acc_stderr": 0.035176035403610084,
        "acc_norm": 0.49261083743842365,
        "acc_norm_stderr": 0.035176035403610084
    },
    "harness|hendrycksTest-high_school_computer_science|5": {
        "acc": 0.67,
        "acc_stderr": 0.04725815626252607,
        "acc_norm": 0.67,
        "acc_norm_stderr": 0.04725815626252607
    },
    "harness|hendrycksTest-high_school_european_history|5": {
        "acc": 0.7757575757575758,
        "acc_stderr": 0.03256866661681102,
        "acc_norm": 0.7757575757575758,
        "acc_norm_stderr": 0.03256866661681102
    },
    "harness|hendrycksTest-high_school_geography|5": {
        "acc": 0.7828282828282829,
        "acc_stderr": 0.02937661648494563,
        "acc_norm": 0.7828282828282829,
        "acc_norm_stderr": 0.02937661648494563
    },
    "harness|hendrycksTest-high_school_government_and_politics|5": {
        "acc": 0.8963730569948186,
        "acc_stderr": 0.02199531196364424,
        "acc_norm": 0.8963730569948186,
        "acc_norm_stderr": 0.02199531196364424
    },
    "harness|hendrycksTest-high_school_macroeconomics|5": {
        "acc": 0.6410256410256411,
        "acc_stderr": 0.024321738484602354,
        "acc_norm": 0.6410256410256411,
        "acc_norm_stderr": 0.024321738484602354
    },
    "harness|hendrycksTest-high_school_mathematics|5": {
        "acc": 0.34814814814814815,
        "acc_stderr": 0.029045600290616255,
        "acc_norm": 0.34814814814814815,
        "acc_norm_stderr": 0.029045600290616255
    },
    "harness|hendrycksTest-high_school_microeconomics|5": {
        "acc": 0.6890756302521008,
        "acc_stderr": 0.03006676158297793,
        "acc_norm": 0.6890756302521008,
        "acc_norm_stderr": 0.03006676158297793
    },
    "harness|hendrycksTest-high_school_physics|5": {
        "acc": 0.2980132450331126,
        "acc_stderr": 0.037345356767871984,
        "acc_norm": 0.2980132450331126,
        "acc_norm_stderr": 0.037345356767871984
    },
    "harness|hendrycksTest-high_school_psychology|5": {
        "acc": 0.8495412844036697,
        "acc_stderr": 0.015328563932669237,
        "acc_norm": 0.8495412844036697,
        "acc_norm_stderr": 0.015328563932669237
    },
    "harness|hendrycksTest-high_school_statistics|5": {
        "acc": 0.5231481481481481,
        "acc_stderr": 0.03406315360711507,
        "acc_norm": 0.5231481481481481,
        "acc_norm_stderr": 0.03406315360711507
    },
    "harness|hendrycksTest-high_school_us_history|5": {
        "acc": 0.8186274509803921,
        "acc_stderr": 0.027044621719474086,
        "acc_norm": 0.8186274509803921,
        "acc_norm_stderr": 0.027044621719474086
    },
    "harness|hendrycksTest-high_school_world_history|5": {
        "acc": 0.8185654008438819,
        "acc_stderr": 0.025085961144579665,
        "acc_norm": 0.8185654008438819,
        "acc_norm_stderr": 0.025085961144579665
    },
    "harness|hendrycksTest-human_aging|5": {
        "acc": 0.6860986547085202,
        "acc_stderr": 0.031146796482972465,
        "acc_norm": 0.6860986547085202,
        "acc_norm_stderr": 0.031146796482972465
    },
    "harness|hendrycksTest-human_sexuality|5": {
        "acc": 0.7862595419847328,
        "acc_stderr": 0.0359546161177469,
        "acc_norm": 0.7862595419847328,
        "acc_norm_stderr": 0.0359546161177469
    },
    "harness|hendrycksTest-international_law|5": {
        "acc": 0.7851239669421488,
        "acc_stderr": 0.037494924487096966,
        "acc_norm": 0.7851239669421488,
        "acc_norm_stderr": 0.037494924487096966
    },
    "harness|hendrycksTest-jurisprudence|5": {
        "acc": 0.7962962962962963,
        "acc_stderr": 0.03893542518824847,
        "acc_norm": 0.7962962962962963,
        "acc_norm_stderr": 0.03893542518824847
    },
    "harness|hendrycksTest-logical_fallacies|5": {
        "acc": 0.7607361963190185,
        "acc_stderr": 0.033519538795212696,
        "acc_norm": 0.7607361963190185,
        "acc_norm_stderr": 0.033519538795212696
    },
    "harness|hendrycksTest-machine_learning|5": {
        "acc": 0.4642857142857143,
        "acc_stderr": 0.04733667890053756,
        "acc_norm": 0.4642857142857143,
        "acc_norm_stderr": 0.04733667890053756
    },
    "harness|hendrycksTest-management|5": {
        "acc": 0.7766990291262136,
        "acc_stderr": 0.04123553189891431,
        "acc_norm": 0.7766990291262136,
        "acc_norm_stderr": 0.04123553189891431
    },
    "harness|hendrycksTest-marketing|5": {
        "acc": 0.8547008547008547,
        "acc_stderr": 0.023086635086841407,
        "acc_norm": 0.8547008547008547,
        "acc_norm_stderr": 0.023086635086841407
    },
    "harness|hendrycksTest-medical_genetics|5": {
        "acc": 0.71,
        "acc_stderr": 0.045604802157206845,
        "acc_norm": 0.71,
        "acc_norm_stderr": 0.045604802157206845
    },
    "harness|hendrycksTest-miscellaneous|5": {
        "acc": 0.8301404853128991,
        "acc_stderr": 0.013428186370608304,
        "acc_norm": 0.8301404853128991,
        "acc_norm_stderr": 0.013428186370608304
    },
    "harness|hendrycksTest-moral_disputes|5": {
        "acc": 0.7369942196531792,
        "acc_stderr": 0.023703099525258172,
        "acc_norm": 0.7369942196531792,
        "acc_norm_stderr": 0.023703099525258172
    },
    "harness|hendrycksTest-moral_scenarios|5": {
        "acc": 0.3664804469273743,
        "acc_stderr": 0.016115235504865467,
        "acc_norm": 0.3664804469273743,
        "acc_norm_stderr": 0.016115235504865467
    },
    "harness|hendrycksTest-nutrition|5": {
        "acc": 0.7320261437908496,
        "acc_stderr": 0.025360603796242553,
        "acc_norm": 0.7320261437908496,
        "acc_norm_stderr": 0.025360603796242553
    },
    "harness|hendrycksTest-philosophy|5": {
        "acc": 0.7170418006430869,
        "acc_stderr": 0.02558306248998481,
        "acc_norm": 0.7170418006430869,
        "acc_norm_stderr": 0.02558306248998481
    },
    "harness|hendrycksTest-prehistory|5": {
        "acc": 0.7376543209876543,
        "acc_stderr": 0.024477222856135114,
        "acc_norm": 0.7376543209876543,
        "acc_norm_stderr": 0.024477222856135114
    },
    "harness|hendrycksTest-professional_accounting|5": {
        "acc": 0.5070921985815603,
        "acc_stderr": 0.02982449855912901,
        "acc_norm": 0.5070921985815603,
        "acc_norm_stderr": 0.02982449855912901
    },
    "harness|hendrycksTest-professional_law|5": {
        "acc": 0.4667535853976532,
        "acc_stderr": 0.01274197433389723,
        "acc_norm": 0.4667535853976532,
        "acc_norm_stderr": 0.01274197433389723
    },
    "harness|hendrycksTest-professional_medicine|5": {
        "acc": 0.6764705882352942,
        "acc_stderr": 0.02841820861940676,
        "acc_norm": 0.6764705882352942,
        "acc_norm_stderr": 0.02841820861940676
    },
    "harness|hendrycksTest-professional_psychology|5": {
        "acc": 0.6666666666666666,
        "acc_stderr": 0.019070985589687495,
        "acc_norm": 0.6666666666666666,
        "acc_norm_stderr": 0.019070985589687495
    },
    "harness|hendrycksTest-public_relations|5": {
        "acc": 0.6545454545454545,
        "acc_stderr": 0.04554619617541054,
        "acc_norm": 0.6545454545454545,
        "acc_norm_stderr": 0.04554619617541054
    },
    "harness|hendrycksTest-security_studies|5": {
        "acc": 0.7306122448979592,
        "acc_stderr": 0.02840125202902294,
        "acc_norm": 0.7306122448979592,
        "acc_norm_stderr": 0.02840125202902294
    },
    "harness|hendrycksTest-sociology|5": {
        "acc": 0.845771144278607,
        "acc_stderr": 0.025538433368578337,
        "acc_norm": 0.845771144278607,
        "acc_norm_stderr": 0.025538433368578337
    },
    "harness|hendrycksTest-us_foreign_policy|5": {
        "acc": 0.86,
        "acc_stderr": 0.0348735088019777,
        "acc_norm": 0.86,
        "acc_norm_stderr": 0.0348735088019777
    },
    "harness|hendrycksTest-virology|5": {
        "acc": 0.5481927710843374,
        "acc_stderr": 0.03874371556587953,
        "acc_norm": 0.5481927710843374,
        "acc_norm_stderr": 0.03874371556587953
    },
    "harness|hendrycksTest-world_religions|5": {
        "acc": 0.8304093567251462,
        "acc_stderr": 0.02878210810540171,
        "acc_norm": 0.8304093567251462,
        "acc_norm_stderr": 0.02878210810540171
    },
    "harness|truthfulqa:mc|0": {
        "mc1": 0.4455324357405141,
        "mc1_stderr": 0.017399335280140357,
        "mc2": 0.6131109579182783,
        "mc2_stderr": 0.015351738756398125
    },
    "harness|winogrande|5": {
        "acc": 0.8003157063930545,
        "acc_stderr": 0.011235328382625842
    },
    "harness|gsm8k|5": {
        "acc": 0.6899166034874905,
        "acc_stderr": 0.01274030571737627
    }
}