|
{ |
|
"claude2###human": { |
|
"Alpaca 7B": { |
|
"nq": { |
|
"abstain": 27.0, |
|
"entailment": 21.90920951194924, |
|
"neutral": 49.65358081796437, |
|
"contradiction": 28.437209670086382 |
|
}, |
|
"msmarco": { |
|
"abstain": 2.0, |
|
"entailment": 59.01711667017789, |
|
"neutral": 19.84909648174954, |
|
"contradiction": 21.13378684807256 |
|
}, |
|
"dolly": { |
|
"abstain": 13.0, |
|
"entailment": 76.98572340813719, |
|
"neutral": 12.884738186462325, |
|
"contradiction": 10.129538405400474 |
|
}, |
|
"avg": { |
|
"abstain": 14.000000000000002, |
|
"entailment": 54.57677389363435, |
|
"neutral": 25.933701849399526, |
|
"contradiction": 19.489524256966117 |
|
} |
|
}, |
|
"GPT-3.5-Turbo": { |
|
"nq": { |
|
"abstain": 1.0, |
|
"entailment": 58.8535769373559, |
|
"neutral": 22.130219091003404, |
|
"contradiction": 19.016203971640692 |
|
}, |
|
"msmarco": { |
|
"abstain": 20.0, |
|
"entailment": 77.3299637383689, |
|
"neutral": 6.634321975916804, |
|
"contradiction": 16.035714285714285 |
|
}, |
|
"dolly": { |
|
"abstain": 0.0, |
|
"entailment": 93.69698079698081, |
|
"neutral": 2.682251082251082, |
|
"contradiction": 3.6207681207681204 |
|
}, |
|
"avg": { |
|
"abstain": 7.000000000000001, |
|
"entailment": 76.64014084432196, |
|
"neutral": 10.716353248415016, |
|
"contradiction": 12.643505907263023 |
|
} |
|
}, |
|
"Claude 2": { |
|
"nq": { |
|
"abstain": 21.0, |
|
"entailment": 36.24974533202381, |
|
"neutral": 60.93093966511689, |
|
"contradiction": 2.819315002859307 |
|
}, |
|
"msmarco": { |
|
"abstain": 6.0, |
|
"entailment": 88.95130578641216, |
|
"neutral": 6.450995812697939, |
|
"contradiction": 4.5976984008898905 |
|
}, |
|
"dolly": { |
|
"abstain": 8.0, |
|
"entailment": 90.86864524364525, |
|
"neutral": 6.670880448054362, |
|
"contradiction": 2.4604743083003955 |
|
}, |
|
"avg": { |
|
"abstain": 11.666666666666668, |
|
"entailment": 73.90591693421882, |
|
"neutral": 22.768523928901285, |
|
"contradiction": 3.3255591368798907 |
|
} |
|
}, |
|
"InstructGPT": { |
|
"nq": { |
|
"abstain": 5.0, |
|
"entailment": 20.438596491228072, |
|
"neutral": 25.30701754385965, |
|
"contradiction": 54.254385964912274 |
|
}, |
|
"msmarco": { |
|
"abstain": 13.0, |
|
"entailment": 65.80729296246537, |
|
"neutral": 13.403575989782887, |
|
"contradiction": 20.78913104775174 |
|
}, |
|
"dolly": { |
|
"abstain": 1.0, |
|
"entailment": 81.58865825532492, |
|
"neutral": 5.608465608465608, |
|
"contradiction": 12.802876136209468 |
|
}, |
|
"avg": { |
|
"abstain": 6.333333333333334, |
|
"entailment": 56.029104347609696, |
|
"neutral": 14.68155114952268, |
|
"contradiction": 29.289344502867635 |
|
} |
|
}, |
|
"Falcon 40B Instruct": { |
|
"nq": { |
|
"abstain": 27.0, |
|
"entailment": 37.96803652968036, |
|
"neutral": 17.123287671232877, |
|
"contradiction": 44.90867579908676 |
|
}, |
|
"msmarco": { |
|
"abstain": 17.0, |
|
"entailment": 61.28370625358577, |
|
"neutral": 17.053930005737232, |
|
"contradiction": 21.662363740676994 |
|
}, |
|
"dolly": { |
|
"abstain": 3.0, |
|
"entailment": 78.37657474255414, |
|
"neutral": 13.978295473140834, |
|
"contradiction": 7.645129784305042 |
|
}, |
|
"avg": { |
|
"abstain": 15.66666666666667, |
|
"entailment": 61.10965231518591, |
|
"neutral": 15.894746448106131, |
|
"contradiction": 22.99560123670796 |
|
} |
|
}, |
|
"GPT-4": { |
|
"nq": { |
|
"abstain": 0.0, |
|
"entailment": 71.44246031746032, |
|
"neutral": 15.671428571428569, |
|
"contradiction": 12.88611111111111 |
|
}, |
|
"msmarco": { |
|
"abstain": 13.0, |
|
"entailment": 91.79110724749671, |
|
"neutral": 6.772111143307898, |
|
"contradiction": 1.4367816091954022 |
|
}, |
|
"dolly": { |
|
"abstain": 8.0, |
|
"entailment": 97.77950310559007, |
|
"neutral": 1.224120082815735, |
|
"contradiction": 0.9963768115942028 |
|
}, |
|
"avg": { |
|
"abstain": 7.000000000000001, |
|
"entailment": 86.47235357703416, |
|
"neutral": 8.132385570715742, |
|
"contradiction": 5.395260852250098 |
|
} |
|
}, |
|
"LLaMA 2 70B Chat": { |
|
"nq": { |
|
"abstain": 6.0, |
|
"entailment": 23.619620247386862, |
|
"neutral": 62.5351563421684, |
|
"contradiction": 13.84522341044474 |
|
}, |
|
"msmarco": { |
|
"abstain": 4.0, |
|
"entailment": 84.80608457890267, |
|
"neutral": 11.166780978062148, |
|
"contradiction": 4.0271344430351785 |
|
}, |
|
"dolly": { |
|
"abstain": 0.0, |
|
"entailment": 92.75111832611834, |
|
"neutral": 4.0687229437229435, |
|
"contradiction": 3.1801587301587304 |
|
}, |
|
"avg": { |
|
"abstain": 3.3333333333333335, |
|
"entailment": 67.71289743255467, |
|
"neutral": 25.369613670448583, |
|
"contradiction": 6.917488896996744 |
|
} |
|
} |
|
} |
|
} |