{ "claude2###human": { "Alpaca 7B": { "nq": { "abstain": 27.0, "entailment": 21.90920951194924, "neutral": 49.65358081796437, "contradiction": 28.437209670086382 }, "msmarco": { "abstain": 2.0, "entailment": 59.01711667017789, "neutral": 19.84909648174954, "contradiction": 21.13378684807256 }, "dolly": { "abstain": 13.0, "entailment": 76.98572340813719, "neutral": 12.884738186462325, "contradiction": 10.129538405400474 }, "avg": { "abstain": 14.000000000000002, "entailment": 54.57677389363435, "neutral": 25.933701849399526, "contradiction": 19.489524256966117 } }, "GPT-3.5-Turbo": { "nq": { "abstain": 1.0, "entailment": 58.8535769373559, "neutral": 22.130219091003404, "contradiction": 19.016203971640692 }, "msmarco": { "abstain": 20.0, "entailment": 77.3299637383689, "neutral": 6.634321975916804, "contradiction": 16.035714285714285 }, "dolly": { "abstain": 0.0, "entailment": 93.69698079698081, "neutral": 2.682251082251082, "contradiction": 3.6207681207681204 }, "avg": { "abstain": 7.000000000000001, "entailment": 76.64014084432196, "neutral": 10.716353248415016, "contradiction": 12.643505907263023 } }, "Claude 2": { "nq": { "abstain": 21.0, "entailment": 36.24974533202381, "neutral": 60.93093966511689, "contradiction": 2.819315002859307 }, "msmarco": { "abstain": 6.0, "entailment": 88.95130578641216, "neutral": 6.450995812697939, "contradiction": 4.5976984008898905 }, "dolly": { "abstain": 8.0, "entailment": 90.86864524364525, "neutral": 6.670880448054362, "contradiction": 2.4604743083003955 }, "avg": { "abstain": 11.666666666666668, "entailment": 73.90591693421882, "neutral": 22.768523928901285, "contradiction": 3.3255591368798907 } }, "InstructGPT": { "nq": { "abstain": 5.0, "entailment": 20.438596491228072, "neutral": 25.30701754385965, "contradiction": 54.254385964912274 }, "msmarco": { "abstain": 13.0, "entailment": 65.80729296246537, "neutral": 13.403575989782887, "contradiction": 20.78913104775174 }, "dolly": { "abstain": 1.0, "entailment": 81.58865825532492, "neutral": 5.608465608465608, "contradiction": 12.802876136209468 }, "avg": { "abstain": 6.333333333333334, "entailment": 56.029104347609696, "neutral": 14.68155114952268, "contradiction": 29.289344502867635 } }, "Falcon 40B Instruct": { "nq": { "abstain": 27.0, "entailment": 37.96803652968036, "neutral": 17.123287671232877, "contradiction": 44.90867579908676 }, "msmarco": { "abstain": 17.0, "entailment": 61.28370625358577, "neutral": 17.053930005737232, "contradiction": 21.662363740676994 }, "dolly": { "abstain": 3.0, "entailment": 78.37657474255414, "neutral": 13.978295473140834, "contradiction": 7.645129784305042 }, "avg": { "abstain": 15.66666666666667, "entailment": 61.10965231518591, "neutral": 15.894746448106131, "contradiction": 22.99560123670796 } }, "GPT-4": { "nq": { "abstain": 0.0, "entailment": 71.44246031746032, "neutral": 15.671428571428569, "contradiction": 12.88611111111111 }, "msmarco": { "abstain": 13.0, "entailment": 91.79110724749671, "neutral": 6.772111143307898, "contradiction": 1.4367816091954022 }, "dolly": { "abstain": 8.0, "entailment": 97.77950310559007, "neutral": 1.224120082815735, "contradiction": 0.9963768115942028 }, "avg": { "abstain": 7.000000000000001, "entailment": 86.47235357703416, "neutral": 8.132385570715742, "contradiction": 5.395260852250098 } }, "LLaMA 2 70B Chat": { "nq": { "abstain": 6.0, "entailment": 23.619620247386862, "neutral": 62.5351563421684, "contradiction": 13.84522341044474 }, "msmarco": { "abstain": 4.0, "entailment": 84.80608457890267, "neutral": 11.166780978062148, "contradiction": 4.0271344430351785 }, "dolly": { "abstain": 0.0, "entailment": 92.75111832611834, "neutral": 4.0687229437229435, "contradiction": 3.1801587301587304 }, "avg": { "abstain": 3.3333333333333335, "entailment": 67.71289743255467, "neutral": 25.369613670448583, "contradiction": 6.917488896996744 } } } }