diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -104450,5 +104450,3533 @@ "prompt_1": -1 } } + }, + "LLaMA_3_Merlion_8B_v1_1": { + "model_size": "8B", + "model_link": "https://seaeval.github.io/", + "zero_shot": { + "cross_xquad": { + "prompt_1": { + "overall_acc": 0.9220588235294117, + "language_acc": { + "Spanish": 0.9285714285714286, + "English": 0.9411764705882353, + "Chinese": 0.9142857142857143, + "Vietnamese": 0.9042016806722689 + }, + "consistency_score_2": 0.9137254901960784, + "consistency_score_3": 0.8756302521008403, + "consistency_score_4": 0.8521008403361344, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9394957983193277, + "Spanish,Chinese": 0.8966386554621849, + "Spanish,Vietnamese": 0.9176470588235294, + "English,Chinese": 0.9193277310924369, + "English,Vietnamese": 0.9100840336134454, + "Chinese,Vietnamese": 0.8991596638655462 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8815126050420168, + "Spanish,English,Vietnamese": 0.888235294117647, + "Spanish,Chinese,Vietnamese": 0.8621848739495799, + "English,Chinese,Vietnamese": 0.8705882352941177 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8521008403361344 + } + }, + "AC3_2": 0.917873242705047, + "AC3_3": 0.8982449868544597, + "AC3_4": 0.885700553706216 + }, + "prompt_2": { + "overall_acc": 0.9260504201680672, + "language_acc": { + "Spanish": 0.9294117647058824, + "English": 0.9403361344537815, + "Chinese": 0.9201680672268907, + "Vietnamese": 0.9142857142857143 + }, + "consistency_score_2": 0.9131652661064426, + "consistency_score_3": 0.8743697478991597, + "consistency_score_4": 0.8478991596638655, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9294117647058824, + "Spanish,Chinese": 0.9008403361344538, + "Spanish,Vietnamese": 0.915126050420168, + "English,Chinese": 0.9117647058823529, + "English,Vietnamese": 0.9201680672268907, + "Chinese,Vietnamese": 0.9016806722689076 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8747899159663866, + "Spanish,English,Vietnamese": 0.8865546218487395, + "Spanish,Chinese,Vietnamese": 0.8647058823529412, + "English,Chinese,Vietnamese": 0.8714285714285714 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8478991596638655 + } + }, + "AC3_2": 0.9195627077623384, + "AC3_3": 0.8994683427130508, + "AC3_4": 0.8852533149189079 + }, + "prompt_3": { + "overall_acc": 0.9195378151260504, + "language_acc": { + "Spanish": 0.9210084033613445, + "English": 0.9411764705882353, + "Chinese": 0.9142857142857143, + "Vietnamese": 0.9016806722689076 + }, + "consistency_score_2": 0.9050420168067227, + "consistency_score_3": 0.8626050420168068, + "consistency_score_4": 0.8361344537815126, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9310924369747899, + "Spanish,Chinese": 0.8907563025210085, + "Spanish,Vietnamese": 0.9025210084033614, + "English,Chinese": 0.9092436974789916, + "English,Vietnamese": 0.9084033613445378, + "Chinese,Vietnamese": 0.888235294117647 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8697478991596639, + "Spanish,English,Vietnamese": 0.8756302521008403, + "Spanish,Chinese,Vietnamese": 0.846218487394958, + "English,Chinese,Vietnamese": 0.8588235294117647 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8361344537815126 + } + }, + "AC3_2": 0.91223233329785, + "AC3_3": 0.8901620343500424, + "AC3_4": 0.8758550924956867 + }, + "prompt_4": { + "overall_acc": 0.9212184873949579, + "language_acc": { + "Spanish": 0.9243697478991597, + "English": 0.9403361344537815, + "Chinese": 0.9126050420168067, + "Vietnamese": 0.907563025210084 + }, + "consistency_score_2": 0.9123249299719888, + "consistency_score_3": 0.8735294117647059, + "consistency_score_4": 0.8487394957983193, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9361344537815126, + "Spanish,Chinese": 0.8932773109243698, + "Spanish,Vietnamese": 0.9176470588235294, + "English,Chinese": 0.9117647058823529, + "English,Vietnamese": 0.9176470588235294, + "Chinese,Vietnamese": 0.8974789915966387 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8756302521008403, + "Spanish,English,Vietnamese": 0.8890756302521008, + "Spanish,Chinese,Vietnamese": 0.8596638655462185, + "English,Chinese,Vietnamese": 0.8697478991596639 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8487394957983193 + } + }, + "AC3_2": 0.9167501396422834, + "AC3_3": 0.8967403652990311, + "AC3_4": 0.8834950003241206 + }, + "prompt_5": { + "overall_acc": 0.9155462184873949, + "language_acc": { + "Spanish": 0.9184873949579831, + "English": 0.9369747899159664, + "Chinese": 0.907563025210084, + "Vietnamese": 0.8991596638655462 + }, + "consistency_score_2": 0.9030812324929972, + "consistency_score_3": 0.859873949579832, + "consistency_score_4": 0.8319327731092437, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9294117647058824, + "Spanish,Chinese": 0.8840336134453781, + "Spanish,Vietnamese": 0.9100840336134454, + "English,Chinese": 0.9016806722689076, + "English,Vietnamese": 0.9067226890756303, + "Chinese,Vietnamese": 0.8865546218487395 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8621848739495799, + "Spanish,English,Vietnamese": 0.8764705882352941, + "Spanish,Chinese,Vietnamese": 0.846218487394958, + "English,Chinese,Vietnamese": 0.8546218487394958 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8319327731092437 + } + }, + "AC3_2": 0.9092710075444148, + "AC3_3": 0.8868372197509976, + "AC3_4": 0.8717391259924525 + } + }, + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.5428571428571428, + "language_acc": { + "Filipino": 0.5133333333333333, + "Vietnamese": 0.5133333333333333, + "Chinese": 0.62, + "Spanish": 0.5666666666666667, + "Malay": 0.47333333333333333, + "Indonesian": 0.46, + "English": 0.6533333333333333 + }, + "consistency_score_2": 0.6184126984126983, + "consistency_score_3": 0.4643809523809524, + "consistency_score_4": 0.38019047619047613, + "consistency_score_5": 0.32730158730158726, + "consistency_score_6": 0.2914285714285714, + "consistency_score_7": 0.26666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.5866666666666667, + "Filipino,Chinese": 0.6133333333333333, + "Filipino,Spanish": 0.6133333333333333, + "Filipino,Malay": 0.5666666666666667, + "Filipino,Indonesian": 0.5933333333333334, + "Filipino,English": 0.5933333333333334, + "Vietnamese,Chinese": 0.6133333333333333, + "Vietnamese,Spanish": 0.6266666666666667, + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,Indonesian": 0.6, + "Vietnamese,English": 0.6066666666666667, + "Chinese,Spanish": 0.6266666666666667, + "Chinese,Malay": 0.5866666666666667, + "Chinese,Indonesian": 0.5666666666666667, + "Chinese,English": 0.5733333333333334, + "Spanish,Malay": 0.6733333333333333, + "Spanish,Indonesian": 0.68, + "Spanish,English": 0.7333333333333333, + "Malay,Indonesian": 0.7333333333333333, + "Malay,English": 0.62, + "Indonesian,English": 0.6333333333333333 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.46, + "Filipino,Vietnamese,Spanish": 0.4666666666666667, + "Filipino,Vietnamese,Malay": 0.4, + "Filipino,Vietnamese,Indonesian": 0.44, + "Filipino,Vietnamese,English": 0.44666666666666666, + "Filipino,Chinese,Spanish": 0.4666666666666667, + "Filipino,Chinese,Malay": 0.4266666666666667, + "Filipino,Chinese,Indonesian": 0.42, + "Filipino,Chinese,English": 0.44, + "Filipino,Spanish,Malay": 0.4666666666666667, + "Filipino,Spanish,Indonesian": 0.48, + "Filipino,Spanish,English": 0.5, + "Filipino,Malay,Indonesian": 0.47333333333333333, + "Filipino,Malay,English": 0.43333333333333335, + "Filipino,Indonesian,English": 0.44666666666666666, + "Vietnamese,Chinese,Spanish": 0.47333333333333333, + "Vietnamese,Chinese,Malay": 0.4066666666666667, + "Vietnamese,Chinese,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,English": 0.44, + "Vietnamese,Spanish,Malay": 0.46, + "Vietnamese,Spanish,Indonesian": 0.5, + "Vietnamese,Spanish,English": 0.5, + "Vietnamese,Malay,Indonesian": 0.4666666666666667, + "Vietnamese,Malay,English": 0.4266666666666667, + "Vietnamese,Indonesian,English": 0.46, + "Chinese,Spanish,Malay": 0.46, + "Chinese,Spanish,Indonesian": 0.47333333333333333, + "Chinese,Spanish,English": 0.4866666666666667, + "Chinese,Malay,Indonesian": 0.4866666666666667, + "Chinese,Malay,English": 0.41333333333333333, + "Chinese,Indonesian,English": 0.43333333333333335, + "Spanish,Malay,Indonesian": 0.5733333333333334, + "Spanish,Malay,English": 0.5333333333333333, + "Spanish,Indonesian,English": 0.54, + "Malay,Indonesian,English": 0.52 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.38, + "Filipino,Vietnamese,Chinese,Malay": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Indonesian": 0.34, + "Filipino,Vietnamese,Chinese,English": 0.36666666666666664, + "Filipino,Vietnamese,Spanish,Malay": 0.36, + "Filipino,Vietnamese,Spanish,Indonesian": 0.3933333333333333, + "Filipino,Vietnamese,Spanish,English": 0.3933333333333333, + "Filipino,Vietnamese,Malay,Indonesian": 0.36, + "Filipino,Vietnamese,Malay,English": 0.34, + "Filipino,Vietnamese,Indonesian,English": 0.37333333333333335, + "Filipino,Chinese,Spanish,Malay": 0.36666666666666664, + "Filipino,Chinese,Spanish,Indonesian": 0.37333333333333335, + "Filipino,Chinese,Spanish,English": 0.38666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.38, + "Filipino,Chinese,Malay,English": 0.3333333333333333, + "Filipino,Chinese,Indonesian,English": 0.3466666666666667, + "Filipino,Spanish,Malay,Indonesian": 0.42, + "Filipino,Spanish,Malay,English": 0.4066666666666667, + "Filipino,Spanish,Indonesian,English": 0.42, + "Filipino,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Malay": 0.36, + "Vietnamese,Chinese,Spanish,Indonesian": 0.38666666666666666, + "Vietnamese,Chinese,Spanish,English": 0.37333333333333335, + "Vietnamese,Chinese,Malay,Indonesian": 0.36, + "Vietnamese,Chinese,Malay,English": 0.32666666666666666, + "Vietnamese,Chinese,Indonesian,English": 0.36666666666666664, + "Vietnamese,Spanish,Malay,Indonesian": 0.44, + "Vietnamese,Spanish,Malay,English": 0.38666666666666666, + "Vietnamese,Spanish,Indonesian,English": 0.43333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.38, + "Chinese,Spanish,Malay,Indonesian": 0.4266666666666667, + "Chinese,Spanish,Malay,English": 0.38666666666666666, + "Chinese,Spanish,Indonesian,English": 0.4, + "Chinese,Malay,Indonesian,English": 0.37333333333333335, + "Spanish,Malay,Indonesian,English": 0.4666666666666667 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.3, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.32, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.32, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.3, + "Filipino,Vietnamese,Chinese,Malay,English": 0.28, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.30666666666666664, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.3466666666666667, + "Filipino,Vietnamese,Spanish,Malay,English": 0.32, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.36, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.3466666666666667, + "Filipino,Chinese,Spanish,Malay,English": 0.32, + "Filipino,Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.30666666666666664, + "Filipino,Spanish,Malay,Indonesian,English": 0.36666666666666664, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Malay,English": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.34, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.30666666666666664, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.37333333333333335, + "Chinese,Spanish,Malay,Indonesian,English": 0.36 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.26666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.3 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 + } + }, + "AC3_2": 0.5781769897182346, + "AC3_3": 0.500561919120392, + "AC3_4": 0.4471906137125338, + "AC3_5": 0.4083806743680366, + "AC3_6": 0.379256360032819, + "AC3_7": 0.3576470587793495 + }, + "prompt_2": { + "overall_acc": 0.5342857142857144, + "language_acc": { + "Filipino": 0.52, + "Vietnamese": 0.5333333333333333, + "Chinese": 0.5933333333333334, + "Spanish": 0.5466666666666666, + "Malay": 0.44666666666666666, + "Indonesian": 0.4666666666666667, + "English": 0.6333333333333333 + }, + "consistency_score_2": 0.6200000000000001, + "consistency_score_3": 0.469904761904762, + "consistency_score_4": 0.38704761904761914, + "consistency_score_5": 0.33555555555555555, + "consistency_score_6": 0.3019047619047619, + "consistency_score_7": 0.28, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.6266666666666667, + "Filipino,Chinese": 0.5866666666666667, + "Filipino,Spanish": 0.6733333333333333, + "Filipino,Malay": 0.6066666666666667, + "Filipino,Indonesian": 0.6333333333333333, + "Filipino,English": 0.6333333333333333, + "Vietnamese,Chinese": 0.6133333333333333, + "Vietnamese,Spanish": 0.6266666666666667, + "Vietnamese,Malay": 0.56, + "Vietnamese,Indonesian": 0.6333333333333333, + "Vietnamese,English": 0.6666666666666666, + "Chinese,Spanish": 0.5866666666666667, + "Chinese,Malay": 0.5333333333333333, + "Chinese,Indonesian": 0.5266666666666666, + "Chinese,English": 0.5933333333333334, + "Spanish,Malay": 0.6066666666666667, + "Spanish,Indonesian": 0.64, + "Spanish,English": 0.74, + "Malay,Indonesian": 0.7066666666666667, + "Malay,English": 0.6133333333333333, + "Indonesian,English": 0.6133333333333333 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.47333333333333333, + "Filipino,Vietnamese,Spanish": 0.5, + "Filipino,Vietnamese,Malay": 0.43333333333333335, + "Filipino,Vietnamese,Indonesian": 0.47333333333333333, + "Filipino,Vietnamese,English": 0.52, + "Filipino,Chinese,Spanish": 0.4533333333333333, + "Filipino,Chinese,Malay": 0.42, + "Filipino,Chinese,Indonesian": 0.42, + "Filipino,Chinese,English": 0.4533333333333333, + "Filipino,Spanish,Malay": 0.4666666666666667, + "Filipino,Spanish,Indonesian": 0.4866666666666667, + "Filipino,Spanish,English": 0.5533333333333333, + "Filipino,Malay,Indonesian": 0.5, + "Filipino,Malay,English": 0.4666666666666667, + "Filipino,Indonesian,English": 0.4866666666666667, + "Vietnamese,Chinese,Spanish": 0.46, + "Vietnamese,Chinese,Malay": 0.38666666666666666, + "Vietnamese,Chinese,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,English": 0.49333333333333335, + "Vietnamese,Spanish,Malay": 0.44666666666666666, + "Vietnamese,Spanish,Indonesian": 0.4866666666666667, + "Vietnamese,Spanish,English": 0.54, + "Vietnamese,Malay,Indonesian": 0.48, + "Vietnamese,Malay,English": 0.4666666666666667, + "Vietnamese,Indonesian,English": 0.49333333333333335, + "Chinese,Spanish,Malay": 0.4, + "Chinese,Spanish,Indonesian": 0.4266666666666667, + "Chinese,Spanish,English": 0.49333333333333335, + "Chinese,Malay,Indonesian": 0.43333333333333335, + "Chinese,Malay,English": 0.41333333333333333, + "Chinese,Indonesian,English": 0.4266666666666667, + "Spanish,Malay,Indonesian": 0.5133333333333333, + "Spanish,Malay,English": 0.52, + "Spanish,Indonesian,English": 0.5333333333333333, + "Malay,Indonesian,English": 0.49333333333333335 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.4, + "Filipino,Vietnamese,Chinese,Malay": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian": 0.36, + "Filipino,Vietnamese,Chinese,English": 0.41333333333333333, + "Filipino,Vietnamese,Spanish,Malay": 0.37333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Spanish,English": 0.46, + "Filipino,Vietnamese,Malay,Indonesian": 0.38, + "Filipino,Vietnamese,Malay,English": 0.4, + "Filipino,Vietnamese,Indonesian,English": 0.41333333333333333, + "Filipino,Chinese,Spanish,Malay": 0.34, + "Filipino,Chinese,Spanish,Indonesian": 0.36, + "Filipino,Chinese,Spanish,English": 0.4066666666666667, + "Filipino,Chinese,Malay,Indonesian": 0.37333333333333335, + "Filipino,Chinese,Malay,English": 0.35333333333333333, + "Filipino,Chinese,Indonesian,English": 0.36666666666666664, + "Filipino,Spanish,Malay,Indonesian": 0.4066666666666667, + "Filipino,Spanish,Malay,English": 0.42, + "Filipino,Spanish,Indonesian,English": 0.44, + "Filipino,Malay,Indonesian,English": 0.4, + "Vietnamese,Chinese,Spanish,Malay": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,English": 0.4, + "Vietnamese,Chinese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Malay,English": 0.3466666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.38, + "Vietnamese,Spanish,Malay,Indonesian": 0.41333333333333333, + "Vietnamese,Spanish,Malay,English": 0.4066666666666667, + "Vietnamese,Spanish,Indonesian,English": 0.43333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.4066666666666667, + "Chinese,Spanish,Malay,Indonesian": 0.36, + "Chinese,Spanish,Malay,English": 0.36666666666666664, + "Chinese,Spanish,Indonesian,English": 0.38, + "Chinese,Malay,Indonesian,English": 0.35333333333333333, + "Spanish,Malay,Indonesian,English": 0.44666666666666666 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.36666666666666664, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.32, + "Filipino,Vietnamese,Chinese,Malay,English": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.34, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.34, + "Filipino,Vietnamese,Spanish,Malay,English": 0.36, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.37333333333333335, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.35333333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.32, + "Filipino,Chinese,Spanish,Malay,English": 0.32, + "Filipino,Chinese,Spanish,Indonesian,English": 0.34, + "Filipino,Chinese,Malay,Indonesian,English": 0.32, + "Filipino,Spanish,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Vietnamese,Chinese,Spanish,Malay,English": 0.3, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.31333333333333335, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.37333333333333335, + "Chinese,Spanish,Malay,Indonesian,English": 0.32666666666666666 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.3, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.32666666666666666, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.3, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.28 + } + }, + "AC3_2": 0.5739603959898798, + "AC3_3": 0.5000314328572075, + "AC3_4": 0.44890162138132517, + "AC3_5": 0.4122189780547997, + "AC3_6": 0.385805401841268, + "AC3_7": 0.36743859644610405 + }, + "prompt_3": { + "overall_acc": 0.5323809523809524, + "language_acc": { + "Filipino": 0.5, + "Vietnamese": 0.5266666666666666, + "Chinese": 0.5733333333333334, + "Spanish": 0.5466666666666666, + "Malay": 0.47333333333333333, + "Indonesian": 0.48, + "English": 0.6266666666666667 + }, + "consistency_score_2": 0.6098412698412697, + "consistency_score_3": 0.4560000000000002, + "consistency_score_4": 0.3704761904761905, + "consistency_score_5": 0.31460317460317466, + "consistency_score_6": 0.27714285714285714, + "consistency_score_7": 0.25333333333333335, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.5933333333333334, + "Filipino,Chinese": 0.6066666666666667, + "Filipino,Spanish": 0.64, + "Filipino,Malay": 0.56, + "Filipino,Indonesian": 0.6066666666666667, + "Filipino,English": 0.5933333333333334, + "Vietnamese,Chinese": 0.6, + "Vietnamese,Spanish": 0.66, + "Vietnamese,Malay": 0.54, + "Vietnamese,Indonesian": 0.5933333333333334, + "Vietnamese,English": 0.64, + "Chinese,Spanish": 0.5866666666666667, + "Chinese,Malay": 0.5266666666666666, + "Chinese,Indonesian": 0.54, + "Chinese,English": 0.5866666666666667, + "Spanish,Malay": 0.62, + "Spanish,Indonesian": 0.6333333333333333, + "Spanish,English": 0.7266666666666667, + "Malay,Indonesian": 0.7333333333333333, + "Malay,English": 0.6133333333333333, + "Indonesian,English": 0.6066666666666667 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.4533333333333333, + "Filipino,Vietnamese,Spanish": 0.4866666666666667, + "Filipino,Vietnamese,Malay": 0.38666666666666666, + "Filipino,Vietnamese,Indonesian": 0.4266666666666667, + "Filipino,Vietnamese,English": 0.47333333333333333, + "Filipino,Chinese,Spanish": 0.44666666666666666, + "Filipino,Chinese,Malay": 0.3933333333333333, + "Filipino,Chinese,Indonesian": 0.42, + "Filipino,Chinese,English": 0.44666666666666666, + "Filipino,Spanish,Malay": 0.4533333333333333, + "Filipino,Spanish,Indonesian": 0.47333333333333333, + "Filipino,Spanish,English": 0.52, + "Filipino,Malay,Indonesian": 0.4666666666666667, + "Filipino,Malay,English": 0.42, + "Filipino,Indonesian,English": 0.44666666666666666, + "Vietnamese,Chinese,Spanish": 0.4666666666666667, + "Vietnamese,Chinese,Malay": 0.37333333333333335, + "Vietnamese,Chinese,Indonesian": 0.42, + "Vietnamese,Chinese,English": 0.47333333333333333, + "Vietnamese,Spanish,Malay": 0.46, + "Vietnamese,Spanish,Indonesian": 0.48, + "Vietnamese,Spanish,English": 0.54, + "Vietnamese,Malay,Indonesian": 0.46, + "Vietnamese,Malay,English": 0.4533333333333333, + "Vietnamese,Indonesian,English": 0.46, + "Chinese,Spanish,Malay": 0.41333333333333333, + "Chinese,Spanish,Indonesian": 0.42, + "Chinese,Spanish,English": 0.49333333333333335, + "Chinese,Malay,Indonesian": 0.44, + "Chinese,Malay,English": 0.41333333333333333, + "Chinese,Indonesian,English": 0.42, + "Spanish,Malay,Indonesian": 0.5266666666666666, + "Spanish,Malay,English": 0.52, + "Spanish,Indonesian,English": 0.5133333333333333, + "Malay,Indonesian,English": 0.5 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.38666666666666666, + "Filipino,Vietnamese,Chinese,Malay": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Indonesian": 0.34, + "Filipino,Vietnamese,Chinese,English": 0.38666666666666666, + "Filipino,Vietnamese,Spanish,Malay": 0.36, + "Filipino,Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Filipino,Vietnamese,Spanish,English": 0.4266666666666667, + "Filipino,Vietnamese,Malay,Indonesian": 0.34, + "Filipino,Vietnamese,Malay,English": 0.34, + "Filipino,Vietnamese,Indonesian,English": 0.35333333333333333, + "Filipino,Chinese,Spanish,Malay": 0.3466666666666667, + "Filipino,Chinese,Spanish,Indonesian": 0.3466666666666667, + "Filipino,Chinese,Spanish,English": 0.4, + "Filipino,Chinese,Malay,Indonesian": 0.3466666666666667, + "Filipino,Chinese,Malay,English": 0.32666666666666666, + "Filipino,Chinese,Indonesian,English": 0.34, + "Filipino,Spanish,Malay,Indonesian": 0.3933333333333333, + "Filipino,Spanish,Malay,English": 0.4, + "Filipino,Spanish,Indonesian,English": 0.4066666666666667, + "Filipino,Malay,Indonesian,English": 0.36, + "Vietnamese,Chinese,Spanish,Malay": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Indonesian": 0.36, + "Vietnamese,Chinese,Spanish,English": 0.41333333333333333, + "Vietnamese,Chinese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Malay,English": 0.34, + "Vietnamese,Chinese,Indonesian,English": 0.36, + "Vietnamese,Spanish,Malay,Indonesian": 0.41333333333333333, + "Vietnamese,Spanish,Malay,English": 0.4066666666666667, + "Vietnamese,Spanish,Indonesian,English": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,English": 0.38666666666666666, + "Chinese,Spanish,Malay,Indonesian": 0.37333333333333335, + "Chinese,Spanish,Malay,English": 0.37333333333333335, + "Chinese,Spanish,Indonesian,English": 0.37333333333333335, + "Chinese,Malay,Indonesian,English": 0.35333333333333333, + "Spanish,Malay,Indonesian,English": 0.44666666666666666 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.3, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Chinese,Malay,English": 0.28, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.3, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.32, + "Filipino,Vietnamese,Spanish,Malay,English": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.32666666666666666, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.29333333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Malay,Indonesian,English": 0.2866666666666667, + "Filipino,Spanish,Malay,Indonesian,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish,Malay,English": 0.31333333333333335, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.32, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.3, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.36, + "Chinese,Spanish,Malay,Indonesian,English": 0.3333333333333333 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.28, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.25333333333333335, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.2866666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.28, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 + } + }, + "AC3_2": 0.5684846078011524, + "AC3_3": 0.49123915971133164, + "AC3_4": 0.43691179420517434, + "AC3_5": 0.39549439561261884, + "AC3_6": 0.36452436970286967, + "AC3_7": 0.3433050504613571 + }, + "prompt_4": { + "overall_acc": 0.5447619047619048, + "language_acc": { + "Filipino": 0.5333333333333333, + "Vietnamese": 0.5066666666666667, + "Chinese": 0.5933333333333334, + "Spanish": 0.56, + "Malay": 0.47333333333333333, + "Indonesian": 0.4866666666666667, + "English": 0.66 + }, + "consistency_score_2": 0.6180952380952381, + "consistency_score_3": 0.46095238095238095, + "consistency_score_4": 0.3752380952380952, + "consistency_score_5": 0.3219047619047618, + "consistency_score_6": 0.2857142857142857, + "consistency_score_7": 0.26, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.6, + "Filipino,Chinese": 0.58, + "Filipino,Spanish": 0.64, + "Filipino,Malay": 0.5733333333333334, + "Filipino,Indonesian": 0.64, + "Filipino,English": 0.62, + "Vietnamese,Chinese": 0.6333333333333333, + "Vietnamese,Spanish": 0.62, + "Vietnamese,Malay": 0.5866666666666667, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,English": 0.6066666666666667, + "Chinese,Spanish": 0.56, + "Chinese,Malay": 0.5533333333333333, + "Chinese,Indonesian": 0.52, + "Chinese,English": 0.56, + "Spanish,Malay": 0.6533333333333333, + "Spanish,Indonesian": 0.6933333333333334, + "Spanish,English": 0.7466666666666667, + "Malay,Indonesian": 0.6933333333333334, + "Malay,English": 0.64, + "Indonesian,English": 0.6333333333333333 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.46, + "Filipino,Vietnamese,Spanish": 0.47333333333333333, + "Filipino,Vietnamese,Malay": 0.42, + "Filipino,Vietnamese,Indonesian": 0.46, + "Filipino,Vietnamese,English": 0.46, + "Filipino,Chinese,Spanish": 0.43333333333333335, + "Filipino,Chinese,Malay": 0.4, + "Filipino,Chinese,Indonesian": 0.4066666666666667, + "Filipino,Chinese,English": 0.44, + "Filipino,Spanish,Malay": 0.4666666666666667, + "Filipino,Spanish,Indonesian": 0.5133333333333333, + "Filipino,Spanish,English": 0.5333333333333333, + "Filipino,Malay,Indonesian": 0.48, + "Filipino,Malay,English": 0.4533333333333333, + "Filipino,Indonesian,English": 0.47333333333333333, + "Vietnamese,Chinese,Spanish": 0.44666666666666666, + "Vietnamese,Chinese,Malay": 0.42, + "Vietnamese,Chinese,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,English": 0.44, + "Vietnamese,Spanish,Malay": 0.4666666666666667, + "Vietnamese,Spanish,Indonesian": 0.5, + "Vietnamese,Spanish,English": 0.5, + "Vietnamese,Malay,Indonesian": 0.48, + "Vietnamese,Malay,English": 0.4533333333333333, + "Vietnamese,Indonesian,English": 0.4666666666666667, + "Chinese,Spanish,Malay": 0.41333333333333333, + "Chinese,Spanish,Indonesian": 0.43333333333333335, + "Chinese,Spanish,English": 0.46, + "Chinese,Malay,Indonesian": 0.42, + "Chinese,Malay,English": 0.4, + "Chinese,Indonesian,English": 0.4066666666666667, + "Spanish,Malay,Indonesian": 0.54, + "Spanish,Malay,English": 0.5333333333333333, + "Spanish,Indonesian,English": 0.5533333333333333, + "Malay,Indonesian,English": 0.49333333333333335 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.37333333333333335, + "Filipino,Vietnamese,Chinese,Malay": 0.3333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian": 0.3466666666666667, + "Filipino,Vietnamese,Chinese,English": 0.37333333333333335, + "Filipino,Vietnamese,Spanish,Malay": 0.36666666666666664, + "Filipino,Vietnamese,Spanish,Indonesian": 0.3933333333333333, + "Filipino,Vietnamese,Spanish,English": 0.4, + "Filipino,Vietnamese,Malay,Indonesian": 0.36666666666666664, + "Filipino,Vietnamese,Malay,English": 0.36666666666666664, + "Filipino,Vietnamese,Indonesian,English": 0.38, + "Filipino,Chinese,Spanish,Malay": 0.34, + "Filipino,Chinese,Spanish,Indonesian": 0.35333333333333333, + "Filipino,Chinese,Spanish,English": 0.38666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.3466666666666667, + "Filipino,Chinese,Malay,English": 0.3333333333333333, + "Filipino,Chinese,Indonesian,English": 0.3466666666666667, + "Filipino,Spanish,Malay,Indonesian": 0.41333333333333333, + "Filipino,Spanish,Malay,English": 0.42, + "Filipino,Spanish,Indonesian,English": 0.44666666666666666, + "Filipino,Malay,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish,Malay": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,English": 0.36666666666666664, + "Vietnamese,Chinese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Malay,English": 0.3333333333333333, + "Vietnamese,Chinese,Indonesian,English": 0.35333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.4266666666666667, + "Vietnamese,Spanish,Malay,English": 0.3933333333333333, + "Vietnamese,Spanish,Indonesian,English": 0.43333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.38, + "Chinese,Spanish,Malay,Indonesian": 0.37333333333333335, + "Chinese,Spanish,Malay,English": 0.36, + "Chinese,Spanish,Indonesian,English": 0.38666666666666666, + "Chinese,Malay,Indonesian,English": 0.3333333333333333, + "Spanish,Malay,Indonesian,English": 0.4533333333333333 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.3, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.30666666666666664, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.34, + "Filipino,Vietnamese,Spanish,Malay,English": 0.3333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.36, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.32, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.29333333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,Malay,English": 0.3, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.2866666666666667, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.36666666666666664, + "Chinese,Spanish,Malay,Indonesian,English": 0.3333333333333333 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.26, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26 + } + }, + "AC3_2": 0.5791162590664581, + "AC3_3": 0.4993650793154265, + "AC3_4": 0.4443813466932929, + "AC3_5": 0.40468027206214957, + "AC3_6": 0.3748361729561755, + "AC3_7": 0.3519999999562604 + }, + "prompt_5": { + "overall_acc": 0.5352380952380953, + "language_acc": { + "Filipino": 0.5, + "Vietnamese": 0.5133333333333333, + "Chinese": 0.5866666666666667, + "Spanish": 0.5733333333333334, + "Malay": 0.44666666666666666, + "Indonesian": 0.4666666666666667, + "English": 0.66 + }, + "consistency_score_2": 0.6057142857142856, + "consistency_score_3": 0.4464761904761905, + "consistency_score_4": 0.35866666666666663, + "consistency_score_5": 0.3025396825396825, + "consistency_score_6": 0.26285714285714284, + "consistency_score_7": 0.23333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.56, + "Filipino,Chinese": 0.5733333333333334, + "Filipino,Spanish": 0.6266666666666667, + "Filipino,Malay": 0.54, + "Filipino,Indonesian": 0.62, + "Filipino,English": 0.5866666666666667, + "Vietnamese,Chinese": 0.62, + "Vietnamese,Spanish": 0.6133333333333333, + "Vietnamese,Malay": 0.54, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,English": 0.5733333333333334, + "Chinese,Spanish": 0.62, + "Chinese,Malay": 0.5666666666666667, + "Chinese,Indonesian": 0.5333333333333333, + "Chinese,English": 0.5733333333333334, + "Spanish,Malay": 0.6466666666666666, + "Spanish,Indonesian": 0.6933333333333334, + "Spanish,English": 0.7333333333333333, + "Malay,Indonesian": 0.6866666666666666, + "Malay,English": 0.5866666666666667, + "Indonesian,English": 0.6533333333333333 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.43333333333333335, + "Filipino,Vietnamese,Spanish": 0.44, + "Filipino,Vietnamese,Malay": 0.36666666666666664, + "Filipino,Vietnamese,Indonesian": 0.4066666666666667, + "Filipino,Vietnamese,English": 0.42, + "Filipino,Chinese,Spanish": 0.44666666666666666, + "Filipino,Chinese,Malay": 0.38666666666666666, + "Filipino,Chinese,Indonesian": 0.3933333333333333, + "Filipino,Chinese,English": 0.41333333333333333, + "Filipino,Spanish,Malay": 0.44666666666666666, + "Filipino,Spanish,Indonesian": 0.5066666666666667, + "Filipino,Spanish,English": 0.5066666666666667, + "Filipino,Malay,Indonesian": 0.4533333333333333, + "Filipino,Malay,English": 0.4066666666666667, + "Filipino,Indonesian,English": 0.4666666666666667, + "Vietnamese,Chinese,Spanish": 0.47333333333333333, + "Vietnamese,Chinese,Malay": 0.4066666666666667, + "Vietnamese,Chinese,Indonesian": 0.4066666666666667, + "Vietnamese,Chinese,English": 0.4266666666666667, + "Vietnamese,Spanish,Malay": 0.44666666666666666, + "Vietnamese,Spanish,Indonesian": 0.48, + "Vietnamese,Spanish,English": 0.48, + "Vietnamese,Malay,Indonesian": 0.44666666666666666, + "Vietnamese,Malay,English": 0.4, + "Vietnamese,Indonesian,English": 0.44, + "Chinese,Spanish,Malay": 0.44666666666666666, + "Chinese,Spanish,Indonesian": 0.46, + "Chinese,Spanish,English": 0.4866666666666667, + "Chinese,Malay,Indonesian": 0.43333333333333335, + "Chinese,Malay,English": 0.4, + "Chinese,Indonesian,English": 0.42, + "Spanish,Malay,Indonesian": 0.54, + "Spanish,Malay,English": 0.5, + "Spanish,Indonesian,English": 0.5533333333333333, + "Malay,Indonesian,English": 0.4866666666666667 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,Malay": 0.3, + "Filipino,Vietnamese,Chinese,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,English": 0.34, + "Filipino,Vietnamese,Spanish,Malay": 0.34, + "Filipino,Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "Filipino,Vietnamese,Spanish,English": 0.37333333333333335, + "Filipino,Vietnamese,Malay,Indonesian": 0.32, + "Filipino,Vietnamese,Malay,English": 0.31333333333333335, + "Filipino,Vietnamese,Indonesian,English": 0.3466666666666667, + "Filipino,Chinese,Spanish,Malay": 0.34, + "Filipino,Chinese,Spanish,Indonesian": 0.36, + "Filipino,Chinese,Spanish,English": 0.37333333333333335, + "Filipino,Chinese,Malay,Indonesian": 0.32666666666666666, + "Filipino,Chinese,Malay,English": 0.31333333333333335, + "Filipino,Chinese,Indonesian,English": 0.34, + "Filipino,Spanish,Malay,Indonesian": 0.4, + "Filipino,Spanish,Malay,English": 0.38666666666666666, + "Filipino,Spanish,Indonesian,English": 0.4266666666666667, + "Filipino,Malay,Indonesian,English": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.36, + "Vietnamese,Chinese,Spanish,Indonesian": 0.36666666666666664, + "Vietnamese,Chinese,Spanish,English": 0.36666666666666664, + "Vietnamese,Chinese,Malay,Indonesian": 0.34, + "Vietnamese,Chinese,Malay,English": 0.31333333333333335, + "Vietnamese,Chinese,Indonesian,English": 0.34, + "Vietnamese,Spanish,Malay,Indonesian": 0.41333333333333333, + "Vietnamese,Spanish,Malay,English": 0.36, + "Vietnamese,Spanish,Indonesian,English": 0.4, + "Vietnamese,Malay,Indonesian,English": 0.35333333333333333, + "Chinese,Spanish,Malay,Indonesian": 0.4066666666666667, + "Chinese,Spanish,Malay,English": 0.37333333333333335, + "Chinese,Spanish,Indonesian,English": 0.3933333333333333, + "Chinese,Malay,Indonesian,English": 0.3466666666666667, + "Spanish,Malay,Indonesian,English": 0.44 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.28, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.3, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,Malay,English": 0.26, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.28, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Spanish,Malay,English": 0.3, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.32666666666666666, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.28, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,English": 0.3, + "Filipino,Chinese,Spanish,Indonesian,English": 0.32, + "Filipino,Chinese,Malay,Indonesian,English": 0.28, + "Filipino,Spanish,Malay,Indonesian,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.31333333333333335, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.28, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.34, + "Chinese,Spanish,Malay,Indonesian,English": 0.3466666666666667 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.26, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.23333333333333334, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.28, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.28, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.28 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334 + } + }, + "AC3_2": 0.5682995468140114, + "AC3_3": 0.48684442961206786, + "AC3_4": 0.4295134597147181, + "AC3_5": 0.38657211421070076, + "AC3_6": 0.3525673371532327, + "AC3_7": 0.3249896736462735 + } + }, + "cross_logiqa": { + "prompt_1": { + "overall_acc": 0.4740259740259741, + "language_acc": { + "Indonesian": 0.42045454545454547, + "English": 0.5909090909090909, + "Filipino": 0.375, + "Spanish": 0.48863636363636365, + "Chinese": 0.5284090909090909, + "Malay": 0.42613636363636365, + "Vietnamese": 0.48863636363636365 + }, + "consistency_score_2": 0.574134199134199, + "consistency_score_3": 0.4001623376623377, + "consistency_score_4": 0.3084415584415585, + "consistency_score_5": 0.25216450216450226, + "consistency_score_6": 0.21266233766233764, + "consistency_score_7": 0.18181818181818182, + "detailed_consistency_score": { + "2_combine": { + "Indonesian,English": 0.5738636363636364, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Spanish": 0.625, + "Indonesian,Chinese": 0.5511363636363636, + "Indonesian,Malay": 0.6420454545454546, + "Indonesian,Vietnamese": 0.6136363636363636, + "English,Filipino": 0.48295454545454547, + "English,Spanish": 0.6590909090909091, + "English,Chinese": 0.6136363636363636, + "English,Malay": 0.5738636363636364, + "English,Vietnamese": 0.6193181818181818, + "Filipino,Spanish": 0.5625, + "Filipino,Chinese": 0.4715909090909091, + "Filipino,Malay": 0.5511363636363636, + "Filipino,Vietnamese": 0.4772727272727273, + "Spanish,Chinese": 0.6079545454545454, + "Spanish,Malay": 0.6136363636363636, + "Spanish,Vietnamese": 0.5965909090909091, + "Chinese,Malay": 0.5056818181818182, + "Chinese,Vietnamese": 0.5738636363636364, + "Malay,Vietnamese": 0.6022727272727273 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.3352272727272727, + "Indonesian,English,Spanish": 0.44886363636363635, + "Indonesian,English,Chinese": 0.4147727272727273, + "Indonesian,English,Malay": 0.42045454545454547, + "Indonesian,English,Vietnamese": 0.4375, + "Indonesian,Filipino,Spanish": 0.3977272727272727, + "Indonesian,Filipino,Chinese": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.4034090909090909, + "Indonesian,Filipino,Vietnamese": 0.35795454545454547, + "Indonesian,Spanish,Chinese": 0.4375, + "Indonesian,Spanish,Malay": 0.4659090909090909, + "Indonesian,Spanish,Vietnamese": 0.44886363636363635, + "Indonesian,Chinese,Malay": 0.3977272727272727, + "Indonesian,Chinese,Vietnamese": 0.4090909090909091, + "Indonesian,Malay,Vietnamese": 0.45454545454545453, + "English,Filipino,Spanish": 0.38636363636363635, + "English,Filipino,Chinese": 0.3352272727272727, + "English,Filipino,Malay": 0.3352272727272727, + "English,Filipino,Vietnamese": 0.3352272727272727, + "English,Spanish,Chinese": 0.48295454545454547, + "English,Spanish,Malay": 0.44886363636363635, + "English,Spanish,Vietnamese": 0.45454545454545453, + "English,Chinese,Malay": 0.39204545454545453, + "English,Chinese,Vietnamese": 0.4375, + "English,Malay,Vietnamese": 0.4375, + "Filipino,Spanish,Chinese": 0.35795454545454547, + "Filipino,Spanish,Malay": 0.3977272727272727, + "Filipino,Spanish,Vietnamese": 0.3693181818181818, + "Filipino,Chinese,Malay": 0.32954545454545453, + "Filipino,Chinese,Vietnamese": 0.3125, + "Filipino,Malay,Vietnamese": 0.36363636363636365, + "Spanish,Chinese,Malay": 0.3977272727272727, + "Spanish,Chinese,Vietnamese": 0.4375, + "Spanish,Malay,Vietnamese": 0.4431818181818182, + "Chinese,Malay,Vietnamese": 0.39204545454545453 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.29545454545454547, + "Indonesian,English,Filipino,Chinese": 0.2556818181818182, + "Indonesian,English,Filipino,Malay": 0.26704545454545453, + "Indonesian,English,Filipino,Vietnamese": 0.2784090909090909, + "Indonesian,English,Spanish,Chinese": 0.36363636363636365, + "Indonesian,English,Spanish,Malay": 0.3522727272727273, + "Indonesian,English,Spanish,Vietnamese": 0.3465909090909091, + "Indonesian,English,Chinese,Malay": 0.3181818181818182, + "Indonesian,English,Chinese,Vietnamese": 0.3352272727272727, + "Indonesian,English,Malay,Vietnamese": 0.3465909090909091, + "Indonesian,Filipino,Spanish,Chinese": 0.2784090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.32386363636363635, + "Indonesian,Filipino,Spanish,Vietnamese": 0.30113636363636365, + "Indonesian,Filipino,Chinese,Malay": 0.2784090909090909, + "Indonesian,Filipino,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,Filipino,Malay,Vietnamese": 0.30113636363636365, + "Indonesian,Spanish,Chinese,Malay": 0.3352272727272727, + "Indonesian,Spanish,Chinese,Vietnamese": 0.3465909090909091, + "Indonesian,Spanish,Malay,Vietnamese": 0.36363636363636365, + "Indonesian,Chinese,Malay,Vietnamese": 0.32954545454545453, + "English,Filipino,Spanish,Chinese": 0.2897727272727273, + "English,Filipino,Spanish,Malay": 0.2897727272727273, + "English,Filipino,Spanish,Vietnamese": 0.29545454545454547, + "English,Filipino,Chinese,Malay": 0.26136363636363635, + "English,Filipino,Chinese,Vietnamese": 0.2556818181818182, + "English,Filipino,Malay,Vietnamese": 0.2840909090909091, + "English,Spanish,Chinese,Malay": 0.3409090909090909, + "English,Spanish,Chinese,Vietnamese": 0.35795454545454547, + "English,Spanish,Malay,Vietnamese": 0.36363636363636365, + "English,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Filipino,Spanish,Chinese,Malay": 0.26704545454545453, + "Filipino,Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Filipino,Spanish,Malay,Vietnamese": 0.30113636363636365, + "Filipino,Chinese,Malay,Vietnamese": 0.26136363636363635, + "Spanish,Chinese,Malay,Vietnamese": 0.3409090909090909 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish,Malay": 0.24431818181818182, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.24431818181818182, + "Indonesian,English,Filipino,Chinese,Malay": 0.2215909090909091, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.2215909090909091, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Spanish,Chinese,Malay": 0.2897727272727273, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2897727272727273, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.29545454545454547, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.23863636363636365, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.2897727272727273, + "English,Filipino,Spanish,Chinese,Malay": 0.22727272727272727, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.23295454545454544, + "English,Filipino,Spanish,Malay,Vietnamese": 0.2556818181818182, + "English,Filipino,Chinese,Malay,Vietnamese": 0.22727272727272727, + "English,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544 + }, + "6_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.25, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456 + }, + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182 + } + }, + "AC3_2": 0.5192994924043111, + "AC3_3": 0.43397364005213085, + "AC3_4": 0.37371342345819386, + "AC3_5": 0.3292043276686818, + "AC3_6": 0.29360474037049444, + "AC3_7": 0.26282628258818835 + }, + "prompt_2": { + "overall_acc": 0.487012987012987, + "language_acc": { + "Indonesian": 0.42045454545454547, + "English": 0.6193181818181818, + "Filipino": 0.4147727272727273, + "Spanish": 0.5227272727272727, + "Chinese": 0.5170454545454546, + "Malay": 0.4090909090909091, + "Vietnamese": 0.5056818181818182 + }, + "consistency_score_2": 0.5955086580086579, + "consistency_score_3": 0.4334415584415585, + "consistency_score_4": 0.34610389610389614, + "consistency_score_5": 0.29031385281385286, + "consistency_score_6": 0.24918831168831174, + "consistency_score_7": 0.2159090909090909, + "detailed_consistency_score": { + "2_combine": { + "Indonesian,English": 0.5852272727272727, + "Indonesian,Filipino": 0.5795454545454546, + "Indonesian,Spanish": 0.6363636363636364, + "Indonesian,Chinese": 0.5340909090909091, + "Indonesian,Malay": 0.7102272727272727, + "Indonesian,Vietnamese": 0.6306818181818182, + "English,Filipino": 0.4943181818181818, + "English,Spanish": 0.6988636363636364, + "English,Chinese": 0.5852272727272727, + "English,Malay": 0.5681818181818182, + "English,Vietnamese": 0.6420454545454546, + "Filipino,Spanish": 0.6136363636363636, + "Filipino,Chinese": 0.5284090909090909, + "Filipino,Malay": 0.5909090909090909, + "Filipino,Vietnamese": 0.4943181818181818, + "Spanish,Chinese": 0.6193181818181818, + "Spanish,Malay": 0.625, + "Spanish,Vietnamese": 0.6022727272727273, + "Chinese,Malay": 0.5568181818181818, + "Chinese,Vietnamese": 0.5965909090909091, + "Malay,Vietnamese": 0.6136363636363636 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.3693181818181818, + "Indonesian,English,Spanish": 0.48863636363636365, + "Indonesian,English,Chinese": 0.4147727272727273, + "Indonesian,English,Malay": 0.4715909090909091, + "Indonesian,English,Vietnamese": 0.4602272727272727, + "Indonesian,Filipino,Spanish": 0.44886363636363635, + "Indonesian,Filipino,Chinese": 0.36363636363636365, + "Indonesian,Filipino,Malay": 0.4659090909090909, + "Indonesian,Filipino,Vietnamese": 0.3977272727272727, + "Indonesian,Spanish,Chinese": 0.4375, + "Indonesian,Spanish,Malay": 0.5113636363636364, + "Indonesian,Spanish,Vietnamese": 0.4659090909090909, + "Indonesian,Chinese,Malay": 0.4375, + "Indonesian,Chinese,Vietnamese": 0.4431818181818182, + "Indonesian,Malay,Vietnamese": 0.5170454545454546, + "English,Filipino,Spanish": 0.4318181818181818, + "English,Filipino,Chinese": 0.3522727272727273, + "English,Filipino,Malay": 0.3693181818181818, + "English,Filipino,Vietnamese": 0.35795454545454547, + "English,Spanish,Chinese": 0.48295454545454547, + "English,Spanish,Malay": 0.4715909090909091, + "English,Spanish,Vietnamese": 0.5, + "English,Chinese,Malay": 0.4034090909090909, + "English,Chinese,Vietnamese": 0.4659090909090909, + "English,Malay,Vietnamese": 0.45454545454545453, + "Filipino,Spanish,Chinese": 0.4147727272727273, + "Filipino,Spanish,Malay": 0.45454545454545453, + "Filipino,Spanish,Vietnamese": 0.39204545454545453, + "Filipino,Chinese,Malay": 0.39204545454545453, + "Filipino,Chinese,Vietnamese": 0.3465909090909091, + "Filipino,Malay,Vietnamese": 0.3977272727272727, + "Spanish,Chinese,Malay": 0.4375, + "Spanish,Chinese,Vietnamese": 0.44886363636363635, + "Spanish,Malay,Vietnamese": 0.4659090909090909, + "Chinese,Malay,Vietnamese": 0.4375 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.32954545454545453, + "Indonesian,English,Filipino,Chinese": 0.2784090909090909, + "Indonesian,English,Filipino,Malay": 0.3125, + "Indonesian,English,Filipino,Vietnamese": 0.3125, + "Indonesian,English,Spanish,Chinese": 0.36363636363636365, + "Indonesian,English,Spanish,Malay": 0.4034090909090909, + "Indonesian,English,Spanish,Vietnamese": 0.3977272727272727, + "Indonesian,English,Chinese,Malay": 0.3465909090909091, + "Indonesian,English,Chinese,Vietnamese": 0.375, + "Indonesian,English,Malay,Vietnamese": 0.4090909090909091, + "Indonesian,Filipino,Spanish,Chinese": 0.3181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.38636363636363635, + "Indonesian,Filipino,Spanish,Vietnamese": 0.32954545454545453, + "Indonesian,Filipino,Chinese,Malay": 0.32386363636363635, + "Indonesian,Filipino,Chinese,Vietnamese": 0.30113636363636365, + "Indonesian,Filipino,Malay,Vietnamese": 0.3465909090909091, + "Indonesian,Spanish,Chinese,Malay": 0.375, + "Indonesian,Spanish,Chinese,Vietnamese": 0.3806818181818182, + "Indonesian,Spanish,Malay,Vietnamese": 0.4090909090909091, + "Indonesian,Chinese,Malay,Vietnamese": 0.38636363636363635, + "English,Filipino,Spanish,Chinese": 0.3181818181818182, + "English,Filipino,Spanish,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Vietnamese": 0.32386363636363635, + "English,Filipino,Chinese,Malay": 0.29545454545454547, + "English,Filipino,Chinese,Vietnamese": 0.29545454545454547, + "English,Filipino,Malay,Vietnamese": 0.3125, + "English,Spanish,Chinese,Malay": 0.35795454545454547, + "English,Spanish,Chinese,Vietnamese": 0.3977272727272727, + "English,Spanish,Malay,Vietnamese": 0.3977272727272727, + "English,Chinese,Malay,Vietnamese": 0.35795454545454547, + "Filipino,Spanish,Chinese,Malay": 0.32386363636363635, + "Filipino,Spanish,Chinese,Vietnamese": 0.3125, + "Filipino,Spanish,Malay,Vietnamese": 0.3409090909090909, + "Filipino,Chinese,Malay,Vietnamese": 0.3068181818181818, + "Spanish,Chinese,Malay,Vietnamese": 0.36363636363636365 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.2556818181818182, + "Indonesian,English,Filipino,Spanish,Malay": 0.2840909090909091, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.2784090909090909, + "Indonesian,English,Filipino,Chinese,Malay": 0.25, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,English,Spanish,Chinese,Malay": 0.3125, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3352272727272727, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.35795454545454547, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.2840909090909091, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.2727272727272727, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.30113636363636365, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.3352272727272727, + "English,Filipino,Spanish,Chinese,Malay": 0.26136363636363635, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.2727272727272727, + "English,Filipino,Spanish,Malay,Vietnamese": 0.2840909090909091, + "English,Filipino,Chinese,Malay,Vietnamese": 0.26136363636363635, + "English,Spanish,Chinese,Malay,Vietnamese": 0.32386363636363635, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2727272727272727 + }, + "6_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.22727272727272727, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.2556818181818182, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.25, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365 + }, + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2159090909090909 + } + }, + "AC3_2": 0.5358238361992322, + "AC3_3": 0.45866831576134237, + "AC3_4": 0.40464212322176873, + "AC3_5": 0.3637764949514161, + "AC3_6": 0.3296868511415178, + "AC3_7": 0.2991811882841224 + }, + "prompt_3": { + "overall_acc": 0.48863636363636365, + "language_acc": { + "Indonesian": 0.4147727272727273, + "English": 0.6022727272727273, + "Filipino": 0.3977272727272727, + "Spanish": 0.5227272727272727, + "Chinese": 0.5284090909090909, + "Malay": 0.45454545454545453, + "Vietnamese": 0.5 + }, + "consistency_score_2": 0.5971320346320347, + "consistency_score_3": 0.43668831168831174, + "consistency_score_4": 0.34853896103896104, + "consistency_score_5": 0.29112554112554107, + "consistency_score_6": 0.2508116883116883, + "consistency_score_7": 0.2215909090909091, + "detailed_consistency_score": { + "2_combine": { + "Indonesian,English": 0.5965909090909091, + "Indonesian,Filipino": 0.5681818181818182, + "Indonesian,Spanish": 0.625, + "Indonesian,Chinese": 0.5625, + "Indonesian,Malay": 0.7102272727272727, + "Indonesian,Vietnamese": 0.6420454545454546, + "English,Filipino": 0.5056818181818182, + "English,Spanish": 0.7329545454545454, + "English,Chinese": 0.5965909090909091, + "English,Malay": 0.5852272727272727, + "English,Vietnamese": 0.6477272727272727, + "Filipino,Spanish": 0.5625, + "Filipino,Chinese": 0.5340909090909091, + "Filipino,Malay": 0.5738636363636364, + "Filipino,Vietnamese": 0.45454545454545453, + "Spanish,Chinese": 0.6420454545454546, + "Spanish,Malay": 0.6477272727272727, + "Spanish,Vietnamese": 0.5965909090909091, + "Chinese,Malay": 0.5852272727272727, + "Chinese,Vietnamese": 0.5681818181818182, + "Malay,Vietnamese": 0.6022727272727273 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.3693181818181818, + "Indonesian,English,Spanish": 0.5, + "Indonesian,English,Chinese": 0.4318181818181818, + "Indonesian,English,Malay": 0.4715909090909091, + "Indonesian,English,Vietnamese": 0.4772727272727273, + "Indonesian,Filipino,Spanish": 0.42045454545454547, + "Indonesian,Filipino,Chinese": 0.375, + "Indonesian,Filipino,Malay": 0.45454545454545453, + "Indonesian,Filipino,Vietnamese": 0.39204545454545453, + "Indonesian,Spanish,Chinese": 0.4772727272727273, + "Indonesian,Spanish,Malay": 0.5170454545454546, + "Indonesian,Spanish,Vietnamese": 0.4659090909090909, + "Indonesian,Chinese,Malay": 0.4659090909090909, + "Indonesian,Chinese,Vietnamese": 0.44886363636363635, + "Indonesian,Malay,Vietnamese": 0.5056818181818182, + "English,Filipino,Spanish": 0.4318181818181818, + "English,Filipino,Chinese": 0.36363636363636365, + "English,Filipino,Malay": 0.3693181818181818, + "English,Filipino,Vietnamese": 0.3522727272727273, + "English,Spanish,Chinese": 0.5227272727272727, + "English,Spanish,Malay": 0.5056818181818182, + "English,Spanish,Vietnamese": 0.5170454545454546, + "English,Chinese,Malay": 0.4375, + "English,Chinese,Vietnamese": 0.45454545454545453, + "English,Malay,Vietnamese": 0.45454545454545453, + "Filipino,Spanish,Chinese": 0.4090909090909091, + "Filipino,Spanish,Malay": 0.4318181818181818, + "Filipino,Spanish,Vietnamese": 0.3522727272727273, + "Filipino,Chinese,Malay": 0.39204545454545453, + "Filipino,Chinese,Vietnamese": 0.32954545454545453, + "Filipino,Malay,Vietnamese": 0.36363636363636365, + "Spanish,Chinese,Malay": 0.4772727272727273, + "Spanish,Chinese,Vietnamese": 0.44886363636363635, + "Spanish,Malay,Vietnamese": 0.45454545454545453, + "Chinese,Malay,Vietnamese": 0.4431818181818182 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.32386363636363635, + "Indonesian,English,Filipino,Chinese": 0.2784090909090909, + "Indonesian,English,Filipino,Malay": 0.3181818181818182, + "Indonesian,English,Filipino,Vietnamese": 0.3181818181818182, + "Indonesian,English,Spanish,Chinese": 0.3977272727272727, + "Indonesian,English,Spanish,Malay": 0.4147727272727273, + "Indonesian,English,Spanish,Vietnamese": 0.4147727272727273, + "Indonesian,English,Chinese,Malay": 0.36363636363636365, + "Indonesian,English,Chinese,Vietnamese": 0.3806818181818182, + "Indonesian,English,Malay,Vietnamese": 0.3977272727272727, + "Indonesian,Filipino,Spanish,Chinese": 0.3181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.375, + "Indonesian,Filipino,Spanish,Vietnamese": 0.3181818181818182, + "Indonesian,Filipino,Chinese,Malay": 0.32386363636363635, + "Indonesian,Filipino,Chinese,Vietnamese": 0.30113636363636365, + "Indonesian,Filipino,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,Spanish,Chinese,Malay": 0.3977272727272727, + "Indonesian,Spanish,Chinese,Vietnamese": 0.38636363636363635, + "Indonesian,Spanish,Malay,Vietnamese": 0.39204545454545453, + "Indonesian,Chinese,Malay,Vietnamese": 0.38636363636363635, + "English,Filipino,Spanish,Chinese": 0.32954545454545453, + "English,Filipino,Spanish,Malay": 0.3465909090909091, + "English,Filipino,Spanish,Vietnamese": 0.3181818181818182, + "English,Filipino,Chinese,Malay": 0.30113636363636365, + "English,Filipino,Chinese,Vietnamese": 0.2897727272727273, + "English,Filipino,Malay,Vietnamese": 0.30113636363636365, + "English,Spanish,Chinese,Malay": 0.4034090909090909, + "English,Spanish,Chinese,Vietnamese": 0.4034090909090909, + "English,Spanish,Malay,Vietnamese": 0.3977272727272727, + "English,Chinese,Malay,Vietnamese": 0.375, + "Filipino,Spanish,Chinese,Malay": 0.3352272727272727, + "Filipino,Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Filipino,Spanish,Malay,Vietnamese": 0.30113636363636365, + "Filipino,Chinese,Malay,Vietnamese": 0.2897727272727273, + "Spanish,Chinese,Malay,Vietnamese": 0.375 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.2556818181818182, + "Indonesian,English,Filipino,Spanish,Malay": 0.29545454545454547, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.2840909090909091, + "Indonesian,English,Filipino,Chinese,Malay": 0.2556818181818182, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,English,Spanish,Chinese,Malay": 0.3352272727272727, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3465909090909091, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.3522727272727273, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.2840909090909091, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.26704545454545453, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.32954545454545453, + "English,Filipino,Spanish,Chinese,Malay": 0.2840909090909091, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.26704545454545453, + "English,Filipino,Spanish,Malay,Vietnamese": 0.2784090909090909, + "English,Filipino,Chinese,Malay,Vietnamese": 0.26136363636363635, + "English,Spanish,Chinese,Malay,Vietnamese": 0.3409090909090909, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.26136363636363635 + }, + "6_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.2556818181818182, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.30113636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.24431818181818182 + }, + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2215909090909091 + } + }, + "AC3_2": 0.5374634709425026, + "AC3_3": 0.46120414668062015, + "AC3_4": 0.40686533744501, + "AC3_5": 0.3648665698857043, + "AC3_6": 0.3314788942770961, + "AC3_7": 0.30490909086615975 + }, + "prompt_4": { + "overall_acc": 0.47564935064935066, + "language_acc": { + "Indonesian": 0.42045454545454547, + "English": 0.5852272727272727, + "Filipino": 0.375, + "Spanish": 0.4943181818181818, + "Chinese": 0.4943181818181818, + "Malay": 0.4431818181818182, + "Vietnamese": 0.5170454545454546 + }, + "consistency_score_2": 0.578733766233766, + "consistency_score_3": 0.41152597402597396, + "consistency_score_4": 0.31818181818181807, + "consistency_score_5": 0.25676406926406925, + "consistency_score_6": 0.21185064935064934, + "consistency_score_7": 0.17613636363636365, + "detailed_consistency_score": { + "2_combine": { + "Indonesian,English": 0.6022727272727273, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,Spanish": 0.6363636363636364, + "Indonesian,Chinese": 0.5568181818181818, + "Indonesian,Malay": 0.6818181818181818, + "Indonesian,Vietnamese": 0.5965909090909091, + "English,Filipino": 0.4715909090909091, + "English,Spanish": 0.6647727272727273, + "English,Chinese": 0.625, + "English,Malay": 0.5965909090909091, + "English,Vietnamese": 0.625, + "Filipino,Spanish": 0.5511363636363636, + "Filipino,Chinese": 0.4659090909090909, + "Filipino,Malay": 0.5340909090909091, + "Filipino,Vietnamese": 0.4715909090909091, + "Spanish,Chinese": 0.6022727272727273, + "Spanish,Malay": 0.6193181818181818, + "Spanish,Vietnamese": 0.5965909090909091, + "Chinese,Malay": 0.5397727272727273, + "Chinese,Vietnamese": 0.6022727272727273, + "Malay,Vietnamese": 0.5795454545454546 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.3465909090909091, + "Indonesian,English,Spanish": 0.4715909090909091, + "Indonesian,English,Chinese": 0.4318181818181818, + "Indonesian,English,Malay": 0.4602272727272727, + "Indonesian,English,Vietnamese": 0.45454545454545453, + "Indonesian,Filipino,Spanish": 0.3977272727272727, + "Indonesian,Filipino,Chinese": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.42045454545454547, + "Indonesian,Filipino,Vietnamese": 0.3465909090909091, + "Indonesian,Spanish,Chinese": 0.4431818181818182, + "Indonesian,Spanish,Malay": 0.48863636363636365, + "Indonesian,Spanish,Vietnamese": 0.4431818181818182, + "Indonesian,Chinese,Malay": 0.4318181818181818, + "Indonesian,Chinese,Vietnamese": 0.42613636363636365, + "Indonesian,Malay,Vietnamese": 0.45454545454545453, + "English,Filipino,Spanish": 0.3693181818181818, + "English,Filipino,Chinese": 0.3465909090909091, + "English,Filipino,Malay": 0.3522727272727273, + "English,Filipino,Vietnamese": 0.3352272727272727, + "English,Spanish,Chinese": 0.48863636363636365, + "English,Spanish,Malay": 0.4659090909090909, + "English,Spanish,Vietnamese": 0.4715909090909091, + "English,Chinese,Malay": 0.42045454545454547, + "English,Chinese,Vietnamese": 0.48295454545454547, + "English,Malay,Vietnamese": 0.4375, + "Filipino,Spanish,Chinese": 0.3522727272727273, + "Filipino,Spanish,Malay": 0.39204545454545453, + "Filipino,Spanish,Vietnamese": 0.36363636363636365, + "Filipino,Chinese,Malay": 0.3522727272727273, + "Filipino,Chinese,Vietnamese": 0.3352272727272727, + "Filipino,Malay,Vietnamese": 0.3522727272727273, + "Spanish,Chinese,Malay": 0.42613636363636365, + "Spanish,Chinese,Vietnamese": 0.4602272727272727, + "Spanish,Malay,Vietnamese": 0.4375, + "Chinese,Malay,Vietnamese": 0.42613636363636365 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.2897727272727273, + "Indonesian,English,Filipino,Chinese": 0.2556818181818182, + "Indonesian,English,Filipino,Malay": 0.2840909090909091, + "Indonesian,English,Filipino,Vietnamese": 0.2784090909090909, + "Indonesian,English,Spanish,Chinese": 0.3693181818181818, + "Indonesian,English,Spanish,Malay": 0.36363636363636365, + "Indonesian,English,Spanish,Vietnamese": 0.3693181818181818, + "Indonesian,English,Chinese,Malay": 0.3352272727272727, + "Indonesian,English,Chinese,Vietnamese": 0.3693181818181818, + "Indonesian,English,Malay,Vietnamese": 0.3693181818181818, + "Indonesian,Filipino,Spanish,Chinese": 0.2727272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.32386363636363635, + "Indonesian,Filipino,Spanish,Vietnamese": 0.2840909090909091, + "Indonesian,Filipino,Chinese,Malay": 0.2784090909090909, + "Indonesian,Filipino,Chinese,Vietnamese": 0.26704545454545453, + "Indonesian,Filipino,Malay,Vietnamese": 0.2897727272727273, + "Indonesian,Spanish,Chinese,Malay": 0.35795454545454547, + "Indonesian,Spanish,Chinese,Vietnamese": 0.35795454545454547, + "Indonesian,Spanish,Malay,Vietnamese": 0.36363636363636365, + "Indonesian,Chinese,Malay,Vietnamese": 0.3522727272727273, + "English,Filipino,Spanish,Chinese": 0.2897727272727273, + "English,Filipino,Spanish,Malay": 0.2897727272727273, + "English,Filipino,Spanish,Vietnamese": 0.2840909090909091, + "English,Filipino,Chinese,Malay": 0.2840909090909091, + "English,Filipino,Chinese,Vietnamese": 0.2840909090909091, + "English,Filipino,Malay,Vietnamese": 0.2840909090909091, + "English,Spanish,Chinese,Malay": 0.3465909090909091, + "English,Spanish,Chinese,Vietnamese": 0.4034090909090909, + "English,Spanish,Malay,Vietnamese": 0.35795454545454547, + "English,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Filipino,Spanish,Chinese,Malay": 0.2897727272727273, + "Filipino,Spanish,Chinese,Vietnamese": 0.2897727272727273, + "Filipino,Spanish,Malay,Vietnamese": 0.2897727272727273, + "Filipino,Chinese,Malay,Vietnamese": 0.2897727272727273, + "Spanish,Chinese,Malay,Vietnamese": 0.35795454545454547 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.22727272727272727, + "Indonesian,English,Filipino,Spanish,Malay": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.23295454545454544, + "Indonesian,English,Filipino,Chinese,Malay": 0.2215909090909091, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.23295454545454544, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Spanish,Chinese,Malay": 0.2840909090909091, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3181818181818182, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.30113636363636365, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.3068181818181818, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.23863636363636365, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.30113636363636365, + "English,Filipino,Spanish,Chinese,Malay": 0.23863636363636365, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.25, + "English,Filipino,Spanish,Malay,Vietnamese": 0.23863636363636365, + "English,Filipino,Chinese,Malay,Vietnamese": 0.25, + "English,Spanish,Chinese,Malay,Vietnamese": 0.3125, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.25 + }, + "6_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2159090909090909 + }, + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365 + } + }, + "AC3_2": 0.5221524048973402, + "AC3_3": 0.4412703032974172, + "AC3_4": 0.3812976389183179, + "AC3_5": 0.3334992491352812, + "AC3_6": 0.2931392691887205, + "AC3_7": 0.25707573866767924 + }, + "prompt_5": { + "overall_acc": 0.4748376623376624, + "language_acc": { + "Indonesian": 0.3977272727272727, + "English": 0.5909090909090909, + "Filipino": 0.375, + "Spanish": 0.5, + "Chinese": 0.5056818181818182, + "Malay": 0.45454545454545453, + "Vietnamese": 0.5 + }, + "consistency_score_2": 0.5744047619047619, + "consistency_score_3": 0.4004870129870129, + "consistency_score_4": 0.3040584415584415, + "consistency_score_5": 0.2413419913419913, + "consistency_score_6": 0.19561688311688316, + "consistency_score_7": 0.1590909090909091, + "detailed_consistency_score": { + "2_combine": { + "Indonesian,English": 0.5625, + "Indonesian,Filipino": 0.5056818181818182, + "Indonesian,Spanish": 0.6022727272727273, + "Indonesian,Chinese": 0.5511363636363636, + "Indonesian,Malay": 0.6931818181818182, + "Indonesian,Vietnamese": 0.5738636363636364, + "English,Filipino": 0.4715909090909091, + "English,Spanish": 0.6704545454545454, + "English,Chinese": 0.6136363636363636, + "English,Malay": 0.5681818181818182, + "English,Vietnamese": 0.625, + "Filipino,Spanish": 0.5568181818181818, + "Filipino,Chinese": 0.48863636363636365, + "Filipino,Malay": 0.5340909090909091, + "Filipino,Vietnamese": 0.5056818181818182, + "Spanish,Chinese": 0.5852272727272727, + "Spanish,Malay": 0.6590909090909091, + "Spanish,Vietnamese": 0.5681818181818182, + "Chinese,Malay": 0.5568181818181818, + "Chinese,Vietnamese": 0.5909090909090909, + "Malay,Vietnamese": 0.5795454545454546 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.3068181818181818, + "Indonesian,English,Spanish": 0.44886363636363635, + "Indonesian,English,Chinese": 0.4090909090909091, + "Indonesian,English,Malay": 0.4318181818181818, + "Indonesian,English,Vietnamese": 0.4147727272727273, + "Indonesian,Filipino,Spanish": 0.36363636363636365, + "Indonesian,Filipino,Chinese": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.3977272727272727, + "Indonesian,Filipino,Vietnamese": 0.3465909090909091, + "Indonesian,Spanish,Chinese": 0.4147727272727273, + "Indonesian,Spanish,Malay": 0.4943181818181818, + "Indonesian,Spanish,Vietnamese": 0.4034090909090909, + "Indonesian,Chinese,Malay": 0.4375, + "Indonesian,Chinese,Vietnamese": 0.4090909090909091, + "Indonesian,Malay,Vietnamese": 0.44886363636363635, + "English,Filipino,Spanish": 0.375, + "English,Filipino,Chinese": 0.3465909090909091, + "English,Filipino,Malay": 0.32954545454545453, + "English,Filipino,Vietnamese": 0.3465909090909091, + "English,Spanish,Chinese": 0.4772727272727273, + "English,Spanish,Malay": 0.4715909090909091, + "English,Spanish,Vietnamese": 0.45454545454545453, + "English,Chinese,Malay": 0.4090909090909091, + "English,Chinese,Vietnamese": 0.4602272727272727, + "English,Malay,Vietnamese": 0.42613636363636365, + "Filipino,Spanish,Chinese": 0.3522727272727273, + "Filipino,Spanish,Malay": 0.4090909090909091, + "Filipino,Spanish,Vietnamese": 0.35795454545454547, + "Filipino,Chinese,Malay": 0.35795454545454547, + "Filipino,Chinese,Vietnamese": 0.3409090909090909, + "Filipino,Malay,Vietnamese": 0.3465909090909091, + "Spanish,Chinese,Malay": 0.4375, + "Spanish,Chinese,Vietnamese": 0.42045454545454547, + "Spanish,Malay,Vietnamese": 0.4318181818181818, + "Chinese,Malay,Vietnamese": 0.42045454545454547 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.2556818181818182, + "Indonesian,English,Filipino,Chinese": 0.23863636363636365, + "Indonesian,English,Filipino,Malay": 0.24431818181818182, + "Indonesian,English,Filipino,Vietnamese": 0.2556818181818182, + "Indonesian,English,Spanish,Chinese": 0.3465909090909091, + "Indonesian,English,Spanish,Malay": 0.36363636363636365, + "Indonesian,English,Spanish,Vietnamese": 0.32386363636363635, + "Indonesian,English,Chinese,Malay": 0.32386363636363635, + "Indonesian,English,Chinese,Vietnamese": 0.3352272727272727, + "Indonesian,English,Malay,Vietnamese": 0.3409090909090909, + "Indonesian,Filipino,Spanish,Chinese": 0.25, + "Indonesian,Filipino,Spanish,Malay": 0.30113636363636365, + "Indonesian,Filipino,Spanish,Vietnamese": 0.26704545454545453, + "Indonesian,Filipino,Chinese,Malay": 0.2784090909090909, + "Indonesian,Filipino,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,Filipino,Malay,Vietnamese": 0.2897727272727273, + "Indonesian,Spanish,Chinese,Malay": 0.3522727272727273, + "Indonesian,Spanish,Chinese,Vietnamese": 0.32954545454545453, + "Indonesian,Spanish,Malay,Vietnamese": 0.3465909090909091, + "Indonesian,Chinese,Malay,Vietnamese": 0.3465909090909091, + "English,Filipino,Spanish,Chinese": 0.2897727272727273, + "English,Filipino,Spanish,Malay": 0.2840909090909091, + "English,Filipino,Spanish,Vietnamese": 0.2897727272727273, + "English,Filipino,Chinese,Malay": 0.2727272727272727, + "English,Filipino,Chinese,Vietnamese": 0.2840909090909091, + "English,Filipino,Malay,Vietnamese": 0.26704545454545453, + "English,Spanish,Chinese,Malay": 0.35795454545454547, + "English,Spanish,Chinese,Vietnamese": 0.36363636363636365, + "English,Spanish,Malay,Vietnamese": 0.3465909090909091, + "English,Chinese,Malay,Vietnamese": 0.3465909090909091, + "Filipino,Spanish,Chinese,Malay": 0.2840909090909091, + "Filipino,Spanish,Chinese,Vietnamese": 0.2840909090909091, + "Filipino,Spanish,Malay,Vietnamese": 0.2784090909090909, + "Filipino,Chinese,Malay,Vietnamese": 0.2840909090909091, + "Spanish,Chinese,Malay,Vietnamese": 0.35795454545454547 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Malay": 0.21022727272727273, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.21022727272727273, + "Indonesian,English,Filipino,Chinese,Malay": 0.20454545454545456, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.2159090909090909, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese,Malay": 0.2840909090909091, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.22727272727272727, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547, + "English,Filipino,Spanish,Chinese,Malay": 0.23295454545454544, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.24431818181818182, + "English,Filipino,Spanish,Malay,Vietnamese": 0.22727272727272727, + "English,Filipino,Chinese,Malay,Vietnamese": 0.23863636363636365, + "English,Spanish,Chinese,Malay,Vietnamese": 0.30113636363636365, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.24431818181818182 + }, + "6_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.17613636363636365, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456 + }, + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 + } + }, + "AC3_2": 0.5198970381881279, + "AC3_3": 0.43450464126601834, + "AC3_4": 0.370725694641208, + "AC3_5": 0.32002659217684465, + "AC3_6": 0.2770844469886398, + "AC3_7": 0.23833081127657013 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.6407766990291263 + }, + "prompt_2": { + "accuracy": 0.6407766990291263 + }, + "prompt_3": { + "accuracy": 0.6310679611650486 + }, + "prompt_4": { + "accuracy": 0.6893203883495146 + }, + "prompt_5": { + "accuracy": 0.6601941747572816 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.44761904761904764 + }, + "prompt_2": { + "accuracy": 0.42857142857142855 + }, + "prompt_3": { + "accuracy": 0.41904761904761906 + }, + "prompt_4": { + "accuracy": 0.41904761904761906 + }, + "prompt_5": { + "accuracy": 0.41904761904761906 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.7383177570093458 + }, + "prompt_2": { + "accuracy": 0.7383177570093458 + }, + "prompt_3": { + "accuracy": 0.7476635514018691 + }, + "prompt_4": { + "accuracy": 0.7570093457943925 + }, + "prompt_5": { + "accuracy": 0.7476635514018691 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.61, + "category_acc": { + "brand": 0.7, + "demographics": 0.2, + "biology": 0.6, + "history": 0.5333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.9, + "film": 0.5, + "law": 0.4, + "geography": 0.8 + } + }, + "prompt_2": { + "accuracy": 0.61, + "category_acc": { + "brand": 0.6, + "demographics": 0.2, + "biology": 0.6, + "history": 0.5333333333333333, + "literature": 0.5, + "politics": 0.9, + "culture": 0.9, + "film": 0.5, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.59, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.8, + "history": 0.5333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.9, + "film": 0.5, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.57, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.8, + "history": 0.4666666666666667, + "literature": 0.5, + "politics": 0.7, + "culture": 0.9, + "film": 0.5, + "law": 0.4, + "geography": 0.8 + } + }, + "prompt_5": { + "accuracy": 0.6, + "category_acc": { + "brand": 0.6, + "demographics": 0.0, + "biology": 0.8, + "history": 0.4666666666666667, + "literature": 0.5, + "politics": 0.8, + "culture": 0.9, + "film": 0.5, + "law": 0.4, + "geography": 0.8 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.27639139147263536 + }, + "prompt_2": { + "bleu_score": 0.28582936543277865 + }, + "prompt_3": { + "bleu_score": 0.2814296877532448 + }, + "prompt_4": { + "bleu_score": 0.27246840198252714 + }, + "prompt_5": { + "bleu_score": 0.22963633899879426 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.32919420522064224, + "category_acc": { + "History": 0.3313253012048193, + "Geography": 0.3, + "Lampungic": 0.32653061224489793, + "Social science": 0.4357262103505843, + "Balinese": 0.28662420382165604, + "Makassarese": 0.24731182795698925, + "Banjarese": 0.3125, + "Chemistry": 0.22043795620437956, + "Biology": 0.2698224852071006, + "Science": 0.35500515995872034, + "Christian religion": 0.38308457711442784, + "Art": 0.3876871880199667, + "Islam religion": 0.3328591749644381, + "Hindu religion": 0.3466666666666667, + "Madurese": 0.28135593220338984, + "Sport": 0.3716216216216216, + "Indonesian language": 0.3854296388542964, + "Physics": 0.25656565656565655, + "Minangkabau culture": 0.2613065326633166, + "Dayak language": 0.25688073394495414, + "Sociology": 0.3125, + "Economy": 0.2848360655737705, + "Sundanese": 0.28608470181503887, + "Javanese": 0.30544354838709675, + "Civic education": 0.36337625178826893 + } + }, + "prompt_2": { + "accuracy": 0.3478202817277522, + "category_acc": { + "History": 0.3493975903614458, + "Geography": 0.3346938775510204, + "Lampungic": 0.29931972789115646, + "Social science": 0.48747913188647746, + "Balinese": 0.26963906581740976, + "Makassarese": 0.2956989247311828, + "Banjarese": 0.2708333333333333, + "Chemistry": 0.26277372262773724, + "Biology": 0.3029585798816568, + "Science": 0.38080495356037153, + "Christian religion": 0.32338308457711445, + "Art": 0.415973377703827, + "Islam religion": 0.33712660028449504, + "Hindu religion": 0.35333333333333333, + "Madurese": 0.3050847457627119, + "Sport": 0.28378378378378377, + "Indonesian language": 0.41002490660024904, + "Physics": 0.30707070707070705, + "Minangkabau culture": 0.3316582914572864, + "Dayak language": 0.21100917431192662, + "Sociology": 0.33669354838709675, + "Economy": 0.3176229508196721, + "Sundanese": 0.30682800345721695, + "Javanese": 0.2711693548387097, + "Civic education": 0.38483547925608014 + } + }, + "prompt_3": { + "accuracy": 0.3960878563321984, + "category_acc": { + "History": 0.38353413654618473, + "Geography": 0.37551020408163266, + "Lampungic": 0.3469387755102041, + "Social science": 0.4724540901502504, + "Balinese": 0.3227176220806794, + "Makassarese": 0.3655913978494624, + "Banjarese": 0.3680555555555556, + "Chemistry": 0.2846715328467153, + "Biology": 0.3633136094674556, + "Science": 0.43859649122807015, + "Christian religion": 0.4228855721393035, + "Art": 0.4559068219633943, + "Islam religion": 0.406827880512091, + "Hindu religion": 0.38, + "Madurese": 0.28135593220338984, + "Sport": 0.4594594594594595, + "Indonesian language": 0.4663760896637609, + "Physics": 0.31717171717171716, + "Minangkabau culture": 0.3015075376884422, + "Dayak language": 0.25688073394495414, + "Sociology": 0.3790322580645161, + "Economy": 0.3463114754098361, + "Sundanese": 0.3595505617977528, + "Javanese": 0.3558467741935484, + "Civic education": 0.43204577968526464 + } + }, + "prompt_4": { + "accuracy": 0.4834101074838107, + "category_acc": { + "History": 0.4738955823293173, + "Geography": 0.4489795918367347, + "Lampungic": 0.35374149659863946, + "Social science": 0.679465776293823, + "Balinese": 0.3184713375796178, + "Makassarese": 0.3548387096774194, + "Banjarese": 0.3958333333333333, + "Chemistry": 0.30364963503649633, + "Biology": 0.463905325443787, + "Science": 0.5830753353973168, + "Christian religion": 0.6318407960199005, + "Art": 0.562396006655574, + "Islam religion": 0.561877667140825, + "Hindu religion": 0.5133333333333333, + "Madurese": 0.3254237288135593, + "Sport": 0.5540540540540541, + "Indonesian language": 0.5414072229140723, + "Physics": 0.3898989898989899, + "Minangkabau culture": 0.37185929648241206, + "Dayak language": 0.3486238532110092, + "Sociology": 0.4657258064516129, + "Economy": 0.4344262295081967, + "Sundanese": 0.43820224719101125, + "Javanese": 0.3971774193548387, + "Civic education": 0.5507868383404864 + } + }, + "prompt_5": { + "accuracy": 0.4880833166433006, + "category_acc": { + "History": 0.4879518072289157, + "Geography": 0.44285714285714284, + "Lampungic": 0.3469387755102041, + "Social science": 0.7161936560934892, + "Balinese": 0.3057324840764331, + "Makassarese": 0.34946236559139787, + "Banjarese": 0.4027777777777778, + "Chemistry": 0.327007299270073, + "Biology": 0.4650887573964497, + "Science": 0.6171310629514963, + "Christian religion": 0.6019900497512438, + "Art": 0.5557404326123128, + "Islam religion": 0.577524893314367, + "Hindu religion": 0.5133333333333333, + "Madurese": 0.3152542372881356, + "Sport": 0.5945945945945946, + "Indonesian language": 0.537359900373599, + "Physics": 0.4080808080808081, + "Minangkabau culture": 0.40703517587939697, + "Dayak language": 0.25688073394495414, + "Sociology": 0.46975806451612906, + "Economy": 0.4385245901639344, + "Sundanese": 0.439066551426102, + "Javanese": 0.39314516129032256, + "Civic education": 0.5550786838340487 + } + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.36184271253034317 + }, + "prompt_2": { + "bleu_score": 0.36303459683677264 + }, + "prompt_3": { + "bleu_score": 0.36432999432611834 + }, + "prompt_4": { + "bleu_score": 0.362458890422489 + }, + "prompt_5": { + "bleu_score": 0.3538206467732131 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.2950787163255092 + }, + "prompt_2": { + "bleu_score": 0.2952562787305222 + }, + "prompt_3": { + "bleu_score": 0.29851761299707374 + }, + "prompt_4": { + "bleu_score": 0.2966319288287702 + }, + "prompt_5": { + "bleu_score": 0.2855514692066493 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.22657273184612212 + }, + "prompt_2": { + "bleu_score": 0.224320719049576 + }, + "prompt_3": { + "bleu_score": 0.2271147306830071 + }, + "prompt_4": { + "bleu_score": 0.22533970444076762 + }, + "prompt_5": { + "bleu_score": 0.21888804222245872 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.36848470575722986 + }, + "prompt_2": { + "bleu_score": 0.37145222557078283 + }, + "prompt_3": { + "bleu_score": 0.36976308244103384 + }, + "prompt_4": { + "bleu_score": 0.36750377990521454 + }, + "prompt_5": { + "bleu_score": 0.34745161548526615 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.6137689614935823 + }, + "prompt_2": { + "accuracy": 0.6091015169194866 + }, + "prompt_3": { + "accuracy": 0.6102683780630105 + }, + "prompt_4": { + "accuracy": 0.617269544924154 + }, + "prompt_5": { + "accuracy": 0.6149358226371062 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.6030747229174115, + "category_acc": { + "high_school_european_history": 0.7378048780487805, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.7045454545454546, + "medical_genetics": 0.7272727272727273, + "high_school_us_history": 0.812807881773399, + "high_school_physics": 0.34, + "high_school_world_history": 0.8389830508474576, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.6708860759493671, + "econometrics": 0.40707964601769914, + "college_computer_science": 0.5858585858585859, + "high_school_biology": 0.7508090614886731, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.4377224199288256, + "philosophy": 0.6612903225806451, + "professional_medicine": 0.6678966789667896, + "nutrition": 0.6655737704918033, + "global_facts": 0.3838383838383838, + "machine_learning": 0.46846846846846846, + "security_studies": 0.6721311475409836, + "public_relations": 0.6422018348623854, + "professional_psychology": 0.6432078559738135, + "prehistory": 0.6965944272445821, + "anatomy": 0.6194029850746269, + "human_sexuality": 0.7230769230769231, + "college_medicine": 0.5813953488372093, + "high_school_government_and_politics": 0.8333333333333334, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6790123456790124, + "high_school_geography": 0.7411167512690355, + "elementary_mathematics": 0.4297082228116711, + "human_aging": 0.6711711711711712, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.8125, + "formal_logic": 0.432, + "high_school_statistics": 0.5116279069767442, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.3308550185873606, + "high_school_computer_science": 0.6666666666666666, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.7928388746803069, + "high_school_chemistry": 0.5198019801980198, + "marketing": 0.8369098712446352, + "professional_law": 0.44422700587084146, + "management": 0.7647058823529411, + "college_physics": 0.46534653465346537, + "jurisprudence": 0.6915887850467289, + "world_religions": 0.8117647058823529, + "sociology": 0.805, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.5424164524421594, + "computer_security": 0.6767676767676768, + "moral_scenarios": 0.3523489932885906, + "moral_disputes": 0.6376811594202898, + "electrical_engineering": 0.5486111111111112, + "astronomy": 0.6357615894039735, + "college_biology": 0.7482517482517482 + } + }, + "prompt_2": { + "accuracy": 0.6010010725777619, + "category_acc": { + "high_school_european_history": 0.7317073170731707, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.6893939393939394, + "medical_genetics": 0.7676767676767676, + "high_school_us_history": 0.7832512315270936, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.8220338983050848, + "virology": 0.49696969696969695, + "high_school_microeconomics": 0.6666666666666666, + "econometrics": 0.39823008849557523, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.7378640776699029, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.43416370106761565, + "philosophy": 0.6548387096774193, + "professional_medicine": 0.6568265682656826, + "nutrition": 0.6655737704918033, + "global_facts": 0.3333333333333333, + "machine_learning": 0.5045045045045045, + "security_studies": 0.6844262295081968, + "public_relations": 0.6330275229357798, + "professional_psychology": 0.6170212765957447, + "prehistory": 0.6934984520123839, + "anatomy": 0.5970149253731343, + "human_sexuality": 0.7230769230769231, + "college_medicine": 0.5697674418604651, + "high_school_government_and_politics": 0.8229166666666666, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.6790123456790124, + "high_school_geography": 0.7411167512690355, + "elementary_mathematics": 0.4217506631299735, + "human_aging": 0.6801801801801802, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.8069852941176471, + "formal_logic": 0.472, + "high_school_statistics": 0.4883720930232558, + "international_law": 0.7166666666666667, + "high_school_mathematics": 0.35687732342007433, + "high_school_computer_science": 0.6464646464646465, + "conceptual_physics": 0.5256410256410257, + "miscellaneous": 0.8107416879795396, + "high_school_chemistry": 0.5247524752475248, + "marketing": 0.8326180257510729, + "professional_law": 0.4637964774951076, + "management": 0.7745098039215687, + "college_physics": 0.39603960396039606, + "jurisprudence": 0.6915887850467289, + "world_religions": 0.8, + "sociology": 0.805, + "us_foreign_policy": 0.8181818181818182, + "high_school_macroeconomics": 0.5526992287917738, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.32997762863534674, + "moral_disputes": 0.6405797101449275, + "electrical_engineering": 0.5625, + "astronomy": 0.6158940397350994, + "college_biology": 0.7202797202797203 + } + }, + "prompt_3": { + "accuracy": 0.6005720414730068, + "category_acc": { + "high_school_european_history": 0.7439024390243902, + "business_ethics": 0.6565656565656566, + "clinical_knowledge": 0.7007575757575758, + "medical_genetics": 0.7676767676767676, + "high_school_us_history": 0.7783251231527094, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.8347457627118644, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.6666666666666666, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.7313915857605178, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.4234875444839858, + "philosophy": 0.6548387096774193, + "professional_medicine": 0.6642066420664207, + "nutrition": 0.659016393442623, + "global_facts": 0.30303030303030304, + "machine_learning": 0.46846846846846846, + "security_studies": 0.7008196721311475, + "public_relations": 0.6513761467889908, + "professional_psychology": 0.6219312602291326, + "prehistory": 0.7058823529411765, + "anatomy": 0.5970149253731343, + "human_sexuality": 0.7230769230769231, + "college_medicine": 0.6046511627906976, + "high_school_government_and_politics": 0.8229166666666666, + "college_chemistry": 0.46464646464646464, + "logical_fallacies": 0.654320987654321, + "high_school_geography": 0.7461928934010152, + "elementary_mathematics": 0.40583554376657827, + "human_aging": 0.6666666666666666, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.7996323529411765, + "formal_logic": 0.432, + "high_school_statistics": 0.5302325581395348, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.35315985130111527, + "high_school_computer_science": 0.6363636363636364, + "conceptual_physics": 0.5085470085470085, + "miscellaneous": 0.7979539641943734, + "high_school_chemistry": 0.5346534653465347, + "marketing": 0.8283261802575107, + "professional_law": 0.45596868884540115, + "management": 0.7941176470588235, + "college_physics": 0.43564356435643564, + "jurisprudence": 0.7009345794392523, + "world_religions": 0.788235294117647, + "sociology": 0.825, + "us_foreign_policy": 0.8383838383838383, + "high_school_macroeconomics": 0.5449871465295629, + "computer_security": 0.6868686868686869, + "moral_scenarios": 0.3344519015659955, + "moral_disputes": 0.6318840579710145, + "electrical_engineering": 0.5694444444444444, + "astronomy": 0.6291390728476821, + "college_biology": 0.7132867132867133 + } + }, + "prompt_4": { + "accuracy": 0.6073650339649624, + "category_acc": { + "high_school_european_history": 0.7378048780487805, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.696969696969697, + "medical_genetics": 0.7373737373737373, + "high_school_us_history": 0.8226600985221675, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.8305084745762712, + "virology": 0.49696969696969695, + "high_school_microeconomics": 0.6877637130801688, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.5757575757575758, + "high_school_biology": 0.7605177993527508, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.4412811387900356, + "philosophy": 0.667741935483871, + "professional_medicine": 0.6678966789667896, + "nutrition": 0.6819672131147541, + "global_facts": 0.3434343434343434, + "machine_learning": 0.4774774774774775, + "security_studies": 0.6762295081967213, + "public_relations": 0.6605504587155964, + "professional_psychology": 0.6317512274959084, + "prehistory": 0.6842105263157895, + "anatomy": 0.5970149253731343, + "human_sexuality": 0.7076923076923077, + "college_medicine": 0.6046511627906976, + "high_school_government_and_politics": 0.8385416666666666, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.7411167512690355, + "elementary_mathematics": 0.41644562334217505, + "human_aging": 0.6801801801801802, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.8014705882352942, + "formal_logic": 0.464, + "high_school_statistics": 0.5255813953488372, + "international_law": 0.7166666666666667, + "high_school_mathematics": 0.3345724907063197, + "high_school_computer_science": 0.6666666666666666, + "conceptual_physics": 0.5170940170940171, + "miscellaneous": 0.80306905370844, + "high_school_chemistry": 0.5445544554455446, + "marketing": 0.8283261802575107, + "professional_law": 0.4455316373124592, + "management": 0.7745098039215687, + "college_physics": 0.44554455445544555, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.8235294117647058, + "sociology": 0.815, + "us_foreign_policy": 0.8282828282828283, + "high_school_macroeconomics": 0.5604113110539846, + "computer_security": 0.7171717171717171, + "moral_scenarios": 0.354586129753915, + "moral_disputes": 0.6434782608695652, + "electrical_engineering": 0.5833333333333334, + "astronomy": 0.6423841059602649, + "college_biology": 0.7552447552447552 + } + }, + "prompt_5": { + "accuracy": 0.6035037540221666, + "category_acc": { + "high_school_european_history": 0.7439024390243902, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.7121212121212122, + "medical_genetics": 0.7474747474747475, + "high_school_us_history": 0.812807881773399, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.8135593220338984, + "virology": 0.47878787878787876, + "high_school_microeconomics": 0.6877637130801688, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.5555555555555556, + "high_school_biology": 0.7378640776699029, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.43416370106761565, + "philosophy": 0.6612903225806451, + "professional_medicine": 0.6752767527675276, + "nutrition": 0.6754098360655738, + "global_facts": 0.37373737373737376, + "machine_learning": 0.43243243243243246, + "security_studies": 0.6885245901639344, + "public_relations": 0.6238532110091743, + "professional_psychology": 0.6317512274959084, + "prehistory": 0.7027863777089783, + "anatomy": 0.6044776119402985, + "human_sexuality": 0.7384615384615385, + "college_medicine": 0.6104651162790697, + "high_school_government_and_politics": 0.8385416666666666, + "college_chemistry": 0.46464646464646464, + "logical_fallacies": 0.691358024691358, + "high_school_geography": 0.7360406091370558, + "elementary_mathematics": 0.4323607427055703, + "human_aging": 0.6711711711711712, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.8088235294117647, + "formal_logic": 0.384, + "high_school_statistics": 0.5348837209302325, + "international_law": 0.7166666666666667, + "high_school_mathematics": 0.3345724907063197, + "high_school_computer_science": 0.6363636363636364, + "conceptual_physics": 0.5085470085470085, + "miscellaneous": 0.8043478260869565, + "high_school_chemistry": 0.5297029702970297, + "marketing": 0.8369098712446352, + "professional_law": 0.4507501630789302, + "management": 0.7745098039215687, + "college_physics": 0.4752475247524752, + "jurisprudence": 0.6915887850467289, + "world_religions": 0.8058823529411765, + "sociology": 0.83, + "us_foreign_policy": 0.8181818181818182, + "high_school_macroeconomics": 0.5372750642673522, + "computer_security": 0.7171717171717171, + "moral_scenarios": 0.32550335570469796, + "moral_disputes": 0.6231884057971014, + "electrical_engineering": 0.5972222222222222, + "astronomy": 0.6423841059602649, + "college_biology": 0.7342657342657343 + } + } + }, + "c_eval": { + "prompt_1": { + "accuracy": 0.48068350668647847 + }, + "prompt_2": { + "accuracy": 0.4398216939078752 + }, + "prompt_3": { + "accuracy": 0.4561664190193165 + }, + "prompt_4": { + "accuracy": 0.46285289747399705 + }, + "prompt_5": { + "accuracy": 0.47325408618127784 + } + }, + "c_eval_full": { + "prompt_1": { + "accuracy": 0.47198007471980075, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5, + "computer_architecture": 0.5384615384615384, + "college_programming": 0.5238095238095238, + "college_physics": 0.375, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.4523809523809524, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.68, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.35, + "business_administration": 0.4473684210526316, + "marxism": 0.5, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.6176470588235294, + "teacher_qualification": 0.673469387755102, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.7692307692307693, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.6666666666666666, + "logic": 0.6296296296296297, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.5263157894736842, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.5, + "high_school_chinese": 0.375, + "high_school_history": 0.64, + "middle_school_history": 0.6296296296296297, + "civil_servant": 0.40384615384615385, + "sports_science": 0.4166666666666667, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.5098039215686274, + "accountant": 0.5, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.4074074074074074, + "physician": 0.48148148148148145 + } + }, + "prompt_2": { + "accuracy": 0.44956413449564137, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.4523809523809524, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.6, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.35, + "business_administration": 0.42105263157894735, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.5, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.7083333333333334, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.7692307692307693, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.5555555555555556, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.5, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.64, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.4230769230769231, + "sports_science": 0.5416666666666666, + "plant_protection": 0.7037037037037037, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.5, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.3888888888888889, + "physician": 0.5185185185185185 + } + }, + "prompt_3": { + "accuracy": 0.4526774595267746, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.625, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.5, + "college_physics": 0.4583333333333333, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.25, + "high_school_chemistry": 0.5, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.6, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.36666666666666664, + "business_administration": 0.39473684210526316, + "marxism": 0.5, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.625, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.5555555555555556, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.5, + "legal_professional": 0.5, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.64, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.38461538461538464, + "sports_science": 0.375, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.375, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.6274509803921569, + "accountant": 0.5, + "fire_engineer": 0.5, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.3333333333333333, + "physician": 0.5185185185185185 + } + }, + "prompt_4": { + "accuracy": 0.4688667496886675, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.625, + "computer_architecture": 0.6153846153846154, + "college_programming": 0.5, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.76, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.4, + "business_administration": 0.3684210526315789, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.5, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.6296296296296297, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.5263157894736842, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.6, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.38461538461538464, + "sports_science": 0.4583333333333333, + "plant_protection": 0.6666666666666666, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.49019607843137253, + "accountant": 0.48148148148148145, + "fire_engineer": 0.4444444444444444, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.4074074074074074, + "physician": 0.5185185185185185 + } + }, + "prompt_5": { + "accuracy": 0.45952677459526775, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.47619047619047616, + "college_physics": 0.5416666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.25, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.76, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.38333333333333336, + "business_administration": 0.39473684210526316, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.5517241379310345, + "education_science": 0.6176470588235294, + "teacher_qualification": 0.6326530612244898, + "high_school_politics": 0.6666666666666666, + "high_school_geography": 0.375, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.6296296296296297, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.5263157894736842, + "professional_tour_guide": 0.5, + "legal_professional": 0.5, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.64, + "middle_school_history": 0.6666666666666666, + "civil_servant": 0.34615384615384615, + "sports_science": 0.5, + "plant_protection": 0.6666666666666666, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.5098039215686274, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.3333333333333333, + "physician": 0.46296296296296297 + } + } + }, + "cmmlu": { + "prompt_1": { + "accuracy": 0.5017921146953405 + }, + "prompt_2": { + "accuracy": 0.3978494623655914 + }, + "prompt_3": { + "accuracy": 0.34050179211469533 + }, + "prompt_4": { + "accuracy": 0.5197132616487455 + }, + "prompt_5": { + "accuracy": 0.4838709677419355 + } + }, + "cmmlu_full": { + "prompt_1": { + "accuracy": 0.46926264893800723, + "category_acc": { + "agronomy": 0.4437869822485207, + "anatomy": 0.34459459459459457, + "ancient_chinese": 0.2621951219512195, + "arts": 0.5625, + "astronomy": 0.28484848484848485, + "business_ethics": 0.5502392344497608, + "chinese_civil_service_exam": 0.41875, + "chinese_driving_rule": 0.5877862595419847, + "chinese_food_culture": 0.41911764705882354, + "chinese_foreign_policy": 0.5327102803738317, + "chinese_history": 0.5139318885448917, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.5642458100558659, + "clinical_knowledge": 0.4050632911392405, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.5794392523364486, + "college_engineering_hydrology": 0.5188679245283019, + "college_law": 0.37037037037037035, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.5094339622641509, + "college_medicine": 0.3882783882783883, + "computer_science": 0.5735294117647058, + "computer_security": 0.6491228070175439, + "conceptual_physics": 0.4217687074829932, + "construction_project_management": 0.39568345323741005, + "economics": 0.5786163522012578, + "education": 0.5214723926380368, + "electrical_engineering": 0.4069767441860465, + "elementary_chinese": 0.4126984126984127, + "elementary_commonsense": 0.4090909090909091, + "elementary_information_and_technology": 0.726890756302521, + "elementary_mathematics": 0.3217391304347826, + "ethnology": 0.362962962962963, + "food_science": 0.5384615384615384, + "genetics": 0.42613636363636365, + "global_facts": 0.4966442953020134, + "high_school_biology": 0.3905325443786982, + "high_school_chemistry": 0.3484848484848485, + "high_school_geography": 0.4830508474576271, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.4, + "high_school_politics": 0.46853146853146854, + "human_sexuality": 0.5714285714285714, + "international_law": 0.4, + "journalism": 0.46511627906976744, + "jurisprudence": 0.46958637469586373, + "legal_and_moral_basis": 0.8084112149532711, + "logical": 0.4878048780487805, + "machine_learning": 0.47540983606557374, + "management": 0.5476190476190477, + "marketing": 0.5555555555555556, + "marxist_theory": 0.5608465608465608, + "modern_chinese": 0.33620689655172414, + "nutrition": 0.46206896551724136, + "philosophy": 0.5238095238095238, + "professional_accounting": 0.49714285714285716, + "professional_law": 0.3886255924170616, + "professional_medicine": 0.3670212765957447, + "professional_psychology": 0.521551724137931, + "public_relations": 0.4942528735632184, + "security_study": 0.6666666666666666, + "sociology": 0.5176991150442478, + "sports_science": 0.4727272727272727, + "traditional_chinese_medicine": 0.32432432432432434, + "virology": 0.5680473372781065, + "world_history": 0.5403726708074534, + "world_religions": 0.59375 + } + }, + "prompt_2": { + "accuracy": 0.4173717838024521, + "category_acc": { + "agronomy": 0.38461538461538464, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.2682926829268293, + "arts": 0.45625, + "astronomy": 0.28484848484848485, + "business_ethics": 0.46411483253588515, + "chinese_civil_service_exam": 0.4375, + "chinese_driving_rule": 0.46564885496183206, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.5514018691588785, + "chinese_history": 0.5325077399380805, + "chinese_literature": 0.37254901960784315, + "chinese_teacher_qualification": 0.5363128491620112, + "clinical_knowledge": 0.37130801687763715, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.5046728971962616, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.32407407407407407, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.49056603773584906, + "college_medicine": 0.32967032967032966, + "computer_science": 0.4019607843137255, + "computer_security": 0.5906432748538012, + "conceptual_physics": 0.41496598639455784, + "construction_project_management": 0.38848920863309355, + "economics": 0.49056603773584906, + "education": 0.44785276073619634, + "electrical_engineering": 0.3430232558139535, + "elementary_chinese": 0.3531746031746032, + "elementary_commonsense": 0.3686868686868687, + "elementary_information_and_technology": 0.4957983193277311, + "elementary_mathematics": 0.22608695652173913, + "ethnology": 0.34814814814814815, + "food_science": 0.3776223776223776, + "genetics": 0.4375, + "global_facts": 0.40268456375838924, + "high_school_biology": 0.39644970414201186, + "high_school_chemistry": 0.4015151515151515, + "high_school_geography": 0.4152542372881356, + "high_school_mathematics": 0.22560975609756098, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.5174825174825175, + "human_sexuality": 0.47619047619047616, + "international_law": 0.4, + "journalism": 0.436046511627907, + "jurisprudence": 0.45742092457420924, + "legal_and_moral_basis": 0.7149532710280374, + "logical": 0.42276422764227645, + "machine_learning": 0.4344262295081967, + "management": 0.4095238095238095, + "marketing": 0.48333333333333334, + "marxist_theory": 0.5132275132275133, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.42758620689655175, + "philosophy": 0.5047619047619047, + "professional_accounting": 0.4342857142857143, + "professional_law": 0.3744075829383886, + "professional_medicine": 0.3696808510638298, + "professional_psychology": 0.4482758620689655, + "public_relations": 0.39655172413793105, + "security_study": 0.5481481481481482, + "sociology": 0.43805309734513276, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.47928994082840237, + "world_history": 0.5217391304347826, + "world_religions": 0.525 + } + }, + "prompt_3": { + "accuracy": 0.42436539457779315, + "category_acc": { + "agronomy": 0.41420118343195267, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.2865853658536585, + "arts": 0.46875, + "astronomy": 0.22424242424242424, + "business_ethics": 0.4688995215311005, + "chinese_civil_service_exam": 0.4375, + "chinese_driving_rule": 0.4580152671755725, + "chinese_food_culture": 0.35294117647058826, + "chinese_foreign_policy": 0.5233644859813084, + "chinese_history": 0.5541795665634675, + "chinese_literature": 0.3480392156862745, + "chinese_teacher_qualification": 0.5810055865921788, + "clinical_knowledge": 0.4092827004219409, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.4953271028037383, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.3333333333333333, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.49056603773584906, + "college_medicine": 0.36996336996337, + "computer_science": 0.4411764705882353, + "computer_security": 0.5730994152046783, + "conceptual_physics": 0.40816326530612246, + "construction_project_management": 0.37410071942446044, + "economics": 0.5157232704402516, + "education": 0.4662576687116564, + "electrical_engineering": 0.37790697674418605, + "elementary_chinese": 0.4007936507936508, + "elementary_commonsense": 0.3838383838383838, + "elementary_information_and_technology": 0.5042016806722689, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.3333333333333333, + "food_science": 0.4125874125874126, + "genetics": 0.4034090909090909, + "global_facts": 0.40939597315436244, + "high_school_biology": 0.35502958579881655, + "high_school_chemistry": 0.38636363636363635, + "high_school_geography": 0.4322033898305085, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.5244755244755245, + "human_sexuality": 0.5158730158730159, + "international_law": 0.3783783783783784, + "journalism": 0.37790697674418605, + "jurisprudence": 0.46715328467153283, + "legal_and_moral_basis": 0.719626168224299, + "logical": 0.4146341463414634, + "machine_learning": 0.4426229508196721, + "management": 0.4238095238095238, + "marketing": 0.5055555555555555, + "marxist_theory": 0.5238095238095238, + "modern_chinese": 0.33620689655172414, + "nutrition": 0.4689655172413793, + "philosophy": 0.5142857142857142, + "professional_accounting": 0.4228571428571429, + "professional_law": 0.36018957345971564, + "professional_medicine": 0.3723404255319149, + "professional_psychology": 0.4482758620689655, + "public_relations": 0.40804597701149425, + "security_study": 0.5259259259259259, + "sociology": 0.43805309734513276, + "sports_science": 0.41818181818181815, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.47928994082840237, + "world_history": 0.5279503105590062, + "world_religions": 0.53125 + } + }, + "prompt_4": { + "accuracy": 0.4589880849594198, + "category_acc": { + "agronomy": 0.46745562130177515, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.2682926829268293, + "arts": 0.58125, + "astronomy": 0.28484848484848485, + "business_ethics": 0.5215311004784688, + "chinese_civil_service_exam": 0.41875, + "chinese_driving_rule": 0.5877862595419847, + "chinese_food_culture": 0.4338235294117647, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.49226006191950467, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.5754189944134078, + "clinical_knowledge": 0.39662447257383965, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.5700934579439252, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.37962962962962965, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.5, + "college_medicine": 0.37362637362637363, + "computer_science": 0.5784313725490197, + "computer_security": 0.6549707602339181, + "conceptual_physics": 0.4421768707482993, + "construction_project_management": 0.39568345323741005, + "economics": 0.5408805031446541, + "education": 0.50920245398773, + "electrical_engineering": 0.4069767441860465, + "elementary_chinese": 0.3968253968253968, + "elementary_commonsense": 0.40404040404040403, + "elementary_information_and_technology": 0.7352941176470589, + "elementary_mathematics": 0.2956521739130435, + "ethnology": 0.37037037037037035, + "food_science": 0.5314685314685315, + "genetics": 0.42045454545454547, + "global_facts": 0.47651006711409394, + "high_school_biology": 0.35502958579881655, + "high_school_chemistry": 0.36363636363636365, + "high_school_geography": 0.4915254237288136, + "high_school_mathematics": 0.2073170731707317, + "high_school_physics": 0.34545454545454546, + "high_school_politics": 0.4755244755244755, + "human_sexuality": 0.5238095238095238, + "international_law": 0.3567567567567568, + "journalism": 0.48255813953488375, + "jurisprudence": 0.45985401459854014, + "legal_and_moral_basis": 0.8084112149532711, + "logical": 0.4634146341463415, + "machine_learning": 0.48360655737704916, + "management": 0.5333333333333333, + "marketing": 0.5333333333333333, + "marxist_theory": 0.5396825396825397, + "modern_chinese": 0.35344827586206895, + "nutrition": 0.4689655172413793, + "philosophy": 0.5142857142857142, + "professional_accounting": 0.52, + "professional_law": 0.3696682464454976, + "professional_medicine": 0.3484042553191489, + "professional_psychology": 0.4956896551724138, + "public_relations": 0.47126436781609193, + "security_study": 0.6148148148148148, + "sociology": 0.5, + "sports_science": 0.47878787878787876, + "traditional_chinese_medicine": 0.31891891891891894, + "virology": 0.5502958579881657, + "world_history": 0.5341614906832298, + "world_religions": 0.58125 + } + }, + "prompt_5": { + "accuracy": 0.4625280607839751, + "category_acc": { + "agronomy": 0.44970414201183434, + "anatomy": 0.32432432432432434, + "ancient_chinese": 0.2621951219512195, + "arts": 0.5375, + "astronomy": 0.2909090909090909, + "business_ethics": 0.5454545454545454, + "chinese_civil_service_exam": 0.38125, + "chinese_driving_rule": 0.5801526717557252, + "chinese_food_culture": 0.38235294117647056, + "chinese_foreign_policy": 0.5420560747663551, + "chinese_history": 0.5356037151702786, + "chinese_literature": 0.3431372549019608, + "chinese_teacher_qualification": 0.5698324022346368, + "clinical_knowledge": 0.4050632911392405, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.5887850467289719, + "college_engineering_hydrology": 0.4528301886792453, + "college_law": 0.35185185185185186, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.5188679245283019, + "college_medicine": 0.3626373626373626, + "computer_science": 0.5931372549019608, + "computer_security": 0.6081871345029239, + "conceptual_physics": 0.42857142857142855, + "construction_project_management": 0.41007194244604317, + "economics": 0.5723270440251572, + "education": 0.5030674846625767, + "electrical_engineering": 0.4127906976744186, + "elementary_chinese": 0.4007936507936508, + "elementary_commonsense": 0.4393939393939394, + "elementary_information_and_technology": 0.7226890756302521, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.3851851851851852, + "food_science": 0.5174825174825175, + "genetics": 0.39204545454545453, + "global_facts": 0.48322147651006714, + "high_school_biology": 0.40828402366863903, + "high_school_chemistry": 0.3712121212121212, + "high_school_geography": 0.4745762711864407, + "high_school_mathematics": 0.1951219512195122, + "high_school_physics": 0.36363636363636365, + "high_school_politics": 0.4405594405594406, + "human_sexuality": 0.5555555555555556, + "international_law": 0.3837837837837838, + "journalism": 0.48255813953488375, + "jurisprudence": 0.45255474452554745, + "legal_and_moral_basis": 0.7990654205607477, + "logical": 0.4634146341463415, + "machine_learning": 0.4672131147540984, + "management": 0.5333333333333333, + "marketing": 0.5444444444444444, + "marxist_theory": 0.5185185185185185, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.45517241379310347, + "philosophy": 0.5428571428571428, + "professional_accounting": 0.5085714285714286, + "professional_law": 0.4028436018957346, + "professional_medicine": 0.3723404255319149, + "professional_psychology": 0.5172413793103449, + "public_relations": 0.46551724137931033, + "security_study": 0.6518518518518519, + "sociology": 0.5176991150442478, + "sports_science": 0.49696969696969695, + "traditional_chinese_medicine": 0.32972972972972975, + "virology": 0.5502958579881657, + "world_history": 0.5341614906832298, + "world_religions": 0.59375 + } + } + }, + "zbench": { + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.3939393939393939 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.2727272727272727 + } + }, + "ind_emotion": { + "prompt_1": { + "accuracy": 0.6727272727272727 + }, + "prompt_2": { + "accuracy": 0.6704545454545454 + }, + "prompt_3": { + "accuracy": 0.6840909090909091 + }, + "prompt_4": { + "accuracy": 0.6454545454545455 + }, + "prompt_5": { + "accuracy": 0.6363636363636364 + } + }, + "ocnli": { + "prompt_1": { + "accuracy": 0.5864406779661017 + }, + "prompt_2": { + "accuracy": 0.5613559322033899 + }, + "prompt_3": { + "accuracy": 0.576271186440678 + }, + "prompt_4": { + "accuracy": 0.5108474576271187 + }, + "prompt_5": { + "accuracy": 0.5335593220338983 + } + }, + "c3": { + "prompt_1": { + "accuracy": 0.831338818249813 + }, + "prompt_2": { + "accuracy": 0.8388182498130142 + }, + "prompt_3": { + "accuracy": 0.8384442782348541 + }, + "prompt_4": { + "accuracy": 0.8347045624532535 + }, + "prompt_5": { + "accuracy": 0.818997756170531 + } + }, + "dream": { + "prompt_1": { + "accuracy": 0.8946594806467418 + }, + "prompt_2": { + "accuracy": 0.8951494365507104 + }, + "prompt_3": { + "accuracy": 0.8980891719745223 + }, + "prompt_4": { + "accuracy": 0.8995590396864283 + }, + "prompt_5": { + "accuracy": 0.8961293483586478 + } + }, + "samsum": { + "prompt_1": { + "rouge1": 0.40391070533815576, + "rouge2": 0.16849240138758603, + "rougeL": 0.3152314020541656, + "avg_rouge": 0.29587816959330243 + }, + "prompt_2": { + "rouge1": 0.4250641042447893, + "rouge2": 0.18421377088719168, + "rougeL": 0.3325789896119648, + "avg_rouge": 0.31395228824798194 + }, + "prompt_3": { + "rouge1": 0.3950434992409805, + "rouge2": 0.15810798548291335, + "rougeL": 0.30600774739209147, + "avg_rouge": 0.28638641070532844 + }, + "prompt_4": { + "rouge1": 0.40583240968226986, + "rouge2": 0.1651988720764051, + "rougeL": 0.31574400302320216, + "avg_rouge": 0.295591761593959 + }, + "prompt_5": { + "rouge1": 0.4171195132140153, + "rouge2": 0.17419094604455684, + "rougeL": 0.32814280990301686, + "avg_rouge": 0.30648442305386303 + } + }, + "dialogsum": { + "prompt_1": { + "rouge1": 0.3489440434996578, + "rouge2": 0.13180175958616205, + "rougeL": 0.272088641371884, + "avg_rouge": 0.2509448148192346 + }, + "prompt_2": { + "rouge1": 0.35049668037175385, + "rouge2": 0.1358757722654819, + "rougeL": 0.27365807232254946, + "avg_rouge": 0.2533435083199284 + }, + "prompt_3": { + "rouge1": 0.35332369180371703, + "rouge2": 0.13212934688024375, + "rougeL": 0.2751467196674016, + "avg_rouge": 0.25353325278378747 + }, + "prompt_4": { + "rouge1": 0.3413976495851046, + "rouge2": 0.12735720749577328, + "rougeL": 0.2650857356907353, + "avg_rouge": 0.24461353092387106 + }, + "prompt_5": { + "rouge1": 0.3598875439493886, + "rouge2": 0.13709311090380896, + "rougeL": 0.281152520373277, + "avg_rouge": 0.2593777250754915 + } + }, + "sst2": { + "prompt_1": { + "accuracy": 0.9277522935779816 + }, + "prompt_2": { + "accuracy": 0.926605504587156 + }, + "prompt_3": { + "accuracy": 0.9288990825688074 + }, + "prompt_4": { + "accuracy": 0.9243119266055045 + }, + "prompt_5": { + "accuracy": 0.4793577981651376 + } + }, + "cola": { + "prompt_1": { + "accuracy": 0.8197507190795782 + }, + "prompt_2": { + "accuracy": 0.8072866730584851 + }, + "prompt_3": { + "accuracy": 0.8187919463087249 + }, + "prompt_4": { + "accuracy": 0.8302972195589645 + }, + "prompt_5": { + "accuracy": 0.7919463087248322 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.807 + }, + "prompt_2": { + "accuracy": 0.8165 + }, + "prompt_3": { + "accuracy": 0.817 + }, + "prompt_4": { + "accuracy": 0.827 + }, + "prompt_5": { + "accuracy": 0.7865 + } + }, + "mnli": { + "prompt_1": { + "accuracy": 0.8 + }, + "prompt_2": { + "accuracy": 0.7 + }, + "prompt_3": { + "accuracy": 0.7 + }, + "prompt_4": { + "accuracy": 0.7 + }, + "prompt_5": { + "accuracy": 0.7 + } + }, + "qnli": { + "prompt_1": { + "accuracy": 1.0 + }, + "prompt_2": { + "accuracy": 0.9 + }, + "prompt_3": { + "accuracy": 0.8 + }, + "prompt_4": { + "accuracy": 1.0 + }, + "prompt_5": { + "accuracy": 1.0 + } + }, + "wnli": { + "prompt_1": { + "accuracy": 0.4 + }, + "prompt_2": { + "accuracy": 0.4 + }, + "prompt_3": { + "accuracy": 0.4 + }, + "prompt_4": { + "accuracy": 0.4 + }, + "prompt_5": { + "accuracy": 0.4 + } + }, + "rte": { + "prompt_1": { + "accuracy": 1.0 + }, + "prompt_2": { + "accuracy": 0.9 + }, + "prompt_3": { + "accuracy": 0.9 + }, + "prompt_4": { + "accuracy": 0.9 + }, + "prompt_5": { + "accuracy": 0.9 + } + }, + "mrpc": { + "prompt_1": { + "accuracy": 0.9 + }, + "prompt_2": { + "accuracy": 0.9 + }, + "prompt_3": { + "accuracy": 0.9 + }, + "prompt_4": { + "accuracy": 0.9 + }, + "prompt_5": { + "accuracy": 0.9 + } + } + }, + "five_shot": { + "cross_xquad": { + "prompt_1": -1 + }, + "cross_mmlu": { + "prompt_1": -1 + }, + "cross_logiqa": { + "prompt_1": -1 + }, + "sg_eval": { + "prompt_1": -1 + }, + "cn_eval": { + "prompt_1": -1 + }, + "us_eval": { + "prompt_1": -1 + }, + "ph_eval": { + "prompt_1": -1 + }, + "sing2eng": { + "prompt_1": -1 + }, + "indommlu": { + "prompt_1": -1 + }, + "flores_ind2eng": { + "prompt_1": -1 + }, + "flores_vie2eng": { + "prompt_1": -1 + }, + "flores_zho2eng": { + "prompt_1": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1 + }, + "mmlu": { + "prompt_1": -1 + }, + "mmlu_full": { + "prompt_1": -1 + }, + "c_eval": { + "prompt_1": -1 + }, + "c_eval_full": { + "prompt_1": -1 + }, + "cmmlu": { + "prompt_1": -1 + }, + "cmmlu_full": { + "prompt_1": -1 + }, + "zbench": { + "prompt_1": -1 + }, + "ind_emotion": { + "prompt_1": -1 + }, + "ocnli": { + "prompt_1": -1 + }, + "c3": { + "prompt_1": -1 + }, + "dream": { + "prompt_1": -1 + }, + "samsum": { + "prompt_1": -1 + }, + "dialogsum": { + "prompt_1": -1 + }, + "sst2": { + "prompt_1": -1 + }, + "cola": { + "prompt_1": -1 + }, + "qqp": { + "prompt_1": -1 + }, + "mnli": { + "prompt_1": -1 + }, + "qnli": { + "prompt_1": -1 + }, + "wnli": { + "prompt_1": -1 + }, + "rte": { + "prompt_1": -1 + }, + "mrpc": { + "prompt_1": -1 + } + } } } \ No newline at end of file