diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -30224,1903 +30224,1903 @@ "zero_shot": { "cross_xquad": { "prompt_1": { - "overall_acc": 0.9378151260504202, + "overall_acc": 0.926470588235294, "language_acc": { - "Spanish": 0.9394957983193277, - "English": 0.9478991596638655, - "Chinese": 0.9294117647058824, - "Vietnamese": 0.934453781512605 + "Spanish": 0.9294117647058824, + "English": 0.9436974789915966, + "Chinese": 0.9176470588235294, + "Vietnamese": 0.915126050420168 }, - "consistency_score_2": 0.9329131652661066, - "consistency_score_3": 0.901890756302521, - "consistency_score_4": 0.8798319327731092, + "consistency_score_2": 0.9120448179271708, + "consistency_score_3": 0.8716386554621849, + "consistency_score_4": 0.8436974789915966, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9445378151260504, - "Spanish,Chinese": 0.926890756302521, - "Spanish,Vietnamese": 0.9302521008403362, - "English,Chinese": 0.9336134453781513, - "English,Vietnamese": 0.9428571428571428, - "Chinese,Vietnamese": 0.9193277310924369 + "Spanish,English": 0.926890756302521, + "Spanish,Chinese": 0.9016806722689076, + "Spanish,Vietnamese": 0.9134453781512605, + "English,Chinese": 0.9226890756302522, + "English,Vietnamese": 0.9176470588235294, + "Chinese,Vietnamese": 0.8899159663865546 }, "3_combine": { - "Spanish,English,Chinese": 0.9042016806722689, - "Spanish,English,Vietnamese": 0.9117647058823529, - "Spanish,Chinese,Vietnamese": 0.8915966386554622, - "English,Chinese,Vietnamese": 0.9 + "Spanish,English,Chinese": 0.8798319327731092, + "Spanish,English,Vietnamese": 0.8815126050420168, + "Spanish,Chinese,Vietnamese": 0.8563025210084033, + "English,Chinese,Vietnamese": 0.8689075630252101 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8798319327731092 + "Spanish,English,Chinese,Vietnamese": 0.8436974789915966 } }, - "AC3_2": 0.9353577231845365, - "AC3_3": 0.9195021893149736, - "AC3_4": 0.9078986934080167 + "AC3_2": 0.9192011076803658, + "AC3_3": 0.8982185933811098, + "AC3_4": 0.8831487970906128 }, "prompt_2": { - "overall_acc": 0.9336134453781513, + "overall_acc": 0.8930672268907563, "language_acc": { - "Spanish": 0.9310924369747899, - "English": 0.9470588235294117, - "Chinese": 0.9252100840336135, - "Vietnamese": 0.9310924369747899 + "Spanish": 0.9235294117647059, + "English": 0.9142857142857143, + "Chinese": 0.8739495798319328, + "Vietnamese": 0.8605042016806723 }, - "consistency_score_2": 0.922689075630252, - "consistency_score_3": 0.8876050420168067, - "consistency_score_4": 0.865546218487395, + "consistency_score_2": 0.8663865546218488, + "consistency_score_3": 0.8067226890756303, + "consistency_score_4": 0.7680672268907563, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9361344537815126, - "Spanish,Chinese": 0.9168067226890756, - "Spanish,Vietnamese": 0.9310924369747899, - "English,Chinese": 0.9218487394957983, - "English,Vietnamese": 0.9277310924369748, - "Chinese,Vietnamese": 0.9025210084033614 + "Spanish,English": 0.8983193277310925, + "Spanish,Chinese": 0.8596638655462185, + "Spanish,Vietnamese": 0.8563025210084033, + "English,Chinese": 0.8672268907563025, + "English,Vietnamese": 0.8663865546218488, + "Chinese,Vietnamese": 0.8504201680672269 }, "3_combine": { - "Spanish,English,Chinese": 0.8890756302521008, - "Spanish,English,Vietnamese": 0.9, - "Spanish,Chinese,Vietnamese": 0.880672268907563, - "English,Chinese,Vietnamese": 0.880672268907563 + "Spanish,English,Chinese": 0.819327731092437, + "Spanish,English,Vietnamese": 0.8168067226890756, + "Spanish,Chinese,Vietnamese": 0.7907563025210084, + "English,Chinese,Vietnamese": 0.8 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.865546218487395 + "Spanish,English,Chinese,Vietnamese": 0.7680672268907563 } }, - "AC3_2": 0.9281191154096758, - "AC3_3": 0.9100281016278929, - "AC3_4": 0.8982922453863125 + "AC3_2": 0.8795245954598574, + "AC3_3": 0.8477019283299347, + "AC3_4": 0.8258641156737193 }, "prompt_3": { - "overall_acc": 0.9218487394957983, + "overall_acc": 0.9176470588235294, "language_acc": { - "Spanish": 0.9193277310924369, - "English": 0.934453781512605, - "Chinese": 0.9126050420168067, - "Vietnamese": 0.9210084033613445 + "Spanish": 0.9201680672268907, + "English": 0.9394957983193277, + "Chinese": 0.9109243697478991, + "Vietnamese": 0.9 }, - "consistency_score_2": 0.903781512605042, - "consistency_score_3": 0.859873949579832, - "consistency_score_4": 0.8285714285714286, + "consistency_score_2": 0.9019607843137255, + "consistency_score_3": 0.857983193277311, + "consistency_score_4": 0.826890756302521, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9218487394957983, - "Spanish,Chinese": 0.8949579831932774, - "Spanish,Vietnamese": 0.9067226890756303, - "English,Chinese": 0.9008403361344538, - "English,Vietnamese": 0.9159663865546218, + "Spanish,English": 0.9252100840336135, + "Spanish,Chinese": 0.8941176470588236, + "Spanish,Vietnamese": 0.892436974789916, + "English,Chinese": 0.9100840336134454, + "English,Vietnamese": 0.907563025210084, "Chinese,Vietnamese": 0.8823529411764706 }, "3_combine": { - "Spanish,English,Chinese": 0.8621848739495799, - "Spanish,English,Vietnamese": 0.8756302521008403, - "Spanish,Chinese,Vietnamese": 0.8478991596638655, - "English,Chinese,Vietnamese": 0.853781512605042 + "Spanish,English,Chinese": 0.8689075630252101, + "Spanish,English,Vietnamese": 0.865546218487395, + "Spanish,Chinese,Vietnamese": 0.8411764705882353, + "English,Chinese,Vietnamese": 0.8563025210084033 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8285714285714286 + "Spanish,English,Chinese,Vietnamese": 0.826890756302521 } }, - "AC3_2": 0.9127257254530945, - "AC3_3": 0.8897834902828757, - "AC3_4": 0.8727247787718962 + "AC3_2": 0.9097363082664338, + "AC3_3": 0.8868127278807935, + "AC3_4": 0.8699081944418048 }, "prompt_4": { - "overall_acc": 0.9304621848739496, + "overall_acc": 0.915126050420168, "language_acc": { - "Spanish": 0.9260504201680673, - "English": 0.9436974789915966, - "Chinese": 0.9193277310924369, - "Vietnamese": 0.9327731092436975 + "Spanish": 0.9218487394957983, + "English": 0.926890756302521, + "Chinese": 0.9008403361344538, + "Vietnamese": 0.9109243697478991 }, - "consistency_score_2": 0.9184873949579831, - "consistency_score_3": 0.8808823529411764, - "consistency_score_4": 0.8546218487394958, + "consistency_score_2": 0.8948179271708684, + "consistency_score_3": 0.8510504201680673, + "consistency_score_4": 0.8218487394957983, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9252100840336135, - "Spanish,Chinese": 0.9109243697478991, - "Spanish,Vietnamese": 0.9226890756302522, - "English,Chinese": 0.9168067226890756, - "English,Vietnamese": 0.9277310924369748, - "Chinese,Vietnamese": 0.907563025210084 + "Spanish,English": 0.9184873949579831, + "Spanish,Chinese": 0.8857142857142857, + "Spanish,Vietnamese": 0.9016806722689076, + "English,Chinese": 0.8899159663865546, + "English,Vietnamese": 0.907563025210084, + "Chinese,Vietnamese": 0.865546218487395 }, "3_combine": { - "Spanish,English,Chinese": 0.8798319327731092, - "Spanish,English,Vietnamese": 0.8907563025210085, - "Spanish,Chinese,Vietnamese": 0.8739495798319328, - "English,Chinese,Vietnamese": 0.8789915966386554 + "Spanish,English,Chinese": 0.8563025210084033, + "Spanish,English,Vietnamese": 0.8714285714285714, + "Spanish,Chinese,Vietnamese": 0.8344537815126051, + "English,Chinese,Vietnamese": 0.8420168067226891 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8546218487394958 + "Spanish,English,Chinese,Vietnamese": 0.8218487394957983 } }, - "AC3_2": 0.9244360122824432, - "AC3_3": 0.904993723257036, - "AC3_4": 0.8909309563034448 + "AC3_2": 0.9048580570782975, + "AC3_3": 0.8819259260240758, + "AC3_4": 0.8659828517265586 }, "prompt_5": { - "overall_acc": 0.8934873949579832, + "overall_acc": 0.8102941176470588, "language_acc": { - "Spanish": 0.9058823529411765, - "English": 0.915126050420168, - "Chinese": 0.8697478991596639, - "Vietnamese": 0.8831932773109243 - }, - "consistency_score_2": 0.8563025210084034, - "consistency_score_3": 0.7915966386554621, - "consistency_score_4": 0.746218487394958, + "Spanish": 0.8848739495798319, + "English": 0.8142857142857143, + "Chinese": 0.7630252100840336, + "Vietnamese": 0.7789915966386555 + }, + "consistency_score_2": 0.7879551820728291, + "consistency_score_3": 0.7092436974789916, + "consistency_score_4": 0.6655462184873949, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8966386554621849, - "Spanish,Chinese": 0.8436974789915966, - "Spanish,Vietnamese": 0.8495798319327731, - "English,Chinese": 0.8554621848739495, - "English,Vietnamese": 0.8672268907563025, - "Chinese,Vietnamese": 0.8252100840336134 + "Spanish,English": 0.8092436974789916, + "Spanish,Chinese": 0.7605042016806722, + "Spanish,Vietnamese": 0.7764705882352941, + "English,Chinese": 0.7890756302521008, + "English,Vietnamese": 0.8109243697478992, + "Chinese,Vietnamese": 0.7815126050420168 }, "3_combine": { - "Spanish,English,Chinese": 0.8050420168067227, - "Spanish,English,Vietnamese": 0.8142857142857143, - "Spanish,Chinese,Vietnamese": 0.7647058823529411, - "English,Chinese,Vietnamese": 0.7823529411764706 + "Spanish,English,Chinese": 0.7109243697478992, + "Spanish,English,Vietnamese": 0.7201680672268908, + "Spanish,Chinese,Vietnamese": 0.6882352941176471, + "English,Chinese,Vietnamese": 0.7176470588235294 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.746218487394958 + "Spanish,English,Chinese,Vietnamese": 0.6655462184873949 } }, - "AC3_2": 0.8744998491152861, - "AC3_3": 0.8394615394593914, - "AC3_4": 0.8132395199736593 + "AC3_2": 0.7989685327252546, + "AC3_3": 0.7564089426231012, + "AC3_4": 0.7308218546710168 } }, "cross_mmlu": { "prompt_1": { - "overall_acc": 0.6076190476190476, + "overall_acc": 0.5857142857142856, "language_acc": { - "Filipino": 0.5466666666666666, - "Vietnamese": 0.5933333333333334, - "Chinese": 0.64, + "Filipino": 0.5333333333333333, + "Vietnamese": 0.6, + "Chinese": 0.6333333333333333, "Spanish": 0.6133333333333333, - "Malay": 0.6066666666666667, + "Malay": 0.52, "Indonesian": 0.52, - "English": 0.7333333333333333 + "English": 0.68 }, - "consistency_score_2": 0.5895238095238096, - "consistency_score_3": 0.431047619047619, - "consistency_score_4": 0.34742857142857136, - "consistency_score_5": 0.29365079365079366, - "consistency_score_6": 0.2552380952380952, - "consistency_score_7": 0.22666666666666666, + "consistency_score_2": 0.6180952380952381, + "consistency_score_3": 0.4647619047619048, + "consistency_score_4": 0.37885714285714284, + "consistency_score_5": 0.32317460317460317, + "consistency_score_6": 0.28380952380952384, + "consistency_score_7": 0.25333333333333335, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5266666666666666, - "Filipino,Chinese": 0.5933333333333334, - "Filipino,Spanish": 0.5333333333333333, - "Filipino,Malay": 0.5133333333333333, - "Filipino,Indonesian": 0.4866666666666667, - "Filipino,English": 0.58, - "Vietnamese,Chinese": 0.66, - "Vietnamese,Spanish": 0.58, + "Filipino,Vietnamese": 0.5866666666666667, + "Filipino,Chinese": 0.6066666666666667, + "Filipino,Spanish": 0.5933333333333334, + "Filipino,Malay": 0.56, + "Filipino,Indonesian": 0.5933333333333334, + "Filipino,English": 0.5866666666666667, + "Vietnamese,Chinese": 0.6866666666666666, + "Vietnamese,Spanish": 0.6266666666666667, "Vietnamese,Malay": 0.6, - "Vietnamese,Indonesian": 0.6333333333333333, - "Vietnamese,English": 0.5933333333333334, - "Chinese,Spanish": 0.5533333333333333, - "Chinese,Malay": 0.6466666666666666, - "Chinese,Indonesian": 0.5933333333333334, + "Vietnamese,Indonesian": 0.66, + "Vietnamese,English": 0.6333333333333333, + "Chinese,Spanish": 0.66, + "Chinese,Malay": 0.6333333333333333, + "Chinese,Indonesian": 0.5866666666666667, "Chinese,English": 0.64, - "Spanish,Malay": 0.5533333333333333, - "Spanish,Indonesian": 0.5733333333333334, - "Spanish,English": 0.6733333333333333, - "Malay,Indonesian": 0.6466666666666666, - "Malay,English": 0.6466666666666666, - "Indonesian,English": 0.5533333333333333 + "Spanish,Malay": 0.56, + "Spanish,Indonesian": 0.6066666666666667, + "Spanish,English": 0.7066666666666667, + "Malay,Indonesian": 0.6533333333333333, + "Malay,English": 0.62, + "Indonesian,English": 0.58 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.43333333333333335, - "Filipino,Vietnamese,Spanish": 0.38, - "Filipino,Vietnamese,Malay": 0.38666666666666666, - "Filipino,Vietnamese,Indonesian": 0.38, - "Filipino,Vietnamese,English": 0.3933333333333333, - "Filipino,Chinese,Spanish": 0.38, - "Filipino,Chinese,Malay": 0.43333333333333335, - "Filipino,Chinese,Indonesian": 0.38, - "Filipino,Chinese,English": 0.4533333333333333, - "Filipino,Spanish,Malay": 0.36666666666666664, - "Filipino,Spanish,Indonesian": 0.35333333333333333, - "Filipino,Spanish,English": 0.4266666666666667, - "Filipino,Malay,Indonesian": 0.38, - "Filipino,Malay,English": 0.42, - "Filipino,Indonesian,English": 0.36666666666666664, - "Vietnamese,Chinese,Spanish": 0.44666666666666666, - "Vietnamese,Chinese,Malay": 0.48, - "Vietnamese,Chinese,Indonesian": 0.4666666666666667, - "Vietnamese,Chinese,English": 0.4866666666666667, - "Vietnamese,Spanish,Malay": 0.42, - "Vietnamese,Spanish,Indonesian": 0.4533333333333333, - "Vietnamese,Spanish,English": 0.44666666666666666, - "Vietnamese,Malay,Indonesian": 0.48, - "Vietnamese,Malay,English": 0.46, - "Vietnamese,Indonesian,English": 0.44, - "Chinese,Spanish,Malay": 0.44666666666666666, - "Chinese,Spanish,Indonesian": 0.4066666666666667, - "Chinese,Spanish,English": 0.4666666666666667, - "Chinese,Malay,Indonesian": 0.4866666666666667, - "Chinese,Malay,English": 0.5, - "Chinese,Indonesian,English": 0.44, - "Spanish,Malay,Indonesian": 0.44, - "Spanish,Malay,English": 0.48, - "Spanish,Indonesian,English": 0.44, - "Malay,Indonesian,English": 0.4666666666666667 + "Filipino,Vietnamese,Chinese": 0.4666666666666667, + "Filipino,Vietnamese,Spanish": 0.43333333333333335, + "Filipino,Vietnamese,Malay": 0.41333333333333333, + "Filipino,Vietnamese,Indonesian": 0.46, + "Filipino,Vietnamese,English": 0.43333333333333335, + "Filipino,Chinese,Spanish": 0.4533333333333333, + "Filipino,Chinese,Malay": 0.44, + "Filipino,Chinese,Indonesian": 0.4266666666666667, + "Filipino,Chinese,English": 0.44666666666666666, + "Filipino,Spanish,Malay": 0.4, + "Filipino,Spanish,Indonesian": 0.4266666666666667, + "Filipino,Spanish,English": 0.47333333333333333, + "Filipino,Malay,Indonesian": 0.43333333333333335, + "Filipino,Malay,English": 0.43333333333333335, + "Filipino,Indonesian,English": 0.43333333333333335, + "Vietnamese,Chinese,Spanish": 0.5133333333333333, + "Vietnamese,Chinese,Malay": 0.49333333333333335, + "Vietnamese,Chinese,Indonesian": 0.5133333333333333, + "Vietnamese,Chinese,English": 0.52, + "Vietnamese,Spanish,Malay": 0.44, + "Vietnamese,Spanish,Indonesian": 0.48, + "Vietnamese,Spanish,English": 0.5066666666666667, + "Vietnamese,Malay,Indonesian": 0.5066666666666667, + "Vietnamese,Malay,English": 0.4666666666666667, + "Vietnamese,Indonesian,English": 0.48, + "Chinese,Spanish,Malay": 0.4666666666666667, + "Chinese,Spanish,Indonesian": 0.46, + "Chinese,Spanish,English": 0.5333333333333333, + "Chinese,Malay,Indonesian": 0.47333333333333333, + "Chinese,Malay,English": 0.4866666666666667, + "Chinese,Indonesian,English": 0.4533333333333333, + "Spanish,Malay,Indonesian": 0.44666666666666666, + "Spanish,Malay,English": 0.4866666666666667, + "Spanish,Indonesian,English": 0.48, + "Malay,Indonesian,English": 0.4866666666666667 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.31333333333333335, - "Filipino,Vietnamese,Chinese,Malay": 0.3466666666666667, - "Filipino,Vietnamese,Chinese,Indonesian": 0.32666666666666666, - "Filipino,Vietnamese,Chinese,English": 0.34, - "Filipino,Vietnamese,Spanish,Malay": 0.30666666666666664, - "Filipino,Vietnamese,Spanish,Indonesian": 0.31333333333333335, - "Filipino,Vietnamese,Spanish,English": 0.32666666666666666, - "Filipino,Vietnamese,Malay,Indonesian": 0.3333333333333333, - "Filipino,Vietnamese,Malay,English": 0.32666666666666666, - "Filipino,Vietnamese,Indonesian,English": 0.31333333333333335, - "Filipino,Chinese,Spanish,Malay": 0.32, - "Filipino,Chinese,Spanish,Indonesian": 0.2866666666666667, - "Filipino,Chinese,Spanish,English": 0.3333333333333333, - "Filipino,Chinese,Malay,Indonesian": 0.32666666666666666, - "Filipino,Chinese,Malay,English": 0.37333333333333335, - "Filipino,Chinese,Indonesian,English": 0.31333333333333335, - "Filipino,Spanish,Malay,Indonesian": 0.30666666666666664, - "Filipino,Spanish,Malay,English": 0.34, - "Filipino,Spanish,Indonesian,English": 0.3, - "Filipino,Malay,Indonesian,English": 0.31333333333333335, - "Vietnamese,Chinese,Spanish,Malay": 0.37333333333333335, - "Vietnamese,Chinese,Spanish,Indonesian": 0.36666666666666664, - "Vietnamese,Chinese,Spanish,English": 0.38, - "Vietnamese,Chinese,Malay,Indonesian": 0.3933333333333333, + "Filipino,Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Filipino,Vietnamese,Chinese,Malay": 0.36666666666666664, + "Filipino,Vietnamese,Chinese,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Chinese,English": 0.36666666666666664, + "Filipino,Vietnamese,Spanish,Malay": 0.3333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian": 0.3466666666666667, + "Filipino,Vietnamese,Spanish,English": 0.38, + "Filipino,Vietnamese,Malay,Indonesian": 0.36666666666666664, + "Filipino,Vietnamese,Malay,English": 0.34, + "Filipino,Vietnamese,Indonesian,English": 0.36666666666666664, + "Filipino,Chinese,Spanish,Malay": 0.35333333333333333, + "Filipino,Chinese,Spanish,Indonesian": 0.35333333333333333, + "Filipino,Chinese,Spanish,English": 0.38666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.35333333333333333, + "Filipino,Chinese,Malay,English": 0.36, + "Filipino,Chinese,Indonesian,English": 0.36, + "Filipino,Spanish,Malay,Indonesian": 0.3333333333333333, + "Filipino,Spanish,Malay,English": 0.3466666666666667, + "Filipino,Spanish,Indonesian,English": 0.38, + "Filipino,Malay,Indonesian,English": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.3933333333333333, + "Vietnamese,Chinese,Spanish,Indonesian": 0.41333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.44, + "Vietnamese,Chinese,Malay,Indonesian": 0.43333333333333335, "Vietnamese,Chinese,Malay,English": 0.4066666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.38666666666666666, - "Vietnamese,Spanish,Malay,Indonesian": 0.37333333333333335, - "Vietnamese,Spanish,Malay,English": 0.36666666666666664, - "Vietnamese,Spanish,Indonesian,English": 0.36, - "Vietnamese,Malay,Indonesian,English": 0.38666666666666666, - "Chinese,Spanish,Malay,Indonesian": 0.36666666666666664, - "Chinese,Spanish,Malay,English": 0.4, - "Chinese,Spanish,Indonesian,English": 0.36, + "Vietnamese,Chinese,Indonesian,English": 0.42, + "Vietnamese,Spanish,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Spanish,Malay,English": 0.38, + "Vietnamese,Spanish,Indonesian,English": 0.4, + "Vietnamese,Malay,Indonesian,English": 0.42, + "Chinese,Spanish,Malay,Indonesian": 0.37333333333333335, + "Chinese,Spanish,Malay,English": 0.4066666666666667, + "Chinese,Spanish,Indonesian,English": 0.3933333333333333, "Chinese,Malay,Indonesian,English": 0.38666666666666666, - "Spanish,Malay,Indonesian,English": 0.3933333333333333 + "Spanish,Malay,Indonesian,English": 0.4 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.2733333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.26666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.2733333333333333, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.29333333333333333, - "Filipino,Vietnamese,Chinese,Malay,English": 0.3, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.28, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.28, - "Filipino,Vietnamese,Spanish,Malay,English": 0.28, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2733333333333333, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.28, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.26666666666666666, - "Filipino,Chinese,Spanish,Malay,English": 0.3, - "Filipino,Chinese,Spanish,Indonesian,English": 0.26, - "Filipino,Chinese,Malay,Indonesian,English": 0.28, - "Filipino,Spanish,Malay,Indonesian,English": 0.28, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.32666666666666666, - "Vietnamese,Chinese,Spanish,Malay,English": 0.3333333333333333, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.32, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.34, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.32, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Malay,English": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.3, + "Filipino,Vietnamese,Spanish,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.32, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.3, + "Filipino,Chinese,Spanish,Malay,English": 0.30666666666666664, + "Filipino,Chinese,Spanish,Indonesian,English": 0.32666666666666666, + "Filipino,Chinese,Malay,Indonesian,English": 0.3, + "Filipino,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.36, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.36666666666666664, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.3466666666666667, "Chinese,Spanish,Malay,Indonesian,English": 0.3333333333333333 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.24, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.25333333333333335, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.24666666666666667, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.29333333333333333 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.28, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.31333333333333335 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22666666666666666 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 } }, - "AC3_2": 0.5984346705565198, - "AC3_3": 0.5043249237592222, - "AC3_4": 0.44208103021184547, - "AC3_5": 0.39594760055319295, - "AC3_6": 0.3594744034062524, - "AC3_7": 0.3301674276620994 + "AC3_2": 0.6014692585395479, + "AC3_3": 0.5182748348166127, + "AC3_4": 0.4601049424032136, + "AC3_5": 0.4165261214051973, + "AC3_6": 0.38235017989656905, + "AC3_7": 0.3536889897421823 }, "prompt_2": { - "overall_acc": 0.5771428571428573, + "overall_acc": 0.5628571428571428, "language_acc": { - "Filipino": 0.52, - "Vietnamese": 0.56, - "Chinese": 0.6266666666666667, - "Spanish": 0.5533333333333333, - "Malay": 0.5666666666666667, - "Indonesian": 0.5466666666666666, - "English": 0.6666666666666666 + "Filipino": 0.4866666666666667, + "Vietnamese": 0.5933333333333334, + "Chinese": 0.56, + "Spanish": 0.5933333333333334, + "Malay": 0.46, + "Indonesian": 0.56, + "English": 0.6866666666666666 }, - "consistency_score_2": 0.5517460317460318, - "consistency_score_3": 0.3826666666666666, - "consistency_score_4": 0.30019047619047623, - "consistency_score_5": 0.2526984126984127, - "consistency_score_6": 0.22190476190476188, - "consistency_score_7": 0.2, + "consistency_score_2": 0.6282539682539682, + "consistency_score_3": 0.48400000000000004, + "consistency_score_4": 0.40514285714285714, + "consistency_score_5": 0.35365079365079366, + "consistency_score_6": 0.3161904761904762, + "consistency_score_7": 0.2866666666666667, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5266666666666666, - "Filipino,Chinese": 0.5, - "Filipino,Spanish": 0.5133333333333333, - "Filipino,Malay": 0.49333333333333335, - "Filipino,Indonesian": 0.49333333333333335, - "Filipino,English": 0.5333333333333333, - "Vietnamese,Chinese": 0.56, - "Vietnamese,Spanish": 0.5666666666666667, - "Vietnamese,Malay": 0.52, - "Vietnamese,Indonesian": 0.54, - "Vietnamese,English": 0.6133333333333333, - "Chinese,Spanish": 0.5666666666666667, - "Chinese,Malay": 0.5266666666666666, - "Chinese,Indonesian": 0.56, - "Chinese,English": 0.5666666666666667, - "Spanish,Malay": 0.5333333333333333, - "Spanish,Indonesian": 0.5133333333333333, - "Spanish,English": 0.6666666666666666, - "Malay,Indonesian": 0.6466666666666666, - "Malay,English": 0.6, - "Indonesian,English": 0.5466666666666666 + "Filipino,Vietnamese": 0.58, + "Filipino,Chinese": 0.5933333333333334, + "Filipino,Spanish": 0.5933333333333334, + "Filipino,Malay": 0.5533333333333333, + "Filipino,Indonesian": 0.5933333333333334, + "Filipino,English": 0.5733333333333334, + "Vietnamese,Chinese": 0.6733333333333333, + "Vietnamese,Spanish": 0.6533333333333333, + "Vietnamese,Malay": 0.6333333333333333, + "Vietnamese,Indonesian": 0.6733333333333333, + "Vietnamese,English": 0.6666666666666666, + "Chinese,Spanish": 0.6666666666666666, + "Chinese,Malay": 0.6266666666666667, + "Chinese,Indonesian": 0.62, + "Chinese,English": 0.6466666666666666, + "Spanish,Malay": 0.6, + "Spanish,Indonesian": 0.62, + "Spanish,English": 0.7, + "Malay,Indonesian": 0.68, + "Malay,English": 0.5933333333333334, + "Indonesian,English": 0.6533333333333333 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.36666666666666664, - "Filipino,Vietnamese,Spanish": 0.36, - "Filipino,Vietnamese,Malay": 0.35333333333333333, - "Filipino,Vietnamese,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,English": 0.37333333333333335, - "Filipino,Chinese,Spanish": 0.3333333333333333, - "Filipino,Chinese,Malay": 0.32666666666666666, - "Filipino,Chinese,Indonesian": 0.34, - "Filipino,Chinese,English": 0.36, - "Filipino,Spanish,Malay": 0.3333333333333333, - "Filipino,Spanish,Indonesian": 0.3, - "Filipino,Spanish,English": 0.3933333333333333, - "Filipino,Malay,Indonesian": 0.36666666666666664, - "Filipino,Malay,English": 0.36, - "Filipino,Indonesian,English": 0.34, - "Vietnamese,Chinese,Spanish": 0.4066666666666667, - "Vietnamese,Chinese,Malay": 0.36666666666666664, - "Vietnamese,Chinese,Indonesian": 0.38666666666666666, - "Vietnamese,Chinese,English": 0.41333333333333333, - "Vietnamese,Spanish,Malay": 0.38666666666666666, - "Vietnamese,Spanish,Indonesian": 0.3933333333333333, - "Vietnamese,Spanish,English": 0.4533333333333333, - "Vietnamese,Malay,Indonesian": 0.4066666666666667, - "Vietnamese,Malay,English": 0.42, - "Vietnamese,Indonesian,English": 0.4266666666666667, - "Chinese,Spanish,Malay": 0.38, - "Chinese,Spanish,Indonesian": 0.36666666666666664, - "Chinese,Spanish,English": 0.44, - "Chinese,Malay,Indonesian": 0.4066666666666667, - "Chinese,Malay,English": 0.4, - "Chinese,Indonesian,English": 0.4, - "Spanish,Malay,Indonesian": 0.4066666666666667, - "Spanish,Malay,English": 0.43333333333333335, - "Spanish,Indonesian,English": 0.4, - "Malay,Indonesian,English": 0.44 + "Filipino,Vietnamese,Chinese": 0.4666666666666667, + "Filipino,Vietnamese,Spanish": 0.4533333333333333, + "Filipino,Vietnamese,Malay": 0.44, + "Filipino,Vietnamese,Indonesian": 0.4666666666666667, + "Filipino,Vietnamese,English": 0.4533333333333333, + "Filipino,Chinese,Spanish": 0.4533333333333333, + "Filipino,Chinese,Malay": 0.44666666666666666, + "Filipino,Chinese,Indonesian": 0.46, + "Filipino,Chinese,English": 0.4533333333333333, + "Filipino,Spanish,Malay": 0.4266666666666667, + "Filipino,Spanish,Indonesian": 0.4533333333333333, + "Filipino,Spanish,English": 0.4666666666666667, + "Filipino,Malay,Indonesian": 0.4533333333333333, + "Filipino,Malay,English": 0.42, + "Filipino,Indonesian,English": 0.4533333333333333, + "Vietnamese,Chinese,Spanish": 0.5333333333333333, + "Vietnamese,Chinese,Malay": 0.49333333333333335, + "Vietnamese,Chinese,Indonesian": 0.5266666666666666, + "Vietnamese,Chinese,English": 0.54, + "Vietnamese,Spanish,Malay": 0.5, + "Vietnamese,Spanish,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish,English": 0.5266666666666666, + "Vietnamese,Malay,Indonesian": 0.52, + "Vietnamese,Malay,English": 0.4866666666666667, + "Vietnamese,Indonesian,English": 0.5333333333333333, + "Chinese,Spanish,Malay": 0.5, + "Chinese,Spanish,Indonesian": 0.5, + "Chinese,Spanish,English": 0.5266666666666666, + "Chinese,Malay,Indonesian": 0.49333333333333335, + "Chinese,Malay,English": 0.4666666666666667, + "Chinese,Indonesian,English": 0.5066666666666667, + "Spanish,Malay,Indonesian": 0.5, + "Spanish,Malay,English": 0.4866666666666667, + "Spanish,Indonesian,English": 0.5133333333333333, + "Malay,Indonesian,English": 0.5066666666666667 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Malay": 0.28, - "Filipino,Vietnamese,Chinese,Indonesian": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,English": 0.28, - "Filipino,Vietnamese,Spanish,Malay": 0.28, - "Filipino,Vietnamese,Spanish,Indonesian": 0.2733333333333333, - "Filipino,Vietnamese,Spanish,English": 0.30666666666666664, - "Filipino,Vietnamese,Malay,Indonesian": 0.2866666666666667, - "Filipino,Vietnamese,Malay,English": 0.29333333333333333, - "Filipino,Vietnamese,Indonesian,English": 0.29333333333333333, - "Filipino,Chinese,Spanish,Malay": 0.2733333333333333, - "Filipino,Chinese,Spanish,Indonesian": 0.26, - "Filipino,Chinese,Spanish,English": 0.2866666666666667, - "Filipino,Chinese,Malay,Indonesian": 0.2733333333333333, - "Filipino,Chinese,Malay,English": 0.28, - "Filipino,Chinese,Indonesian,English": 0.29333333333333333, - "Filipino,Spanish,Malay,Indonesian": 0.25333333333333335, - "Filipino,Spanish,Malay,English": 0.2866666666666667, - "Filipino,Spanish,Indonesian,English": 0.26, - "Filipino,Malay,Indonesian,English": 0.2733333333333333, - "Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, - "Vietnamese,Chinese,Spanish,Indonesian": 0.32, - "Vietnamese,Chinese,Spanish,English": 0.35333333333333333, - "Vietnamese,Chinese,Malay,Indonesian": 0.31333333333333335, - "Vietnamese,Chinese,Malay,English": 0.31333333333333335, - "Vietnamese,Chinese,Indonesian,English": 0.3333333333333333, - "Vietnamese,Spanish,Malay,Indonesian": 0.32, - "Vietnamese,Spanish,Malay,English": 0.34, - "Vietnamese,Spanish,Indonesian,English": 0.34, - "Vietnamese,Malay,Indonesian,English": 0.3466666666666667, - "Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, - "Chinese,Spanish,Malay,English": 0.3333333333333333, - "Chinese,Spanish,Indonesian,English": 0.32, - "Chinese,Malay,Indonesian,English": 0.32666666666666666, - "Spanish,Malay,Indonesian,English": 0.34 + "Filipino,Vietnamese,Chinese,Spanish": 0.38666666666666666, + "Filipino,Vietnamese,Chinese,Malay": 0.3933333333333333, + "Filipino,Vietnamese,Chinese,Indonesian": 0.3933333333333333, + "Filipino,Vietnamese,Chinese,English": 0.3933333333333333, + "Filipino,Vietnamese,Spanish,Malay": 0.38, + "Filipino,Vietnamese,Spanish,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Spanish,English": 0.3933333333333333, + "Filipino,Vietnamese,Malay,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Malay,English": 0.37333333333333335, + "Filipino,Vietnamese,Indonesian,English": 0.3933333333333333, + "Filipino,Chinese,Spanish,Malay": 0.38666666666666666, + "Filipino,Chinese,Spanish,Indonesian": 0.38666666666666666, + "Filipino,Chinese,Spanish,English": 0.38, + "Filipino,Chinese,Malay,Indonesian": 0.3933333333333333, + "Filipino,Chinese,Malay,English": 0.36, + "Filipino,Chinese,Indonesian,English": 0.4, + "Filipino,Spanish,Malay,Indonesian": 0.37333333333333335, + "Filipino,Spanish,Malay,English": 0.36666666666666664, + "Filipino,Spanish,Indonesian,English": 0.38, + "Filipino,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Malay": 0.44, + "Vietnamese,Chinese,Spanish,Indonesian": 0.4533333333333333, + "Vietnamese,Chinese,Spanish,English": 0.4533333333333333, + "Vietnamese,Chinese,Malay,Indonesian": 0.44, + "Vietnamese,Chinese,Malay,English": 0.41333333333333333, + "Vietnamese,Chinese,Indonesian,English": 0.46, + "Vietnamese,Spanish,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Spanish,Malay,English": 0.41333333333333333, + "Vietnamese,Spanish,Indonesian,English": 0.44, + "Vietnamese,Malay,Indonesian,English": 0.44666666666666666, + "Chinese,Spanish,Malay,Indonesian": 0.4266666666666667, + "Chinese,Spanish,Malay,English": 0.41333333333333333, + "Chinese,Spanish,Indonesian,English": 0.43333333333333335, + "Chinese,Malay,Indonesian,English": 0.4066666666666667, + "Spanish,Malay,Indonesian,English": 0.4266666666666667 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Malay,English": 0.24, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.23333333333333334, - "Filipino,Vietnamese,Spanish,Malay,English": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.24666666666666667, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.24666666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, - "Filipino,Chinese,Spanish,Malay,English": 0.24666666666666667, - "Filipino,Chinese,Spanish,Indonesian,English": 0.24, - "Filipino,Chinese,Malay,Indonesian,English": 0.24, - "Filipino,Spanish,Malay,Indonesian,English": 0.22, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.26666666666666666, - "Vietnamese,Chinese,Spanish,Malay,English": 0.28, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.29333333333333333, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.28, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.2866666666666667, - "Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.3466666666666667, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.34, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.34, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.36, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.34, + "Filipino,Vietnamese,Spanish,Malay,English": 0.3333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.34, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.3466666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.3466666666666667, + "Filipino,Chinese,Spanish,Malay,English": 0.32666666666666666, + "Filipino,Chinese,Spanish,Indonesian,English": 0.34, + "Filipino,Chinese,Malay,Indonesian,English": 0.3333333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.4, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.38666666666666666, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.38, + "Chinese,Spanish,Malay,Indonesian,English": 0.36666666666666664 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.22, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.22, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.22666666666666666, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.22, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.21333333333333335, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.20666666666666667, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.24666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.32, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.32, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.3, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.3466666666666667 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 } }, - "AC3_2": 0.5641587658185775, - "AC3_3": 0.4602024210673534, - "AC3_4": 0.3949531666319877, - "AC3_5": 0.35149633835528205, - "AC3_6": 0.3205584879557959, - "AC3_7": 0.2970588234911873 + "AC3_2": 0.5937602801816463, + "AC3_3": 0.5204585152341265, + "AC3_4": 0.47115196486952593, + "AC3_5": 0.434376762781048, + "AC3_6": 0.40491564768648614, + "AC3_7": 0.37986547080730276 }, "prompt_3": { - "overall_acc": 0.5466666666666666, + "overall_acc": 0.5771428571428572, "language_acc": { - "Filipino": 0.5066666666666667, - "Vietnamese": 0.5466666666666666, - "Chinese": 0.56, - "Spanish": 0.52, - "Malay": 0.49333333333333335, - "Indonesian": 0.5466666666666666, - "English": 0.6533333333333333 + "Filipino": 0.52, + "Vietnamese": 0.58, + "Chinese": 0.5733333333333334, + "Spanish": 0.5933333333333334, + "Malay": 0.5, + "Indonesian": 0.56, + "English": 0.7133333333333334 }, - "consistency_score_2": 0.5396825396825398, - "consistency_score_3": 0.3613333333333334, - "consistency_score_4": 0.26647619047619053, - "consistency_score_5": 0.20952380952380953, - "consistency_score_6": 0.17238095238095238, - "consistency_score_7": 0.14666666666666667, + "consistency_score_2": 0.6301587301587301, + "consistency_score_3": 0.4872380952380952, + "consistency_score_4": 0.40628571428571425, + "consistency_score_5": 0.35238095238095235, + "consistency_score_6": 0.31238095238095237, + "consistency_score_7": 0.28, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5133333333333333, - "Filipino,Chinese": 0.5133333333333333, - "Filipino,Spanish": 0.5, - "Filipino,Malay": 0.48, - "Filipino,Indonesian": 0.5533333333333333, - "Filipino,English": 0.52, - "Vietnamese,Chinese": 0.6066666666666667, - "Vietnamese,Spanish": 0.54, - "Vietnamese,Malay": 0.5733333333333334, - "Vietnamese,Indonesian": 0.54, - "Vietnamese,English": 0.58, - "Chinese,Spanish": 0.5733333333333334, - "Chinese,Malay": 0.48, - "Chinese,Indonesian": 0.44, - "Chinese,English": 0.5533333333333333, - "Spanish,Malay": 0.5733333333333334, - "Spanish,Indonesian": 0.5133333333333333, - "Spanish,English": 0.5933333333333334, - "Malay,Indonesian": 0.5533333333333333, - "Malay,English": 0.5466666666666666, - "Indonesian,English": 0.5866666666666667 + "Filipino,Vietnamese": 0.6133333333333333, + "Filipino,Chinese": 0.5733333333333334, + "Filipino,Spanish": 0.5866666666666667, + "Filipino,Malay": 0.5666666666666667, + "Filipino,Indonesian": 0.5866666666666667, + "Filipino,English": 0.5866666666666667, + "Vietnamese,Chinese": 0.6666666666666666, + "Vietnamese,Spanish": 0.6666666666666666, + "Vietnamese,Malay": 0.6066666666666667, + "Vietnamese,Indonesian": 0.68, + "Vietnamese,English": 0.6666666666666666, + "Chinese,Spanish": 0.68, + "Chinese,Malay": 0.6066666666666667, + "Chinese,Indonesian": 0.6266666666666667, + "Chinese,English": 0.6466666666666666, + "Spanish,Malay": 0.6, + "Spanish,Indonesian": 0.6266666666666667, + "Spanish,English": 0.7333333333333333, + "Malay,Indonesian": 0.6933333333333334, + "Malay,English": 0.5866666666666667, + "Indonesian,English": 0.6333333333333333 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.38, - "Filipino,Vietnamese,Spanish": 0.34, - "Filipino,Vietnamese,Malay": 0.3466666666666667, - "Filipino,Vietnamese,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,English": 0.36, - "Filipino,Chinese,Spanish": 0.35333333333333333, - "Filipino,Chinese,Malay": 0.3, - "Filipino,Chinese,Indonesian": 0.30666666666666664, - "Filipino,Chinese,English": 0.3466666666666667, - "Filipino,Spanish,Malay": 0.3333333333333333, - "Filipino,Spanish,Indonesian": 0.3466666666666667, - "Filipino,Spanish,English": 0.35333333333333333, - "Filipino,Malay,Indonesian": 0.3466666666666667, - "Filipino,Malay,English": 0.3333333333333333, - "Filipino,Indonesian,English": 0.36, - "Vietnamese,Chinese,Spanish": 0.41333333333333333, - "Vietnamese,Chinese,Malay": 0.37333333333333335, - "Vietnamese,Chinese,Indonesian": 0.35333333333333333, - "Vietnamese,Chinese,English": 0.4066666666666667, - "Vietnamese,Spanish,Malay": 0.4, - "Vietnamese,Spanish,Indonesian": 0.35333333333333333, - "Vietnamese,Spanish,English": 0.38666666666666666, - "Vietnamese,Malay,Indonesian": 0.38666666666666666, - "Vietnamese,Malay,English": 0.4, - "Vietnamese,Indonesian,English": 0.41333333333333333, - "Chinese,Spanish,Malay": 0.37333333333333335, - "Chinese,Spanish,Indonesian": 0.32, - "Chinese,Spanish,English": 0.38666666666666666, - "Chinese,Malay,Indonesian": 0.3, - "Chinese,Malay,English": 0.3333333333333333, - "Chinese,Indonesian,English": 0.3466666666666667, - "Spanish,Malay,Indonesian": 0.37333333333333335, - "Spanish,Malay,English": 0.38666666666666666, - "Spanish,Indonesian,English": 0.38666666666666666, - "Malay,Indonesian,English": 0.3933333333333333 - }, + "Filipino,Vietnamese,Chinese": 0.4666666666666667, + "Filipino,Vietnamese,Spanish": 0.47333333333333333, + "Filipino,Vietnamese,Malay": 0.44, + "Filipino,Vietnamese,Indonesian": 0.46, + "Filipino,Vietnamese,English": 0.4533333333333333, + "Filipino,Chinese,Spanish": 0.44666666666666666, + "Filipino,Chinese,Malay": 0.44666666666666666, + "Filipino,Chinese,Indonesian": 0.4533333333333333, + "Filipino,Chinese,English": 0.44, + "Filipino,Spanish,Malay": 0.44666666666666666, + "Filipino,Spanish,Indonesian": 0.4533333333333333, + "Filipino,Spanish,English": 0.4866666666666667, + "Filipino,Malay,Indonesian": 0.4533333333333333, + "Filipino,Malay,English": 0.43333333333333335, + "Filipino,Indonesian,English": 0.44666666666666666, + "Vietnamese,Chinese,Spanish": 0.5466666666666666, + "Vietnamese,Chinese,Malay": 0.48, + "Vietnamese,Chinese,Indonesian": 0.5266666666666666, + "Vietnamese,Chinese,English": 0.5333333333333333, + "Vietnamese,Spanish,Malay": 0.4866666666666667, + "Vietnamese,Spanish,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish,English": 0.5533333333333333, + "Vietnamese,Malay,Indonesian": 0.5133333333333333, + "Vietnamese,Malay,English": 0.47333333333333333, + "Vietnamese,Indonesian,English": 0.5333333333333333, + "Chinese,Spanish,Malay": 0.5066666666666667, + "Chinese,Spanish,Indonesian": 0.5266666666666666, + "Chinese,Spanish,English": 0.5533333333333333, + "Chinese,Malay,Indonesian": 0.5133333333333333, + "Chinese,Malay,English": 0.4666666666666667, + "Chinese,Indonesian,English": 0.5066666666666667, + "Spanish,Malay,Indonesian": 0.5, + "Spanish,Malay,English": 0.49333333333333335, + "Spanish,Indonesian,English": 0.52, + "Malay,Indonesian,English": 0.5066666666666667 + }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Malay": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Indonesian": 0.26, - "Filipino,Vietnamese,Chinese,English": 0.2733333333333333, - "Filipino,Vietnamese,Spanish,Malay": 0.26, - "Filipino,Vietnamese,Spanish,Indonesian": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,English": 0.26, - "Filipino,Vietnamese,Malay,Indonesian": 0.2733333333333333, - "Filipino,Vietnamese,Malay,English": 0.2733333333333333, - "Filipino,Vietnamese,Indonesian,English": 0.2733333333333333, - "Filipino,Chinese,Spanish,Malay": 0.25333333333333335, - "Filipino,Chinese,Spanish,Indonesian": 0.24666666666666667, - "Filipino,Chinese,Spanish,English": 0.26666666666666666, - "Filipino,Chinese,Malay,Indonesian": 0.22, - "Filipino,Chinese,Malay,English": 0.23333333333333334, - "Filipino,Chinese,Indonesian,English": 0.22666666666666666, - "Filipino,Spanish,Malay,Indonesian": 0.26, - "Filipino,Spanish,Malay,English": 0.24, - "Filipino,Spanish,Indonesian,English": 0.26, - "Filipino,Malay,Indonesian,English": 0.26, - "Vietnamese,Chinese,Spanish,Malay": 0.31333333333333335, - "Vietnamese,Chinese,Spanish,Indonesian": 0.2733333333333333, - "Vietnamese,Chinese,Spanish,English": 0.30666666666666664, - "Vietnamese,Chinese,Malay,Indonesian": 0.26, - "Vietnamese,Chinese,Malay,English": 0.2866666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.29333333333333333, - "Vietnamese,Spanish,Malay,Indonesian": 0.28, - "Vietnamese,Spanish,Malay,English": 0.3, - "Vietnamese,Spanish,Indonesian,English": 0.2866666666666667, - "Vietnamese,Malay,Indonesian,English": 0.3, - "Chinese,Spanish,Malay,Indonesian": 0.25333333333333335, - "Chinese,Spanish,Malay,English": 0.26, - "Chinese,Spanish,Indonesian,English": 0.26, - "Chinese,Malay,Indonesian,English": 0.24, - "Spanish,Malay,Indonesian,English": 0.2866666666666667 + "Filipino,Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Filipino,Vietnamese,Chinese,Malay": 0.38, + "Filipino,Vietnamese,Chinese,Indonesian": 0.38, + "Filipino,Vietnamese,Chinese,English": 0.38, + "Filipino,Vietnamese,Spanish,Malay": 0.38666666666666666, + "Filipino,Vietnamese,Spanish,Indonesian": 0.38, + "Filipino,Vietnamese,Spanish,English": 0.4066666666666667, + "Filipino,Vietnamese,Malay,Indonesian": 0.37333333333333335, + "Filipino,Vietnamese,Malay,English": 0.35333333333333333, + "Filipino,Vietnamese,Indonesian,English": 0.38, + "Filipino,Chinese,Spanish,Malay": 0.3933333333333333, + "Filipino,Chinese,Spanish,Indonesian": 0.3933333333333333, + "Filipino,Chinese,Spanish,English": 0.3933333333333333, + "Filipino,Chinese,Malay,Indonesian": 0.3933333333333333, + "Filipino,Chinese,Malay,English": 0.36666666666666664, + "Filipino,Chinese,Indonesian,English": 0.38666666666666666, + "Filipino,Spanish,Malay,Indonesian": 0.38, + "Filipino,Spanish,Malay,English": 0.38, + "Filipino,Spanish,Indonesian,English": 0.3933333333333333, + "Filipino,Malay,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Chinese,Spanish,Indonesian": 0.4666666666666667, + "Vietnamese,Chinese,Spanish,English": 0.4666666666666667, + "Vietnamese,Chinese,Malay,Indonesian": 0.4266666666666667, + "Vietnamese,Chinese,Malay,English": 0.4, + "Vietnamese,Chinese,Indonesian,English": 0.4533333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.4266666666666667, + "Vietnamese,Spanish,Malay,English": 0.41333333333333333, + "Vietnamese,Spanish,Indonesian,English": 0.44666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.43333333333333335, + "Chinese,Spanish,Malay,Indonesian": 0.44666666666666666, + "Chinese,Spanish,Malay,English": 0.4266666666666667, + "Chinese,Spanish,Indonesian,English": 0.4533333333333333, + "Chinese,Malay,Indonesian,English": 0.4266666666666667, + "Spanish,Malay,Indonesian,English": 0.43333333333333335 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.22, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.21333333333333335, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.22, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2, - "Filipino,Vietnamese,Chinese,Malay,English": 0.20666666666666667, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Malay,English": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.20666666666666667, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.22, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2, - "Filipino,Chinese,Spanish,Malay,English": 0.19333333333333333, - "Filipino,Chinese,Spanish,Indonesian,English": 0.19333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.17333333333333334, - "Filipino,Spanish,Malay,Indonesian,English": 0.19333333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, - "Vietnamese,Chinese,Spanish,Malay,English": 0.24, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.22666666666666666, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.22, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.22666666666666666, - "Chinese,Spanish,Malay,Indonesian,English": 0.2 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.3466666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.3466666666666667, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.3466666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.32, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.34, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.34, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.32666666666666666, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.35333333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.34, + "Filipino,Chinese,Spanish,Indonesian,English": 0.35333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.34, + "Filipino,Spanish,Malay,Indonesian,English": 0.34, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.36666666666666664, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.4066666666666667, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.38, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.38, + "Chinese,Spanish,Malay,Indonesian,English": 0.38666666666666666 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.18, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.18, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.15333333333333332, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.18666666666666668 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.3, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.3, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.3, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.31333333333333335, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.3466666666666667 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.14666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.28 } }, - "AC3_2": 0.5431521526871927, - "AC3_3": 0.4350856582976534, - "AC3_4": 0.3582978058433421, - "AC3_5": 0.3029387069288725, - "AC3_6": 0.2621103752394857, - "AC3_7": 0.23128205124869328 + "AC3_2": 0.6024867585241545, + "AC3_3": 0.5283934962178687, + "AC3_4": 0.47687225030429414, + "AC3_5": 0.43758782196697493, + "AC3_6": 0.4053594370911686, + "AC3_7": 0.3770666666226756 }, "prompt_4": { - "overall_acc": 0.602857142857143, + "overall_acc": 0.5866666666666667, "language_acc": { - "Filipino": 0.5666666666666667, - "Vietnamese": 0.5866666666666667, - "Chinese": 0.64, - "Spanish": 0.6066666666666667, - "Malay": 0.5866666666666667, - "Indonesian": 0.5466666666666666, - "English": 0.6866666666666666 - }, - "consistency_score_2": 0.5930158730158731, - "consistency_score_3": 0.43466666666666665, - "consistency_score_4": 0.3512380952380953, - "consistency_score_5": 0.299047619047619, - "consistency_score_6": 0.2619047619047619, - "consistency_score_7": 0.23333333333333334, + "Filipino": 0.52, + "Vietnamese": 0.6133333333333333, + "Chinese": 0.62, + "Spanish": 0.62, + "Malay": 0.49333333333333335, + "Indonesian": 0.56, + "English": 0.68 + }, + "consistency_score_2": 0.6234920634920633, + "consistency_score_3": 0.47085714285714286, + "consistency_score_4": 0.3826666666666666, + "consistency_score_5": 0.32253968253968257, + "consistency_score_6": 0.27714285714285714, + "consistency_score_7": 0.24, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5466666666666666, - "Filipino,Chinese": 0.6066666666666667, - "Filipino,Spanish": 0.5733333333333334, - "Filipino,Malay": 0.5733333333333334, - "Filipino,Indonesian": 0.58, - "Filipino,English": 0.58, - "Vietnamese,Chinese": 0.6133333333333333, - "Vietnamese,Spanish": 0.6333333333333333, - "Vietnamese,Malay": 0.5666666666666667, - "Vietnamese,Indonesian": 0.56, - "Vietnamese,English": 0.5933333333333334, - "Chinese,Spanish": 0.5866666666666667, - "Chinese,Malay": 0.5933333333333334, - "Chinese,Indonesian": 0.5733333333333334, - "Chinese,English": 0.6133333333333333, - "Spanish,Malay": 0.5933333333333334, - "Spanish,Indonesian": 0.56, - "Spanish,English": 0.66, - "Malay,Indonesian": 0.6666666666666666, - "Malay,English": 0.6133333333333333, - "Indonesian,English": 0.5666666666666667 + "Filipino,Vietnamese": 0.5733333333333334, + "Filipino,Chinese": 0.5866666666666667, + "Filipino,Spanish": 0.5933333333333334, + "Filipino,Malay": 0.56, + "Filipino,Indonesian": 0.6, + "Filipino,English": 0.5733333333333334, + "Vietnamese,Chinese": 0.6733333333333333, + "Vietnamese,Spanish": 0.6466666666666666, + "Vietnamese,Malay": 0.6266666666666667, + "Vietnamese,Indonesian": 0.66, + "Vietnamese,English": 0.62, + "Chinese,Spanish": 0.68, + "Chinese,Malay": 0.6133333333333333, + "Chinese,Indonesian": 0.62, + "Chinese,English": 0.6333333333333333, + "Spanish,Malay": 0.6466666666666666, + "Spanish,Indonesian": 0.6533333333333333, + "Spanish,English": 0.7, + "Malay,Indonesian": 0.6533333333333333, + "Malay,English": 0.58, + "Indonesian,English": 0.6 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.4266666666666667, + "Filipino,Vietnamese,Chinese": 0.44666666666666666, "Filipino,Vietnamese,Spanish": 0.43333333333333335, - "Filipino,Vietnamese,Malay": 0.4066666666666667, - "Filipino,Vietnamese,Indonesian": 0.4, - "Filipino,Vietnamese,English": 0.4066666666666667, - "Filipino,Chinese,Spanish": 0.43333333333333335, - "Filipino,Chinese,Malay": 0.4266666666666667, - "Filipino,Chinese,Indonesian": 0.41333333333333333, + "Filipino,Vietnamese,Malay": 0.4266666666666667, + "Filipino,Vietnamese,Indonesian": 0.4533333333333333, + "Filipino,Vietnamese,English": 0.41333333333333333, + "Filipino,Chinese,Spanish": 0.4533333333333333, + "Filipino,Chinese,Malay": 0.42, + "Filipino,Chinese,Indonesian": 0.44666666666666666, "Filipino,Chinese,English": 0.43333333333333335, - "Filipino,Spanish,Malay": 0.44, - "Filipino,Spanish,Indonesian": 0.41333333333333333, - "Filipino,Spanish,English": 0.44666666666666666, - "Filipino,Malay,Indonesian": 0.44, - "Filipino,Malay,English": 0.43333333333333335, - "Filipino,Indonesian,English": 0.3933333333333333, - "Vietnamese,Chinese,Spanish": 0.4533333333333333, - "Vietnamese,Chinese,Malay": 0.43333333333333335, - "Vietnamese,Chinese,Indonesian": 0.42, - "Vietnamese,Chinese,English": 0.44, - "Vietnamese,Spanish,Malay": 0.44666666666666666, - "Vietnamese,Spanish,Indonesian": 0.43333333333333335, - "Vietnamese,Spanish,English": 0.4866666666666667, - "Vietnamese,Malay,Indonesian": 0.4533333333333333, - "Vietnamese,Malay,English": 0.43333333333333335, - "Vietnamese,Indonesian,English": 0.42, - "Chinese,Spanish,Malay": 0.4266666666666667, - "Chinese,Spanish,Indonesian": 0.41333333333333333, - "Chinese,Spanish,English": 0.4666666666666667, - "Chinese,Malay,Indonesian": 0.4533333333333333, - "Chinese,Malay,English": 0.44666666666666666, - "Chinese,Indonesian,English": 0.41333333333333333, - "Spanish,Malay,Indonesian": 0.4533333333333333, - "Spanish,Malay,English": 0.47333333333333333, - "Spanish,Indonesian,English": 0.4266666666666667, - "Malay,Indonesian,English": 0.47333333333333333 + "Filipino,Spanish,Malay": 0.43333333333333335, + "Filipino,Spanish,Indonesian": 0.44, + "Filipino,Spanish,English": 0.46, + "Filipino,Malay,Indonesian": 0.44666666666666666, + "Filipino,Malay,English": 0.3933333333333333, + "Filipino,Indonesian,English": 0.4266666666666667, + "Vietnamese,Chinese,Spanish": 0.5266666666666666, + "Vietnamese,Chinese,Malay": 0.48, + "Vietnamese,Chinese,Indonesian": 0.52, + "Vietnamese,Chinese,English": 0.5, + "Vietnamese,Spanish,Malay": 0.5, + "Vietnamese,Spanish,Indonesian": 0.52, + "Vietnamese,Spanish,English": 0.5, + "Vietnamese,Malay,Indonesian": 0.5133333333333333, + "Vietnamese,Malay,English": 0.4533333333333333, + "Vietnamese,Indonesian,English": 0.49333333333333335, + "Chinese,Spanish,Malay": 0.5066666666666667, + "Chinese,Spanish,Indonesian": 0.5266666666666666, + "Chinese,Spanish,English": 0.5333333333333333, + "Chinese,Malay,Indonesian": 0.48, + "Chinese,Malay,English": 0.4533333333333333, + "Chinese,Indonesian,English": 0.47333333333333333, + "Spanish,Malay,Indonesian": 0.5133333333333333, + "Spanish,Malay,English": 0.4866666666666667, + "Spanish,Indonesian,English": 0.5133333333333333, + "Malay,Indonesian,English": 0.46 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.36, - "Filipino,Vietnamese,Chinese,Malay": 0.3333333333333333, - "Filipino,Vietnamese,Chinese,Indonesian": 0.3333333333333333, - "Filipino,Vietnamese,Chinese,English": 0.32666666666666666, - "Filipino,Vietnamese,Spanish,Malay": 0.37333333333333335, - "Filipino,Vietnamese,Spanish,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,Spanish,English": 0.37333333333333335, - "Filipino,Vietnamese,Malay,Indonesian": 0.34, - "Filipino,Vietnamese,Malay,English": 0.34, - "Filipino,Vietnamese,Indonesian,English": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Filipino,Vietnamese,Chinese,Malay": 0.3466666666666667, + "Filipino,Vietnamese,Chinese,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Chinese,English": 0.35333333333333333, + "Filipino,Vietnamese,Spanish,Malay": 0.36, + "Filipino,Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "Filipino,Vietnamese,Spanish,English": 0.35333333333333333, + "Filipino,Vietnamese,Malay,Indonesian": 0.37333333333333335, + "Filipino,Vietnamese,Malay,English": 0.32, + "Filipino,Vietnamese,Indonesian,English": 0.36666666666666664, "Filipino,Chinese,Spanish,Malay": 0.36, - "Filipino,Chinese,Spanish,Indonesian": 0.34, - "Filipino,Chinese,Spanish,English": 0.35333333333333333, - "Filipino,Chinese,Malay,Indonesian": 0.34, - "Filipino,Chinese,Malay,English": 0.3466666666666667, - "Filipino,Chinese,Indonesian,English": 0.32, - "Filipino,Spanish,Malay,Indonesian": 0.35333333333333333, - "Filipino,Spanish,Malay,English": 0.38, - "Filipino,Spanish,Indonesian,English": 0.3333333333333333, - "Filipino,Malay,Indonesian,English": 0.34, - "Vietnamese,Chinese,Spanish,Malay": 0.3466666666666667, - "Vietnamese,Chinese,Spanish,Indonesian": 0.35333333333333333, - "Vietnamese,Chinese,Spanish,English": 0.38, - "Vietnamese,Chinese,Malay,Indonesian": 0.35333333333333333, - "Vietnamese,Chinese,Malay,English": 0.3466666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.3466666666666667, - "Vietnamese,Spanish,Malay,Indonesian": 0.36666666666666664, - "Vietnamese,Spanish,Malay,English": 0.37333333333333335, - "Vietnamese,Spanish,Indonesian,English": 0.35333333333333333, - "Vietnamese,Malay,Indonesian,English": 0.36, - "Chinese,Spanish,Malay,Indonesian": 0.34, - "Chinese,Spanish,Malay,English": 0.36666666666666664, - "Chinese,Spanish,Indonesian,English": 0.34, - "Chinese,Malay,Indonesian,English": 0.35333333333333333, - "Spanish,Malay,Indonesian,English": 0.38666666666666666 + "Filipino,Chinese,Spanish,Indonesian": 0.38666666666666666, + "Filipino,Chinese,Spanish,English": 0.37333333333333335, + "Filipino,Chinese,Malay,Indonesian": 0.35333333333333333, + "Filipino,Chinese,Malay,English": 0.32666666666666666, + "Filipino,Chinese,Indonesian,English": 0.36, + "Filipino,Spanish,Malay,Indonesian": 0.36666666666666664, + "Filipino,Spanish,Malay,English": 0.34, + "Filipino,Spanish,Indonesian,English": 0.36666666666666664, + "Filipino,Malay,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.42, + "Vietnamese,Chinese,Spanish,Indonesian": 0.4533333333333333, + "Vietnamese,Chinese,Spanish,English": 0.44, + "Vietnamese,Chinese,Malay,Indonesian": 0.4266666666666667, + "Vietnamese,Chinese,Malay,English": 0.38, + "Vietnamese,Chinese,Indonesian,English": 0.41333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.44, + "Vietnamese,Spanish,Malay,English": 0.38666666666666666, + "Vietnamese,Spanish,Indonesian,English": 0.42, + "Vietnamese,Malay,Indonesian,English": 0.4, + "Chinese,Spanish,Malay,Indonesian": 0.4266666666666667, + "Chinese,Spanish,Malay,English": 0.41333333333333333, + "Chinese,Spanish,Indonesian,English": 0.4266666666666667, + "Chinese,Malay,Indonesian,English": 0.37333333333333335, + "Spanish,Malay,Indonesian,English": 0.41333333333333333 }, "5_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.30666666666666664, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.30666666666666664, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.34, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.32, "Filipino,Vietnamese,Chinese,Malay,English": 0.28, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.28, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.31333333333333335, - "Filipino,Vietnamese,Spanish,Malay,English": 0.32666666666666666, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.3, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.2866666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, - "Filipino,Chinese,Spanish,Malay,English": 0.30666666666666664, - "Filipino,Chinese,Spanish,Indonesian,English": 0.28, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.32, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.32, + "Filipino,Vietnamese,Spanish,Malay,English": 0.28, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.3, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.32, + "Filipino,Chinese,Spanish,Malay,English": 0.3, + "Filipino,Chinese,Spanish,Indonesian,English": 0.32, "Filipino,Chinese,Malay,Indonesian,English": 0.28, - "Filipino,Spanish,Malay,Indonesian,English": 0.30666666666666664, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3, - "Vietnamese,Chinese,Spanish,Malay,English": 0.30666666666666664, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.30666666666666664, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.29333333333333333, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.31333333333333335, - "Chinese,Spanish,Malay,Indonesian,English": 0.3 + "Filipino,Spanish,Malay,Indonesian,English": 0.3, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.38666666666666666, + "Vietnamese,Chinese,Spanish,Malay,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.37333333333333335, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.34, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.36, + "Chinese,Spanish,Malay,Indonesian,English": 0.35333333333333333 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.26666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.26666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.26, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.24666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.2733333333333333, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.26, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.26, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.32 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.24 } }, - "AC3_2": 0.5978960142091715, - "AC3_3": 0.5051294289948243, - "AC3_4": 0.4438684652873053, - "AC3_5": 0.3997827726212339, - "AC3_6": 0.3651667715122094, - "AC3_7": 0.33644646920805593 + "AC3_2": 0.6045190625593502, + "AC3_3": 0.5224207492301385, + "AC3_4": 0.4632003667564327, + "AC3_5": 0.41623836122051383, + "AC3_6": 0.3764498345760449, + "AC3_7": 0.34064516124911554 }, "prompt_5": { - "overall_acc": 0.5390476190476191, + "overall_acc": 0.560952380952381, "language_acc": { - "Filipino": 0.4266666666666667, - "Vietnamese": 0.49333333333333335, - "Chinese": 0.56, - "Spanish": 0.6, - "Malay": 0.5333333333333333, - "Indonesian": 0.4866666666666667, - "English": 0.6733333333333333 + "Filipino": 0.5133333333333333, + "Vietnamese": 0.5666666666666667, + "Chinese": 0.5666666666666667, + "Spanish": 0.5866666666666667, + "Malay": 0.52, + "Indonesian": 0.56, + "English": 0.6133333333333333 }, - "consistency_score_2": 0.5266666666666666, - "consistency_score_3": 0.3462857142857143, - "consistency_score_4": 0.24952380952380956, - "consistency_score_5": 0.1892063492063492, - "consistency_score_6": 0.14857142857142858, - "consistency_score_7": 0.12, + "consistency_score_2": 0.6244444444444445, + "consistency_score_3": 0.4790476190476191, + "consistency_score_4": 0.3958095238095237, + "consistency_score_5": 0.33873015873015866, + "consistency_score_6": 0.29523809523809524, + "consistency_score_7": 0.26, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.4533333333333333, - "Filipino,Chinese": 0.4666666666666667, - "Filipino,Spanish": 0.47333333333333333, - "Filipino,Malay": 0.44666666666666666, - "Filipino,Indonesian": 0.43333333333333335, - "Filipino,English": 0.47333333333333333, - "Vietnamese,Chinese": 0.54, - "Vietnamese,Spanish": 0.5333333333333333, - "Vietnamese,Malay": 0.6133333333333333, - "Vietnamese,Indonesian": 0.5333333333333333, - "Vietnamese,English": 0.4866666666666667, - "Chinese,Spanish": 0.5333333333333333, - "Chinese,Malay": 0.5866666666666667, - "Chinese,Indonesian": 0.5266666666666666, - "Chinese,English": 0.5866666666666667, - "Spanish,Malay": 0.5933333333333334, - "Spanish,Indonesian": 0.5533333333333333, - "Spanish,English": 0.5933333333333334, - "Malay,Indonesian": 0.6133333333333333, - "Malay,English": 0.5, - "Indonesian,English": 0.52 + "Filipino,Vietnamese": 0.5533333333333333, + "Filipino,Chinese": 0.5933333333333334, + "Filipino,Spanish": 0.5666666666666667, + "Filipino,Malay": 0.5866666666666667, + "Filipino,Indonesian": 0.58, + "Filipino,English": 0.58, + "Vietnamese,Chinese": 0.6333333333333333, + "Vietnamese,Spanish": 0.6466666666666666, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Indonesian": 0.5933333333333334, + "Vietnamese,English": 0.6266666666666667, + "Chinese,Spanish": 0.6533333333333333, + "Chinese,Malay": 0.64, + "Chinese,Indonesian": 0.6533333333333333, + "Chinese,English": 0.6066666666666667, + "Spanish,Malay": 0.6466666666666666, + "Spanish,Indonesian": 0.6466666666666666, + "Spanish,English": 0.6933333333333334, + "Malay,Indonesian": 0.74, + "Malay,English": 0.6333333333333333, + "Indonesian,English": 0.6466666666666666 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.30666666666666664, - "Filipino,Vietnamese,Spanish": 0.3, - "Filipino,Vietnamese,Malay": 0.32, - "Filipino,Vietnamese,Indonesian": 0.26666666666666666, - "Filipino,Vietnamese,English": 0.2733333333333333, - "Filipino,Chinese,Spanish": 0.30666666666666664, - "Filipino,Chinese,Malay": 0.32, - "Filipino,Chinese,Indonesian": 0.28, - "Filipino,Chinese,English": 0.31333333333333335, - "Filipino,Spanish,Malay": 0.32, - "Filipino,Spanish,Indonesian": 0.3, - "Filipino,Spanish,English": 0.32666666666666666, - "Filipino,Malay,Indonesian": 0.2866666666666667, - "Filipino,Malay,English": 0.2733333333333333, - "Filipino,Indonesian,English": 0.26666666666666666, - "Vietnamese,Chinese,Spanish": 0.36666666666666664, - "Vietnamese,Chinese,Malay": 0.4066666666666667, - "Vietnamese,Chinese,Indonesian": 0.35333333333333333, - "Vietnamese,Chinese,English": 0.36, - "Vietnamese,Spanish,Malay": 0.44, - "Vietnamese,Spanish,Indonesian": 0.38, - "Vietnamese,Spanish,English": 0.34, - "Vietnamese,Malay,Indonesian": 0.44, - "Vietnamese,Malay,English": 0.36666666666666664, - "Vietnamese,Indonesian,English": 0.35333333333333333, - "Chinese,Spanish,Malay": 0.4066666666666667, - "Chinese,Spanish,Indonesian": 0.36, - "Chinese,Spanish,English": 0.38666666666666666, - "Chinese,Malay,Indonesian": 0.41333333333333333, - "Chinese,Malay,English": 0.38, - "Chinese,Indonesian,English": 0.36666666666666664, - "Spanish,Malay,Indonesian": 0.42, - "Spanish,Malay,English": 0.38, - "Spanish,Indonesian,English": 0.37333333333333335, - "Malay,Indonesian,English": 0.36666666666666664 + "Filipino,Vietnamese,Chinese": 0.43333333333333335, + "Filipino,Vietnamese,Spanish": 0.43333333333333335, + "Filipino,Vietnamese,Malay": 0.4066666666666667, + "Filipino,Vietnamese,Indonesian": 0.42, + "Filipino,Vietnamese,English": 0.41333333333333333, + "Filipino,Chinese,Spanish": 0.4533333333333333, + "Filipino,Chinese,Malay": 0.4533333333333333, + "Filipino,Chinese,Indonesian": 0.48, + "Filipino,Chinese,English": 0.4266666666666667, + "Filipino,Spanish,Malay": 0.44666666666666666, + "Filipino,Spanish,Indonesian": 0.4533333333333333, + "Filipino,Spanish,English": 0.4533333333333333, + "Filipino,Malay,Indonesian": 0.49333333333333335, + "Filipino,Malay,English": 0.44, + "Filipino,Indonesian,English": 0.4533333333333333, + "Vietnamese,Chinese,Spanish": 0.5133333333333333, + "Vietnamese,Chinese,Malay": 0.48, + "Vietnamese,Chinese,Indonesian": 0.4866666666666667, + "Vietnamese,Chinese,English": 0.47333333333333333, + "Vietnamese,Spanish,Malay": 0.4866666666666667, + "Vietnamese,Spanish,Indonesian": 0.4866666666666667, + "Vietnamese,Spanish,English": 0.5133333333333333, + "Vietnamese,Malay,Indonesian": 0.4866666666666667, + "Vietnamese,Malay,English": 0.46, + "Vietnamese,Indonesian,English": 0.47333333333333333, + "Chinese,Spanish,Malay": 0.5266666666666666, + "Chinese,Spanish,Indonesian": 0.52, + "Chinese,Spanish,English": 0.5133333333333333, + "Chinese,Malay,Indonesian": 0.5533333333333333, + "Chinese,Malay,English": 0.49333333333333335, + "Chinese,Indonesian,English": 0.5, + "Spanish,Malay,Indonesian": 0.56, + "Spanish,Malay,English": 0.5266666666666666, + "Spanish,Indonesian,English": 0.52, + "Malay,Indonesian,English": 0.5333333333333333 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.22666666666666666, - "Filipino,Vietnamese,Chinese,Malay": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Indonesian": 0.21333333333333335, - "Filipino,Vietnamese,Chinese,English": 0.22666666666666666, - "Filipino,Vietnamese,Spanish,Malay": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,Indonesian": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,English": 0.21333333333333335, - "Filipino,Vietnamese,Malay,Indonesian": 0.22666666666666666, - "Filipino,Vietnamese,Malay,English": 0.22, - "Filipino,Vietnamese,Indonesian,English": 0.19333333333333333, - "Filipino,Chinese,Spanish,Malay": 0.24, - "Filipino,Chinese,Spanish,Indonesian": 0.2, - "Filipino,Chinese,Spanish,English": 0.23333333333333334, - "Filipino,Chinese,Malay,Indonesian": 0.22666666666666666, - "Filipino,Chinese,Malay,English": 0.21333333333333335, - "Filipino,Chinese,Indonesian,English": 0.20666666666666667, - "Filipino,Spanish,Malay,Indonesian": 0.22, - "Filipino,Spanish,Malay,English": 0.23333333333333334, - "Filipino,Spanish,Indonesian,English": 0.21333333333333335, - "Filipino,Malay,Indonesian,English": 0.2, - "Vietnamese,Chinese,Spanish,Malay": 0.32666666666666666, - "Vietnamese,Chinese,Spanish,Indonesian": 0.26666666666666666, - "Vietnamese,Chinese,Spanish,English": 0.26666666666666666, - "Vietnamese,Chinese,Malay,Indonesian": 0.32, - "Vietnamese,Chinese,Malay,English": 0.29333333333333333, - "Vietnamese,Chinese,Indonesian,English": 0.26, - "Vietnamese,Spanish,Malay,Indonesian": 0.3333333333333333, - "Vietnamese,Spanish,Malay,English": 0.2866666666666667, - "Vietnamese,Spanish,Indonesian,English": 0.26, - "Vietnamese,Malay,Indonesian,English": 0.2866666666666667, - "Chinese,Spanish,Malay,Indonesian": 0.30666666666666664, - "Chinese,Spanish,Malay,English": 0.2866666666666667, - "Chinese,Spanish,Indonesian,English": 0.26, - "Chinese,Malay,Indonesian,English": 0.2733333333333333, - "Spanish,Malay,Indonesian,English": 0.29333333333333333 + "Filipino,Vietnamese,Chinese,Spanish": 0.37333333333333335, + "Filipino,Vietnamese,Chinese,Malay": 0.36, + "Filipino,Vietnamese,Chinese,Indonesian": 0.37333333333333335, + "Filipino,Vietnamese,Chinese,English": 0.34, + "Filipino,Vietnamese,Spanish,Malay": 0.36, + "Filipino,Vietnamese,Spanish,Indonesian": 0.36, + "Filipino,Vietnamese,Spanish,English": 0.35333333333333333, + "Filipino,Vietnamese,Malay,Indonesian": 0.36, + "Filipino,Vietnamese,Malay,English": 0.3333333333333333, + "Filipino,Vietnamese,Indonesian,English": 0.3466666666666667, + "Filipino,Chinese,Spanish,Malay": 0.38666666666666666, + "Filipino,Chinese,Spanish,Indonesian": 0.4, + "Filipino,Chinese,Spanish,English": 0.36666666666666664, + "Filipino,Chinese,Malay,Indonesian": 0.42, + "Filipino,Chinese,Malay,English": 0.36, + "Filipino,Chinese,Indonesian,English": 0.37333333333333335, + "Filipino,Spanish,Malay,Indonesian": 0.4066666666666667, + "Filipino,Spanish,Malay,English": 0.37333333333333335, + "Filipino,Spanish,Indonesian,English": 0.38666666666666666, + "Filipino,Malay,Indonesian,English": 0.3933333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.43333333333333335, + "Vietnamese,Chinese,Spanish,Indonesian": 0.4266666666666667, + "Vietnamese,Chinese,Spanish,English": 0.4266666666666667, + "Vietnamese,Chinese,Malay,Indonesian": 0.4266666666666667, + "Vietnamese,Chinese,Malay,English": 0.4, + "Vietnamese,Chinese,Indonesian,English": 0.3933333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Spanish,Malay,English": 0.4, + "Vietnamese,Spanish,Indonesian,English": 0.41333333333333333, + "Vietnamese,Malay,Indonesian,English": 0.4066666666666667, + "Chinese,Spanish,Malay,Indonesian": 0.4666666666666667, + "Chinese,Spanish,Malay,English": 0.44666666666666666, + "Chinese,Spanish,Indonesian,English": 0.44, + "Chinese,Malay,Indonesian,English": 0.44, + "Spanish,Malay,Indonesian,English": 0.47333333333333333 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.20666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.16, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.18, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.19333333333333333, - "Filipino,Vietnamese,Chinese,Malay,English": 0.18666666666666668, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.16666666666666666, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.18, - "Filipino,Vietnamese,Spanish,Malay,English": 0.18666666666666668, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.17333333333333334, - "Filipino,Chinese,Spanish,Malay,English": 0.18, - "Filipino,Chinese,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Chinese,Malay,Indonesian,English": 0.16, - "Filipino,Spanish,Malay,Indonesian,English": 0.17333333333333334, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.25333333333333335, - "Vietnamese,Chinese,Spanish,Malay,English": 0.23333333333333334, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.19333333333333333, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.22666666666666666, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.22666666666666666, - "Chinese,Spanish,Malay,Indonesian,English": 0.22 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.3, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.30666666666666664, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.30666666666666664, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.30666666666666664, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.36, + "Filipino,Chinese,Spanish,Malay,English": 0.32666666666666666, + "Filipino,Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.3333333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.36666666666666664, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.36666666666666664, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.36, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.37333333333333335, + "Chinese,Spanish,Malay,Indonesian,English": 0.4066666666666667 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.15333333333333332, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.16, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.14666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.14, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.13333333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.18 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.28, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.28, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.28, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.34 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.12 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26 } }, - "AC3_2": 0.532785224853194, - "AC3_3": 0.42168182931061965, - "AC3_4": 0.3411364158757646, - "AC3_5": 0.2800979781240327, - "AC3_6": 0.23294024531634128, - "AC3_7": 0.1963005780048966 + "AC3_2": 0.5909980360151319, + "AC3_3": 0.5167748124394084, + "AC3_4": 0.4641286273132446, + "AC3_5": 0.42239674693695334, + "AC3_6": 0.3868637109564578, + "AC3_7": 0.355313225014724 } }, "cross_logiqa": { "prompt_1": { - "overall_acc": 0.4935064935064935, + "overall_acc": 0.5081168831168831, "language_acc": { - "Indonesian": 0.4715909090909091, - "English": 0.5681818181818182, - "Filipino": 0.4318181818181818, - "Spanish": 0.5340909090909091, - "Chinese": 0.5568181818181818, - "Malay": 0.42045454545454547, - "Vietnamese": 0.4715909090909091 + "Indonesian": 0.4772727272727273, + "English": 0.5852272727272727, + "Filipino": 0.4147727272727273, + "Spanish": 0.5454545454545454, + "Chinese": 0.5795454545454546, + "Malay": 0.4659090909090909, + "Vietnamese": 0.48863636363636365 }, - "consistency_score_2": 0.6071428571428572, - "consistency_score_3": 0.45, - "consistency_score_4": 0.3626623376623376, - "consistency_score_5": 0.3057359307359307, - "consistency_score_6": 0.26461038961038963, - "consistency_score_7": 0.23295454545454544, + "consistency_score_2": 0.6263528138528139, + "consistency_score_3": 0.4767857142857143, + "consistency_score_4": 0.3964285714285714, + "consistency_score_5": 0.3444264069264069, + "consistency_score_6": 0.30681818181818177, + "consistency_score_7": 0.2784090909090909, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5909090909090909, - "Indonesian,Filipino": 0.5965909090909091, + "Indonesian,English": 0.5852272727272727, + "Indonesian,Filipino": 0.5909090909090909, "Indonesian,Spanish": 0.6477272727272727, - "Indonesian,Chinese": 0.6022727272727273, - "Indonesian,Malay": 0.6647727272727273, - "Indonesian,Vietnamese": 0.6477272727272727, - "English,Filipino": 0.5454545454545454, - "English,Spanish": 0.7840909090909091, - "English,Chinese": 0.5965909090909091, - "English,Malay": 0.5738636363636364, - "English,Vietnamese": 0.7272727272727273, + "Indonesian,Chinese": 0.5625, + "Indonesian,Malay": 0.7329545454545454, + "Indonesian,Vietnamese": 0.6420454545454546, + "English,Filipino": 0.5511363636363636, + "English,Spanish": 0.75, + "English,Chinese": 0.6647727272727273, + "English,Malay": 0.5625, + "English,Vietnamese": 0.6931818181818182, "Filipino,Spanish": 0.5738636363636364, - "Filipino,Chinese": 0.4943181818181818, - "Filipino,Malay": 0.6363636363636364, - "Filipino,Vietnamese": 0.5284090909090909, - "Spanish,Chinese": 0.5625, - "Spanish,Malay": 0.6079545454545454, - "Spanish,Vietnamese": 0.6477272727272727, - "Chinese,Malay": 0.5397727272727273, - "Chinese,Vietnamese": 0.6022727272727273, - "Malay,Vietnamese": 0.5795454545454546 + "Filipino,Chinese": 0.5170454545454546, + "Filipino,Malay": 0.6875, + "Filipino,Vietnamese": 0.5681818181818182, + "Spanish,Chinese": 0.6647727272727273, + "Spanish,Malay": 0.6477272727272727, + "Spanish,Vietnamese": 0.6875, + "Chinese,Malay": 0.5795454545454546, + "Chinese,Vietnamese": 0.6079545454545454, + "Malay,Vietnamese": 0.6363636363636364 }, "3_combine": { "Indonesian,English,Filipino": 0.42045454545454547, - "Indonesian,English,Spanish": 0.5340909090909091, + "Indonesian,English,Spanish": 0.5170454545454546, "Indonesian,English,Chinese": 0.4431818181818182, - "Indonesian,English,Malay": 0.45454545454545453, - "Indonesian,English,Vietnamese": 0.5113636363636364, - "Indonesian,Filipino,Spanish": 0.4602272727272727, + "Indonesian,English,Malay": 0.48863636363636365, + "Indonesian,English,Vietnamese": 0.48863636363636365, + "Indonesian,Filipino,Spanish": 0.4431818181818182, "Indonesian,Filipino,Chinese": 0.375, - "Indonesian,Filipino,Malay": 0.5, - "Indonesian,Filipino,Vietnamese": 0.4318181818181818, - "Indonesian,Spanish,Chinese": 0.4431818181818182, - "Indonesian,Spanish,Malay": 0.4943181818181818, - "Indonesian,Spanish,Vietnamese": 0.5, - "Indonesian,Chinese,Malay": 0.4431818181818182, - "Indonesian,Chinese,Vietnamese": 0.4659090909090909, - "Indonesian,Malay,Vietnamese": 0.48295454545454547, + "Indonesian,Filipino,Malay": 0.5340909090909091, + "Indonesian,Filipino,Vietnamese": 0.44886363636363635, + "Indonesian,Spanish,Chinese": 0.4715909090909091, + "Indonesian,Spanish,Malay": 0.5397727272727273, + "Indonesian,Spanish,Vietnamese": 0.5170454545454546, + "Indonesian,Chinese,Malay": 0.4715909090909091, + "Indonesian,Chinese,Vietnamese": 0.44886363636363635, + "Indonesian,Malay,Vietnamese": 0.5454545454545454, "English,Filipino,Spanish": 0.4772727272727273, - "English,Filipino,Chinese": 0.3806818181818182, - "English,Filipino,Malay": 0.42045454545454547, - "English,Filipino,Vietnamese": 0.4431818181818182, - "English,Spanish,Chinese": 0.4943181818181818, - "English,Spanish,Malay": 0.5056818181818182, - "English,Spanish,Vietnamese": 0.6022727272727273, - "English,Chinese,Malay": 0.38636363636363635, - "English,Chinese,Vietnamese": 0.48863636363636365, - "English,Malay,Vietnamese": 0.4772727272727273, - "Filipino,Spanish,Chinese": 0.3693181818181818, - "Filipino,Spanish,Malay": 0.45454545454545453, - "Filipino,Spanish,Vietnamese": 0.42045454545454547, - "Filipino,Chinese,Malay": 0.375, - "Filipino,Chinese,Vietnamese": 0.3693181818181818, - "Filipino,Malay,Vietnamese": 0.4147727272727273, - "Spanish,Chinese,Malay": 0.39204545454545453, - "Spanish,Chinese,Vietnamese": 0.4431818181818182, - "Spanish,Malay,Vietnamese": 0.45454545454545453, - "Chinese,Malay,Vietnamese": 0.42045454545454547 + "English,Filipino,Chinese": 0.42045454545454547, + "English,Filipino,Malay": 0.44886363636363635, + "English,Filipino,Vietnamese": 0.4602272727272727, + "English,Spanish,Chinese": 0.5511363636363636, + "English,Spanish,Malay": 0.5, + "English,Spanish,Vietnamese": 0.5852272727272727, + "English,Chinese,Malay": 0.4431818181818182, + "English,Chinese,Vietnamese": 0.5, + "English,Malay,Vietnamese": 0.48863636363636365, + "Filipino,Spanish,Chinese": 0.42613636363636365, + "Filipino,Spanish,Malay": 0.4943181818181818, + "Filipino,Spanish,Vietnamese": 0.4602272727272727, + "Filipino,Chinese,Malay": 0.4375, + "Filipino,Chinese,Vietnamese": 0.4034090909090909, + "Filipino,Malay,Vietnamese": 0.48295454545454547, + "Spanish,Chinese,Malay": 0.4772727272727273, + "Spanish,Chinese,Vietnamese": 0.5056818181818182, + "Spanish,Malay,Vietnamese": 0.5170454545454546, + "Chinese,Malay,Vietnamese": 0.45454545454545453 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.38636363636363635, - "Indonesian,English,Filipino,Chinese": 0.3181818181818182, - "Indonesian,English,Filipino,Malay": 0.36363636363636365, - "Indonesian,English,Filipino,Vietnamese": 0.3693181818181818, - "Indonesian,English,Spanish,Chinese": 0.3977272727272727, - "Indonesian,English,Spanish,Malay": 0.4147727272727273, - "Indonesian,English,Spanish,Vietnamese": 0.4715909090909091, - "Indonesian,English,Chinese,Malay": 0.3409090909090909, + "Indonesian,English,Filipino,Spanish": 0.375, + "Indonesian,English,Filipino,Chinese": 0.3409090909090909, + "Indonesian,English,Filipino,Malay": 0.4034090909090909, + "Indonesian,English,Filipino,Vietnamese": 0.375, + "Indonesian,English,Spanish,Chinese": 0.4090909090909091, + "Indonesian,English,Spanish,Malay": 0.4375, + "Indonesian,English,Spanish,Vietnamese": 0.4431818181818182, + "Indonesian,English,Chinese,Malay": 0.39204545454545453, "Indonesian,English,Chinese,Vietnamese": 0.3977272727272727, - "Indonesian,English,Malay,Vietnamese": 0.4090909090909091, - "Indonesian,Filipino,Spanish,Chinese": 0.3125, - "Indonesian,Filipino,Spanish,Malay": 0.3977272727272727, - "Indonesian,Filipino,Spanish,Vietnamese": 0.35795454545454547, - "Indonesian,Filipino,Chinese,Malay": 0.32386363636363635, - "Indonesian,Filipino,Chinese,Vietnamese": 0.32386363636363635, - "Indonesian,Filipino,Malay,Vietnamese": 0.3693181818181818, - "Indonesian,Spanish,Chinese,Malay": 0.3465909090909091, - "Indonesian,Spanish,Chinese,Vietnamese": 0.39204545454545453, - "Indonesian,Spanish,Malay,Vietnamese": 0.39204545454545453, - "Indonesian,Chinese,Malay,Vietnamese": 0.3693181818181818, - "English,Filipino,Spanish,Chinese": 0.3352272727272727, - "English,Filipino,Spanish,Malay": 0.3806818181818182, - "English,Filipino,Spanish,Vietnamese": 0.3977272727272727, - "English,Filipino,Chinese,Malay": 0.30113636363636365, - "English,Filipino,Chinese,Vietnamese": 0.32386363636363635, - "English,Filipino,Malay,Vietnamese": 0.36363636363636365, - "English,Spanish,Chinese,Malay": 0.3522727272727273, - "English,Spanish,Chinese,Vietnamese": 0.4147727272727273, - "English,Spanish,Malay,Vietnamese": 0.42613636363636365, - "English,Chinese,Malay,Vietnamese": 0.3465909090909091, - "Filipino,Spanish,Chinese,Malay": 0.29545454545454547, - "Filipino,Spanish,Chinese,Vietnamese": 0.3125, - "Filipino,Spanish,Malay,Vietnamese": 0.3465909090909091, - "Filipino,Chinese,Malay,Vietnamese": 0.3068181818181818, - "Spanish,Chinese,Malay,Vietnamese": 0.3352272727272727 + "Indonesian,English,Malay,Vietnamese": 0.4318181818181818, + "Indonesian,Filipino,Spanish,Chinese": 0.3352272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.4318181818181818, + "Indonesian,Filipino,Spanish,Vietnamese": 0.38636363636363635, + "Indonesian,Filipino,Chinese,Malay": 0.36363636363636365, + "Indonesian,Filipino,Chinese,Vietnamese": 0.32954545454545453, + "Indonesian,Filipino,Malay,Vietnamese": 0.42613636363636365, + "Indonesian,Spanish,Chinese,Malay": 0.4034090909090909, + "Indonesian,Spanish,Chinese,Vietnamese": 0.4090909090909091, + "Indonesian,Spanish,Malay,Vietnamese": 0.45454545454545453, + "Indonesian,Chinese,Malay,Vietnamese": 0.38636363636363635, + "English,Filipino,Spanish,Chinese": 0.38636363636363635, + "English,Filipino,Spanish,Malay": 0.4034090909090909, + "English,Filipino,Spanish,Vietnamese": 0.4034090909090909, + "English,Filipino,Chinese,Malay": 0.36363636363636365, + "English,Filipino,Chinese,Vietnamese": 0.3693181818181818, + "English,Filipino,Malay,Vietnamese": 0.3977272727272727, + "English,Spanish,Chinese,Malay": 0.4090909090909091, + "English,Spanish,Chinese,Vietnamese": 0.45454545454545453, + "English,Spanish,Malay,Vietnamese": 0.4431818181818182, + "English,Chinese,Malay,Vietnamese": 0.39204545454545453, + "Filipino,Spanish,Chinese,Malay": 0.3693181818181818, + "Filipino,Spanish,Chinese,Vietnamese": 0.3693181818181818, + "Filipino,Spanish,Malay,Vietnamese": 0.4147727272727273, + "Filipino,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Spanish,Chinese,Malay,Vietnamese": 0.4034090909090909 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.2897727272727273, - "Indonesian,English,Filipino,Spanish,Malay": 0.3352272727272727, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3409090909090909, - "Indonesian,English,Filipino,Chinese,Malay": 0.2727272727272727, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.2897727272727273, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.32386363636363635, - "Indonesian,English,Spanish,Chinese,Malay": 0.3125, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3693181818181818, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.375, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.3181818181818182, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.26704545454545453, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.2784090909090909, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.3125, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.2784090909090909, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.3068181818181818, - "English,Filipino,Spanish,Chinese,Malay": 0.2727272727272727, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.29545454545454547, - "English,Filipino,Spanish,Malay,Vietnamese": 0.32954545454545453, - "English,Filipino,Chinese,Malay,Vietnamese": 0.2727272727272727, - "English,Spanish,Chinese,Malay,Vietnamese": 0.3181818181818182, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.26136363636363635 + "Indonesian,English,Filipino,Spanish,Chinese": 0.3125, + "Indonesian,English,Filipino,Spanish,Malay": 0.36363636363636365, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3352272727272727, + "Indonesian,English,Filipino,Chinese,Malay": 0.32954545454545453, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.3125, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.35795454545454547, + "Indonesian,English,Spanish,Chinese,Malay": 0.35795454545454547, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.375, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.39204545454545453, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.32386363636363635, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.375, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.3181818181818182, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.3522727272727273, + "English,Filipino,Spanish,Chinese,Malay": 0.3352272727272727, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.3465909090909091, + "English,Filipino,Spanish,Malay,Vietnamese": 0.35795454545454547, + "English,Filipino,Chinese,Malay,Vietnamese": 0.32954545454545453, + "English,Spanish,Chinese,Malay,Vietnamese": 0.3693181818181818, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.32954545454545453 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.25, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.26704545454545453, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.30113636363636365, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.25, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.25 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.30113636363636365, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.2897727272727273, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.30113636363636365, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3068181818181818 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909 } }, - "AC3_2": 0.5444584913116792, - "AC3_3": 0.4707501720079177, - "AC3_4": 0.4180862746927325, - "AC3_5": 0.3775642096090479, - "AC3_6": 0.34450346223103956, - "AC3_7": 0.31650584048461866 + "AC3_2": 0.5610734959751482, + "AC3_3": 0.49195295386938, + "AC3_4": 0.44537739714079405, + "AC3_5": 0.41055715146424704, + "AC3_6": 0.38260593982990687, + "AC3_7": 0.35971948583474134 }, "prompt_2": { - "overall_acc": 0.48701298701298706, + "overall_acc": 0.5032467532467533, "language_acc": { - "Indonesian": 0.44886363636363635, - "English": 0.5511363636363636, - "Filipino": 0.4431818181818182, - "Spanish": 0.5227272727272727, - "Chinese": 0.5397727272727273, - "Malay": 0.4772727272727273, - "Vietnamese": 0.42613636363636365 + "Indonesian": 0.5056818181818182, + "English": 0.5738636363636364, + "Filipino": 0.4147727272727273, + "Spanish": 0.5397727272727273, + "Chinese": 0.5511363636363636, + "Malay": 0.5, + "Vietnamese": 0.4375 }, - "consistency_score_2": 0.5933441558441558, - "consistency_score_3": 0.436525974025974, - "consistency_score_4": 0.34902597402597413, - "consistency_score_5": 0.2919372294372295, - "consistency_score_6": 0.25162337662337664, - "consistency_score_7": 0.2215909090909091, + "consistency_score_2": 0.6236471861471862, + "consistency_score_3": 0.47727272727272724, + "consistency_score_4": 0.3987012987012987, + "consistency_score_5": 0.3492965367965367, + "consistency_score_6": 0.31655844155844154, + "consistency_score_7": 0.29545454545454547, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5852272727272727, - "Indonesian,Filipino": 0.5454545454545454, - "Indonesian,Spanish": 0.6193181818181818, - "Indonesian,Chinese": 0.5568181818181818, - "Indonesian,Malay": 0.6590909090909091, - "Indonesian,Vietnamese": 0.6079545454545454, - "English,Filipino": 0.5284090909090909, + "Indonesian,English": 0.6079545454545454, + "Indonesian,Filipino": 0.625, + "Indonesian,Spanish": 0.6590909090909091, + "Indonesian,Chinese": 0.5852272727272727, + "Indonesian,Malay": 0.7386363636363636, + "Indonesian,Vietnamese": 0.6477272727272727, + "English,Filipino": 0.5454545454545454, "English,Spanish": 0.75, - "English,Chinese": 0.5909090909090909, - "English,Malay": 0.6306818181818182, - "English,Vietnamese": 0.6079545454545454, + "English,Chinese": 0.6761363636363636, + "English,Malay": 0.5681818181818182, + "English,Vietnamese": 0.6306818181818182, "Filipino,Spanish": 0.5681818181818182, - "Filipino,Chinese": 0.5284090909090909, - "Filipino,Malay": 0.5738636363636364, - "Filipino,Vietnamese": 0.5397727272727273, - "Spanish,Chinese": 0.5681818181818182, - "Spanish,Malay": 0.6647727272727273, - "Spanish,Vietnamese": 0.6306818181818182, - "Chinese,Malay": 0.5454545454545454, - "Chinese,Vietnamese": 0.5909090909090909, - "Malay,Vietnamese": 0.5681818181818182 + "Filipino,Chinese": 0.5170454545454546, + "Filipino,Malay": 0.6590909090909091, + "Filipino,Vietnamese": 0.5625, + "Spanish,Chinese": 0.6420454545454546, + "Spanish,Malay": 0.625, + "Spanish,Vietnamese": 0.6704545454545454, + "Chinese,Malay": 0.5965909090909091, + "Chinese,Vietnamese": 0.5852272727272727, + "Malay,Vietnamese": 0.6363636363636364 }, "3_combine": { - "Indonesian,English,Filipino": 0.3977272727272727, - "Indonesian,English,Spanish": 0.5056818181818182, - "Indonesian,English,Chinese": 0.42045454545454547, - "Indonesian,English,Malay": 0.4943181818181818, - "Indonesian,English,Vietnamese": 0.4375, - "Indonesian,Filipino,Spanish": 0.4090909090909091, - "Indonesian,Filipino,Chinese": 0.36363636363636365, - "Indonesian,Filipino,Malay": 0.44886363636363635, - "Indonesian,Filipino,Vietnamese": 0.39204545454545453, - "Indonesian,Spanish,Chinese": 0.4147727272727273, - "Indonesian,Spanish,Malay": 0.5170454545454546, - "Indonesian,Spanish,Vietnamese": 0.45454545454545453, - "Indonesian,Chinese,Malay": 0.4375, - "Indonesian,Chinese,Vietnamese": 0.42045454545454547, - "Indonesian,Malay,Vietnamese": 0.4715909090909091, - "English,Filipino,Spanish": 0.4659090909090909, - "English,Filipino,Chinese": 0.39204545454545453, - "English,Filipino,Malay": 0.42613636363636365, - "English,Filipino,Vietnamese": 0.38636363636363635, - "English,Spanish,Chinese": 0.48863636363636365, - "English,Spanish,Malay": 0.5454545454545454, - "English,Spanish,Vietnamese": 0.5113636363636364, - "English,Chinese,Malay": 0.4318181818181818, - "English,Chinese,Vietnamese": 0.4318181818181818, - "English,Malay,Vietnamese": 0.4431818181818182, - "Filipino,Spanish,Chinese": 0.38636363636363635, - "Filipino,Spanish,Malay": 0.44886363636363635, - "Filipino,Spanish,Vietnamese": 0.4034090909090909, - "Filipino,Chinese,Malay": 0.39204545454545453, - "Filipino,Chinese,Vietnamese": 0.39204545454545453, - "Filipino,Malay,Vietnamese": 0.39204545454545453, - "Spanish,Chinese,Malay": 0.4375, - "Spanish,Chinese,Vietnamese": 0.4318181818181818, - "Spanish,Malay,Vietnamese": 0.4772727272727273, - "Chinese,Malay,Vietnamese": 0.4090909090909091 + "Indonesian,English,Filipino": 0.4431818181818182, + "Indonesian,English,Spanish": 0.5284090909090909, + "Indonesian,English,Chinese": 0.4659090909090909, + "Indonesian,English,Malay": 0.5, + "Indonesian,English,Vietnamese": 0.48295454545454547, + "Indonesian,Filipino,Spanish": 0.4659090909090909, + "Indonesian,Filipino,Chinese": 0.3977272727272727, + "Indonesian,Filipino,Malay": 0.5511363636363636, + "Indonesian,Filipino,Vietnamese": 0.4715909090909091, + "Indonesian,Spanish,Chinese": 0.4772727272727273, + "Indonesian,Spanish,Malay": 0.5454545454545454, + "Indonesian,Spanish,Vietnamese": 0.5284090909090909, + "Indonesian,Chinese,Malay": 0.4943181818181818, + "Indonesian,Chinese,Vietnamese": 0.4659090909090909, + "Indonesian,Malay,Vietnamese": 0.5511363636363636, + "English,Filipino,Spanish": 0.4602272727272727, + "English,Filipino,Chinese": 0.42045454545454547, + "English,Filipino,Malay": 0.4431818181818182, + "English,Filipino,Vietnamese": 0.4147727272727273, + "English,Spanish,Chinese": 0.5511363636363636, + "English,Spanish,Malay": 0.48295454545454547, + "English,Spanish,Vietnamese": 0.5454545454545454, + "English,Chinese,Malay": 0.4659090909090909, + "English,Chinese,Vietnamese": 0.4943181818181818, + "English,Malay,Vietnamese": 0.4659090909090909, + "Filipino,Spanish,Chinese": 0.4147727272727273, + "Filipino,Spanish,Malay": 0.4772727272727273, + "Filipino,Spanish,Vietnamese": 0.44886363636363635, + "Filipino,Chinese,Malay": 0.42613636363636365, + "Filipino,Chinese,Vietnamese": 0.3977272727272727, + "Filipino,Malay,Vietnamese": 0.48295454545454547, + "Spanish,Chinese,Malay": 0.4772727272727273, + "Spanish,Chinese,Vietnamese": 0.48863636363636365, + "Spanish,Malay,Vietnamese": 0.5056818181818182, + "Chinese,Malay,Vietnamese": 0.4715909090909091 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.3465909090909091, - "Indonesian,English,Filipino,Chinese": 0.3125, - "Indonesian,English,Filipino,Malay": 0.36363636363636365, - "Indonesian,English,Filipino,Vietnamese": 0.3181818181818182, - "Indonesian,English,Spanish,Chinese": 0.3693181818181818, + "Indonesian,English,Filipino,Spanish": 0.39204545454545453, + "Indonesian,English,Filipino,Chinese": 0.3522727272727273, + "Indonesian,English,Filipino,Malay": 0.4034090909090909, + "Indonesian,English,Filipino,Vietnamese": 0.3693181818181818, + "Indonesian,English,Spanish,Chinese": 0.42613636363636365, "Indonesian,English,Spanish,Malay": 0.4375, - "Indonesian,English,Spanish,Vietnamese": 0.39204545454545453, - "Indonesian,English,Chinese,Malay": 0.36363636363636365, - "Indonesian,English,Chinese,Vietnamese": 0.3409090909090909, - "Indonesian,English,Malay,Vietnamese": 0.38636363636363635, - "Indonesian,Filipino,Spanish,Chinese": 0.2897727272727273, - "Indonesian,Filipino,Spanish,Malay": 0.36363636363636365, - "Indonesian,Filipino,Spanish,Vietnamese": 0.3181818181818182, - "Indonesian,Filipino,Chinese,Malay": 0.3181818181818182, - "Indonesian,Filipino,Chinese,Vietnamese": 0.3125, - "Indonesian,Filipino,Malay,Vietnamese": 0.3352272727272727, - "Indonesian,Spanish,Chinese,Malay": 0.35795454545454547, - "Indonesian,Spanish,Chinese,Vietnamese": 0.3409090909090909, - "Indonesian,Spanish,Malay,Vietnamese": 0.3977272727272727, - "Indonesian,Chinese,Malay,Vietnamese": 0.3409090909090909, - "English,Filipino,Spanish,Chinese": 0.3465909090909091, - "English,Filipino,Spanish,Malay": 0.375, - "English,Filipino,Spanish,Vietnamese": 0.3465909090909091, - "English,Filipino,Chinese,Malay": 0.3352272727272727, - "English,Filipino,Chinese,Vietnamese": 0.3181818181818182, - "English,Filipino,Malay,Vietnamese": 0.32386363636363635, - "English,Spanish,Chinese,Malay": 0.38636363636363635, - "English,Spanish,Chinese,Vietnamese": 0.375, - "English,Spanish,Malay,Vietnamese": 0.3977272727272727, - "English,Chinese,Malay,Vietnamese": 0.3409090909090909, - "Filipino,Spanish,Chinese,Malay": 0.32386363636363635, - "Filipino,Spanish,Chinese,Vietnamese": 0.32386363636363635, - "Filipino,Spanish,Malay,Vietnamese": 0.3409090909090909, - "Filipino,Chinese,Malay,Vietnamese": 0.32954545454545453, - "Spanish,Chinese,Malay,Vietnamese": 0.3465909090909091 + "Indonesian,English,Spanish,Vietnamese": 0.4375, + "Indonesian,English,Chinese,Malay": 0.4147727272727273, + "Indonesian,English,Chinese,Vietnamese": 0.4090909090909091, + "Indonesian,English,Malay,Vietnamese": 0.42045454545454547, + "Indonesian,Filipino,Spanish,Chinese": 0.3522727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.4375, + "Indonesian,Filipino,Spanish,Vietnamese": 0.4034090909090909, + "Indonesian,Filipino,Chinese,Malay": 0.375, + "Indonesian,Filipino,Chinese,Vietnamese": 0.3409090909090909, + "Indonesian,Filipino,Malay,Vietnamese": 0.4375, + "Indonesian,Spanish,Chinese,Malay": 0.42045454545454547, + "Indonesian,Spanish,Chinese,Vietnamese": 0.4090909090909091, + "Indonesian,Spanish,Malay,Vietnamese": 0.4602272727272727, + "Indonesian,Chinese,Malay,Vietnamese": 0.4147727272727273, + "English,Filipino,Spanish,Chinese": 0.38636363636363635, + "English,Filipino,Spanish,Malay": 0.39204545454545453, + "English,Filipino,Spanish,Vietnamese": 0.375, + "English,Filipino,Chinese,Malay": 0.375, + "English,Filipino,Chinese,Vietnamese": 0.35795454545454547, + "English,Filipino,Malay,Vietnamese": 0.375, + "English,Spanish,Chinese,Malay": 0.4090909090909091, + "English,Spanish,Chinese,Vietnamese": 0.44886363636363635, + "English,Spanish,Malay,Vietnamese": 0.4147727272727273, + "English,Chinese,Malay,Vietnamese": 0.4034090909090909, + "Filipino,Spanish,Chinese,Malay": 0.3693181818181818, + "Filipino,Spanish,Chinese,Vietnamese": 0.3522727272727273, + "Filipino,Spanish,Malay,Vietnamese": 0.4090909090909091, + "Filipino,Chinese,Malay,Vietnamese": 0.3693181818181818, + "Spanish,Chinese,Malay,Vietnamese": 0.4034090909090909 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.2727272727272727, - "Indonesian,English,Filipino,Spanish,Malay": 0.3125, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.2840909090909091, - "Indonesian,English,Filipino,Chinese,Malay": 0.2840909090909091, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.2727272727272727, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.2897727272727273, - "Indonesian,English,Spanish,Chinese,Malay": 0.32386363636363635, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3068181818181818, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.3465909090909091, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.30113636363636365, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.26136363636363635, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.26136363636363635, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.2897727272727273, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.2784090909090909, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547, - "English,Filipino,Spanish,Chinese,Malay": 0.29545454545454547, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.2897727272727273, - "English,Filipino,Spanish,Malay,Vietnamese": 0.2897727272727273, - "English,Filipino,Chinese,Malay,Vietnamese": 0.2840909090909091, - "English,Spanish,Chinese,Malay,Vietnamese": 0.3068181818181818, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2840909090909091 + "Indonesian,English,Filipino,Spanish,Chinese": 0.3352272727272727, + "Indonesian,English,Filipino,Spanish,Malay": 0.36363636363636365, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3409090909090909, + "Indonesian,English,Filipino,Chinese,Malay": 0.3409090909090909, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.3181818181818182, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.3465909090909091, + "Indonesian,English,Spanish,Chinese,Malay": 0.375, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3806818181818182, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.3806818181818182, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.3409090909090909, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3181818181818182, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.38636363636363635, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.36363636363636365, + "English,Filipino,Spanish,Chinese,Malay": 0.3465909090909091, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.3352272727272727, + "English,Filipino,Spanish,Malay,Vietnamese": 0.3409090909090909, + "English,Filipino,Chinese,Malay,Vietnamese": 0.3352272727272727, + "English,Spanish,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.32954545454545453 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.24431818181818182, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.24431818181818182, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.2556818181818182, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.25, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.2727272727272727, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2556818181818182 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.32386363636363635, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.3068181818181818, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.3352272727272727, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3068181818181818, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3125 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2215909090909091 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547 } }, - "AC3_2": 0.5349458955274844, - "AC3_3": 0.4603894961983203, - "AC3_4": 0.4066322027000697, - "AC3_5": 0.3650482896280321, - "AC3_6": 0.33181104605183825, - "AC3_7": 0.3045923148586084 + "AC3_2": 0.5570150137482976, + "AC3_3": 0.48991571337568224, + "AC3_4": 0.4449150560968294, + "AC3_5": 0.4123716651001917, + "AC3_6": 0.3886460074104815, + "AC3_7": 0.37232076861561636 }, "prompt_3": { - "overall_acc": 0.4715909090909091, + "overall_acc": 0.5064935064935064, "language_acc": { - "Indonesian": 0.4431818181818182, - "English": 0.5681818181818182, - "Filipino": 0.39204545454545453, - "Spanish": 0.5340909090909091, - "Chinese": 0.4772727272727273, - "Malay": 0.4659090909090909, - "Vietnamese": 0.42045454545454547 + "Indonesian": 0.4715909090909091, + "English": 0.6193181818181818, + "Filipino": 0.4090909090909091, + "Spanish": 0.5454545454545454, + "Chinese": 0.5795454545454546, + "Malay": 0.45454545454545453, + "Vietnamese": 0.4659090909090909 }, - "consistency_score_2": 0.554112554112554, - "consistency_score_3": 0.38165584415584414, - "consistency_score_4": 0.2931818181818182, - "consistency_score_5": 0.23971861471861478, - "consistency_score_6": 0.20454545454545456, - "consistency_score_7": 0.18181818181818182, + "consistency_score_2": 0.6122835497835497, + "consistency_score_3": 0.46201298701298693, + "consistency_score_4": 0.38279220779220785, + "consistency_score_5": 0.3333333333333333, + "consistency_score_6": 0.30032467532467527, + "consistency_score_7": 0.2784090909090909, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5227272727272727, - "Indonesian,Filipino": 0.5568181818181818, + "Indonesian,English": 0.5852272727272727, + "Indonesian,Filipino": 0.5965909090909091, "Indonesian,Spanish": 0.5965909090909091, - "Indonesian,Chinese": 0.5113636363636364, - "Indonesian,Malay": 0.6704545454545454, - "Indonesian,Vietnamese": 0.5397727272727273, - "English,Filipino": 0.4431818181818182, - "English,Spanish": 0.6818181818181818, - "English,Chinese": 0.5284090909090909, - "English,Malay": 0.5340909090909091, - "English,Vietnamese": 0.5681818181818182, - "Filipino,Spanish": 0.5340909090909091, - "Filipino,Chinese": 0.4602272727272727, - "Filipino,Malay": 0.6193181818181818, - "Filipino,Vietnamese": 0.5227272727272727, - "Spanish,Chinese": 0.48295454545454547, + "Indonesian,Chinese": 0.5681818181818182, + "Indonesian,Malay": 0.6875, + "Indonesian,Vietnamese": 0.625, + "English,Filipino": 0.5340909090909091, + "English,Spanish": 0.7727272727272727, + "English,Chinese": 0.6534090909090909, + "English,Malay": 0.5681818181818182, + "English,Vietnamese": 0.6875, + "Filipino,Spanish": 0.5568181818181818, + "Filipino,Chinese": 0.5113636363636364, + "Filipino,Malay": 0.6534090909090909, + "Filipino,Vietnamese": 0.5625, + "Spanish,Chinese": 0.5965909090909091, "Spanish,Malay": 0.6306818181818182, - "Spanish,Vietnamese": 0.6022727272727273, - "Chinese,Malay": 0.5, - "Chinese,Vietnamese": 0.5625, - "Malay,Vietnamese": 0.5681818181818182 + "Spanish,Vietnamese": 0.6704545454545454, + "Chinese,Malay": 0.5568181818181818, + "Chinese,Vietnamese": 0.6193181818181818, + "Malay,Vietnamese": 0.625 }, "3_combine": { - "Indonesian,English,Filipino": 0.3352272727272727, - "Indonesian,English,Spanish": 0.4318181818181818, - "Indonesian,English,Chinese": 0.3352272727272727, - "Indonesian,English,Malay": 0.4090909090909091, - "Indonesian,English,Vietnamese": 0.36363636363636365, - "Indonesian,Filipino,Spanish": 0.4034090909090909, - "Indonesian,Filipino,Chinese": 0.3125, - "Indonesian,Filipino,Malay": 0.4772727272727273, - "Indonesian,Filipino,Vietnamese": 0.36363636363636365, - "Indonesian,Spanish,Chinese": 0.3465909090909091, - "Indonesian,Spanish,Malay": 0.48863636363636365, - "Indonesian,Spanish,Vietnamese": 0.4147727272727273, - "Indonesian,Chinese,Malay": 0.3806818181818182, - "Indonesian,Chinese,Vietnamese": 0.36363636363636365, - "Indonesian,Malay,Vietnamese": 0.4431818181818182, - "English,Filipino,Spanish": 0.3806818181818182, - "English,Filipino,Chinese": 0.2897727272727273, - "English,Filipino,Malay": 0.35795454545454547, - "English,Filipino,Vietnamese": 0.32386363636363635, - "English,Spanish,Chinese": 0.36363636363636365, - "English,Spanish,Malay": 0.4602272727272727, - "English,Spanish,Vietnamese": 0.4431818181818182, - "English,Chinese,Malay": 0.3352272727272727, - "English,Chinese,Vietnamese": 0.3693181818181818, - "English,Malay,Vietnamese": 0.3693181818181818, - "Filipino,Spanish,Chinese": 0.3125, - "Filipino,Spanish,Malay": 0.42045454545454547, - "Filipino,Spanish,Vietnamese": 0.38636363636363635, - "Filipino,Chinese,Malay": 0.3693181818181818, - "Filipino,Chinese,Vietnamese": 0.3465909090909091, - "Filipino,Malay,Vietnamese": 0.4090909090909091, - "Spanish,Chinese,Malay": 0.3693181818181818, - "Spanish,Chinese,Vietnamese": 0.36363636363636365, - "Spanish,Malay,Vietnamese": 0.4431818181818182, - "Chinese,Malay,Vietnamese": 0.375 + "Indonesian,English,Filipino": 0.42045454545454547, + "Indonesian,English,Spanish": 0.4943181818181818, + "Indonesian,English,Chinese": 0.4431818181818182, + "Indonesian,English,Malay": 0.4659090909090909, + "Indonesian,English,Vietnamese": 0.48295454545454547, + "Indonesian,Filipino,Spanish": 0.42045454545454547, + "Indonesian,Filipino,Chinese": 0.3806818181818182, + "Indonesian,Filipino,Malay": 0.5170454545454546, + "Indonesian,Filipino,Vietnamese": 0.4431818181818182, + "Indonesian,Spanish,Chinese": 0.42045454545454547, + "Indonesian,Spanish,Malay": 0.4943181818181818, + "Indonesian,Spanish,Vietnamese": 0.48295454545454547, + "Indonesian,Chinese,Malay": 0.44886363636363635, + "Indonesian,Chinese,Vietnamese": 0.45454545454545453, + "Indonesian,Malay,Vietnamese": 0.5170454545454546, + "English,Filipino,Spanish": 0.4602272727272727, + "English,Filipino,Chinese": 0.4090909090909091, + "English,Filipino,Malay": 0.4431818181818182, + "English,Filipino,Vietnamese": 0.4375, + "English,Spanish,Chinese": 0.5284090909090909, + "English,Spanish,Malay": 0.5056818181818182, + "English,Spanish,Vietnamese": 0.5852272727272727, + "English,Chinese,Malay": 0.42613636363636365, + "English,Chinese,Vietnamese": 0.5113636363636364, + "English,Malay,Vietnamese": 0.48863636363636365, + "Filipino,Spanish,Chinese": 0.3977272727272727, + "Filipino,Spanish,Malay": 0.4715909090909091, + "Filipino,Spanish,Vietnamese": 0.4375, + "Filipino,Chinese,Malay": 0.4090909090909091, + "Filipino,Chinese,Vietnamese": 0.42045454545454547, + "Filipino,Malay,Vietnamese": 0.4715909090909091, + "Spanish,Chinese,Malay": 0.4318181818181818, + "Spanish,Chinese,Vietnamese": 0.48863636363636365, + "Spanish,Malay,Vietnamese": 0.5056818181818182, + "Chinese,Malay,Vietnamese": 0.45454545454545453 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.29545454545454547, - "Indonesian,English,Filipino,Chinese": 0.23863636363636365, - "Indonesian,English,Filipino,Malay": 0.3181818181818182, - "Indonesian,English,Filipino,Vietnamese": 0.26704545454545453, - "Indonesian,English,Spanish,Chinese": 0.2784090909090909, - "Indonesian,English,Spanish,Malay": 0.36363636363636365, - "Indonesian,English,Spanish,Vietnamese": 0.32386363636363635, - "Indonesian,English,Chinese,Malay": 0.2784090909090909, - "Indonesian,English,Chinese,Vietnamese": 0.2727272727272727, - "Indonesian,English,Malay,Vietnamese": 0.3068181818181818, - "Indonesian,Filipino,Spanish,Chinese": 0.23863636363636365, - "Indonesian,Filipino,Spanish,Malay": 0.36363636363636365, - "Indonesian,Filipino,Spanish,Vietnamese": 0.30113636363636365, - "Indonesian,Filipino,Chinese,Malay": 0.30113636363636365, - "Indonesian,Filipino,Chinese,Vietnamese": 0.26136363636363635, - "Indonesian,Filipino,Malay,Vietnamese": 0.3409090909090909, - "Indonesian,Spanish,Chinese,Malay": 0.30113636363636365, - "Indonesian,Spanish,Chinese,Vietnamese": 0.2784090909090909, - "Indonesian,Spanish,Malay,Vietnamese": 0.36363636363636365, - "Indonesian,Chinese,Malay,Vietnamese": 0.3068181818181818, - "English,Filipino,Spanish,Chinese": 0.24431818181818182, - "English,Filipino,Spanish,Malay": 0.3125, - "English,Filipino,Spanish,Vietnamese": 0.2840909090909091, - "English,Filipino,Chinese,Malay": 0.2556818181818182, - "English,Filipino,Chinese,Vietnamese": 0.25, - "English,Filipino,Malay,Vietnamese": 0.2784090909090909, - "English,Spanish,Chinese,Malay": 0.2840909090909091, - "English,Spanish,Chinese,Vietnamese": 0.2897727272727273, - "English,Spanish,Malay,Vietnamese": 0.32386363636363635, - "English,Chinese,Malay,Vietnamese": 0.2727272727272727, - "Filipino,Spanish,Chinese,Malay": 0.2727272727272727, - "Filipino,Spanish,Chinese,Vietnamese": 0.26704545454545453, - "Filipino,Spanish,Malay,Vietnamese": 0.32386363636363635, - "Filipino,Chinese,Malay,Vietnamese": 0.3068181818181818, - "Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547 + "Indonesian,English,Filipino,Spanish": 0.35795454545454547, + "Indonesian,English,Filipino,Chinese": 0.3352272727272727, + "Indonesian,English,Filipino,Malay": 0.39204545454545453, + "Indonesian,English,Filipino,Vietnamese": 0.3693181818181818, + "Indonesian,English,Spanish,Chinese": 0.39204545454545453, + "Indonesian,English,Spanish,Malay": 0.4090909090909091, + "Indonesian,English,Spanish,Vietnamese": 0.4147727272727273, + "Indonesian,English,Chinese,Malay": 0.375, + "Indonesian,English,Chinese,Vietnamese": 0.3977272727272727, + "Indonesian,English,Malay,Vietnamese": 0.4090909090909091, + "Indonesian,Filipino,Spanish,Chinese": 0.3125, + "Indonesian,Filipino,Spanish,Malay": 0.4034090909090909, + "Indonesian,Filipino,Spanish,Vietnamese": 0.35795454545454547, + "Indonesian,Filipino,Chinese,Malay": 0.35795454545454547, + "Indonesian,Filipino,Chinese,Vietnamese": 0.3465909090909091, + "Indonesian,Filipino,Malay,Vietnamese": 0.4147727272727273, + "Indonesian,Spanish,Chinese,Malay": 0.3522727272727273, + "Indonesian,Spanish,Chinese,Vietnamese": 0.3806818181818182, + "Indonesian,Spanish,Malay,Vietnamese": 0.42045454545454547, + "Indonesian,Chinese,Malay,Vietnamese": 0.3977272727272727, + "English,Filipino,Spanish,Chinese": 0.36363636363636365, + "English,Filipino,Spanish,Malay": 0.3977272727272727, + "English,Filipino,Spanish,Vietnamese": 0.3806818181818182, + "English,Filipino,Chinese,Malay": 0.35795454545454547, + "English,Filipino,Chinese,Vietnamese": 0.3693181818181818, + "English,Filipino,Malay,Vietnamese": 0.39204545454545453, + "English,Spanish,Chinese,Malay": 0.3806818181818182, + "English,Spanish,Chinese,Vietnamese": 0.44886363636363635, + "English,Spanish,Malay,Vietnamese": 0.4375, + "English,Chinese,Malay,Vietnamese": 0.3977272727272727, + "Filipino,Spanish,Chinese,Malay": 0.3465909090909091, + "Filipino,Spanish,Chinese,Vietnamese": 0.35795454545454547, + "Filipino,Spanish,Malay,Vietnamese": 0.4034090909090909, + "Filipino,Chinese,Malay,Vietnamese": 0.375, + "Spanish,Chinese,Malay,Vietnamese": 0.39204545454545453 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.20454545454545456, - "Indonesian,English,Filipino,Spanish,Malay": 0.2784090909090909, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.23863636363636365, - "Indonesian,English,Filipino,Chinese,Malay": 0.23295454545454544, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.2159090909090909, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.2556818181818182, - "Indonesian,English,Spanish,Chinese,Malay": 0.24431818181818182, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.23863636363636365, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.2784090909090909, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.23863636363636365, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.23295454545454544, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.21022727272727273, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.2840909090909091, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.2556818181818182, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.25, - "English,Filipino,Spanish,Chinese,Malay": 0.2159090909090909, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.2159090909090909, - "English,Filipino,Spanish,Malay,Vietnamese": 0.24431818181818182, - "English,Filipino,Chinese,Malay,Vietnamese": 0.22727272727272727, - "English,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365 + "Indonesian,English,Filipino,Spanish,Chinese": 0.29545454545454547, + "Indonesian,English,Filipino,Spanish,Malay": 0.3465909090909091, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3125, + "Indonesian,English,Filipino,Chinese,Malay": 0.32386363636363635, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.3181818181818182, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.3522727272727273, + "Indonesian,English,Spanish,Chinese,Malay": 0.32954545454545453, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3522727272727273, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.36363636363636365, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.30113636363636365, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.3352272727272727, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.3352272727272727, + "English,Filipino,Spanish,Chinese,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.32954545454545453, + "English,Filipino,Spanish,Malay,Vietnamese": 0.3522727272727273, + "English,Filipino,Chinese,Malay,Vietnamese": 0.3409090909090909, + "English,Spanish,Chinese,Malay,Vietnamese": 0.35795454545454547, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.32954545454545453 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.19886363636363635, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.1875, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.22727272727272727, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.21022727272727273, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.19318181818181818 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.2897727272727273, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.3125, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.3125, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.3125, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2897727272727273, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3068181818181818 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909 } }, - "AC3_2": 0.5095321454624216, - "AC3_3": 0.421883648075642, - "AC3_4": 0.36157638790355184, - "AC3_5": 0.3178619592208479, - "AC3_6": 0.2853323147018791, - "AC3_7": 0.26245059284520916 + "AC3_2": 0.5543868464775596, + "AC3_3": 0.48323182009107746, + "AC3_4": 0.4360393166036467, + "AC3_5": 0.4020618556222287, + "AC3_6": 0.3770676003609055, + "AC3_7": 0.35931183599821936 }, "prompt_4": { - "overall_acc": 0.4813311688311689, + "overall_acc": 0.49107142857142855, "language_acc": { - "Indonesian": 0.4431818181818182, - "English": 0.5681818181818182, - "Filipino": 0.4090909090909091, - "Spanish": 0.5397727272727273, - "Chinese": 0.5056818181818182, - "Malay": 0.4318181818181818, - "Vietnamese": 0.4715909090909091 + "Indonesian": 0.4659090909090909, + "English": 0.5397727272727273, + "Filipino": 0.4034090909090909, + "Spanish": 0.5284090909090909, + "Chinese": 0.5738636363636364, + "Malay": 0.44886363636363635, + "Vietnamese": 0.4772727272727273 }, - "consistency_score_2": 0.5827922077922079, - "consistency_score_3": 0.4183441558441558, - "consistency_score_4": 0.32987012987012987, - "consistency_score_5": 0.2740800865800866, - "consistency_score_6": 0.2362012987012987, - "consistency_score_7": 0.21022727272727273, + "consistency_score_2": 0.6501623376623377, + "consistency_score_3": 0.5081168831168831, + "consistency_score_4": 0.4254870129870131, + "consistency_score_5": 0.36931818181818177, + "consistency_score_6": 0.3279220779220779, + "consistency_score_7": 0.29545454545454547, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5511363636363636, - "Indonesian,Filipino": 0.5397727272727273, - "Indonesian,Spanish": 0.6477272727272727, - "Indonesian,Chinese": 0.5340909090909091, - "Indonesian,Malay": 0.6761363636363636, - "Indonesian,Vietnamese": 0.5852272727272727, - "English,Filipino": 0.48863636363636365, - "English,Spanish": 0.7556818181818182, - "English,Chinese": 0.5965909090909091, - "English,Malay": 0.5738636363636364, - "English,Vietnamese": 0.6363636363636364, - "Filipino,Spanish": 0.5284090909090909, - "Filipino,Chinese": 0.4772727272727273, - "Filipino,Malay": 0.6136363636363636, - "Filipino,Vietnamese": 0.5227272727272727, - "Spanish,Chinese": 0.5738636363636364, + "Indonesian,English": 0.6363636363636364, + "Indonesian,Filipino": 0.6079545454545454, + "Indonesian,Spanish": 0.6704545454545454, + "Indonesian,Chinese": 0.6193181818181818, + "Indonesian,Malay": 0.7386363636363636, + "Indonesian,Vietnamese": 0.6875, + "English,Filipino": 0.6193181818181818, + "English,Spanish": 0.75, + "English,Chinese": 0.7045454545454546, + "English,Malay": 0.6477272727272727, + "English,Vietnamese": 0.6647727272727273, + "Filipino,Spanish": 0.5795454545454546, + "Filipino,Chinese": 0.5170454545454546, + "Filipino,Malay": 0.6818181818181818, + "Filipino,Vietnamese": 0.5738636363636364, + "Spanish,Chinese": 0.6761363636363636, "Spanish,Malay": 0.6534090909090909, - "Spanish,Vietnamese": 0.625, - "Chinese,Malay": 0.5, - "Chinese,Vietnamese": 0.6079545454545454, - "Malay,Vietnamese": 0.5511363636363636 + "Spanish,Vietnamese": 0.6875, + "Chinese,Malay": 0.6193181818181818, + "Chinese,Vietnamese": 0.6420454545454546, + "Malay,Vietnamese": 0.6761363636363636 }, "3_combine": { - "Indonesian,English,Filipino": 0.36363636363636365, - "Indonesian,English,Spanish": 0.5113636363636364, - "Indonesian,English,Chinese": 0.38636363636363635, - "Indonesian,English,Malay": 0.44886363636363635, - "Indonesian,English,Vietnamese": 0.4375, - "Indonesian,Filipino,Spanish": 0.4090909090909091, - "Indonesian,Filipino,Chinese": 0.32386363636363635, - "Indonesian,Filipino,Malay": 0.45454545454545453, - "Indonesian,Filipino,Vietnamese": 0.375, - "Indonesian,Spanish,Chinese": 0.42045454545454547, - "Indonesian,Spanish,Malay": 0.5284090909090909, - "Indonesian,Spanish,Vietnamese": 0.4659090909090909, - "Indonesian,Chinese,Malay": 0.39204545454545453, - "Indonesian,Chinese,Vietnamese": 0.4034090909090909, - "Indonesian,Malay,Vietnamese": 0.4375, - "English,Filipino,Spanish": 0.4318181818181818, - "English,Filipino,Chinese": 0.3465909090909091, - "English,Filipino,Malay": 0.38636363636363635, - "English,Filipino,Vietnamese": 0.3806818181818182, - "English,Spanish,Chinese": 0.4943181818181818, - "English,Spanish,Malay": 0.5227272727272727, - "English,Spanish,Vietnamese": 0.5340909090909091, - "English,Chinese,Malay": 0.375, - "English,Chinese,Vietnamese": 0.45454545454545453, - "English,Malay,Vietnamese": 0.42613636363636365, - "Filipino,Spanish,Chinese": 0.3465909090909091, - "Filipino,Spanish,Malay": 0.4318181818181818, - "Filipino,Spanish,Vietnamese": 0.3806818181818182, - "Filipino,Chinese,Malay": 0.3465909090909091, - "Filipino,Chinese,Vietnamese": 0.3522727272727273, - "Filipino,Malay,Vietnamese": 0.39204545454545453, - "Spanish,Chinese,Malay": 0.4090909090909091, - "Spanish,Chinese,Vietnamese": 0.4375, - "Spanish,Malay,Vietnamese": 0.45454545454545453, - "Chinese,Malay,Vietnamese": 0.3806818181818182 + "Indonesian,English,Filipino": 0.48295454545454547, + "Indonesian,English,Spanish": 0.5511363636363636, + "Indonesian,English,Chinese": 0.5, + "Indonesian,English,Malay": 0.5397727272727273, + "Indonesian,English,Vietnamese": 0.5227272727272727, + "Indonesian,Filipino,Spanish": 0.4715909090909091, + "Indonesian,Filipino,Chinese": 0.4147727272727273, + "Indonesian,Filipino,Malay": 0.5454545454545454, + "Indonesian,Filipino,Vietnamese": 0.4943181818181818, + "Indonesian,Spanish,Chinese": 0.5056818181818182, + "Indonesian,Spanish,Malay": 0.5681818181818182, + "Indonesian,Spanish,Vietnamese": 0.5454545454545454, + "Indonesian,Chinese,Malay": 0.5227272727272727, + "Indonesian,Chinese,Vietnamese": 0.5056818181818182, + "Indonesian,Malay,Vietnamese": 0.5909090909090909, + "English,Filipino,Spanish": 0.5056818181818182, + "English,Filipino,Chinese": 0.4431818181818182, + "English,Filipino,Malay": 0.5113636363636364, + "English,Filipino,Vietnamese": 0.4772727272727273, + "English,Spanish,Chinese": 0.5738636363636364, + "English,Spanish,Malay": 0.5397727272727273, + "English,Spanish,Vietnamese": 0.5681818181818182, + "English,Chinese,Malay": 0.5056818181818182, + "English,Chinese,Vietnamese": 0.5284090909090909, + "English,Malay,Vietnamese": 0.5284090909090909, + "Filipino,Spanish,Chinese": 0.4318181818181818, + "Filipino,Spanish,Malay": 0.48863636363636365, + "Filipino,Spanish,Vietnamese": 0.4602272727272727, + "Filipino,Chinese,Malay": 0.45454545454545453, + "Filipino,Chinese,Vietnamese": 0.4147727272727273, + "Filipino,Malay,Vietnamese": 0.5113636363636364, + "Spanish,Chinese,Malay": 0.5056818181818182, + "Spanish,Chinese,Vietnamese": 0.5284090909090909, + "Spanish,Malay,Vietnamese": 0.5397727272727273, + "Chinese,Malay,Vietnamese": 0.5056818181818182 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.3465909090909091, - "Indonesian,English,Filipino,Chinese": 0.26704545454545453, - "Indonesian,English,Filipino,Malay": 0.32386363636363635, - "Indonesian,English,Filipino,Vietnamese": 0.30113636363636365, - "Indonesian,English,Spanish,Chinese": 0.36363636363636365, - "Indonesian,English,Spanish,Malay": 0.42613636363636365, - "Indonesian,English,Spanish,Vietnamese": 0.4147727272727273, - "Indonesian,English,Chinese,Malay": 0.3125, - "Indonesian,English,Chinese,Vietnamese": 0.32954545454545453, - "Indonesian,English,Malay,Vietnamese": 0.35795454545454547, - "Indonesian,Filipino,Spanish,Chinese": 0.2727272727272727, - "Indonesian,Filipino,Spanish,Malay": 0.375, - "Indonesian,Filipino,Spanish,Vietnamese": 0.3125, - "Indonesian,Filipino,Chinese,Malay": 0.2784090909090909, - "Indonesian,Filipino,Chinese,Vietnamese": 0.2784090909090909, - "Indonesian,Filipino,Malay,Vietnamese": 0.3181818181818182, - "Indonesian,Spanish,Chinese,Malay": 0.3465909090909091, - "Indonesian,Spanish,Chinese,Vietnamese": 0.3522727272727273, - "Indonesian,Spanish,Malay,Vietnamese": 0.38636363636363635, - "Indonesian,Chinese,Malay,Vietnamese": 0.3181818181818182, - "English,Filipino,Spanish,Chinese": 0.3125, - "English,Filipino,Spanish,Malay": 0.35795454545454547, - "English,Filipino,Spanish,Vietnamese": 0.3352272727272727, - "English,Filipino,Chinese,Malay": 0.2784090909090909, - "English,Filipino,Chinese,Vietnamese": 0.2840909090909091, - "English,Filipino,Malay,Vietnamese": 0.3125, - "English,Spanish,Chinese,Malay": 0.35795454545454547, - "English,Spanish,Chinese,Vietnamese": 0.3977272727272727, - "English,Spanish,Malay,Vietnamese": 0.39204545454545453, - "English,Chinese,Malay,Vietnamese": 0.3181818181818182, - "Filipino,Spanish,Chinese,Malay": 0.2840909090909091, - "Filipino,Spanish,Chinese,Vietnamese": 0.2840909090909091, - "Filipino,Spanish,Malay,Vietnamese": 0.32386363636363635, - "Filipino,Chinese,Malay,Vietnamese": 0.2897727272727273, - "Spanish,Chinese,Malay,Vietnamese": 0.3352272727272727 + "Indonesian,English,Filipino,Spanish": 0.4147727272727273, + "Indonesian,English,Filipino,Chinese": 0.375, + "Indonesian,English,Filipino,Malay": 0.44886363636363635, + "Indonesian,English,Filipino,Vietnamese": 0.42613636363636365, + "Indonesian,English,Spanish,Chinese": 0.4431818181818182, + "Indonesian,English,Spanish,Malay": 0.4715909090909091, + "Indonesian,English,Spanish,Vietnamese": 0.4602272727272727, + "Indonesian,English,Chinese,Malay": 0.4431818181818182, + "Indonesian,English,Chinese,Vietnamese": 0.42613636363636365, + "Indonesian,English,Malay,Vietnamese": 0.4659090909090909, + "Indonesian,Filipino,Spanish,Chinese": 0.36363636363636365, + "Indonesian,Filipino,Spanish,Malay": 0.4431818181818182, + "Indonesian,Filipino,Spanish,Vietnamese": 0.42045454545454547, + "Indonesian,Filipino,Chinese,Malay": 0.3977272727272727, + "Indonesian,Filipino,Chinese,Vietnamese": 0.3693181818181818, + "Indonesian,Filipino,Malay,Vietnamese": 0.4659090909090909, + "Indonesian,Spanish,Chinese,Malay": 0.4431818181818182, + "Indonesian,Spanish,Chinese,Vietnamese": 0.4375, + "Indonesian,Spanish,Malay,Vietnamese": 0.48295454545454547, + "Indonesian,Chinese,Malay,Vietnamese": 0.44886363636363635, + "English,Filipino,Spanish,Chinese": 0.39204545454545453, + "English,Filipino,Spanish,Malay": 0.42613636363636365, + "English,Filipino,Spanish,Vietnamese": 0.4034090909090909, + "English,Filipino,Chinese,Malay": 0.3977272727272727, + "English,Filipino,Chinese,Vietnamese": 0.375, + "English,Filipino,Malay,Vietnamese": 0.42613636363636365, + "English,Spanish,Chinese,Malay": 0.44886363636363635, + "English,Spanish,Chinese,Vietnamese": 0.4659090909090909, + "English,Spanish,Malay,Vietnamese": 0.4659090909090909, + "English,Chinese,Malay,Vietnamese": 0.4375, + "Filipino,Spanish,Chinese,Malay": 0.38636363636363635, + "Filipino,Spanish,Chinese,Vietnamese": 0.3693181818181818, + "Filipino,Spanish,Malay,Vietnamese": 0.42613636363636365, + "Filipino,Chinese,Malay,Vietnamese": 0.39204545454545453, + "Spanish,Chinese,Malay,Vietnamese": 0.4318181818181818 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.2556818181818182, - "Indonesian,English,Filipino,Spanish,Malay": 0.3125, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.2840909090909091, - "Indonesian,English,Filipino,Chinese,Malay": 0.23863636363636365, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.23863636363636365, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.26704545454545453, - "Indonesian,English,Spanish,Chinese,Malay": 0.3068181818181818, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3181818181818182, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.3409090909090909, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.2727272727272727, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.25, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.24431818181818182, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.2840909090909091, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.24431818181818182, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547, - "English,Filipino,Spanish,Chinese,Malay": 0.26136363636363635, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.26136363636363635, - "English,Filipino,Spanish,Malay,Vietnamese": 0.2840909090909091, - "English,Filipino,Chinese,Malay,Vietnamese": 0.24431818181818182, - "English,Spanish,Chinese,Malay,Vietnamese": 0.30113636363636365, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.25 + "Indonesian,English,Filipino,Spanish,Chinese": 0.3352272727272727, + "Indonesian,English,Filipino,Spanish,Malay": 0.38636363636363635, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3693181818181818, + "Indonesian,English,Filipino,Chinese,Malay": 0.35795454545454547, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.3409090909090909, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.3977272727272727, + "Indonesian,English,Spanish,Chinese,Malay": 0.3977272727272727, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.38636363636363635, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.4147727272727273, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.39204545454545453, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.3465909090909091, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3352272727272727, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.3977272727272727, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.35795454545454547, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.39204545454545453, + "English,Filipino,Spanish,Chinese,Malay": 0.3522727272727273, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.3352272727272727, + "English,Filipino,Spanish,Malay,Vietnamese": 0.3693181818181818, + "English,Filipino,Chinese,Malay,Vietnamese": 0.3522727272727273, + "English,Spanish,Chinese,Malay,Vietnamese": 0.39204545454545453, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3465909090909091 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.23295454545454544, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.22727272727272727, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.2556818181818182, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.2159090909090909, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.26704545454545453, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.22727272727272727, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.22727272727272727 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.3181818181818182, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.3465909090909091, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.35795454545454547, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.32386363636363635, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3125 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547 } }, - "AC3_2": 0.5272246822094617, - "AC3_3": 0.447632775871545, - "AC3_4": 0.39146085048803886, - "AC3_5": 0.34927541114715466, - "AC3_6": 0.31689450251210277, - "AC3_7": 0.29264031152405184 + "AC3_2": 0.5595280430316463, + "AC3_3": 0.49944876402103694, + "AC3_4": 0.45593277157112544, + "AC3_5": 0.42158018863024654, + "AC3_6": 0.39324649577528864, + "AC3_7": 0.36893704845670483 }, "prompt_5": { - "overall_acc": 0.45454545454545453, + "overall_acc": 0.43262987012987014, "language_acc": { - "Indonesian": 0.4375, - "English": 0.5056818181818182, - "Filipino": 0.4147727272727273, - "Spanish": 0.48863636363636365, - "Chinese": 0.42613636363636365, - "Malay": 0.44886363636363635, - "Vietnamese": 0.4602272727272727 + "Indonesian": 0.42045454545454547, + "English": 0.48863636363636365, + "Filipino": 0.4034090909090909, + "Spanish": 0.4602272727272727, + "Chinese": 0.45454545454545453, + "Malay": 0.42613636363636365, + "Vietnamese": 0.375 }, - "consistency_score_2": 0.5454545454545455, - "consistency_score_3": 0.3748376623376623, - "consistency_score_4": 0.2870129870129871, - "consistency_score_5": 0.23376623376623382, - "consistency_score_6": 0.19724025974025977, - "consistency_score_7": 0.17045454545454544, + "consistency_score_2": 0.6176948051948054, + "consistency_score_3": 0.465422077922078, + "consistency_score_4": 0.3821428571428571, + "consistency_score_5": 0.3273809523809524, + "consistency_score_6": 0.2873376623376624, + "consistency_score_7": 0.2556818181818182, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5113636363636364, - "Indonesian,Filipino": 0.5625, - "Indonesian,Spanish": 0.5852272727272727, - "Indonesian,Chinese": 0.5113636363636364, - "Indonesian,Malay": 0.6420454545454546, - "Indonesian,Vietnamese": 0.5681818181818182, - "English,Filipino": 0.4375, - "English,Spanish": 0.6534090909090909, - "English,Chinese": 0.5, - "English,Malay": 0.5113636363636364, - "English,Vietnamese": 0.5170454545454546, - "Filipino,Spanish": 0.5113636363636364, - "Filipino,Chinese": 0.4602272727272727, - "Filipino,Malay": 0.6079545454545454, - "Filipino,Vietnamese": 0.5227272727272727, - "Spanish,Chinese": 0.5227272727272727, - "Spanish,Malay": 0.6136363636363636, - "Spanish,Vietnamese": 0.6022727272727273, - "Chinese,Malay": 0.5284090909090909, - "Chinese,Vietnamese": 0.5, - "Malay,Vietnamese": 0.5852272727272727 + "Indonesian,English": 0.6306818181818182, + "Indonesian,Filipino": 0.5852272727272727, + "Indonesian,Spanish": 0.6306818181818182, + "Indonesian,Chinese": 0.6079545454545454, + "Indonesian,Malay": 0.6818181818181818, + "Indonesian,Vietnamese": 0.6761363636363636, + "English,Filipino": 0.5568181818181818, + "English,Spanish": 0.6875, + "English,Chinese": 0.6590909090909091, + "English,Malay": 0.5965909090909091, + "English,Vietnamese": 0.6363636363636364, + "Filipino,Spanish": 0.5397727272727273, + "Filipino,Chinese": 0.5, + "Filipino,Malay": 0.6136363636363636, + "Filipino,Vietnamese": 0.5511363636363636, + "Spanish,Chinese": 0.6534090909090909, + "Spanish,Malay": 0.5795454545454546, + "Spanish,Vietnamese": 0.6761363636363636, + "Chinese,Malay": 0.5909090909090909, + "Chinese,Vietnamese": 0.6477272727272727, + "Malay,Vietnamese": 0.6704545454545454 }, "3_combine": { - "Indonesian,English,Filipino": 0.3352272727272727, - "Indonesian,English,Spanish": 0.4090909090909091, - "Indonesian,English,Chinese": 0.32386363636363635, - "Indonesian,English,Malay": 0.39204545454545453, - "Indonesian,English,Vietnamese": 0.3522727272727273, - "Indonesian,Filipino,Spanish": 0.3977272727272727, - "Indonesian,Filipino,Chinese": 0.32386363636363635, - "Indonesian,Filipino,Malay": 0.4602272727272727, - "Indonesian,Filipino,Vietnamese": 0.38636363636363635, - "Indonesian,Spanish,Chinese": 0.35795454545454547, - "Indonesian,Spanish,Malay": 0.4772727272727273, - "Indonesian,Spanish,Vietnamese": 0.42045454545454547, - "Indonesian,Chinese,Malay": 0.3977272727272727, - "Indonesian,Chinese,Vietnamese": 0.3465909090909091, - "Indonesian,Malay,Vietnamese": 0.4602272727272727, - "English,Filipino,Spanish": 0.35795454545454547, - "English,Filipino,Chinese": 0.29545454545454547, - "English,Filipino,Malay": 0.32954545454545453, - "English,Filipino,Vietnamese": 0.3125, - "English,Spanish,Chinese": 0.375, - "English,Spanish,Malay": 0.4318181818181818, - "English,Spanish,Vietnamese": 0.42613636363636365, - "English,Chinese,Malay": 0.32386363636363635, - "English,Chinese,Vietnamese": 0.3068181818181818, - "English,Malay,Vietnamese": 0.36363636363636365, - "Filipino,Spanish,Chinese": 0.32954545454545453, - "Filipino,Spanish,Malay": 0.42045454545454547, - "Filipino,Spanish,Vietnamese": 0.375, - "Filipino,Chinese,Malay": 0.3522727272727273, - "Filipino,Chinese,Vietnamese": 0.3181818181818182, - "Filipino,Malay,Vietnamese": 0.4090909090909091, - "Spanish,Chinese,Malay": 0.3806818181818182, - "Spanish,Chinese,Vietnamese": 0.36363636363636365, - "Spanish,Malay,Vietnamese": 0.4431818181818182, - "Chinese,Malay,Vietnamese": 0.36363636363636365 - }, - "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.2897727272727273, - "Indonesian,English,Filipino,Chinese": 0.23863636363636365, - "Indonesian,English,Filipino,Malay": 0.30113636363636365, - "Indonesian,English,Filipino,Vietnamese": 0.2556818181818182, - "Indonesian,English,Spanish,Chinese": 0.2727272727272727, - "Indonesian,English,Spanish,Malay": 0.3522727272727273, - "Indonesian,English,Spanish,Vietnamese": 0.3181818181818182, - "Indonesian,English,Chinese,Malay": 0.2727272727272727, - "Indonesian,English,Chinese,Vietnamese": 0.23295454545454544, - "Indonesian,English,Malay,Vietnamese": 0.3181818181818182, - "Indonesian,Filipino,Spanish,Chinese": 0.25, - "Indonesian,Filipino,Spanish,Malay": 0.36363636363636365, - "Indonesian,Filipino,Spanish,Vietnamese": 0.3125, - "Indonesian,Filipino,Chinese,Malay": 0.2897727272727273, - "Indonesian,Filipino,Chinese,Vietnamese": 0.25, - "Indonesian,Filipino,Malay,Vietnamese": 0.35795454545454547, - "Indonesian,Spanish,Chinese,Malay": 0.30113636363636365, - "Indonesian,Spanish,Chinese,Vietnamese": 0.26704545454545453, - "Indonesian,Spanish,Malay,Vietnamese": 0.3806818181818182, - "Indonesian,Chinese,Malay,Vietnamese": 0.3068181818181818, - "English,Filipino,Spanish,Chinese": 0.24431818181818182, - "English,Filipino,Spanish,Malay": 0.29545454545454547, - "English,Filipino,Spanish,Vietnamese": 0.2727272727272727, - "English,Filipino,Chinese,Malay": 0.22727272727272727, - "English,Filipino,Chinese,Vietnamese": 0.2215909090909091, - "English,Filipino,Malay,Vietnamese": 0.2727272727272727, - "English,Spanish,Chinese,Malay": 0.2897727272727273, - "English,Spanish,Chinese,Vietnamese": 0.26136363636363635, - "English,Spanish,Malay,Vietnamese": 0.32954545454545453, - "English,Chinese,Malay,Vietnamese": 0.2556818181818182, - "Filipino,Spanish,Chinese,Malay": 0.2727272727272727, - "Filipino,Spanish,Chinese,Vietnamese": 0.2556818181818182, - "Filipino,Spanish,Malay,Vietnamese": 0.3409090909090909, - "Filipino,Chinese,Malay,Vietnamese": 0.2727272727272727, - "Spanish,Chinese,Malay,Vietnamese": 0.30113636363636365 + "Indonesian,English,Filipino": 0.4147727272727273, + "Indonesian,English,Spanish": 0.5056818181818182, + "Indonesian,English,Chinese": 0.5, + "Indonesian,English,Malay": 0.5, + "Indonesian,English,Vietnamese": 0.5056818181818182, + "Indonesian,Filipino,Spanish": 0.42045454545454547, + "Indonesian,Filipino,Chinese": 0.3977272727272727, + "Indonesian,Filipino,Malay": 0.4659090909090909, + "Indonesian,Filipino,Vietnamese": 0.4431818181818182, + "Indonesian,Spanish,Chinese": 0.4772727272727273, + "Indonesian,Spanish,Malay": 0.48863636363636365, + "Indonesian,Spanish,Vietnamese": 0.5284090909090909, + "Indonesian,Chinese,Malay": 0.48295454545454547, + "Indonesian,Chinese,Vietnamese": 0.4943181818181818, + "Indonesian,Malay,Vietnamese": 0.5454545454545454, + "English,Filipino,Spanish": 0.42613636363636365, + "English,Filipino,Chinese": 0.4090909090909091, + "English,Filipino,Malay": 0.4375, + "English,Filipino,Vietnamese": 0.4090909090909091, + "English,Spanish,Chinese": 0.5397727272727273, + "English,Spanish,Malay": 0.4659090909090909, + "English,Spanish,Vietnamese": 0.5170454545454546, + "English,Chinese,Malay": 0.4659090909090909, + "English,Chinese,Vietnamese": 0.5056818181818182, + "English,Malay,Vietnamese": 0.48863636363636365, + "Filipino,Spanish,Chinese": 0.3977272727272727, + "Filipino,Spanish,Malay": 0.4034090909090909, + "Filipino,Spanish,Vietnamese": 0.4147727272727273, + "Filipino,Chinese,Malay": 0.4090909090909091, + "Filipino,Chinese,Vietnamese": 0.3977272727272727, + "Filipino,Malay,Vietnamese": 0.45454545454545453, + "Spanish,Chinese,Malay": 0.4602272727272727, + "Spanish,Chinese,Vietnamese": 0.5227272727272727, + "Spanish,Malay,Vietnamese": 0.5056818181818182, + "Chinese,Malay,Vietnamese": 0.48863636363636365 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.3409090909090909, + "Indonesian,English,Filipino,Chinese": 0.3522727272727273, + "Indonesian,English,Filipino,Malay": 0.3693181818181818, + "Indonesian,English,Filipino,Vietnamese": 0.3409090909090909, + "Indonesian,English,Spanish,Chinese": 0.4147727272727273, + "Indonesian,English,Spanish,Malay": 0.4147727272727273, + "Indonesian,English,Spanish,Vietnamese": 0.4375, + "Indonesian,English,Chinese,Malay": 0.4147727272727273, + "Indonesian,English,Chinese,Vietnamese": 0.42613636363636365, + "Indonesian,English,Malay,Vietnamese": 0.42045454545454547, + "Indonesian,Filipino,Spanish,Chinese": 0.3409090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.3522727272727273, + "Indonesian,Filipino,Spanish,Vietnamese": 0.36363636363636365, + "Indonesian,Filipino,Chinese,Malay": 0.35795454545454547, + "Indonesian,Filipino,Chinese,Vietnamese": 0.3465909090909091, + "Indonesian,Filipino,Malay,Vietnamese": 0.38636363636363635, + "Indonesian,Spanish,Chinese,Malay": 0.4034090909090909, + "Indonesian,Spanish,Chinese,Vietnamese": 0.42613636363636365, + "Indonesian,Spanish,Malay,Vietnamese": 0.4431818181818182, + "Indonesian,Chinese,Malay,Vietnamese": 0.42045454545454547, + "English,Filipino,Spanish,Chinese": 0.3522727272727273, + "English,Filipino,Spanish,Malay": 0.3465909090909091, + "English,Filipino,Spanish,Vietnamese": 0.3409090909090909, + "English,Filipino,Chinese,Malay": 0.3522727272727273, + "English,Filipino,Chinese,Vietnamese": 0.3409090909090909, + "English,Filipino,Malay,Vietnamese": 0.3693181818181818, + "English,Spanish,Chinese,Malay": 0.3977272727272727, + "English,Spanish,Chinese,Vietnamese": 0.4431818181818182, + "English,Spanish,Malay,Vietnamese": 0.42613636363636365, + "English,Chinese,Malay,Vietnamese": 0.4090909090909091, + "Filipino,Spanish,Chinese,Malay": 0.3409090909090909, + "Filipino,Spanish,Chinese,Vietnamese": 0.3409090909090909, + "Filipino,Spanish,Malay,Vietnamese": 0.36363636363636365, + "Filipino,Chinese,Malay,Vietnamese": 0.35795454545454547, + "Spanish,Chinese,Malay,Vietnamese": 0.42045454545454547 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.20454545454545456, - "Indonesian,English,Filipino,Spanish,Malay": 0.2727272727272727, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.23863636363636365, - "Indonesian,English,Filipino,Chinese,Malay": 0.2159090909090909, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1875, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.2556818181818182, - "Indonesian,English,Spanish,Chinese,Malay": 0.24431818181818182, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.21022727272727273, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.29545454545454547, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.22727272727272727, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.23295454545454544, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.20454545454545456, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.3068181818181818, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.24431818181818182, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.25, - "English,Filipino,Spanish,Chinese,Malay": 0.20454545454545456, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.19318181818181818, - "English,Filipino,Spanish,Malay,Vietnamese": 0.2556818181818182, - "English,Filipino,Chinese,Malay,Vietnamese": 0.19886363636363635, - "English,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544 + "Indonesian,English,Filipino,Spanish,Chinese": 0.30113636363636365, + "Indonesian,English,Filipino,Spanish,Malay": 0.3068181818181818, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.30113636363636365, + "Indonesian,English,Filipino,Chinese,Malay": 0.3181818181818182, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.3181818181818182, + "Indonesian,English,Spanish,Chinese,Malay": 0.35795454545454547, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.375, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.3806818181818182, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.3068181818181818, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.3181818181818182, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.375, + "English,Filipino,Spanish,Chinese,Malay": 0.30113636363636365, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.30113636363636365, + "English,Filipino,Spanish,Malay,Vietnamese": 0.3181818181818182, + "English,Filipino,Chinese,Malay,Vietnamese": 0.3125, + "English,Spanish,Chinese,Malay,Vietnamese": 0.3693181818181818, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3125 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.23863636363636365, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.1875, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.2727272727272727, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.2727272727272727, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.3352272727272727, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2840909090909091, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2556818181818182 } }, - "AC3_2": 0.4958677685454546, - "AC3_3": 0.41086140317827724, - "AC3_4": 0.35185480014360443, - "AC3_5": 0.30874785587281145, - "AC3_6": 0.2751047208906576, - "AC3_7": 0.2479338842578512 + "AC3_2": 0.508858317059483, + "AC3_3": 0.44842727314686265, + "AC3_4": 0.4058221614507755, + "AC3_5": 0.3727177947940652, + "AC3_6": 0.3453235039564873, + "AC3_7": 0.3214113421488447 } }, "sg_eval": { "prompt_1": { - "accuracy": 0.6407766990291263 + "accuracy": 0.6699029126213593 }, "prompt_2": { - "accuracy": 0.6213592233009708 + "accuracy": 0.6601941747572816 }, "prompt_3": { - "accuracy": 0.6019417475728155 + "accuracy": 0.6213592233009708 }, "prompt_4": { - "accuracy": 0.6019417475728155 + "accuracy": 0.6310679611650486 }, "prompt_5": { - "accuracy": 0.6407766990291263 + "accuracy": 0.6601941747572816 } }, "cn_eval": { "prompt_1": { - "accuracy": 0.41904761904761906 + "accuracy": 0.49523809523809526 }, "prompt_2": { - "accuracy": 0.4857142857142857 + "accuracy": 0.4380952380952381 }, "prompt_3": { - "accuracy": 0.5238095238095238 + "accuracy": 0.5047619047619047 }, "prompt_4": { "accuracy": 0.4666666666666667 }, "prompt_5": { - "accuracy": 0.4857142857142857 + "accuracy": 0.45714285714285713 } }, "us_eval": { "prompt_1": { - "accuracy": 0.7009345794392523 + "accuracy": 0.6728971962616822 }, "prompt_2": { - "accuracy": 0.7009345794392523 + "accuracy": 0.7102803738317757 }, "prompt_3": { - "accuracy": 0.6915887850467289 + "accuracy": 0.7383177570093458 }, "prompt_4": { - "accuracy": 0.7009345794392523 + "accuracy": 0.7383177570093458 }, "prompt_5": { - "accuracy": 0.7009345794392523 + "accuracy": 0.7289719626168224 } }, "ph_eval": { "prompt_1": { - "accuracy": 0.52, - "category_acc": { - "brand": 0.5, - "demographics": 0.6, - "biology": 0.5, - "history": 0.4, - "literature": 0.2, - "politics": 0.7, - "culture": 0.8, - "film": 0.5, - "law": 0.5, - "geography": 0.6 - } - }, - "prompt_2": { - "accuracy": 0.52, + "accuracy": 0.56, "category_acc": { "brand": 0.6, "demographics": 0.4, - "biology": 0.6, - "history": 0.4, + "biology": 0.5, + "history": 0.6, "literature": 0.2, - "politics": 0.6, + "politics": 0.8, "culture": 0.7, "film": 0.6, "law": 0.6, "geography": 0.5 } }, - "prompt_3": { - "accuracy": 0.52, + "prompt_2": { + "accuracy": 0.55, "category_acc": { "brand": 0.5, "demographics": 0.2, "biology": 0.5, - "history": 0.5333333333333333, - "literature": 0.3, - "politics": 0.7, - "culture": 0.8, - "film": 0.4, + "history": 0.6, + "literature": 0.4, + "politics": 0.9, + "culture": 0.6, + "film": 0.5, "law": 0.6, "geography": 0.5 } }, - "prompt_4": { - "accuracy": 0.58, + "prompt_3": { + "accuracy": 0.59, "category_acc": { - "brand": 0.6, + "brand": 0.5, "demographics": 0.4, - "biology": 0.6, - "history": 0.4666666666666667, + "biology": 0.7, + "history": 0.6, "literature": 0.3, - "politics": 0.7, + "politics": 0.8, "culture": 0.8, - "film": 0.5, + "film": 0.6, "law": 0.7, - "geography": 0.7 + "geography": 0.4 } }, - "prompt_5": { - "accuracy": 0.59, + "prompt_4": { + "accuracy": 0.61, "category_acc": { - "brand": 0.5, + "brand": 0.6, "demographics": 0.4, - "biology": 0.6, - "history": 0.6, + "biology": 0.7, + "history": 0.6666666666666666, "literature": 0.2, "politics": 0.9, + "culture": 0.7, + "film": 0.7, + "law": 0.6, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.62, + "category_acc": { + "brand": 0.6, + "demographics": 0.4, + "biology": 0.7, + "history": 0.6, + "literature": 0.4, + "politics": 0.8, "culture": 0.8, - "film": 0.5, + "film": 0.7, "law": 0.7, - "geography": 0.6 + "geography": 0.4 } } }, "sing2eng": { "prompt_1": { - "bleu_score": 0.06094069384356101 + "bleu_score": 0.21137532751755803 }, "prompt_2": { - "bleu_score": 0.06207526753612465 + "bleu_score": 0.22676833396433665 }, "prompt_3": { - "bleu_score": 0.06192642106229149 + "bleu_score": 0.226457620703362 }, "prompt_4": { - "bleu_score": 0.060092982025264 + "bleu_score": 0.17506554466534743 }, "prompt_5": { - "bleu_score": 0.05469202971301356 + "bleu_score": 0.18042896589720248 } }, "indommlu": { @@ -32277,87 +32277,87 @@ }, "flores_ind2eng": { "prompt_1": { - "bleu_score": 0.3087387231733152 + "bleu_score": 0.3446133677838909 }, "prompt_2": { - "bleu_score": 0.3094226547039261 + "bleu_score": 0.3457399898887441 }, "prompt_3": { - "bleu_score": 0.3061124934874166 + "bleu_score": 0.3417313571410895 }, "prompt_4": { - "bleu_score": 0.30135340693301044 + "bleu_score": 0.3364574211899931 }, "prompt_5": { - "bleu_score": 0.30791510943643785 + "bleu_score": 0.3445716997029711 } }, "flores_vie2eng": { "prompt_1": { - "bleu_score": 0.24226557595813872 + "bleu_score": 0.26896447803580786 }, "prompt_2": { - "bleu_score": 0.24374681205197152 + "bleu_score": 0.27080144084485186 }, "prompt_3": { - "bleu_score": 0.23865746431889961 + "bleu_score": 0.2653227001747312 }, "prompt_4": { - "bleu_score": 0.24343786296993222 + "bleu_score": 0.2707313441233083 }, "prompt_5": { - "bleu_score": 0.2496790676198905 + "bleu_score": 0.2776319262090024 } }, "flores_zho2eng": { "prompt_1": { - "bleu_score": 0.18741482916807534 + "bleu_score": 0.20706040289452007 }, "prompt_2": { - "bleu_score": 0.18861522471729936 + "bleu_score": 0.20768858624328185 }, "prompt_3": { - "bleu_score": 0.1828941675772202 + "bleu_score": 0.20116841535523658 }, "prompt_4": { - "bleu_score": 0.18500544495397628 + "bleu_score": 0.20400921233542527 }, "prompt_5": { - "bleu_score": 0.19088057936700595 + "bleu_score": 0.21068726751387765 } }, "flores_zsm2eng": { "prompt_1": { - "bleu_score": 0.31040973391193794 + "bleu_score": 0.34602202013314215 }, "prompt_2": { - "bleu_score": 0.31410450445911836 + "bleu_score": 0.3495179491087512 }, "prompt_3": { - "bleu_score": 0.30742063457580054 + "bleu_score": 0.3417109996866184 }, "prompt_4": { - "bleu_score": 0.2954984182513215 + "bleu_score": 0.32902747117701614 }, "prompt_5": { - "bleu_score": 0.3059634141807576 + "bleu_score": 0.3406278063632438 } }, "mmlu": { "prompt_1": { - "accuracy": 0.6254375729288215 + "accuracy": 0.574095682613769 }, "prompt_2": { - "accuracy": 0.646441073512252 + "accuracy": 0.5997666277712952 }, "prompt_3": { - "accuracy": 0.6277712952158693 + "accuracy": 0.5997666277712952 }, "prompt_4": { - "accuracy": 0.6394399066511085 + "accuracy": 0.6161026837806302 }, "prompt_5": { - "accuracy": 0.588098016336056 + "accuracy": 0.5355892648774796 } }, "mmlu_full": { @@ -32674,323 +32674,323 @@ }, "c_eval": { "prompt_1": { - "accuracy": 0.4903417533432392 + "accuracy": 0.4851411589895988 }, "prompt_2": { - "accuracy": 0.5044576523031203 + "accuracy": 0.4160475482912333 }, "prompt_3": { - "accuracy": 0.49257057949479943 + "accuracy": 0.4658246656760773 }, "prompt_4": { - "accuracy": 0.4390787518573551 + "accuracy": 0.4606240713224368 }, "prompt_5": { - "accuracy": 0.4576523031203566 + "accuracy": 0.3848439821693908 } }, "c_eval_full": { "prompt_1": { - "accuracy": 0.4919053549190536, + "accuracy": 0.4900373599003736, "category_acc": { - "computer_network": 0.7916666666666666, - "operating_system": 0.6666666666666666, - "computer_architecture": 0.5, - "college_programming": 0.6428571428571429, - "college_physics": 0.4583333333333333, - "college_chemistry": 0.3793103448275862, + "computer_network": 0.4583333333333333, + "operating_system": 0.625, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.5714285714285714, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.4482758620689655, "advanced_mathematics": 0.2916666666666667, - "probability_and_statistics": 0.391304347826087, - "discrete_mathematics": 0.3333333333333333, - "electrical_engineer": 0.30952380952380953, - "metrology_engineer": 0.41379310344827586, - "high_school_mathematics": 0.17391304347826086, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.4523809523809524, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.13043478260869565, "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.20833333333333334, - "high_school_biology": 0.3333333333333333, + "high_school_chemistry": 0.5833333333333334, + "high_school_biology": 0.375, "middle_school_mathematics": 0.3333333333333333, - "middle_school_biology": 0.7692307692307693, - "middle_school_physics": 0.4166666666666667, - "middle_school_chemistry": 0.56, - "veterinary_medicine": 0.5, - "college_economics": 0.4, - "business_administration": 0.5, - "marxism": 0.5, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.4642857142857143, + "college_economics": 0.43333333333333335, + "business_administration": 0.39473684210526316, + "marxism": 0.5833333333333334, "mao_zedong_thought": 0.6551724137931034, - "education_science": 0.5588235294117647, - "teacher_qualification": 0.7142857142857143, - "high_school_politics": 0.625, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.7755102040816326, + "high_school_politics": 0.5833333333333334, "high_school_geography": 0.5416666666666666, - "middle_school_politics": 0.7307692307692307, - "middle_school_geography": 0.7058823529411765, - "modern_chinese_history": 0.6071428571428571, - "ideological_and_moral_cultivation": 0.6666666666666666, - "logic": 0.4444444444444444, - "law": 0.3448275862068966, - "chinese_language_and_literature": 0.42857142857142855, - "art_studies": 0.631578947368421, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.5185185185185185, + "law": 0.4482758620689655, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.5789473684210527, "professional_tour_guide": 0.5588235294117647, "legal_professional": 0.42857142857142855, - "high_school_chinese": 0.3333333333333333, - "high_school_history": 0.52, - "middle_school_history": 0.7407407407407407, + "high_school_chinese": 0.25, + "high_school_history": 0.36, + "middle_school_history": 0.7037037037037037, "civil_servant": 0.4423076923076923, - "sports_science": 0.4166666666666667, - "plant_protection": 0.7407407407407407, - "basic_medicine": 0.5, - "clinical_medicine": 0.48148148148148145, + "sports_science": 0.4583333333333333, + "plant_protection": 0.7037037037037037, + "basic_medicine": 0.625, + "clinical_medicine": 0.5185185185185185, "urban_and_rural_planner": 0.49019607843137253, - "accountant": 0.3888888888888889, - "fire_engineer": 0.3888888888888889, - "environmental_impact_assessment_engineer": 0.4166666666666667, - "tax_accountant": 0.3148148148148148, - "physician": 0.6296296296296297 + "accountant": 0.4074074074074074, + "fire_engineer": 0.4444444444444444, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.3333333333333333, + "physician": 0.5555555555555556 } }, "prompt_2": { - "accuracy": 0.48443337484433374, + "accuracy": 0.4215442092154421, "category_acc": { - "computer_network": 0.7083333333333334, - "operating_system": 0.7083333333333334, - "computer_architecture": 0.6153846153846154, - "college_programming": 0.47619047619047616, - "college_physics": 0.4166666666666667, - "college_chemistry": 0.41379310344827586, - "advanced_mathematics": 0.375, - "probability_and_statistics": 0.391304347826087, + "computer_network": 0.5833333333333334, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.625, + "probability_and_statistics": 0.43478260869565216, "discrete_mathematics": 0.2857142857142857, - "electrical_engineer": 0.30952380952380953, + "electrical_engineer": 0.2857142857142857, "metrology_engineer": 0.3793103448275862, - "high_school_mathematics": 0.13043478260869565, - "high_school_physics": 0.4166666666666667, - "high_school_chemistry": 0.3333333333333333, - "high_school_biology": 0.4166666666666667, - "middle_school_mathematics": 0.25, - "middle_school_biology": 0.7307692307692307, - "middle_school_physics": 0.4583333333333333, - "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.5, - "college_economics": 0.4666666666666667, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.36666666666666664, "business_administration": 0.4473684210526316, - "marxism": 0.5833333333333334, - "mao_zedong_thought": 0.6206896551724138, - "education_science": 0.6176470588235294, - "teacher_qualification": 0.7142857142857143, - "high_school_politics": 0.625, - "high_school_geography": 0.5416666666666666, - "middle_school_politics": 0.7307692307692307, - "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.5357142857142857, - "ideological_and_moral_cultivation": 0.6666666666666666, - "logic": 0.48148148148148145, - "law": 0.4482758620689655, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.5, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.25925925925925924, + "law": 0.3103448275862069, "chinese_language_and_literature": 0.4642857142857143, "art_studies": 0.6052631578947368, "professional_tour_guide": 0.5294117647058824, - "legal_professional": 0.32142857142857145, - "high_school_chinese": 0.20833333333333334, - "high_school_history": 0.4, - "middle_school_history": 0.6296296296296297, - "civil_servant": 0.4423076923076923, - "sports_science": 0.5416666666666666, - "plant_protection": 0.7037037037037037, - "basic_medicine": 0.5, - "clinical_medicine": 0.4074074074074074, - "urban_and_rural_planner": 0.5686274509803921, - "accountant": 0.42592592592592593, - "fire_engineer": 0.4166666666666667, - "environmental_impact_assessment_engineer": 0.3611111111111111, - "tax_accountant": 0.3148148148148148, - "physician": 0.5740740740740741 + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.56, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.3269230769230769, + "sports_science": 0.4583333333333333, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2222222222222222, + "physician": 0.5555555555555556 } }, "prompt_3": { - "accuracy": 0.4919053549190536, + "accuracy": 0.4738480697384807, "category_acc": { - "computer_network": 0.5, + "computer_network": 0.375, "operating_system": 0.625, - "computer_architecture": 0.5, - "college_programming": 0.6666666666666666, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5714285714285714, "college_physics": 0.4166666666666667, - "college_chemistry": 0.27586206896551724, - "advanced_mathematics": 0.3333333333333333, - "probability_and_statistics": 0.391304347826087, - "discrete_mathematics": 0.42857142857142855, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, "electrical_engineer": 0.38095238095238093, - "metrology_engineer": 0.5862068965517241, - "high_school_mathematics": 0.21739130434782608, - "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.3333333333333333, + "metrology_engineer": 0.6206896551724138, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.5, "high_school_biology": 0.375, - "middle_school_mathematics": 0.2916666666666667, - "middle_school_biology": 0.7692307692307693, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.6538461538461539, "middle_school_physics": 0.5, - "middle_school_chemistry": 0.52, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.45, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.4166666666666667, "business_administration": 0.3684210526315789, "marxism": 0.5833333333333334, "mao_zedong_thought": 0.5862068965517241, - "education_science": 0.6470588235294118, - "teacher_qualification": 0.7346938775510204, - "high_school_politics": 0.5833333333333334, - "high_school_geography": 0.5833333333333334, - "middle_school_politics": 0.7307692307692307, - "middle_school_geography": 0.5294117647058824, - "modern_chinese_history": 0.5357142857142857, - "ideological_and_moral_cultivation": 0.75, - "logic": 0.48148148148148145, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.42857142857142855, - "art_studies": 0.5526315789473685, - "professional_tour_guide": 0.5882352941176471, - "legal_professional": 0.5, - "high_school_chinese": 0.25, - "high_school_history": 0.56, - "middle_school_history": 0.6666666666666666, - "civil_servant": 0.3076923076923077, - "sports_science": 0.5, - "plant_protection": 0.7407407407407407, - "basic_medicine": 0.5, - "clinical_medicine": 0.48148148148148145, - "urban_and_rural_planner": 0.5882352941176471, - "accountant": 0.3888888888888889, - "fire_engineer": 0.4722222222222222, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.7551020408163265, + "high_school_politics": 0.625, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4444444444444444, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.5, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.4, + "middle_school_history": 0.7037037037037037, + "civil_servant": 0.4423076923076923, + "sports_science": 0.4583333333333333, + "plant_protection": 0.6666666666666666, + "basic_medicine": 0.625, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.35185185185185186, + "fire_engineer": 0.4166666666666667, "environmental_impact_assessment_engineer": 0.3888888888888889, - "tax_accountant": 0.25925925925925924, - "physician": 0.6111111111111112 + "tax_accountant": 0.3333333333333333, + "physician": 0.5740740740740741 } }, "prompt_4": { - "accuracy": 0.41594022415940224, + "accuracy": 0.4726027397260274, "category_acc": { - "computer_network": 0.4583333333333333, - "operating_system": 0.3333333333333333, - "computer_architecture": 0.5384615384615384, - "college_programming": 0.35714285714285715, + "computer_network": 0.4166666666666667, + "operating_system": 0.625, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.5952380952380952, "college_physics": 0.3333333333333333, - "college_chemistry": 0.2413793103448276, - "advanced_mathematics": 0.2916666666666667, - "probability_and_statistics": 0.30434782608695654, - "discrete_mathematics": 0.14285714285714285, - "electrical_engineer": 0.38095238095238093, - "metrology_engineer": 0.3793103448275862, - "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.3333333333333333, - "high_school_biology": 0.2916666666666667, - "middle_school_mathematics": 0.2916666666666667, - "middle_school_biology": 0.5769230769230769, - "middle_school_physics": 0.2916666666666667, - "middle_school_chemistry": 0.32, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.36666666666666664, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.42857142857142855, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.38333333333333336, "business_administration": 0.42105263157894735, - "marxism": 0.5416666666666666, - "mao_zedong_thought": 0.4482758620689655, - "education_science": 0.5, - "teacher_qualification": 0.5918367346938775, - "high_school_politics": 0.5833333333333334, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.6153846153846154, - "middle_school_geography": 0.5294117647058824, - "modern_chinese_history": 0.4642857142857143, - "ideological_and_moral_cultivation": 0.5, - "logic": 0.5185185185185185, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.673469387755102, + "high_school_politics": 0.625, + "high_school_geography": 0.5833333333333334, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.6666666666666666, + "logic": 0.4444444444444444, "law": 0.41379310344827586, "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.6052631578947368, - "professional_tour_guide": 0.47058823529411764, - "legal_professional": 0.35714285714285715, - "high_school_chinese": 0.2916666666666667, - "high_school_history": 0.64, - "middle_school_history": 0.37037037037037035, - "civil_servant": 0.4230769230769231, - "sports_science": 0.375, - "plant_protection": 0.5555555555555556, - "basic_medicine": 0.2916666666666667, - "clinical_medicine": 0.37037037037037035, - "urban_and_rural_planner": 0.49019607843137253, - "accountant": 0.3333333333333333, - "fire_engineer": 0.3333333333333333, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.5, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.4, + "middle_school_history": 0.6296296296296297, + "civil_servant": 0.4230769230769231, + "sports_science": 0.4583333333333333, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.625, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.5686274509803921, + "accountant": 0.48148148148148145, + "fire_engineer": 0.3888888888888889, "environmental_impact_assessment_engineer": 0.4722222222222222, - "tax_accountant": 0.25925925925925924, - "physician": 0.5370370370370371 + "tax_accountant": 0.37037037037037035, + "physician": 0.5185185185185185 } }, "prompt_5": { - "accuracy": 0.4701120797011208, + "accuracy": 0.39352428393524286, "category_acc": { - "computer_network": 0.5833333333333334, - "operating_system": 0.4583333333333333, + "computer_network": 0.4166666666666667, + "operating_system": 0.5, "computer_architecture": 0.46153846153846156, - "college_programming": 0.35714285714285715, - "college_physics": 0.4166666666666667, - "college_chemistry": 0.3103448275862069, + "college_programming": 0.47619047619047616, + "college_physics": 0.25, + "college_chemistry": 0.3448275862068966, "advanced_mathematics": 0.3333333333333333, - "probability_and_statistics": 0.30434782608695654, - "discrete_mathematics": 0.3333333333333333, - "electrical_engineer": 0.42857142857142855, - "metrology_engineer": 0.4827586206896552, + "probability_and_statistics": 0.4782608695652174, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.4482758620689655, "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.375, - "high_school_chemistry": 0.2916666666666667, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.4166666666666667, "high_school_biology": 0.375, "middle_school_mathematics": 0.2916666666666667, - "middle_school_biology": 0.7307692307692307, - "middle_school_physics": 0.4166666666666667, - "middle_school_chemistry": 0.52, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.56, "veterinary_medicine": 0.42857142857142855, - "college_economics": 0.36666666666666664, - "business_administration": 0.47368421052631576, - "marxism": 0.5416666666666666, - "mao_zedong_thought": 0.5862068965517241, - "education_science": 0.6470588235294118, - "teacher_qualification": 0.673469387755102, - "high_school_politics": 0.6666666666666666, - "high_school_geography": 0.5, - "middle_school_politics": 0.7307692307692307, + "college_economics": 0.25, + "business_administration": 0.39473684210526316, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.625, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.46153846153846156, "middle_school_geography": 0.5882352941176471, - "modern_chinese_history": 0.5, - "ideological_and_moral_cultivation": 0.6666666666666666, - "logic": 0.5185185185185185, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.4074074074074074, "law": 0.3793103448275862, - "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.5526315789473685, - "professional_tour_guide": 0.5294117647058824, - "legal_professional": 0.4642857142857143, - "high_school_chinese": 0.16666666666666666, - "high_school_history": 0.4, - "middle_school_history": 0.5925925925925926, - "civil_servant": 0.36538461538461536, - "sports_science": 0.5, - "plant_protection": 0.6296296296296297, - "basic_medicine": 0.5833333333333334, - "clinical_medicine": 0.5185185185185185, - "urban_and_rural_planner": 0.5490196078431373, - "accountant": 0.48148148148148145, - "fire_engineer": 0.3888888888888889, - "environmental_impact_assessment_engineer": 0.4444444444444444, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.5263157894736842, + "professional_tour_guide": 0.5, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.48, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.3269230769230769, + "sports_science": 0.3333333333333333, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.37037037037037035, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.2777777777777778, "tax_accountant": 0.25925925925925924, - "physician": 0.6296296296296297 + "physician": 0.46296296296296297 } } }, "cmmlu": { "prompt_1": { - "accuracy": 0.5806451612903226 + "accuracy": 0.5053763440860215 }, "prompt_2": { - "accuracy": 0.5448028673835126 + "accuracy": 0.3978494623655914 }, "prompt_3": { - "accuracy": 0.5232974910394266 + "accuracy": 0.5017921146953405 }, "prompt_4": { - "accuracy": 0.4731182795698925 + "accuracy": 0.4874551971326165 }, "prompt_5": { - "accuracy": 0.5161290322580645 + "accuracy": 0.4014336917562724 } }, "cmmlu_full": { @@ -33357,16 +33357,16 @@ }, "zbench": { "prompt_1": { - "accuracy": 0.30303030303030304 + "accuracy": 0.36363636363636365 }, "prompt_2": { - "accuracy": 0.2727272727272727 + "accuracy": 0.30303030303030304 }, "prompt_3": { - "accuracy": 0.42424242424242425 + "accuracy": 0.45454545454545453 }, "prompt_4": { - "accuracy": 0.2727272727272727 + "accuracy": 0.3939393939393939 }, "prompt_5": { "accuracy": 0.3939393939393939 @@ -33377,10 +33377,10 @@ "accuracy": 0.6863636363636364 }, "prompt_2": { - "accuracy": 0.6477272727272727 + "accuracy": 0.6409090909090909 }, "prompt_3": { - "accuracy": 0.6795454545454546 + "accuracy": 0.6772727272727272 }, "prompt_4": { "accuracy": 0.6590909090909091 @@ -33391,173 +33391,173 @@ }, "ocnli": { "prompt_1": { - "accuracy": 0.49627118644067797 + "accuracy": 0.4654237288135593 }, "prompt_2": { - "accuracy": 0.4505084745762712 + "accuracy": 0.4230508474576271 }, "prompt_3": { - "accuracy": 0.5345762711864407 + "accuracy": 0.48745762711864404 }, "prompt_4": { - "accuracy": 0.48033898305084743 + "accuracy": 0.47627118644067795 }, "prompt_5": { - "accuracy": 0.42338983050847456 + "accuracy": 0.4477966101694915 } }, "c3": { "prompt_1": { - "accuracy": 0.8844427823485415 + "accuracy": 0.8066566940912491 }, "prompt_2": { - "accuracy": 0.8825729244577412 + "accuracy": 0.8515332834704562 }, "prompt_3": { - "accuracy": 0.8739715781600599 + "accuracy": 0.8081525804038893 }, "prompt_4": { - "accuracy": 0.8754674644727001 + "accuracy": 0.8421839940164547 }, "prompt_5": { - "accuracy": 0.8694839192221391 + "accuracy": 0.837696335078534 } }, "dream": { "prompt_1": { - "accuracy": 0.8931896129348359 + "accuracy": 0.6751592356687898 }, "prompt_2": { - "accuracy": 0.8990690837824595 + "accuracy": 0.6712395884370407 }, "prompt_3": { - "accuracy": 0.9118079372856442 + "accuracy": 0.8118569328760411 }, "prompt_4": { - "accuracy": 0.9024987751102401 + "accuracy": 0.7951984321411073 }, "prompt_5": { - "accuracy": 0.9064184223419892 + "accuracy": 0.6320431161195492 } }, "samsum": { "prompt_1": { - "rouge1": 0.22783552386864417, - "rouge2": 0.0839403119084876, - "rougeL": 0.17007006250391818, - "avg_rouge": 0.16061529942701666 + "rouge1": 0.394536257047064, + "rouge2": 0.15938958970092068, + "rougeL": 0.3080033448345455, + "avg_rouge": 0.2873097305275101 }, "prompt_2": { - "rouge1": 0.21906277397329468, - "rouge2": 0.07981495779273613, - "rougeL": 0.1611019992992204, - "avg_rouge": 0.15332657702175043 + "rouge1": 0.40809723645003265, + "rouge2": 0.1694273969016742, + "rougeL": 0.3186125456760373, + "avg_rouge": 0.29871239300924807 }, "prompt_3": { - "rouge1": 0.22051601719774955, - "rouge2": 0.07733715420256924, - "rougeL": 0.16309052127918097, - "avg_rouge": 0.15364789755983324 + "rouge1": 0.3820070872728476, + "rouge2": 0.14500406750910239, + "rougeL": 0.29121498132165047, + "avg_rouge": 0.27274204536786684 }, "prompt_4": { - "rouge1": 0.21865452524714854, - "rouge2": 0.07795006249760242, - "rougeL": 0.16086961877960224, - "avg_rouge": 0.1524914021747844 + "rouge1": 0.40397831488868796, + "rouge2": 0.1619497822621371, + "rougeL": 0.31255037171504924, + "avg_rouge": 0.29282615628862474 }, "prompt_5": { - "rouge1": 0.22042637368342233, - "rouge2": 0.08018824424560042, - "rougeL": 0.1712332946359059, - "avg_rouge": 0.15728263752164287 + "rouge1": 0.40348660886167875, + "rouge2": 0.14601168775164333, + "rougeL": 0.31568160624996433, + "avg_rouge": 0.2883933009544288 } }, "dialogsum": { "prompt_1": { - "rouge1": 0.12705281041724492, - "rouge2": 0.034748531923445534, - "rougeL": 0.09916061070001848, - "avg_rouge": 0.08698731768023632 + "rouge1": 0.3483807693282565, + "rouge2": 0.12523698197172764, + "rougeL": 0.2668352693206316, + "avg_rouge": 0.24681767354020526 }, "prompt_2": { - "rouge1": 0.13768897790409756, - "rouge2": 0.037895101154647844, - "rougeL": 0.10580890122208217, - "avg_rouge": 0.0937976600936092 + "rouge1": 0.33371476031593517, + "rouge2": 0.12035832084627907, + "rougeL": 0.2562992507417343, + "avg_rouge": 0.23679077730131617 }, "prompt_3": { - "rouge1": 0.15013721929677626, - "rouge2": 0.043554485551585684, - "rougeL": 0.11271506493749443, - "avg_rouge": 0.1021355899286188 + "rouge1": 0.3403892309431726, + "rouge2": 0.11952916764727636, + "rougeL": 0.25999286877914174, + "avg_rouge": 0.23997042245653022 }, "prompt_4": { - "rouge1": 0.16301602801162368, - "rouge2": 0.04606017775287866, - "rougeL": 0.12228466252459767, - "avg_rouge": 0.11045362276303333 + "rouge1": 0.33636015533984887, + "rouge2": 0.11925700460871659, + "rougeL": 0.2566002433796598, + "avg_rouge": 0.23740580110940843 }, "prompt_5": { - "rouge1": 0.19426426170381578, - "rouge2": 0.05026998096295629, - "rougeL": 0.1427788616737507, - "avg_rouge": 0.1291043681135076 + "rouge1": 0.37610779507943576, + "rouge2": 0.13156228827155866, + "rougeL": 0.2964961001253403, + "avg_rouge": 0.2680553944921116 } }, "sst2": { "prompt_1": { - "accuracy": 0.9025229357798165 + "accuracy": 0.8910550458715596 }, "prompt_2": { - "accuracy": 0.911697247706422 + "accuracy": 0.9151376146788991 }, "prompt_3": { - "accuracy": 0.8692660550458715 + "accuracy": 0.9025229357798165 }, "prompt_4": { - "accuracy": 0.9128440366972477 + "accuracy": 0.8899082568807339 }, "prompt_5": { - "accuracy": 0.8990825688073395 + "accuracy": 0.8830275229357798 } }, "cola": { "prompt_1": { - "accuracy": 0.6663470757430489 + "accuracy": 0.6193672099712368 }, "prompt_2": { - "accuracy": 0.6452540747842761 + "accuracy": 0.6212847555129435 }, "prompt_3": { - "accuracy": 0.6903163950143816 + "accuracy": 0.6749760306807286 }, "prompt_4": { - "accuracy": 0.7948226270373921 + "accuracy": 0.6116970278044104 }, "prompt_5": { - "accuracy": 0.6586768935762224 + "accuracy": 0.6596356663470757 } }, "qqp": { "prompt_1": { - "accuracy": 0.639 + "accuracy": 0.6045 }, "prompt_2": { - "accuracy": 0.7385 + "accuracy": 0.692 }, "prompt_3": { - "accuracy": 0.6695 + "accuracy": 0.5995 }, "prompt_4": { - "accuracy": 0.5695 + "accuracy": 0.522 }, "prompt_5": { - "accuracy": 0.75 + "accuracy": 0.727 } }, "mnli": { "prompt_1": { - "accuracy": 0.7 + "accuracy": 0.4 }, "prompt_2": { "accuracy": 0.7 @@ -33566,7 +33566,7 @@ "accuracy": 0.7 }, "prompt_4": { - "accuracy": 0.6 + "accuracy": 0.7 }, "prompt_5": { "accuracy": 0.6 @@ -33574,19 +33574,19 @@ }, "qnli": { "prompt_1": { - "accuracy": 1.0 + "accuracy": 0.7 }, "prompt_2": { - "accuracy": 1.0 + "accuracy": 0.5 }, "prompt_3": { - "accuracy": 1.0 + "accuracy": 0.9 }, "prompt_4": { - "accuracy": 1.0 + "accuracy": 0.9 }, "prompt_5": { - "accuracy": 1.0 + "accuracy": 0.8 } }, "wnli": { @@ -33597,13 +33597,13 @@ "accuracy": 0.6 }, "prompt_3": { - "accuracy": 0.5 + "accuracy": 0.4 }, "prompt_4": { - "accuracy": 0.3 + "accuracy": 0.4 }, "prompt_5": { - "accuracy": 0.4 + "accuracy": 0.5 } }, "rte": { @@ -33611,16 +33611,16 @@ "accuracy": 0.8 }, "prompt_2": { - "accuracy": 0.6 + "accuracy": 0.7 }, "prompt_3": { "accuracy": 0.8 }, "prompt_4": { - "accuracy": 0.8 + "accuracy": 0.5 }, "prompt_5": { - "accuracy": 0.7 + "accuracy": 0.8 } }, "mrpc": { @@ -33628,7 +33628,7 @@ "accuracy": 0.9 }, "prompt_2": { - "accuracy": 0.9 + "accuracy": 1.0 }, "prompt_3": { "accuracy": 0.9 @@ -93873,1823 +93873,1823 @@ "zero_shot": { "cross_xquad": { "prompt_1": { - "overall_acc": 0.8915966386554622, + "overall_acc": 0.9121848739495798, "language_acc": { - "Spanish": 0.9025210084033614, - "English": 0.9319327731092437, - "Chinese": 0.8621848739495799, - "Vietnamese": 0.8697478991596639 - }, - "consistency_score_2": 0.8551820728291317, - "consistency_score_3": 0.7907563025210084, - "consistency_score_4": 0.7445378151260504, + "Spanish": 0.9193277310924369, + "English": 0.9411764705882353, + "Chinese": 0.8915966386554622, + "Vietnamese": 0.8966386554621849 + }, + "consistency_score_2": 0.888795518207283, + "consistency_score_3": 0.8378151260504202, + "consistency_score_4": 0.8033613445378152, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9042016806722689, - "Spanish,Chinese": 0.8319327731092437, - "Spanish,Vietnamese": 0.8470588235294118, - "English,Chinese": 0.8579831932773109, - "English,Vietnamese": 0.8722689075630252, - "Chinese,Vietnamese": 0.8176470588235294 + "Spanish,English": 0.9210084033613445, + "Spanish,Chinese": 0.8672268907563025, + "Spanish,Vietnamese": 0.8915966386554622, + "English,Chinese": 0.888235294117647, + "English,Vietnamese": 0.8983193277310925, + "Chinese,Vietnamese": 0.8663865546218488 }, "3_combine": { - "Spanish,English,Chinese": 0.8033613445378152, - "Spanish,English,Vietnamese": 0.8184873949579832, - "Spanish,Chinese,Vietnamese": 0.7588235294117647, - "English,Chinese,Vietnamese": 0.7823529411764706 + "Spanish,English,Chinese": 0.8428571428571429, + "Spanish,English,Vietnamese": 0.8596638655462185, + "Spanish,Chinese,Vietnamese": 0.8184873949579832, + "English,Chinese,Vietnamese": 0.8302521008403362 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.7445378151260504 + "Spanish,English,Chinese,Vietnamese": 0.8033613445378152 } }, - "AC3_2": 0.8730097939895106, - "AC3_3": 0.8381542826422775, - "AC3_4": 0.8114582658453598 + "AC3_2": 0.9003383170950483, + "AC3_3": 0.8734197544064948, + "AC3_4": 0.8543215669311448 }, "prompt_2": { - "overall_acc": 0.8907563025210085, + "overall_acc": 0.9174369747899159, "language_acc": { - "Spanish": 0.892436974789916, - "English": 0.9327731092436975, - "Chinese": 0.8630252100840337, - "Vietnamese": 0.8747899159663866 + "Spanish": 0.9252100840336135, + "English": 0.9378151260504202, + "Chinese": 0.9042016806722689, + "Vietnamese": 0.9025210084033614 }, - "consistency_score_2": 0.8521008403361345, - "consistency_score_3": 0.7852941176470588, - "consistency_score_4": 0.7336134453781512, + "consistency_score_2": 0.8983193277310925, + "consistency_score_3": 0.8525210084033614, + "consistency_score_4": 0.8201680672268907, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8915966386554622, - "Spanish,Chinese": 0.8294117647058824, - "Spanish,Vietnamese": 0.8394957983193277, - "English,Chinese": 0.8638655462184874, - "English,Vietnamese": 0.8722689075630252, - "Chinese,Vietnamese": 0.8159663865546218 + "Spanish,English": 0.9226890756302522, + "Spanish,Chinese": 0.8840336134453781, + "Spanish,Vietnamese": 0.8983193277310925, + "English,Chinese": 0.8966386554621849, + "English,Vietnamese": 0.907563025210084, + "Chinese,Vietnamese": 0.880672268907563 }, "3_combine": { - "Spanish,English,Chinese": 0.8008403361344538, - "Spanish,English,Vietnamese": 0.8067226890756303, - "Spanish,Chinese,Vietnamese": 0.7521008403361344, - "English,Chinese,Vietnamese": 0.7815126050420168 + "Spanish,English,Chinese": 0.8546218487394958, + "Spanish,English,Vietnamese": 0.8689075630252101, + "Spanish,Chinese,Vietnamese": 0.838655462184874, + "English,Chinese,Vietnamese": 0.8478991596638655 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.7336134453781512 + "Spanish,English,Chinese,Vietnamese": 0.8201680672268907 } }, - "AC3_2": 0.8709998946041255, - "AC3_3": 0.8347072094822936, - "AC3_4": 0.8045838096771522 + "AC3_2": 0.9077775087320675, + "AC3_3": 0.8837885444479853, + "AC3_4": 0.8660800264475165 }, "prompt_3": { - "overall_acc": 0.8863445378151261, + "overall_acc": 0.9119747899159664, "language_acc": { - "Spanish": 0.8890756302521008, - "English": 0.9235294117647059, - "Chinese": 0.8596638655462185, - "Vietnamese": 0.873109243697479 + "Spanish": 0.9218487394957983, + "English": 0.9378151260504202, + "Chinese": 0.892436974789916, + "Vietnamese": 0.8957983193277311 }, - "consistency_score_2": 0.8471988795518207, - "consistency_score_3": 0.778781512605042, - "consistency_score_4": 0.7294117647058823, + "consistency_score_2": 0.8915966386554622, + "consistency_score_3": 0.8422268907563025, + "consistency_score_4": 0.8084033613445378, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8865546218487395, - "Spanish,Chinese": 0.8260504201680672, - "Spanish,Vietnamese": 0.8403361344537815, - "English,Chinese": 0.853781512605042, - "English,Vietnamese": 0.865546218487395, - "Chinese,Vietnamese": 0.8109243697478992 + "Spanish,English": 0.9260504201680673, + "Spanish,Chinese": 0.8747899159663866, + "Spanish,Vietnamese": 0.8899159663865546, + "English,Chinese": 0.8857142857142857, + "English,Vietnamese": 0.9042016806722689, + "Chinese,Vietnamese": 0.8689075630252101 }, "3_combine": { - "Spanish,English,Chinese": 0.7907563025210084, - "Spanish,English,Vietnamese": 0.8008403361344538, - "Spanish,Chinese,Vietnamese": 0.7495798319327731, - "English,Chinese,Vietnamese": 0.7739495798319328 + "Spanish,English,Chinese": 0.8470588235294118, + "Spanish,English,Vietnamese": 0.8638655462184874, + "Spanish,Chinese,Vietnamese": 0.8235294117647058, + "English,Chinese,Vietnamese": 0.8344537815126051 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.7294117647058823 + "Spanish,English,Chinese,Vietnamese": 0.8084033613445378 } }, - "AC3_2": 0.8663297287714672, - "AC3_3": 0.8290888724409158, - "AC3_4": 0.800256984814369 + "AC3_2": 0.9016705901429852, + "AC3_3": 0.8757142353448357, + "AC3_4": 0.8570714351917585 }, "prompt_4": { - "overall_acc": 0.8890756302521009, + "overall_acc": 0.9128151260504201, "language_acc": { - "Spanish": 0.9092436974789916, - "English": 0.926890756302521, - "Chinese": 0.8613445378151261, - "Vietnamese": 0.8588235294117647 + "Spanish": 0.9235294117647059, + "English": 0.9403361344537815, + "Chinese": 0.892436974789916, + "Vietnamese": 0.8949579831932774 }, - "consistency_score_2": 0.8490196078431373, - "consistency_score_3": 0.7819327731092437, - "consistency_score_4": 0.7327731092436974, + "consistency_score_2": 0.8897759103641457, + "consistency_score_3": 0.8394957983193277, + "consistency_score_4": 0.8042016806722689, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9084033613445378, - "Spanish,Chinese": 0.8235294117647058, - "Spanish,Vietnamese": 0.8369747899159664, - "English,Chinese": 0.8529411764705882, - "English,Vietnamese": 0.8613445378151261, - "Chinese,Vietnamese": 0.8109243697478992 + "Spanish,English": 0.9226890756302522, + "Spanish,Chinese": 0.8722689075630252, + "Spanish,Vietnamese": 0.892436974789916, + "English,Chinese": 0.888235294117647, + "English,Vietnamese": 0.8941176470588236, + "Chinese,Vietnamese": 0.8689075630252101 }, "3_combine": { - "Spanish,English,Chinese": 0.7991596638655463, - "Spanish,English,Vietnamese": 0.8100840336134454, - "Spanish,Chinese,Vietnamese": 0.746218487394958, - "English,Chinese,Vietnamese": 0.7722689075630252 + "Spanish,English,Chinese": 0.846218487394958, + "Spanish,English,Vietnamese": 0.8579831932773109, + "Spanish,Chinese,Vietnamese": 0.8235294117647058, + "English,Chinese,Vietnamese": 0.8302521008403362 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.7327731092436974 + "Spanish,English,Chinese,Vietnamese": 0.8042016806722689 } }, - "AC3_2": 0.8685860548393474, - "AC3_3": 0.8320692722153098, - "AC3_4": 0.8033926938155752 + "AC3_2": 0.9011482841345106, + "AC3_3": 0.8746215665961681, + "AC3_4": 0.8550731193729354 }, "prompt_5": { - "overall_acc": 0.8676470588235294, + "overall_acc": 0.9115546218487395, "language_acc": { - "Spanish": 0.880672268907563, - "English": 0.9226890756302522, - "Chinese": 0.8210084033613445, - "Vietnamese": 0.846218487394958 - }, - "consistency_score_2": 0.8089635854341736, - "consistency_score_3": 0.7279411764705883, - "consistency_score_4": 0.66890756302521, + "Spanish": 0.9210084033613445, + "English": 0.9403361344537815, + "Chinese": 0.8932773109243698, + "Vietnamese": 0.8915966386554622 + }, + "consistency_score_2": 0.8893557422969188, + "consistency_score_3": 0.8384453781512605, + "consistency_score_4": 0.8033613445378152, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8621848739495799, - "Spanish,Chinese": 0.7764705882352941, - "Spanish,Vietnamese": 0.8134453781512605, - "English,Chinese": 0.8092436974789916, - "English,Vietnamese": 0.8344537815126051, - "Chinese,Vietnamese": 0.7579831932773109 + "Spanish,English": 0.9193277310924369, + "Spanish,Chinese": 0.8680672268907563, + "Spanish,Vietnamese": 0.8941176470588236, + "English,Chinese": 0.8907563025210085, + "English,Vietnamese": 0.8957983193277311, + "Chinese,Vietnamese": 0.8680672268907563 }, "3_combine": { - "Spanish,English,Chinese": 0.7361344537815127, - "Spanish,English,Vietnamese": 0.7672268907563026, - "Spanish,Chinese,Vietnamese": 0.6941176470588235, - "English,Chinese,Vietnamese": 0.7142857142857143 + "Spanish,English,Chinese": 0.8436974789915966, + "Spanish,English,Vietnamese": 0.8579831932773109, + "Spanish,Chinese,Vietnamese": 0.8210084033613445, + "English,Chinese,Vietnamese": 0.83109243697479 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.66890756302521 + "Spanish,English,Chinese,Vietnamese": 0.8033613445378152 } }, - "AC3_2": 0.837278324528577, - "AC3_3": 0.7916779614575669, - "AC3_4": 0.7554247293175627 + "AC3_2": 0.9003183650376143, + "AC3_3": 0.8734728680889394, + "AC3_4": 0.8540450505319899 } }, "cross_mmlu": { "prompt_1": { - "overall_acc": 0.5180952380952382, + "overall_acc": 0.5361904761904762, "language_acc": { - "Filipino": 0.5333333333333333, - "Vietnamese": 0.4533333333333333, - "Chinese": 0.56, - "Spanish": 0.4866666666666667, - "Malay": 0.43333333333333335, - "Indonesian": 0.4866666666666667, - "English": 0.6733333333333333 + "Filipino": 0.5066666666666667, + "Vietnamese": 0.48, + "Chinese": 0.5466666666666666, + "Spanish": 0.56, + "Malay": 0.5, + "Indonesian": 0.5, + "English": 0.66 }, - "consistency_score_2": 0.45841269841269844, - "consistency_score_3": 0.2664761904761905, - "consistency_score_4": 0.17619047619047623, - "consistency_score_5": 0.12793650793650796, - "consistency_score_6": 0.09904761904761905, - "consistency_score_7": 0.08, + "consistency_score_2": 0.5752380952380952, + "consistency_score_3": 0.40971428571428575, + "consistency_score_4": 0.3234285714285714, + "consistency_score_5": 0.27142857142857146, + "consistency_score_6": 0.23714285714285713, + "consistency_score_7": 0.21333333333333335, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.4533333333333333, - "Filipino,Chinese": 0.44666666666666666, - "Filipino,Spanish": 0.4666666666666667, - "Filipino,Malay": 0.44, - "Filipino,Indonesian": 0.4666666666666667, - "Filipino,English": 0.47333333333333333, - "Vietnamese,Chinese": 0.42, - "Vietnamese,Spanish": 0.44, - "Vietnamese,Malay": 0.4533333333333333, - "Vietnamese,Indonesian": 0.4266666666666667, - "Vietnamese,English": 0.4533333333333333, - "Chinese,Spanish": 0.44666666666666666, - "Chinese,Malay": 0.4066666666666667, - "Chinese,Indonesian": 0.49333333333333335, - "Chinese,English": 0.48, - "Spanish,Malay": 0.44, - "Spanish,Indonesian": 0.49333333333333335, - "Spanish,English": 0.5, - "Malay,Indonesian": 0.49333333333333335, - "Malay,English": 0.44, - "Indonesian,English": 0.49333333333333335 + "Filipino,Vietnamese": 0.5266666666666666, + "Filipino,Chinese": 0.5933333333333334, + "Filipino,Spanish": 0.58, + "Filipino,Malay": 0.5266666666666666, + "Filipino,Indonesian": 0.5666666666666667, + "Filipino,English": 0.5666666666666667, + "Vietnamese,Chinese": 0.58, + "Vietnamese,Spanish": 0.6266666666666667, + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,Indonesian": 0.58, + "Vietnamese,English": 0.5533333333333333, + "Chinese,Spanish": 0.5733333333333334, + "Chinese,Malay": 0.5, + "Chinese,Indonesian": 0.54, + "Chinese,English": 0.5733333333333334, + "Spanish,Malay": 0.58, + "Spanish,Indonesian": 0.6, + "Spanish,English": 0.66, + "Malay,Indonesian": 0.6666666666666666, + "Malay,English": 0.5333333333333333, + "Indonesian,English": 0.6066666666666667 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.24666666666666667, - "Filipino,Vietnamese,Spanish": 0.25333333333333335, - "Filipino,Vietnamese,Malay": 0.22666666666666666, - "Filipino,Vietnamese,Indonesian": 0.26, - "Filipino,Vietnamese,English": 0.2733333333333333, - "Filipino,Chinese,Spanish": 0.25333333333333335, - "Filipino,Chinese,Malay": 0.23333333333333334, - "Filipino,Chinese,Indonesian": 0.28, - "Filipino,Chinese,English": 0.2866666666666667, - "Filipino,Spanish,Malay": 0.24, - "Filipino,Spanish,Indonesian": 0.28, - "Filipino,Spanish,English": 0.31333333333333335, - "Filipino,Malay,Indonesian": 0.2866666666666667, - "Filipino,Malay,English": 0.26666666666666666, - "Filipino,Indonesian,English": 0.32666666666666666, - "Vietnamese,Chinese,Spanish": 0.23333333333333334, - "Vietnamese,Chinese,Malay": 0.21333333333333335, - "Vietnamese,Chinese,Indonesian": 0.24, - "Vietnamese,Chinese,English": 0.25333333333333335, - "Vietnamese,Spanish,Malay": 0.24666666666666667, - "Vietnamese,Spanish,Indonesian": 0.24666666666666667, - "Vietnamese,Spanish,English": 0.25333333333333335, - "Vietnamese,Malay,Indonesian": 0.2733333333333333, - "Vietnamese,Malay,English": 0.26666666666666666, - "Vietnamese,Indonesian,English": 0.26, - "Chinese,Spanish,Malay": 0.22666666666666666, - "Chinese,Spanish,Indonesian": 0.26666666666666666, - "Chinese,Spanish,English": 0.30666666666666664, - "Chinese,Malay,Indonesian": 0.26666666666666666, - "Chinese,Malay,English": 0.26, - "Chinese,Indonesian,English": 0.30666666666666664, - "Spanish,Malay,Indonesian": 0.29333333333333333, - "Spanish,Malay,English": 0.2733333333333333, - "Spanish,Indonesian,English": 0.32, - "Malay,Indonesian,English": 0.29333333333333333 + "Filipino,Vietnamese,Chinese": 0.4066666666666667, + "Filipino,Vietnamese,Spanish": 0.4266666666666667, + "Filipino,Vietnamese,Malay": 0.35333333333333333, + "Filipino,Vietnamese,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,English": 0.38666666666666666, + "Filipino,Chinese,Spanish": 0.43333333333333335, + "Filipino,Chinese,Malay": 0.35333333333333333, + "Filipino,Chinese,Indonesian": 0.4, + "Filipino,Chinese,English": 0.4266666666666667, + "Filipino,Spanish,Malay": 0.4, + "Filipino,Spanish,Indonesian": 0.43333333333333335, + "Filipino,Spanish,English": 0.4533333333333333, + "Filipino,Malay,Indonesian": 0.42, + "Filipino,Malay,English": 0.36666666666666664, + "Filipino,Indonesian,English": 0.4266666666666667, + "Vietnamese,Chinese,Spanish": 0.43333333333333335, + "Vietnamese,Chinese,Malay": 0.36666666666666664, + "Vietnamese,Chinese,Indonesian": 0.4066666666666667, + "Vietnamese,Chinese,English": 0.4, + "Vietnamese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.4533333333333333, + "Vietnamese,Spanish,English": 0.44666666666666666, + "Vietnamese,Malay,Indonesian": 0.44666666666666666, + "Vietnamese,Malay,English": 0.37333333333333335, + "Vietnamese,Indonesian,English": 0.41333333333333333, + "Chinese,Spanish,Malay": 0.36666666666666664, + "Chinese,Spanish,Indonesian": 0.4, + "Chinese,Spanish,English": 0.43333333333333335, + "Chinese,Malay,Indonesian": 0.38, + "Chinese,Malay,English": 0.3466666666666667, + "Chinese,Indonesian,English": 0.4066666666666667, + "Spanish,Malay,Indonesian": 0.4533333333333333, + "Spanish,Malay,English": 0.42, + "Spanish,Indonesian,English": 0.46, + "Malay,Indonesian,English": 0.43333333333333335 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Malay": 0.14, - "Filipino,Vietnamese,Chinese,Indonesian": 0.16666666666666666, - "Filipino,Vietnamese,Chinese,English": 0.17333333333333334, - "Filipino,Vietnamese,Spanish,Malay": 0.15333333333333332, - "Filipino,Vietnamese,Spanish,Indonesian": 0.16, - "Filipino,Vietnamese,Spanish,English": 0.17333333333333334, - "Filipino,Vietnamese,Malay,Indonesian": 0.18, - "Filipino,Vietnamese,Malay,English": 0.17333333333333334, - "Filipino,Vietnamese,Indonesian,English": 0.19333333333333333, - "Filipino,Chinese,Spanish,Malay": 0.16, - "Filipino,Chinese,Spanish,Indonesian": 0.16, - "Filipino,Chinese,Spanish,English": 0.20666666666666667, - "Filipino,Chinese,Malay,Indonesian": 0.18666666666666668, - "Filipino,Chinese,Malay,English": 0.17333333333333334, - "Filipino,Chinese,Indonesian,English": 0.21333333333333335, - "Filipino,Spanish,Malay,Indonesian": 0.2, - "Filipino,Spanish,Malay,English": 0.19333333333333333, - "Filipino,Spanish,Indonesian,English": 0.23333333333333334, - "Filipino,Malay,Indonesian,English": 0.22, - "Vietnamese,Chinese,Spanish,Malay": 0.13333333333333333, - "Vietnamese,Chinese,Spanish,Indonesian": 0.13333333333333333, - "Vietnamese,Chinese,Spanish,English": 0.15333333333333332, - "Vietnamese,Chinese,Malay,Indonesian": 0.16, - "Vietnamese,Chinese,Malay,English": 0.14666666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.16666666666666666, - "Vietnamese,Spanish,Malay,Indonesian": 0.16, - "Vietnamese,Spanish,Malay,English": 0.17333333333333334, - "Vietnamese,Spanish,Indonesian,English": 0.18, - "Vietnamese,Malay,Indonesian,English": 0.19333333333333333, - "Chinese,Spanish,Malay,Indonesian": 0.17333333333333334, - "Chinese,Spanish,Malay,English": 0.17333333333333334, - "Chinese,Spanish,Indonesian,English": 0.19333333333333333, - "Chinese,Malay,Indonesian,English": 0.20666666666666667, - "Spanish,Malay,Indonesian,English": 0.21333333333333335 + "Filipino,Vietnamese,Chinese,Spanish": 0.34, + "Filipino,Vietnamese,Chinese,Malay": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,English": 0.32, + "Filipino,Vietnamese,Spanish,Malay": 0.31333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Filipino,Vietnamese,Spanish,English": 0.3466666666666667, + "Filipino,Vietnamese,Malay,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Malay,English": 0.28, + "Filipino,Vietnamese,Indonesian,English": 0.32, + "Filipino,Chinese,Spanish,Malay": 0.3, + "Filipino,Chinese,Spanish,Indonesian": 0.34, + "Filipino,Chinese,Spanish,English": 0.36, + "Filipino,Chinese,Malay,Indonesian": 0.29333333333333333, + "Filipino,Chinese,Malay,English": 0.29333333333333333, + "Filipino,Chinese,Indonesian,English": 0.34, + "Filipino,Spanish,Malay,Indonesian": 0.35333333333333333, + "Filipino,Spanish,Malay,English": 0.32, + "Filipino,Spanish,Indonesian,English": 0.35333333333333333, + "Filipino,Malay,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian": 0.34, + "Vietnamese,Chinese,Spanish,English": 0.32666666666666666, + "Vietnamese,Chinese,Malay,Indonesian": 0.32, + "Vietnamese,Chinese,Malay,English": 0.2733333333333333, + "Vietnamese,Chinese,Indonesian,English": 0.32, + "Vietnamese,Spanish,Malay,Indonesian": 0.37333333333333335, + "Vietnamese,Spanish,Malay,English": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian,English": 0.36, + "Vietnamese,Malay,Indonesian,English": 0.32, + "Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Chinese,Spanish,Malay,English": 0.30666666666666664, + "Chinese,Spanish,Indonesian,English": 0.32666666666666666, + "Chinese,Malay,Indonesian,English": 0.3, + "Spanish,Malay,Indonesian,English": 0.35333333333333333 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.1, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.09333333333333334, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.11333333333333333, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.12, - "Filipino,Vietnamese,Chinese,Malay,English": 0.11333333333333333, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.13333333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.12666666666666668, - "Filipino,Vietnamese,Spanish,Malay,English": 0.12666666666666668, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.14, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.15333333333333332, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.14, - "Filipino,Chinese,Spanish,Malay,English": 0.13333333333333333, - "Filipino,Chinese,Spanish,Indonesian,English": 0.14666666666666667, - "Filipino,Chinese,Malay,Indonesian,English": 0.16, - "Filipino,Spanish,Malay,Indonesian,English": 0.17333333333333334, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.1, - "Vietnamese,Chinese,Spanish,Malay,English": 0.1, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.1, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.12666666666666668, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.14, - "Chinese,Spanish,Malay,Indonesian,English": 0.14666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.28, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,English": 0.24, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.26, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.3, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.26, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Indonesian,English": 0.29333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.26666666666666666, + "Filipino,Spanish,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.24, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.28, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.25333333333333335, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.29333333333333333, + "Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.08666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.08, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.08666666666666667, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.10666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.12, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.12666666666666668, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.08666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.22, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.22666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.24666666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22666666666666666 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.08 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.21333333333333335 } }, - "AC3_2": 0.4864301194622309, - "AC3_3": 0.35193747900114325, - "AC3_4": 0.2629564308197911, - "AC3_5": 0.20520135716959387, - "AC3_6": 0.16630217516411697, - "AC3_7": 0.13859872609147633 + "AC3_2": 0.5550283591943592, + "AC3_3": 0.4644968643430462, + "AC3_4": 0.4034794719545824, + "AC3_5": 0.3604110511683117, + "AC3_6": 0.32884588313833535, + "AC3_7": 0.305226598858049 }, "prompt_2": { - "overall_acc": 0.5219047619047619, + "overall_acc": 0.5495238095238094, "language_acc": { - "Filipino": 0.5133333333333333, - "Vietnamese": 0.5, - "Chinese": 0.52, - "Spanish": 0.52, - "Malay": 0.4533333333333333, - "Indonesian": 0.52, - "English": 0.6266666666666667 + "Filipino": 0.5333333333333333, + "Vietnamese": 0.5266666666666666, + "Chinese": 0.5733333333333334, + "Spanish": 0.5866666666666667, + "Malay": 0.47333333333333333, + "Indonesian": 0.5333333333333333, + "English": 0.62 }, - "consistency_score_2": 0.5069841269841271, - "consistency_score_3": 0.33409523809523817, - "consistency_score_4": 0.24495238095238103, - "consistency_score_5": 0.19015873015873014, - "consistency_score_6": 0.15333333333333332, - "consistency_score_7": 0.12666666666666668, + "consistency_score_2": 0.5904761904761906, + "consistency_score_3": 0.4382857142857143, + "consistency_score_4": 0.3575238095238095, + "consistency_score_5": 0.30698412698412697, + "consistency_score_6": 0.2723809523809524, + "consistency_score_7": 0.24666666666666667, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5066666666666667, - "Filipino,Chinese": 0.5, - "Filipino,Spanish": 0.5266666666666666, - "Filipino,Malay": 0.5133333333333333, - "Filipino,Indonesian": 0.48, - "Filipino,English": 0.5266666666666666, - "Vietnamese,Chinese": 0.4533333333333333, - "Vietnamese,Spanish": 0.4866666666666667, - "Vietnamese,Malay": 0.49333333333333335, - "Vietnamese,Indonesian": 0.5866666666666667, + "Filipino,Vietnamese": 0.5933333333333334, + "Filipino,Chinese": 0.6, + "Filipino,Spanish": 0.6266666666666667, + "Filipino,Malay": 0.5533333333333333, + "Filipino,Indonesian": 0.6333333333333333, + "Filipino,English": 0.6, + "Vietnamese,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.6533333333333333, + "Vietnamese,Malay": 0.52, + "Vietnamese,Indonesian": 0.6266666666666667, "Vietnamese,English": 0.5533333333333333, - "Chinese,Spanish": 0.4266666666666667, - "Chinese,Malay": 0.4666666666666667, - "Chinese,Indonesian": 0.48, - "Chinese,English": 0.5133333333333333, - "Spanish,Malay": 0.4533333333333333, - "Spanish,Indonesian": 0.44666666666666666, - "Spanish,English": 0.58, - "Malay,Indonesian": 0.58, - "Malay,English": 0.52, - "Indonesian,English": 0.5533333333333333 + "Chinese,Spanish": 0.5533333333333333, + "Chinese,Malay": 0.5533333333333333, + "Chinese,Indonesian": 0.5866666666666667, + "Chinese,English": 0.5466666666666666, + "Spanish,Malay": 0.5666666666666667, + "Spanish,Indonesian": 0.6733333333333333, + "Spanish,English": 0.6333333333333333, + "Malay,Indonesian": 0.6266666666666667, + "Malay,English": 0.5266666666666666, + "Indonesian,English": 0.6 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.31333333333333335, - "Filipino,Vietnamese,Spanish": 0.32666666666666666, - "Filipino,Vietnamese,Malay": 0.3466666666666667, - "Filipino,Vietnamese,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,English": 0.38, - "Filipino,Chinese,Spanish": 0.31333333333333335, - "Filipino,Chinese,Malay": 0.32, - "Filipino,Chinese,Indonesian": 0.32, - "Filipino,Chinese,English": 0.3466666666666667, - "Filipino,Spanish,Malay": 0.32, - "Filipino,Spanish,Indonesian": 0.29333333333333333, - "Filipino,Spanish,English": 0.37333333333333335, - "Filipino,Malay,Indonesian": 0.34, - "Filipino,Malay,English": 0.3466666666666667, - "Filipino,Indonesian,English": 0.35333333333333333, - "Vietnamese,Chinese,Spanish": 0.2733333333333333, - "Vietnamese,Chinese,Malay": 0.31333333333333335, - "Vietnamese,Chinese,Indonesian": 0.3333333333333333, - "Vietnamese,Chinese,English": 0.3466666666666667, - "Vietnamese,Spanish,Malay": 0.3, - "Vietnamese,Spanish,Indonesian": 0.31333333333333335, - "Vietnamese,Spanish,English": 0.38, - "Vietnamese,Malay,Indonesian": 0.38666666666666666, - "Vietnamese,Malay,English": 0.35333333333333333, - "Vietnamese,Indonesian,English": 0.4066666666666667, - "Chinese,Spanish,Malay": 0.26, - "Chinese,Spanish,Indonesian": 0.25333333333333335, - "Chinese,Spanish,English": 0.3333333333333333, - "Chinese,Malay,Indonesian": 0.3333333333333333, - "Chinese,Malay,English": 0.3333333333333333, - "Chinese,Indonesian,English": 0.35333333333333333, - "Spanish,Malay,Indonesian": 0.29333333333333333, - "Spanish,Malay,English": 0.3333333333333333, - "Spanish,Indonesian,English": 0.36, - "Malay,Indonesian,English": 0.38666666666666666 - }, - "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.21333333333333335, - "Filipino,Vietnamese,Chinese,Malay": 0.24, - "Filipino,Vietnamese,Chinese,Indonesian": 0.24, - "Filipino,Vietnamese,Chinese,English": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,Malay": 0.24666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian": 0.22666666666666666, - "Filipino,Vietnamese,Spanish,English": 0.26666666666666666, - "Filipino,Vietnamese,Malay,Indonesian": 0.26666666666666666, - "Filipino,Vietnamese,Malay,English": 0.28, - "Filipino,Vietnamese,Indonesian,English": 0.29333333333333333, - "Filipino,Chinese,Spanish,Malay": 0.21333333333333335, - "Filipino,Chinese,Spanish,Indonesian": 0.20666666666666667, - "Filipino,Chinese,Spanish,English": 0.26, - "Filipino,Chinese,Malay,Indonesian": 0.24, - "Filipino,Chinese,Malay,English": 0.25333333333333335, - "Filipino,Chinese,Indonesian,English": 0.25333333333333335, - "Filipino,Spanish,Malay,Indonesian": 0.21333333333333335, - "Filipino,Spanish,Malay,English": 0.24666666666666667, - "Filipino,Spanish,Indonesian,English": 0.24666666666666667, - "Filipino,Malay,Indonesian,English": 0.26666666666666666, - "Vietnamese,Chinese,Spanish,Malay": 0.2, - "Vietnamese,Chinese,Spanish,Indonesian": 0.2, - "Vietnamese,Chinese,Spanish,English": 0.24, - "Vietnamese,Chinese,Malay,Indonesian": 0.24666666666666667, - "Vietnamese,Chinese,Malay,English": 0.24666666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.2733333333333333, - "Vietnamese,Spanish,Malay,Indonesian": 0.23333333333333334, - "Vietnamese,Spanish,Malay,English": 0.25333333333333335, - "Vietnamese,Spanish,Indonesian,English": 0.28, - "Vietnamese,Malay,Indonesian,English": 0.3, - "Chinese,Spanish,Malay,Indonesian": 0.2, - "Chinese,Spanish,Malay,English": 0.22666666666666666, - "Chinese,Spanish,Indonesian,English": 0.23333333333333334, - "Chinese,Malay,Indonesian,English": 0.26, - "Spanish,Malay,Indonesian,English": 0.25333333333333335 + "Filipino,Vietnamese,Chinese": 0.44666666666666666, + "Filipino,Vietnamese,Spanish": 0.4666666666666667, + "Filipino,Vietnamese,Malay": 0.3933333333333333, + "Filipino,Vietnamese,Indonesian": 0.47333333333333333, + "Filipino,Vietnamese,English": 0.44, + "Filipino,Chinese,Spanish": 0.44666666666666666, + "Filipino,Chinese,Malay": 0.4066666666666667, + "Filipino,Chinese,Indonesian": 0.4533333333333333, + "Filipino,Chinese,English": 0.44666666666666666, + "Filipino,Spanish,Malay": 0.44, + "Filipino,Spanish,Indonesian": 0.52, + "Filipino,Spanish,English": 0.4866666666666667, + "Filipino,Malay,Indonesian": 0.44666666666666666, + "Filipino,Malay,English": 0.41333333333333333, + "Filipino,Indonesian,English": 0.47333333333333333, + "Vietnamese,Chinese,Spanish": 0.43333333333333335, + "Vietnamese,Chinese,Malay": 0.38666666666666666, + "Vietnamese,Chinese,Indonesian": 0.44, + "Vietnamese,Chinese,English": 0.3933333333333333, + "Vietnamese,Spanish,Malay": 0.42, + "Vietnamese,Spanish,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish,English": 0.48, + "Vietnamese,Malay,Indonesian": 0.44, + "Vietnamese,Malay,English": 0.36666666666666664, + "Vietnamese,Indonesian,English": 0.44, + "Chinese,Spanish,Malay": 0.3933333333333333, + "Chinese,Spanish,Indonesian": 0.44, + "Chinese,Spanish,English": 0.43333333333333335, + "Chinese,Malay,Indonesian": 0.4266666666666667, + "Chinese,Malay,English": 0.38, + "Chinese,Indonesian,English": 0.41333333333333333, + "Spanish,Malay,Indonesian": 0.47333333333333333, + "Spanish,Malay,English": 0.4066666666666667, + "Spanish,Indonesian,English": 0.49333333333333335, + "Malay,Indonesian,English": 0.41333333333333333 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Filipino,Vietnamese,Chinese,Malay": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Indonesian": 0.38, + "Filipino,Vietnamese,Chinese,English": 0.3466666666666667, + "Filipino,Vietnamese,Spanish,Malay": 0.35333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian": 0.41333333333333333, + "Filipino,Vietnamese,Spanish,English": 0.4, + "Filipino,Vietnamese,Malay,Indonesian": 0.36, + "Filipino,Vietnamese,Malay,English": 0.3333333333333333, + "Filipino,Vietnamese,Indonesian,English": 0.38666666666666666, + "Filipino,Chinese,Spanish,Malay": 0.3333333333333333, + "Filipino,Chinese,Spanish,Indonesian": 0.37333333333333335, + "Filipino,Chinese,Spanish,English": 0.38666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.34, + "Filipino,Chinese,Malay,English": 0.3333333333333333, + "Filipino,Chinese,Indonesian,English": 0.37333333333333335, + "Filipino,Spanish,Malay,Indonesian": 0.4, + "Filipino,Spanish,Malay,English": 0.36666666666666664, + "Filipino,Spanish,Indonesian,English": 0.42, + "Filipino,Malay,Indonesian,English": 0.36, + "Vietnamese,Chinese,Spanish,Malay": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,English": 0.34, + "Vietnamese,Chinese,Malay,Indonesian": 0.34, + "Vietnamese,Chinese,Malay,English": 0.3, + "Vietnamese,Chinese,Indonesian,English": 0.3333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.38666666666666666, + "Vietnamese,Spanish,Malay,English": 0.34, + "Vietnamese,Spanish,Indonesian,English": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,English": 0.3333333333333333, + "Chinese,Spanish,Malay,Indonesian": 0.3333333333333333, + "Chinese,Spanish,Malay,English": 0.32666666666666666, + "Chinese,Spanish,Indonesian,English": 0.3466666666666667, + "Chinese,Malay,Indonesian,English": 0.32, + "Spanish,Malay,Indonesian,English": 0.35333333333333333 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.16, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.18666666666666668, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.19333333333333333, - "Filipino,Vietnamese,Chinese,Malay,English": 0.2, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.18, - "Filipino,Vietnamese,Spanish,Malay,English": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.23333333333333334, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Filipino,Chinese,Spanish,Malay,English": 0.18666666666666668, - "Filipino,Chinese,Spanish,Indonesian,English": 0.18666666666666668, - "Filipino,Chinese,Malay,Indonesian,English": 0.19333333333333333, - "Filipino,Spanish,Malay,Indonesian,English": 0.18666666666666668, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,Malay,English": 0.18, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.18666666666666668, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.20666666666666667, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.21333333333333335, - "Chinese,Spanish,Malay,Indonesian,English": 0.18 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.3, + "Filipino,Vietnamese,Chinese,Malay,English": 0.28, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.32, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.36, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.3, + "Filipino,Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.29333333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3, + "Vietnamese,Chinese,Spanish,Malay,English": 0.28, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.3, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.2733333333333333, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.31333333333333335, + "Chinese,Spanish,Malay,Indonesian,English": 0.2733333333333333 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.14, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.15333333333333332, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.14666666666666667, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.15333333333333332 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.26, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.3, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.12666666666666668 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.24666666666666667 } }, - "AC3_2": 0.5143362571310687, - "AC3_3": 0.40739695250289754, - "AC3_4": 0.333417547800299, - "AC3_5": 0.2787525210316975, - "AC3_6": 0.23702867885535306, - "AC3_7": 0.20385707290053107 + "AC3_2": 0.5692644308686308, + "AC3_3": 0.4876414522122174, + "AC3_4": 0.43320293589352155, + "AC3_5": 0.39391365733690314, + "AC3_6": 0.364226673243772, + "AC3_7": 0.34049441782007345 }, "prompt_3": { - "overall_acc": 0.5219047619047619, + "overall_acc": 0.5333333333333333, "language_acc": { - "Filipino": 0.5066666666666667, - "Vietnamese": 0.47333333333333333, - "Chinese": 0.5466666666666666, - "Spanish": 0.5266666666666666, - "Malay": 0.4666666666666667, - "Indonesian": 0.5066666666666667, - "English": 0.6266666666666667 + "Filipino": 0.5133333333333333, + "Vietnamese": 0.5266666666666666, + "Chinese": 0.5666666666666667, + "Spanish": 0.5866666666666667, + "Malay": 0.43333333333333335, + "Indonesian": 0.5, + "English": 0.6066666666666667 }, - "consistency_score_2": 0.49269841269841275, - "consistency_score_3": 0.3135238095238095, - "consistency_score_4": 0.22704761904761903, - "consistency_score_5": 0.17968253968253972, - "consistency_score_6": 0.15333333333333332, - "consistency_score_7": 0.14, + "consistency_score_2": 0.5847619047619047, + "consistency_score_3": 0.4276190476190476, + "consistency_score_4": 0.34419047619047616, + "consistency_score_5": 0.29111111111111115, + "consistency_score_6": 0.2542857142857143, + "consistency_score_7": 0.22666666666666666, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.48, - "Filipino,Chinese": 0.44, - "Filipino,Spanish": 0.47333333333333333, - "Filipino,Malay": 0.52, - "Filipino,Indonesian": 0.4666666666666667, - "Filipino,English": 0.48, - "Vietnamese,Chinese": 0.5, - "Vietnamese,Spanish": 0.5333333333333333, - "Vietnamese,Malay": 0.4533333333333333, - "Vietnamese,Indonesian": 0.49333333333333335, - "Vietnamese,English": 0.5066666666666667, - "Chinese,Spanish": 0.4666666666666667, - "Chinese,Malay": 0.38, - "Chinese,Indonesian": 0.4533333333333333, + "Filipino,Vietnamese": 0.5866666666666667, + "Filipino,Chinese": 0.58, + "Filipino,Spanish": 0.5866666666666667, + "Filipino,Malay": 0.5533333333333333, + "Filipino,Indonesian": 0.62, + "Filipino,English": 0.5533333333333333, + "Vietnamese,Chinese": 0.58, + "Vietnamese,Spanish": 0.62, + "Vietnamese,Malay": 0.54, + "Vietnamese,Indonesian": 0.6333333333333333, + "Vietnamese,English": 0.5666666666666667, + "Chinese,Spanish": 0.5266666666666666, + "Chinese,Malay": 0.5733333333333334, + "Chinese,Indonesian": 0.56, "Chinese,English": 0.5266666666666666, - "Spanish,Malay": 0.4866666666666667, - "Spanish,Indonesian": 0.56, - "Spanish,English": 0.5933333333333334, - "Malay,Indonesian": 0.5266666666666666, - "Malay,English": 0.48, - "Indonesian,English": 0.5266666666666666 + "Spanish,Malay": 0.5466666666666666, + "Spanish,Indonesian": 0.6466666666666666, + "Spanish,English": 0.7, + "Malay,Indonesian": 0.6533333333333333, + "Malay,English": 0.5133333333333333, + "Indonesian,English": 0.6133333333333333 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.3, - "Filipino,Vietnamese,Spanish": 0.3333333333333333, - "Filipino,Vietnamese,Malay": 0.30666666666666664, - "Filipino,Vietnamese,Indonesian": 0.3, - "Filipino,Vietnamese,English": 0.3, - "Filipino,Chinese,Spanish": 0.24666666666666667, - "Filipino,Chinese,Malay": 0.25333333333333335, - "Filipino,Chinese,Indonesian": 0.26, - "Filipino,Chinese,English": 0.31333333333333335, - "Filipino,Spanish,Malay": 0.31333333333333335, - "Filipino,Spanish,Indonesian": 0.32666666666666666, - "Filipino,Spanish,English": 0.32666666666666666, - "Filipino,Malay,Indonesian": 0.34, - "Filipino,Malay,English": 0.31333333333333335, - "Filipino,Indonesian,English": 0.30666666666666664, - "Vietnamese,Chinese,Spanish": 0.3333333333333333, - "Vietnamese,Chinese,Malay": 0.23333333333333334, - "Vietnamese,Chinese,Indonesian": 0.32, - "Vietnamese,Chinese,English": 0.34, - "Vietnamese,Spanish,Malay": 0.32666666666666666, - "Vietnamese,Spanish,Indonesian": 0.37333333333333335, - "Vietnamese,Spanish,English": 0.36666666666666664, - "Vietnamese,Malay,Indonesian": 0.32, - "Vietnamese,Malay,English": 0.2866666666666667, - "Vietnamese,Indonesian,English": 0.3333333333333333, - "Chinese,Spanish,Malay": 0.22, - "Chinese,Spanish,Indonesian": 0.30666666666666664, - "Chinese,Spanish,English": 0.38, - "Chinese,Malay,Indonesian": 0.28, - "Chinese,Malay,English": 0.28, - "Chinese,Indonesian,English": 0.32666666666666666, - "Spanish,Malay,Indonesian": 0.36666666666666664, - "Spanish,Malay,English": 0.32666666666666666, - "Spanish,Indonesian,English": 0.37333333333333335, - "Malay,Indonesian,English": 0.34 + "Filipino,Vietnamese,Chinese": 0.43333333333333335, + "Filipino,Vietnamese,Spanish": 0.44, + "Filipino,Vietnamese,Malay": 0.38666666666666666, + "Filipino,Vietnamese,Indonesian": 0.47333333333333333, + "Filipino,Vietnamese,English": 0.41333333333333333, + "Filipino,Chinese,Spanish": 0.4, + "Filipino,Chinese,Malay": 0.3933333333333333, + "Filipino,Chinese,Indonesian": 0.4266666666666667, + "Filipino,Chinese,English": 0.4, + "Filipino,Spanish,Malay": 0.42, + "Filipino,Spanish,Indonesian": 0.48, + "Filipino,Spanish,English": 0.48, + "Filipino,Malay,Indonesian": 0.4533333333333333, + "Filipino,Malay,English": 0.38666666666666666, + "Filipino,Indonesian,English": 0.44666666666666666, + "Vietnamese,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Chinese,Malay": 0.4, + "Vietnamese,Chinese,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,English": 0.4066666666666667, + "Vietnamese,Spanish,Malay": 0.4, + "Vietnamese,Spanish,Indonesian": 0.4866666666666667, + "Vietnamese,Spanish,English": 0.4866666666666667, + "Vietnamese,Malay,Indonesian": 0.44666666666666666, + "Vietnamese,Malay,English": 0.38, + "Vietnamese,Indonesian,English": 0.4666666666666667, + "Chinese,Spanish,Malay": 0.37333333333333335, + "Chinese,Spanish,Indonesian": 0.3933333333333333, + "Chinese,Spanish,English": 0.43333333333333335, + "Chinese,Malay,Indonesian": 0.42, + "Chinese,Malay,English": 0.37333333333333335, + "Chinese,Indonesian,English": 0.4066666666666667, + "Spanish,Malay,Indonesian": 0.46, + "Spanish,Malay,English": 0.43333333333333335, + "Spanish,Indonesian,English": 0.5066666666666667, + "Malay,Indonesian,English": 0.42 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.21333333333333335, - "Filipino,Vietnamese,Chinese,Malay": 0.18666666666666668, - "Filipino,Vietnamese,Chinese,Indonesian": 0.2, - "Filipino,Vietnamese,Chinese,English": 0.23333333333333334, - "Filipino,Vietnamese,Spanish,Malay": 0.24666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,English": 0.24, - "Filipino,Vietnamese,Malay,Indonesian": 0.24, - "Filipino,Vietnamese,Malay,English": 0.20666666666666667, - "Filipino,Vietnamese,Indonesian,English": 0.21333333333333335, - "Filipino,Chinese,Spanish,Malay": 0.16, - "Filipino,Chinese,Spanish,Indonesian": 0.18, - "Filipino,Chinese,Spanish,English": 0.22666666666666666, - "Filipino,Chinese,Malay,Indonesian": 0.20666666666666667, - "Filipino,Chinese,Malay,English": 0.22, - "Filipino,Chinese,Indonesian,English": 0.22, - "Filipino,Spanish,Malay,Indonesian": 0.26666666666666666, - "Filipino,Spanish,Malay,English": 0.22666666666666666, - "Filipino,Spanish,Indonesian,English": 0.23333333333333334, - "Filipino,Malay,Indonesian,English": 0.26666666666666666, - "Vietnamese,Chinese,Spanish,Malay": 0.17333333333333334, - "Vietnamese,Chinese,Spanish,Indonesian": 0.25333333333333335, - "Vietnamese,Chinese,Spanish,English": 0.2733333333333333, - "Vietnamese,Chinese,Malay,Indonesian": 0.2, - "Vietnamese,Chinese,Malay,English": 0.2, - "Vietnamese,Chinese,Indonesian,English": 0.25333333333333335, - "Vietnamese,Spanish,Malay,Indonesian": 0.2733333333333333, - "Vietnamese,Spanish,Malay,English": 0.23333333333333334, - "Vietnamese,Spanish,Indonesian,English": 0.2733333333333333, - "Vietnamese,Malay,Indonesian,English": 0.22, - "Chinese,Spanish,Malay,Indonesian": 0.19333333333333333, - "Chinese,Spanish,Malay,English": 0.20666666666666667, - "Chinese,Spanish,Indonesian,English": 0.26, - "Chinese,Malay,Indonesian,English": 0.23333333333333334, - "Spanish,Malay,Indonesian,English": 0.26 + "Filipino,Vietnamese,Chinese,Spanish": 0.34, + "Filipino,Vietnamese,Chinese,Malay": 0.32, + "Filipino,Vietnamese,Chinese,Indonesian": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,English": 0.34, + "Filipino,Vietnamese,Spanish,Malay": 0.32, + "Filipino,Vietnamese,Spanish,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Spanish,English": 0.37333333333333335, + "Filipino,Vietnamese,Malay,Indonesian": 0.35333333333333333, + "Filipino,Vietnamese,Malay,English": 0.31333333333333335, + "Filipino,Vietnamese,Indonesian,English": 0.36666666666666664, + "Filipino,Chinese,Spanish,Malay": 0.30666666666666664, + "Filipino,Chinese,Spanish,Indonesian": 0.32666666666666666, + "Filipino,Chinese,Spanish,English": 0.35333333333333333, + "Filipino,Chinese,Malay,Indonesian": 0.32666666666666666, + "Filipino,Chinese,Malay,English": 0.30666666666666664, + "Filipino,Chinese,Indonesian,English": 0.3333333333333333, + "Filipino,Spanish,Malay,Indonesian": 0.37333333333333335, + "Filipino,Spanish,Malay,English": 0.36666666666666664, + "Filipino,Spanish,Indonesian,English": 0.4, + "Filipino,Malay,Indonesian,English": 0.34, + "Vietnamese,Chinese,Spanish,Malay": 0.31333333333333335, + "Vietnamese,Chinese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.3466666666666667, + "Vietnamese,Chinese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Malay,English": 0.32, + "Vietnamese,Chinese,Indonesian,English": 0.3466666666666667, + "Vietnamese,Spanish,Malay,Indonesian": 0.35333333333333333, + "Vietnamese,Spanish,Malay,English": 0.3466666666666667, + "Vietnamese,Spanish,Indonesian,English": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,English": 0.34, + "Chinese,Spanish,Malay,Indonesian": 0.32666666666666666, + "Chinese,Spanish,Malay,English": 0.3333333333333333, + "Chinese,Spanish,Indonesian,English": 0.34, + "Chinese,Malay,Indonesian,English": 0.32666666666666666, + "Spanish,Malay,Indonesian,English": 0.36666666666666664 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.16666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.19333333333333333, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.16, - "Filipino,Vietnamese,Chinese,Malay,English": 0.16666666666666666, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.17333333333333334, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.22, - "Filipino,Vietnamese,Spanish,Malay,English": 0.18, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.18666666666666668, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.18, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.15333333333333332, - "Filipino,Chinese,Spanish,Malay,English": 0.16, - "Filipino,Chinese,Spanish,Indonesian,English": 0.16666666666666666, - "Filipino,Chinese,Malay,Indonesian,English": 0.2, - "Filipino,Spanish,Malay,Indonesian,English": 0.20666666666666667, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,Malay,English": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.22, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.18, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.2, - "Chinese,Spanish,Malay,Indonesian,English": 0.18 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.3, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Malay,English": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.2866666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.2866666666666667, + "Filipino,Chinese,Spanish,Indonesian,English": 0.29333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.2733333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.32, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, + "Vietnamese,Chinese,Spanish,Malay,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.3, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.2866666666666667, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.14, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.15333333333333332, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.15333333333333332, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.16 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.26, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.24666666666666667, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.14 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22666666666666666 } }, - "AC3_2": 0.506881220523377, - "AC3_3": 0.3917260547161011, - "AC3_4": 0.3164346267077376, - "AC3_5": 0.26732859293754846, - "AC3_6": 0.23702867885535306, - "AC3_7": 0.22077697838391142 + "AC3_2": 0.5578648494674255, + "AC3_3": 0.4746613808557917, + "AC3_4": 0.41837783078945473, + "AC3_5": 0.3766397124430851, + "AC3_6": 0.3443772671872314, + "AC3_7": 0.3181286549289012 }, "prompt_4": { - "overall_acc": 0.5257142857142857, + "overall_acc": 0.5457142857142857, "language_acc": { - "Filipino": 0.5066666666666667, - "Vietnamese": 0.49333333333333335, - "Chinese": 0.52, - "Spanish": 0.5533333333333333, - "Malay": 0.47333333333333333, - "Indonesian": 0.49333333333333335, - "English": 0.64 + "Filipino": 0.5333333333333333, + "Vietnamese": 0.5066666666666667, + "Chinese": 0.5733333333333334, + "Spanish": 0.5466666666666666, + "Malay": 0.5, + "Indonesian": 0.5133333333333333, + "English": 0.6466666666666666 }, - "consistency_score_2": 0.47714285714285704, - "consistency_score_3": 0.2897142857142857, - "consistency_score_4": 0.1996190476190476, - "consistency_score_5": 0.1485714285714286, - "consistency_score_6": 0.11619047619047619, - "consistency_score_7": 0.09333333333333334, + "consistency_score_2": 0.5590476190476191, + "consistency_score_3": 0.3906666666666667, + "consistency_score_4": 0.30666666666666664, + "consistency_score_5": 0.25714285714285723, + "consistency_score_6": 0.22380952380952382, + "consistency_score_7": 0.2, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.49333333333333335, - "Filipino,Chinese": 0.44666666666666666, - "Filipino,Spanish": 0.5, - "Filipino,Malay": 0.48, - "Filipino,Indonesian": 0.49333333333333335, - "Filipino,English": 0.5333333333333333, - "Vietnamese,Chinese": 0.36666666666666664, - "Vietnamese,Spanish": 0.4266666666666667, - "Vietnamese,Malay": 0.44, - "Vietnamese,Indonesian": 0.4866666666666667, - "Vietnamese,English": 0.4666666666666667, - "Chinese,Spanish": 0.46, - "Chinese,Malay": 0.4066666666666667, - "Chinese,Indonesian": 0.43333333333333335, - "Chinese,English": 0.4533333333333333, - "Spanish,Malay": 0.5533333333333333, - "Spanish,Indonesian": 0.5266666666666666, - "Spanish,English": 0.5666666666666667, - "Malay,Indonesian": 0.5533333333333333, - "Malay,English": 0.4266666666666667, - "Indonesian,English": 0.5066666666666667 + "Filipino,Vietnamese": 0.52, + "Filipino,Chinese": 0.58, + "Filipino,Spanish": 0.6, + "Filipino,Malay": 0.5333333333333333, + "Filipino,Indonesian": 0.56, + "Filipino,English": 0.56, + "Vietnamese,Chinese": 0.58, + "Vietnamese,Spanish": 0.58, + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,English": 0.5133333333333333, + "Chinese,Spanish": 0.5466666666666666, + "Chinese,Malay": 0.4866666666666667, + "Chinese,Indonesian": 0.6, + "Chinese,English": 0.5066666666666667, + "Spanish,Malay": 0.5666666666666667, + "Spanish,Indonesian": 0.5733333333333334, + "Spanish,English": 0.6266666666666667, + "Malay,Indonesian": 0.6733333333333333, + "Malay,English": 0.49333333333333335, + "Indonesian,English": 0.52 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.24, - "Filipino,Vietnamese,Spanish": 0.2866666666666667, - "Filipino,Vietnamese,Malay": 0.2866666666666667, - "Filipino,Vietnamese,Indonesian": 0.3, - "Filipino,Vietnamese,English": 0.3, - "Filipino,Chinese,Spanish": 0.2866666666666667, - "Filipino,Chinese,Malay": 0.25333333333333335, - "Filipino,Chinese,Indonesian": 0.25333333333333335, - "Filipino,Chinese,English": 0.2866666666666667, - "Filipino,Spanish,Malay": 0.3466666666666667, - "Filipino,Spanish,Indonesian": 0.32, - "Filipino,Spanish,English": 0.35333333333333333, - "Filipino,Malay,Indonesian": 0.32666666666666666, - "Filipino,Malay,English": 0.29333333333333333, - "Filipino,Indonesian,English": 0.32666666666666666, - "Vietnamese,Chinese,Spanish": 0.22666666666666666, - "Vietnamese,Chinese,Malay": 0.22, - "Vietnamese,Chinese,Indonesian": 0.22, - "Vietnamese,Chinese,English": 0.22666666666666666, - "Vietnamese,Spanish,Malay": 0.3, - "Vietnamese,Spanish,Indonesian": 0.29333333333333333, - "Vietnamese,Spanish,English": 0.30666666666666664, - "Vietnamese,Malay,Indonesian": 0.3, - "Vietnamese,Malay,English": 0.26, - "Vietnamese,Indonesian,English": 0.3, - "Chinese,Spanish,Malay": 0.28, - "Chinese,Spanish,Indonesian": 0.2866666666666667, - "Chinese,Spanish,English": 0.30666666666666664, - "Chinese,Malay,Indonesian": 0.26666666666666666, - "Chinese,Malay,English": 0.22666666666666666, - "Chinese,Indonesian,English": 0.28, - "Spanish,Malay,Indonesian": 0.37333333333333335, - "Spanish,Malay,English": 0.3333333333333333, - "Spanish,Indonesian,English": 0.36666666666666664, - "Malay,Indonesian,English": 0.30666666666666664 + "Filipino,Vietnamese,Chinese": 0.3933333333333333, + "Filipino,Vietnamese,Spanish": 0.38666666666666666, + "Filipino,Vietnamese,Malay": 0.34, + "Filipino,Vietnamese,Indonesian": 0.37333333333333335, + "Filipino,Vietnamese,English": 0.36666666666666664, + "Filipino,Chinese,Spanish": 0.4266666666666667, + "Filipino,Chinese,Malay": 0.36, + "Filipino,Chinese,Indonesian": 0.4066666666666667, + "Filipino,Chinese,English": 0.3933333333333333, + "Filipino,Spanish,Malay": 0.4, + "Filipino,Spanish,Indonesian": 0.4266666666666667, + "Filipino,Spanish,English": 0.46, + "Filipino,Malay,Indonesian": 0.41333333333333333, + "Filipino,Malay,English": 0.35333333333333333, + "Filipino,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Chinese,Malay": 0.37333333333333335, + "Vietnamese,Chinese,Indonesian": 0.4266666666666667, + "Vietnamese,Chinese,English": 0.35333333333333333, + "Vietnamese,Spanish,Malay": 0.4, + "Vietnamese,Spanish,Indonesian": 0.42, + "Vietnamese,Spanish,English": 0.41333333333333333, + "Vietnamese,Malay,Indonesian": 0.44, + "Vietnamese,Malay,English": 0.3466666666666667, + "Vietnamese,Indonesian,English": 0.36666666666666664, + "Chinese,Spanish,Malay": 0.36, + "Chinese,Spanish,Indonesian": 0.4066666666666667, + "Chinese,Spanish,English": 0.37333333333333335, + "Chinese,Malay,Indonesian": 0.42, + "Chinese,Malay,English": 0.30666666666666664, + "Chinese,Indonesian,English": 0.36666666666666664, + "Spanish,Malay,Indonesian": 0.43333333333333335, + "Spanish,Malay,English": 0.38666666666666666, + "Spanish,Indonesian,English": 0.41333333333333333, + "Malay,Indonesian,English": 0.38 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Malay": 0.16, - "Filipino,Vietnamese,Chinese,Indonesian": 0.15333333333333332, - "Filipino,Vietnamese,Chinese,English": 0.16666666666666666, - "Filipino,Vietnamese,Spanish,Malay": 0.22666666666666666, - "Filipino,Vietnamese,Spanish,Indonesian": 0.21333333333333335, - "Filipino,Vietnamese,Spanish,English": 0.23333333333333334, - "Filipino,Vietnamese,Malay,Indonesian": 0.20666666666666667, - "Filipino,Vietnamese,Malay,English": 0.2, - "Filipino,Vietnamese,Indonesian,English": 0.22666666666666666, - "Filipino,Chinese,Spanish,Malay": 0.19333333333333333, - "Filipino,Chinese,Spanish,Indonesian": 0.18666666666666668, - "Filipino,Chinese,Spanish,English": 0.21333333333333335, - "Filipino,Chinese,Malay,Indonesian": 0.17333333333333334, - "Filipino,Chinese,Malay,English": 0.17333333333333334, - "Filipino,Chinese,Indonesian,English": 0.2, - "Filipino,Spanish,Malay,Indonesian": 0.26, - "Filipino,Spanish,Malay,English": 0.24666666666666667, - "Filipino,Spanish,Indonesian,English": 0.24666666666666667, - "Filipino,Malay,Indonesian,English": 0.22, - "Vietnamese,Chinese,Spanish,Malay": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,Indonesian": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,English": 0.17333333333333334, - "Vietnamese,Chinese,Malay,Indonesian": 0.16, - "Vietnamese,Chinese,Malay,English": 0.14666666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.16666666666666666, - "Vietnamese,Spanish,Malay,Indonesian": 0.22666666666666666, - "Vietnamese,Spanish,Malay,English": 0.22666666666666666, - "Vietnamese,Spanish,Indonesian,English": 0.22, - "Vietnamese,Malay,Indonesian,English": 0.19333333333333333, - "Chinese,Spanish,Malay,Indonesian": 0.20666666666666667, - "Chinese,Spanish,Malay,English": 0.19333333333333333, - "Chinese,Spanish,Indonesian,English": 0.21333333333333335, - "Chinese,Malay,Indonesian,English": 0.18666666666666668, - "Spanish,Malay,Indonesian,English": 0.26666666666666666 + "Filipino,Vietnamese,Chinese,Spanish": 0.32, + "Filipino,Vietnamese,Chinese,Malay": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,English": 0.3, + "Filipino,Vietnamese,Spanish,Malay": 0.2866666666666667, + "Filipino,Vietnamese,Spanish,Indonesian": 0.32, + "Filipino,Vietnamese,Spanish,English": 0.32666666666666666, + "Filipino,Vietnamese,Malay,Indonesian": 0.3, + "Filipino,Vietnamese,Malay,English": 0.26666666666666666, + "Filipino,Vietnamese,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay": 0.29333333333333333, + "Filipino,Chinese,Spanish,Indonesian": 0.3333333333333333, + "Filipino,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Malay,English": 0.2733333333333333, + "Filipino,Chinese,Indonesian,English": 0.31333333333333335, + "Filipino,Spanish,Malay,Indonesian": 0.32666666666666666, + "Filipino,Spanish,Malay,English": 0.31333333333333335, + "Filipino,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.3, + "Vietnamese,Chinese,Spanish,Indonesian": 0.34, + "Vietnamese,Chinese,Spanish,English": 0.29333333333333333, + "Vietnamese,Chinese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Malay,English": 0.24666666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.29333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.35333333333333333, + "Vietnamese,Spanish,Malay,English": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,English": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,English": 0.3, + "Chinese,Spanish,Malay,Indonesian": 0.32, + "Chinese,Spanish,Malay,English": 0.2733333333333333, + "Chinese,Spanish,Indonesian,English": 0.3, + "Chinese,Malay,Indonesian,English": 0.2733333333333333, + "Spanish,Malay,Indonesian,English": 0.31333333333333335 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.13333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.11333333333333333, - "Filipino,Vietnamese,Chinese,Malay,English": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.13333333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.18, - "Filipino,Vietnamese,Spanish,Malay,English": 0.18666666666666668, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.18, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.15333333333333332, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.14666666666666667, - "Filipino,Chinese,Spanish,Malay,English": 0.14666666666666667, - "Filipino,Chinese,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Chinese,Malay,Indonesian,English": 0.14, - "Filipino,Spanish,Malay,Indonesian,English": 0.2, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.13333333333333333, - "Vietnamese,Chinese,Spanish,Malay,English": 0.13333333333333333, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.13333333333333333, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.12, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.17333333333333334, - "Chinese,Spanish,Malay,Indonesian,English": 0.16 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.26, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Malay,English": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.26666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.26666666666666666, + "Filipino,Vietnamese,Spanish,Malay,English": 0.24666666666666667, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2866666666666667, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.25333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Chinese,Spanish,Malay,English": 0.24, + "Filipino,Chinese,Spanish,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Malay,Indonesian,English": 0.24666666666666667, + "Filipino,Spanish,Malay,Indonesian,English": 0.26, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Vietnamese,Chinese,Spanish,Malay,English": 0.22666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.26666666666666666, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.24, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.2733333333333333, + "Chinese,Spanish,Malay,Indonesian,English": 0.24 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.10666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.12, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.11333333333333333, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.1, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.14666666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.12, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.10666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.20666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.24, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.22, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.23333333333333334, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.21333333333333335, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.09333333333333334 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2 } }, - "AC3_2": 0.5002523402024575, - "AC3_3": 0.37356291916532036, - "AC3_4": 0.2893637454583054, - "AC3_5": 0.23167070214481886, - "AC3_6": 0.19031793129718447, - "AC3_7": 0.15852307689746933 + "AC3_2": 0.5523004925608448, + "AC3_3": 0.45535394624917397, + "AC3_4": 0.39267039101538503, + "AC3_5": 0.349567869809027, + "AC3_6": 0.3174328146687919, + "AC3_7": 0.29272030647415626 }, "prompt_5": { - "overall_acc": 0.48761904761904756, + "overall_acc": 0.5438095238095239, "language_acc": { - "Filipino": 0.5, - "Vietnamese": 0.46, - "Chinese": 0.49333333333333335, - "Spanish": 0.5266666666666666, - "Malay": 0.42, - "Indonesian": 0.4666666666666667, - "English": 0.5466666666666666 + "Filipino": 0.54, + "Vietnamese": 0.4666666666666667, + "Chinese": 0.6, + "Spanish": 0.5533333333333333, + "Malay": 0.5, + "Indonesian": 0.49333333333333335, + "English": 0.6533333333333333 }, - "consistency_score_2": 0.45746031746031757, - "consistency_score_3": 0.26895238095238094, - "consistency_score_4": 0.17923809523809528, - "consistency_score_5": 0.1285714285714286, - "consistency_score_6": 0.0961904761904762, - "consistency_score_7": 0.07333333333333333, + "consistency_score_2": 0.5673015873015874, + "consistency_score_3": 0.40361904761904777, + "consistency_score_4": 0.3182857142857142, + "consistency_score_5": 0.2663492063492064, + "consistency_score_6": 0.23142857142857146, + "consistency_score_7": 0.20666666666666667, "detailed_consistency_score": { "2_combine": { "Filipino,Vietnamese": 0.5066666666666667, - "Filipino,Chinese": 0.3933333333333333, - "Filipino,Spanish": 0.49333333333333335, - "Filipino,Malay": 0.4666666666666667, - "Filipino,Indonesian": 0.44, - "Filipino,English": 0.43333333333333335, - "Vietnamese,Chinese": 0.43333333333333335, - "Vietnamese,Spanish": 0.49333333333333335, - "Vietnamese,Malay": 0.48, - "Vietnamese,Indonesian": 0.4666666666666667, - "Vietnamese,English": 0.47333333333333333, - "Chinese,Spanish": 0.41333333333333333, - "Chinese,Malay": 0.37333333333333335, - "Chinese,Indonesian": 0.38666666666666666, - "Chinese,English": 0.4, - "Spanish,Malay": 0.4666666666666667, - "Spanish,Indonesian": 0.5066666666666667, - "Spanish,English": 0.52, - "Malay,Indonesian": 0.5133333333333333, - "Malay,English": 0.4533333333333333, - "Indonesian,English": 0.49333333333333335 + "Filipino,Chinese": 0.5866666666666667, + "Filipino,Spanish": 0.56, + "Filipino,Malay": 0.5466666666666666, + "Filipino,Indonesian": 0.5666666666666667, + "Filipino,English": 0.5666666666666667, + "Vietnamese,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.5733333333333334, + "Vietnamese,Malay": 0.5133333333333333, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,English": 0.52, + "Chinese,Spanish": 0.56, + "Chinese,Malay": 0.5733333333333334, + "Chinese,Indonesian": 0.58, + "Chinese,English": 0.5333333333333333, + "Spanish,Malay": 0.56, + "Spanish,Indonesian": 0.6066666666666667, + "Spanish,English": 0.6533333333333333, + "Malay,Indonesian": 0.68, + "Malay,English": 0.5266666666666666, + "Indonesian,English": 0.6 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.23333333333333334, - "Filipino,Vietnamese,Spanish": 0.32666666666666666, - "Filipino,Vietnamese,Malay": 0.30666666666666664, - "Filipino,Vietnamese,Indonesian": 0.2733333333333333, - "Filipino,Vietnamese,English": 0.2733333333333333, - "Filipino,Chinese,Spanish": 0.23333333333333334, - "Filipino,Chinese,Malay": 0.19333333333333333, - "Filipino,Chinese,Indonesian": 0.22666666666666666, - "Filipino,Chinese,English": 0.22666666666666666, - "Filipino,Spanish,Malay": 0.28, - "Filipino,Spanish,Indonesian": 0.3, - "Filipino,Spanish,English": 0.30666666666666664, - "Filipino,Malay,Indonesian": 0.30666666666666664, - "Filipino,Malay,English": 0.2733333333333333, - "Filipino,Indonesian,English": 0.28, - "Vietnamese,Chinese,Spanish": 0.26, - "Vietnamese,Chinese,Malay": 0.20666666666666667, - "Vietnamese,Chinese,Indonesian": 0.23333333333333334, - "Vietnamese,Chinese,English": 0.26666666666666666, - "Vietnamese,Spanish,Malay": 0.29333333333333333, - "Vietnamese,Spanish,Indonesian": 0.30666666666666664, - "Vietnamese,Spanish,English": 0.3, - "Vietnamese,Malay,Indonesian": 0.3, - "Vietnamese,Malay,English": 0.2866666666666667, - "Vietnamese,Indonesian,English": 0.28, - "Chinese,Spanish,Malay": 0.22, - "Chinese,Spanish,Indonesian": 0.25333333333333335, - "Chinese,Spanish,English": 0.24666666666666667, - "Chinese,Malay,Indonesian": 0.24, - "Chinese,Malay,English": 0.2, - "Chinese,Indonesian,English": 0.23333333333333334, - "Spanish,Malay,Indonesian": 0.32, - "Spanish,Malay,English": 0.29333333333333333, - "Spanish,Indonesian,English": 0.34, - "Malay,Indonesian,English": 0.29333333333333333 + "Filipino,Vietnamese,Chinese": 0.38666666666666666, + "Filipino,Vietnamese,Spanish": 0.36666666666666664, + "Filipino,Vietnamese,Malay": 0.34, + "Filipino,Vietnamese,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,English": 0.37333333333333335, + "Filipino,Chinese,Spanish": 0.41333333333333333, + "Filipino,Chinese,Malay": 0.4066666666666667, + "Filipino,Chinese,Indonesian": 0.42, + "Filipino,Chinese,English": 0.41333333333333333, + "Filipino,Spanish,Malay": 0.3933333333333333, + "Filipino,Spanish,Indonesian": 0.42, + "Filipino,Spanish,English": 0.44, + "Filipino,Malay,Indonesian": 0.43333333333333335, + "Filipino,Malay,English": 0.36666666666666664, + "Filipino,Indonesian,English": 0.4266666666666667, + "Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Vietnamese,Chinese,Malay": 0.38, + "Vietnamese,Chinese,Indonesian": 0.3933333333333333, + "Vietnamese,Chinese,English": 0.36666666666666664, + "Vietnamese,Spanish,Malay": 0.3933333333333333, + "Vietnamese,Spanish,Indonesian": 0.42, + "Vietnamese,Spanish,English": 0.42, + "Vietnamese,Malay,Indonesian": 0.42, + "Vietnamese,Malay,English": 0.35333333333333333, + "Vietnamese,Indonesian,English": 0.4, + "Chinese,Spanish,Malay": 0.4, + "Chinese,Spanish,Indonesian": 0.41333333333333333, + "Chinese,Spanish,English": 0.41333333333333333, + "Chinese,Malay,Indonesian": 0.44666666666666666, + "Chinese,Malay,English": 0.36666666666666664, + "Chinese,Indonesian,English": 0.41333333333333333, + "Spanish,Malay,Indonesian": 0.4533333333333333, + "Spanish,Malay,English": 0.4, + "Spanish,Indonesian,English": 0.46, + "Malay,Indonesian,English": 0.43333333333333335 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Malay": 0.13333333333333333, - "Filipino,Vietnamese,Chinese,Indonesian": 0.16, - "Filipino,Vietnamese,Chinese,English": 0.16, - "Filipino,Vietnamese,Spanish,Malay": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian": 0.21333333333333335, - "Filipino,Vietnamese,Spanish,English": 0.21333333333333335, - "Filipino,Vietnamese,Malay,Indonesian": 0.22, - "Filipino,Vietnamese,Malay,English": 0.19333333333333333, - "Filipino,Vietnamese,Indonesian,English": 0.19333333333333333, - "Filipino,Chinese,Spanish,Malay": 0.13333333333333333, - "Filipino,Chinese,Spanish,Indonesian": 0.17333333333333334, - "Filipino,Chinese,Spanish,English": 0.17333333333333334, - "Filipino,Chinese,Malay,Indonesian": 0.15333333333333332, - "Filipino,Chinese,Malay,English": 0.13333333333333333, - "Filipino,Chinese,Indonesian,English": 0.16, - "Filipino,Spanish,Malay,Indonesian": 0.22, - "Filipino,Spanish,Malay,English": 0.19333333333333333, - "Filipino,Spanish,Indonesian,English": 0.22, - "Filipino,Malay,Indonesian,English": 0.20666666666666667, - "Vietnamese,Chinese,Spanish,Malay": 0.14, - "Vietnamese,Chinese,Spanish,Indonesian": 0.18, - "Vietnamese,Chinese,Spanish,English": 0.16666666666666666, - "Vietnamese,Chinese,Malay,Indonesian": 0.15333333333333332, - "Vietnamese,Chinese,Malay,English": 0.13333333333333333, - "Vietnamese,Chinese,Indonesian,English": 0.16, - "Vietnamese,Spanish,Malay,Indonesian": 0.21333333333333335, - "Vietnamese,Spanish,Malay,English": 0.20666666666666667, - "Vietnamese,Spanish,Indonesian,English": 0.22, - "Vietnamese,Malay,Indonesian,English": 0.2, - "Chinese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Chinese,Spanish,Malay,English": 0.14666666666666667, - "Chinese,Spanish,Indonesian,English": 0.18, - "Chinese,Malay,Indonesian,English": 0.15333333333333332, - "Spanish,Malay,Indonesian,English": 0.22 + "Filipino,Vietnamese,Chinese,Spanish": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Malay": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,English": 0.3, + "Filipino,Vietnamese,Spanish,Malay": 0.28, + "Filipino,Vietnamese,Spanish,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Spanish,English": 0.32, + "Filipino,Vietnamese,Malay,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Malay,English": 0.2866666666666667, + "Filipino,Vietnamese,Indonesian,English": 0.32666666666666666, + "Filipino,Chinese,Spanish,Malay": 0.32, + "Filipino,Chinese,Spanish,Indonesian": 0.32, + "Filipino,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.34, + "Filipino,Chinese,Malay,English": 0.30666666666666664, + "Filipino,Chinese,Indonesian,English": 0.34, + "Filipino,Spanish,Malay,Indonesian": 0.3333333333333333, + "Filipino,Spanish,Malay,English": 0.30666666666666664, + "Filipino,Spanish,Indonesian,English": 0.3466666666666667, + "Filipino,Malay,Indonesian,English": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,English": 0.30666666666666664, + "Vietnamese,Chinese,Malay,Indonesian": 0.34, + "Vietnamese,Chinese,Malay,English": 0.28, + "Vietnamese,Chinese,Indonesian,English": 0.30666666666666664, + "Vietnamese,Spanish,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Spanish,Malay,English": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian,English": 0.3466666666666667, + "Vietnamese,Malay,Indonesian,English": 0.32, + "Chinese,Spanish,Malay,Indonesian": 0.34, + "Chinese,Spanish,Malay,English": 0.30666666666666664, + "Chinese,Spanish,Indonesian,English": 0.32, + "Chinese,Malay,Indonesian,English": 0.32666666666666666, + "Spanish,Malay,Indonesian,English": 0.34 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.1, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.14, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.12, - "Filipino,Vietnamese,Chinese,Malay,English": 0.09333333333333334, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.12, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Filipino,Vietnamese,Spanish,Malay,English": 0.14666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.16666666666666666, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.15333333333333332, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.12, - "Filipino,Chinese,Spanish,Malay,English": 0.10666666666666667, - "Filipino,Chinese,Spanish,Indonesian,English": 0.13333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.11333333333333333, - "Filipino,Spanish,Malay,Indonesian,English": 0.16, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.12, - "Vietnamese,Chinese,Spanish,Malay,English": 0.1, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.12666666666666668, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.10666666666666667, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.16, - "Chinese,Spanish,Malay,Indonesian,English": 0.12 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Chinese,Malay,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.26666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Spanish,Malay,English": 0.25333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2866666666666667, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.26, + "Filipino,Chinese,Spanish,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Malay,Indonesian,English": 0.28, + "Filipino,Spanish,Malay,Indonesian,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Vietnamese,Chinese,Spanish,Malay,English": 0.24666666666666667, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.26666666666666666, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.26666666666666666, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.28, + "Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.1, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.07333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.10666666666666667, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.08666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.12666666666666668, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.09333333333333334, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.08666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.22, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.24, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.07333333333333333 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.20666666666666667 } }, - "AC3_2": 0.4720584800434655, - "AC3_3": 0.3466858485131371, - "AC3_4": 0.2621248350418258, - "AC3_5": 0.20348862880336405, - "AC3_6": 0.1606836013086066, - "AC3_7": 0.12749292584031677 + "AC3_2": 0.5553072108343763, + "AC3_3": 0.4633423324610993, + "AC3_4": 0.40154914457812974, + "AC3_5": 0.35756754735099816, + "AC3_6": 0.32468234464046314, + "AC3_7": 0.29950930622066607 } }, "cross_logiqa": { "prompt_1": { - "overall_acc": 0.41233766233766234, + "overall_acc": 0.43344155844155846, "language_acc": { - "Indonesian": 0.3806818181818182, + "Indonesian": 0.3977272727272727, "English": 0.4943181818181818, - "Filipino": 0.3125, - "Spanish": 0.44886363636363635, - "Chinese": 0.4431818181818182, - "Malay": 0.38636363636363635, - "Vietnamese": 0.42045454545454547 + "Filipino": 0.3068181818181818, + "Spanish": 0.4659090909090909, + "Chinese": 0.4659090909090909, + "Malay": 0.4431818181818182, + "Vietnamese": 0.4602272727272727 }, - "consistency_score_2": 0.4374999999999999, - "consistency_score_3": 0.23961038961038958, - "consistency_score_4": 0.14756493506493504, - "consistency_score_5": 0.09875541125541125, - "consistency_score_6": 0.0698051948051948, - "consistency_score_7": 0.05113636363636364, + "consistency_score_2": 0.49485930735930744, + "consistency_score_3": 0.3073051948051948, + "consistency_score_4": 0.21704545454545454, + "consistency_score_5": 0.16639610389610388, + "consistency_score_6": 0.13311688311688313, + "consistency_score_7": 0.10795454545454546, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.42613636363636365, - "Indonesian,Filipino": 0.30113636363636365, - "Indonesian,Spanish": 0.45454545454545453, - "Indonesian,Chinese": 0.4147727272727273, - "Indonesian,Malay": 0.5, - "Indonesian,Vietnamese": 0.4715909090909091, - "English,Filipino": 0.35795454545454547, - "English,Spanish": 0.5568181818181818, - "English,Chinese": 0.42045454545454547, - "English,Malay": 0.5056818181818182, - "English,Vietnamese": 0.48863636363636365, - "Filipino,Spanish": 0.3977272727272727, - "Filipino,Chinese": 0.32954545454545453, - "Filipino,Malay": 0.4772727272727273, - "Filipino,Vietnamese": 0.3977272727272727, - "Spanish,Chinese": 0.39204545454545453, - "Spanish,Malay": 0.4943181818181818, - "Spanish,Vietnamese": 0.4943181818181818, - "Chinese,Malay": 0.3693181818181818, - "Chinese,Vietnamese": 0.4659090909090909, - "Malay,Vietnamese": 0.4715909090909091 + "Indonesian,English": 0.4715909090909091, + "Indonesian,Filipino": 0.4659090909090909, + "Indonesian,Spanish": 0.5056818181818182, + "Indonesian,Chinese": 0.4943181818181818, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Vietnamese": 0.48863636363636365, + "English,Filipino": 0.3806818181818182, + "English,Spanish": 0.6136363636363636, + "English,Chinese": 0.5113636363636364, + "English,Malay": 0.5454545454545454, + "English,Vietnamese": 0.4715909090909091, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Chinese": 0.3693181818181818, + "Filipino,Malay": 0.4659090909090909, + "Filipino,Vietnamese": 0.42045454545454547, + "Spanish,Chinese": 0.5113636363636364, + "Spanish,Malay": 0.5511363636363636, + "Spanish,Vietnamese": 0.5056818181818182, + "Chinese,Malay": 0.48863636363636365, + "Chinese,Vietnamese": 0.5511363636363636, + "Malay,Vietnamese": 0.5113636363636364 }, "3_combine": { - "Indonesian,English,Filipino": 0.1534090909090909, - "Indonesian,English,Spanish": 0.2784090909090909, - "Indonesian,English,Chinese": 0.22727272727272727, - "Indonesian,English,Malay": 0.2840909090909091, - "Indonesian,English,Vietnamese": 0.2784090909090909, - "Indonesian,Filipino,Spanish": 0.18181818181818182, - "Indonesian,Filipino,Chinese": 0.14204545454545456, - "Indonesian,Filipino,Malay": 0.2159090909090909, - "Indonesian,Filipino,Vietnamese": 0.19886363636363635, - "Indonesian,Spanish,Chinese": 0.21022727272727273, - "Indonesian,Spanish,Malay": 0.2897727272727273, - "Indonesian,Spanish,Vietnamese": 0.26136363636363635, - "Indonesian,Chinese,Malay": 0.19318181818181818, - "Indonesian,Chinese,Vietnamese": 0.2556818181818182, - "Indonesian,Malay,Vietnamese": 0.3068181818181818, - "English,Filipino,Spanish": 0.21022727272727273, - "English,Filipino,Chinese": 0.17613636363636365, - "English,Filipino,Malay": 0.24431818181818182, - "English,Filipino,Vietnamese": 0.20454545454545456, - "English,Spanish,Chinese": 0.2556818181818182, - "English,Spanish,Malay": 0.3352272727272727, + "Indonesian,English,Filipino": 0.23863636363636365, + "Indonesian,English,Spanish": 0.3409090909090909, + "Indonesian,English,Chinese": 0.3068181818181818, + "Indonesian,English,Malay": 0.36363636363636365, + "Indonesian,English,Vietnamese": 0.2840909090909091, + "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,Filipino,Chinese": 0.23863636363636365, + "Indonesian,Filipino,Malay": 0.32386363636363635, + "Indonesian,Filipino,Vietnamese": 0.2784090909090909, + "Indonesian,Spanish,Chinese": 0.3181818181818182, + "Indonesian,Spanish,Malay": 0.39204545454545453, + "Indonesian,Spanish,Vietnamese": 0.30113636363636365, + "Indonesian,Chinese,Malay": 0.36363636363636365, + "Indonesian,Chinese,Vietnamese": 0.3409090909090909, + "Indonesian,Malay,Vietnamese": 0.3693181818181818, + "English,Filipino,Spanish": 0.29545454545454547, + "English,Filipino,Chinese": 0.21022727272727273, + "English,Filipino,Malay": 0.26136363636363635, + "English,Filipino,Vietnamese": 0.21022727272727273, + "English,Spanish,Chinese": 0.36363636363636365, + "English,Spanish,Malay": 0.39204545454545453, "English,Spanish,Vietnamese": 0.3409090909090909, - "English,Chinese,Malay": 0.23863636363636365, - "English,Chinese,Vietnamese": 0.26704545454545453, - "English,Malay,Vietnamese": 0.29545454545454547, - "Filipino,Spanish,Chinese": 0.18181818181818182, - "Filipino,Spanish,Malay": 0.26704545454545453, - "Filipino,Spanish,Vietnamese": 0.2159090909090909, - "Filipino,Chinese,Malay": 0.20454545454545456, - "Filipino,Chinese,Vietnamese": 0.20454545454545456, - "Filipino,Malay,Vietnamese": 0.2556818181818182, - "Spanish,Chinese,Malay": 0.22727272727272727, - "Spanish,Chinese,Vietnamese": 0.2556818181818182, - "Spanish,Malay,Vietnamese": 0.2897727272727273, - "Chinese,Malay,Vietnamese": 0.23863636363636365 + "English,Chinese,Malay": 0.3522727272727273, + "English,Chinese,Vietnamese": 0.32954545454545453, + "English,Malay,Vietnamese": 0.3181818181818182, + "Filipino,Spanish,Chinese": 0.26136363636363635, + "Filipino,Spanish,Malay": 0.3125, + "Filipino,Spanish,Vietnamese": 0.26704545454545453, + "Filipino,Chinese,Malay": 0.25, + "Filipino,Chinese,Vietnamese": 0.25, + "Filipino,Malay,Vietnamese": 0.2897727272727273, + "Spanish,Chinese,Malay": 0.32386363636363635, + "Spanish,Chinese,Vietnamese": 0.3465909090909091, + "Spanish,Malay,Vietnamese": 0.32954545454545453, + "Chinese,Malay,Vietnamese": 0.32386363636363635 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.09659090909090909, - "Indonesian,English,Filipino,Chinese": 0.09090909090909091, - "Indonesian,English,Filipino,Malay": 0.13068181818181818, - "Indonesian,English,Filipino,Vietnamese": 0.125, - "Indonesian,English,Spanish,Chinese": 0.14772727272727273, - "Indonesian,English,Spanish,Malay": 0.21022727272727273, - "Indonesian,English,Spanish,Vietnamese": 0.18181818181818182, - "Indonesian,English,Chinese,Malay": 0.13068181818181818, - "Indonesian,English,Chinese,Vietnamese": 0.16477272727272727, - "Indonesian,English,Malay,Vietnamese": 0.19318181818181818, - "Indonesian,Filipino,Spanish,Chinese": 0.09659090909090909, - "Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, - "Indonesian,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Chinese,Malay": 0.10227272727272728, - "Indonesian,Filipino,Chinese,Vietnamese": 0.10795454545454546, - "Indonesian,Filipino,Malay,Vietnamese": 0.16477272727272727, - "Indonesian,Spanish,Chinese,Malay": 0.14204545454545456, - "Indonesian,Spanish,Chinese,Vietnamese": 0.1590909090909091, - "Indonesian,Spanish,Malay,Vietnamese": 0.19318181818181818, - "Indonesian,Chinese,Malay,Vietnamese": 0.14204545454545456, - "English,Filipino,Spanish,Chinese": 0.11931818181818182, - "English,Filipino,Spanish,Malay": 0.1590909090909091, - "English,Filipino,Spanish,Vietnamese": 0.13636363636363635, - "English,Filipino,Chinese,Malay": 0.13636363636363635, - "English,Filipino,Chinese,Vietnamese": 0.11363636363636363, - "English,Filipino,Malay,Vietnamese": 0.1590909090909091, - "English,Spanish,Chinese,Malay": 0.18181818181818182, - "English,Spanish,Chinese,Vietnamese": 0.19318181818181818, - "English,Spanish,Malay,Vietnamese": 0.21022727272727273, - "English,Chinese,Malay,Vietnamese": 0.17613636363636365, - "Filipino,Spanish,Chinese,Malay": 0.13068181818181818, - "Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, - "Filipino,Spanish,Malay,Vietnamese": 0.16477272727272727, - "Filipino,Chinese,Malay,Vietnamese": 0.14204545454545456, - "Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 + "Indonesian,English,Filipino,Spanish": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese": 0.14772727272727273, + "Indonesian,English,Filipino,Malay": 0.19886363636363635, + "Indonesian,English,Filipino,Vietnamese": 0.16477272727272727, + "Indonesian,English,Spanish,Chinese": 0.25, + "Indonesian,English,Spanish,Malay": 0.2840909090909091, + "Indonesian,English,Spanish,Vietnamese": 0.22727272727272727, + "Indonesian,English,Chinese,Malay": 0.26136363636363635, + "Indonesian,English,Chinese,Vietnamese": 0.22727272727272727, + "Indonesian,English,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Spanish,Chinese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Vietnamese": 0.19318181818181818, + "Indonesian,Filipino,Chinese,Malay": 0.1875, + "Indonesian,Filipino,Chinese,Vietnamese": 0.1875, + "Indonesian,Filipino,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,Spanish,Chinese,Malay": 0.2556818181818182, + "Indonesian,Spanish,Chinese,Vietnamese": 0.25, + "Indonesian,Spanish,Malay,Vietnamese": 0.2556818181818182, + "Indonesian,Chinese,Malay,Vietnamese": 0.2727272727272727, + "English,Filipino,Spanish,Chinese": 0.1875, + "English,Filipino,Spanish,Malay": 0.2215909090909091, + "English,Filipino,Spanish,Vietnamese": 0.17613636363636365, + "English,Filipino,Chinese,Malay": 0.17045454545454544, + "English,Filipino,Chinese,Vietnamese": 0.1534090909090909, + "English,Filipino,Malay,Vietnamese": 0.17613636363636365, + "English,Spanish,Chinese,Malay": 0.2784090909090909, + "English,Spanish,Chinese,Vietnamese": 0.25, + "English,Spanish,Malay,Vietnamese": 0.2556818181818182, + "English,Chinese,Malay,Vietnamese": 0.24431818181818182, + "Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "Filipino,Spanish,Chinese,Vietnamese": 0.19318181818181818, + "Filipino,Spanish,Malay,Vietnamese": 0.2159090909090909, + "Filipino,Chinese,Malay,Vietnamese": 0.1875, + "Spanish,Chinese,Malay,Vietnamese": 0.24431818181818182 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.0625, - "Indonesian,English,Filipino,Spanish,Malay": 0.08522727272727272, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.08522727272727272, - "Indonesian,English,Filipino,Chinese,Malay": 0.07386363636363637, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.06818181818181818, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,English,Spanish,Chinese,Malay": 0.11363636363636363, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.11363636363636363, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.14204545454545456, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.10227272727272728, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.07386363636363637, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.08522727272727272, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.10795454545454546, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.08522727272727272, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.11363636363636363, - "English,Filipino,Spanish,Chinese,Malay": 0.10227272727272728, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.09090909090909091, - "English,Filipino,Spanish,Malay,Vietnamese": 0.10795454545454546, - "English,Filipino,Chinese,Malay,Vietnamese": 0.10227272727272728, - "English,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728 + "Indonesian,English,Filipino,Spanish,Chinese": 0.13636363636363635, + "Indonesian,English,Filipino,Spanish,Malay": 0.17045454545454544, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese,Malay": 0.13068181818181818, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.125, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,English,Spanish,Chinese,Malay": 0.2159090909090909, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.16477272727272727, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.13636363636363635, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1534090909090909, + "English,Filipino,Chinese,Malay,Vietnamese": 0.13068181818181818, + "English,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.056818181818181816, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.056818181818181816, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.07386363636363637, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.0625, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.09090909090909091, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.06818181818181818, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.07954545454545454 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.125, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.11931818181818182, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.13636363636363635, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.11363636363636363, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.05113636363636364 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546 } }, - "AC3_2": 0.4245463227771689, - "AC3_3": 0.3030928234646598, - "AC3_4": 0.21734701942475623, - "AC3_5": 0.1593470055307564, - "AC3_6": 0.11939743755449184, - "AC3_7": 0.0909886960478733 + "AC3_2": 0.4621186886063602, + "AC3_3": 0.3596339557392517, + "AC3_4": 0.2892494951571124, + "AC3_5": 0.24047501885170827, + "AC3_6": 0.20368027384489654, + "AC3_7": 0.1728567534095331 }, "prompt_2": { - "overall_acc": 0.44642857142857145, + "overall_acc": 0.4488636363636364, "language_acc": { - "Indonesian": 0.39204545454545453, + "Indonesian": 0.42045454545454547, "English": 0.5511363636363636, - "Filipino": 0.32386363636363635, - "Spanish": 0.48863636363636365, - "Chinese": 0.4602272727272727, - "Malay": 0.42045454545454547, - "Vietnamese": 0.48863636363636365 + "Filipino": 0.3352272727272727, + "Spanish": 0.44886363636363635, + "Chinese": 0.48863636363636365, + "Malay": 0.4375, + "Vietnamese": 0.4602272727272727 }, - "consistency_score_2": 0.4691558441558441, - "consistency_score_3": 0.27516233766233766, - "consistency_score_4": 0.18165584415584413, - "consistency_score_5": 0.12905844155844154, - "consistency_score_6": 0.09659090909090909, - "consistency_score_7": 0.07386363636363637, + "consistency_score_2": 0.5303030303030303, + "consistency_score_3": 0.35162337662337656, + "consistency_score_4": 0.260551948051948, + "consistency_score_5": 0.20670995670995668, + "consistency_score_6": 0.17207792207792208, + "consistency_score_7": 0.14772727272727273, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.4772727272727273, - "Indonesian,Filipino": 0.4147727272727273, - "Indonesian,Spanish": 0.5625, - "Indonesian,Chinese": 0.45454545454545453, - "Indonesian,Malay": 0.4659090909090909, - "Indonesian,Vietnamese": 0.4318181818181818, - "English,Filipino": 0.4318181818181818, - "English,Spanish": 0.5795454545454546, - "English,Chinese": 0.4659090909090909, - "English,Malay": 0.5056818181818182, - "English,Vietnamese": 0.5681818181818182, - "Filipino,Spanish": 0.4375, - "Filipino,Chinese": 0.3409090909090909, - "Filipino,Malay": 0.4715909090909091, - "Filipino,Vietnamese": 0.39204545454545453, - "Spanish,Chinese": 0.45454545454545453, - "Spanish,Malay": 0.5397727272727273, - "Spanish,Vietnamese": 0.4943181818181818, - "Chinese,Malay": 0.4147727272727273, - "Chinese,Vietnamese": 0.5113636363636364, - "Malay,Vietnamese": 0.4375 + "Indonesian,English": 0.5625, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.5909090909090909, + "Indonesian,Chinese": 0.5227272727272727, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Vietnamese": 0.5113636363636364, + "English,Filipino": 0.4090909090909091, + "English,Spanish": 0.6079545454545454, + "English,Chinese": 0.5568181818181818, + "English,Malay": 0.5852272727272727, + "English,Vietnamese": 0.5511363636363636, + "Filipino,Spanish": 0.4943181818181818, + "Filipino,Chinese": 0.4034090909090909, + "Filipino,Malay": 0.5113636363636364, + "Filipino,Vietnamese": 0.4602272727272727, + "Spanish,Chinese": 0.5170454545454546, + "Spanish,Malay": 0.6079545454545454, + "Spanish,Vietnamese": 0.5454545454545454, + "Chinese,Malay": 0.5170454545454546, + "Chinese,Vietnamese": 0.5511363636363636, + "Malay,Vietnamese": 0.5170454545454546 }, "3_combine": { - "Indonesian,English,Filipino": 0.23863636363636365, - "Indonesian,English,Spanish": 0.3522727272727273, - "Indonesian,English,Chinese": 0.26704545454545453, - "Indonesian,English,Malay": 0.3068181818181818, - "Indonesian,English,Vietnamese": 0.30113636363636365, - "Indonesian,Filipino,Spanish": 0.2784090909090909, - "Indonesian,Filipino,Chinese": 0.2159090909090909, - "Indonesian,Filipino,Malay": 0.24431818181818182, - "Indonesian,Filipino,Vietnamese": 0.2215909090909091, - "Indonesian,Spanish,Chinese": 0.3181818181818182, - "Indonesian,Spanish,Malay": 0.3522727272727273, - "Indonesian,Spanish,Vietnamese": 0.29545454545454547, - "Indonesian,Chinese,Malay": 0.2556818181818182, - "Indonesian,Chinese,Vietnamese": 0.26704545454545453, - "Indonesian,Malay,Vietnamese": 0.24431818181818182, - "English,Filipino,Spanish": 0.26704545454545453, - "English,Filipino,Chinese": 0.20454545454545456, - "English,Filipino,Malay": 0.26704545454545453, - "English,Filipino,Vietnamese": 0.2556818181818182, - "English,Spanish,Chinese": 0.30113636363636365, - "English,Spanish,Malay": 0.35795454545454547, - "English,Spanish,Vietnamese": 0.3693181818181818, - "English,Chinese,Malay": 0.2784090909090909, - "English,Chinese,Vietnamese": 0.3409090909090909, - "English,Malay,Vietnamese": 0.3068181818181818, - "Filipino,Spanish,Chinese": 0.19318181818181818, - "Filipino,Spanish,Malay": 0.2897727272727273, - "Filipino,Spanish,Vietnamese": 0.23863636363636365, - "Filipino,Chinese,Malay": 0.21022727272727273, - "Filipino,Chinese,Vietnamese": 0.20454545454545456, - "Filipino,Malay,Vietnamese": 0.23295454545454544, - "Spanish,Chinese,Malay": 0.2784090909090909, - "Spanish,Chinese,Vietnamese": 0.3068181818181818, - "Spanish,Malay,Vietnamese": 0.30113636363636365, - "Chinese,Malay,Vietnamese": 0.26704545454545453 + "Indonesian,English,Filipino": 0.2840909090909091, + "Indonesian,English,Spanish": 0.4147727272727273, + "Indonesian,English,Chinese": 0.38636363636363635, + "Indonesian,English,Malay": 0.4318181818181818, + "Indonesian,English,Vietnamese": 0.35795454545454547, + "Indonesian,Filipino,Spanish": 0.3409090909090909, + "Indonesian,Filipino,Chinese": 0.2727272727272727, + "Indonesian,Filipino,Malay": 0.35795454545454547, + "Indonesian,Filipino,Vietnamese": 0.2897727272727273, + "Indonesian,Spanish,Chinese": 0.3693181818181818, + "Indonesian,Spanish,Malay": 0.4602272727272727, + "Indonesian,Spanish,Vietnamese": 0.3693181818181818, + "Indonesian,Chinese,Malay": 0.3977272727272727, + "Indonesian,Chinese,Vietnamese": 0.3693181818181818, + "Indonesian,Malay,Vietnamese": 0.375, + "English,Filipino,Spanish": 0.3125, + "English,Filipino,Chinese": 0.2556818181818182, + "English,Filipino,Malay": 0.3068181818181818, + "English,Filipino,Vietnamese": 0.26704545454545453, + "English,Spanish,Chinese": 0.3977272727272727, + "English,Spanish,Malay": 0.4431818181818182, + "English,Spanish,Vietnamese": 0.39204545454545453, + "English,Chinese,Malay": 0.4034090909090909, + "English,Chinese,Vietnamese": 0.38636363636363635, + "English,Malay,Vietnamese": 0.38636363636363635, + "Filipino,Spanish,Chinese": 0.26704545454545453, + "Filipino,Spanish,Malay": 0.36363636363636365, + "Filipino,Spanish,Vietnamese": 0.3068181818181818, + "Filipino,Chinese,Malay": 0.2897727272727273, + "Filipino,Chinese,Vietnamese": 0.2727272727272727, + "Filipino,Malay,Vietnamese": 0.29545454545454547, + "Spanish,Chinese,Malay": 0.3693181818181818, + "Spanish,Chinese,Vietnamese": 0.3693181818181818, + "Spanish,Malay,Vietnamese": 0.3806818181818182, + "Chinese,Malay,Vietnamese": 0.36363636363636365 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.18181818181818182, - "Indonesian,English,Filipino,Chinese": 0.13068181818181818, - "Indonesian,English,Filipino,Malay": 0.16477272727272727, - "Indonesian,English,Filipino,Vietnamese": 0.16477272727272727, - "Indonesian,English,Spanish,Chinese": 0.22727272727272727, - "Indonesian,English,Spanish,Malay": 0.25, - "Indonesian,English,Spanish,Vietnamese": 0.23863636363636365, - "Indonesian,English,Chinese,Malay": 0.1875, - "Indonesian,English,Chinese,Vietnamese": 0.19318181818181818, - "Indonesian,English,Malay,Vietnamese": 0.19886363636363635, - "Indonesian,Filipino,Spanish,Chinese": 0.1590909090909091, - "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, - "Indonesian,Filipino,Spanish,Vietnamese": 0.17045454545454544, - "Indonesian,Filipino,Chinese,Malay": 0.14772727272727273, - "Indonesian,Filipino,Chinese,Vietnamese": 0.14204545454545456, - "Indonesian,Filipino,Malay,Vietnamese": 0.13636363636363635, - "Indonesian,Spanish,Chinese,Malay": 0.2159090909090909, - "Indonesian,Spanish,Chinese,Vietnamese": 0.2159090909090909, - "Indonesian,Spanish,Malay,Vietnamese": 0.19886363636363635, - "Indonesian,Chinese,Malay,Vietnamese": 0.17613636363636365, - "English,Filipino,Spanish,Chinese": 0.13636363636363635, - "English,Filipino,Spanish,Malay": 0.1875, - "English,Filipino,Spanish,Vietnamese": 0.18181818181818182, - "English,Filipino,Chinese,Malay": 0.1534090909090909, - "English,Filipino,Chinese,Vietnamese": 0.1590909090909091, - "English,Filipino,Malay,Vietnamese": 0.16477272727272727, - "English,Spanish,Chinese,Malay": 0.21022727272727273, - "English,Spanish,Chinese,Vietnamese": 0.23863636363636365, - "English,Spanish,Malay,Vietnamese": 0.23863636363636365, - "English,Chinese,Malay,Vietnamese": 0.20454545454545456, - "Filipino,Spanish,Chinese,Malay": 0.14772727272727273, - "Filipino,Spanish,Chinese,Vietnamese": 0.13636363636363635, - "Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, - "Filipino,Chinese,Malay,Vietnamese": 0.14204545454545456, - "Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635 + "Indonesian,English,Filipino,Spanish": 0.25, + "Indonesian,English,Filipino,Chinese": 0.2159090909090909, + "Indonesian,English,Filipino,Malay": 0.25, + "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese": 0.3068181818181818, + "Indonesian,English,Spanish,Malay": 0.3522727272727273, + "Indonesian,English,Spanish,Vietnamese": 0.2784090909090909, + "Indonesian,English,Chinese,Malay": 0.32954545454545453, + "Indonesian,English,Chinese,Vietnamese": 0.2897727272727273, + "Indonesian,English,Malay,Vietnamese": 0.30113636363636365, + "Indonesian,Filipino,Spanish,Chinese": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.2897727272727273, + "Indonesian,Filipino,Spanish,Vietnamese": 0.2215909090909091, + "Indonesian,Filipino,Chinese,Malay": 0.23295454545454544, + "Indonesian,Filipino,Chinese,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,Spanish,Chinese,Malay": 0.3068181818181818, + "Indonesian,Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Indonesian,Spanish,Malay,Vietnamese": 0.2897727272727273, + "Indonesian,Chinese,Malay,Vietnamese": 0.29545454545454547, + "English,Filipino,Spanish,Chinese": 0.2159090909090909, + "English,Filipino,Spanish,Malay": 0.26704545454545453, + "English,Filipino,Spanish,Vietnamese": 0.2215909090909091, + "English,Filipino,Chinese,Malay": 0.22727272727272727, + "English,Filipino,Chinese,Vietnamese": 0.19318181818181818, + "English,Filipino,Malay,Vietnamese": 0.2215909090909091, + "English,Spanish,Chinese,Malay": 0.32386363636363635, + "English,Spanish,Chinese,Vietnamese": 0.30113636363636365, + "English,Spanish,Malay,Vietnamese": 0.3068181818181818, + "English,Chinese,Malay,Vietnamese": 0.30113636363636365, + "Filipino,Spanish,Chinese,Malay": 0.22727272727272727, + "Filipino,Spanish,Chinese,Vietnamese": 0.2159090909090909, + "Filipino,Spanish,Malay,Vietnamese": 0.23863636363636365, + "Filipino,Chinese,Malay,Vietnamese": 0.2215909090909091, + "Spanish,Chinese,Malay,Vietnamese": 0.2840909090909091 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.11363636363636363, - "Indonesian,English,Filipino,Spanish,Malay": 0.13636363636363635, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.13636363636363635, - "Indonesian,English,Filipino,Chinese,Malay": 0.10795454545454546, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.10227272727272728, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,English,Spanish,Chinese,Malay": 0.17045454545454544, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.17045454545454544, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.17045454545454544, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.14204545454545456, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.125, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.11363636363636363, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.10227272727272728, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909, - "English,Filipino,Spanish,Chinese,Malay": 0.11363636363636363, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.11363636363636363, - "English,Filipino,Spanish,Malay,Vietnamese": 0.125, - "English,Filipino,Chinese,Malay,Vietnamese": 0.11931818181818182, - "English,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728 + "Indonesian,English,Filipino,Spanish,Chinese": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish,Malay": 0.23295454545454544, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.18181818181818182, + "Indonesian,English,Filipino,Chinese,Malay": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese,Malay": 0.2727272727272727, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.23295454545454544, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.2556818181818182, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.1875, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544, + "English,Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, + "English,Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, + "English,Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "English,Spanish,Chinese,Malay,Vietnamese": 0.25, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1875 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.09659090909090909, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.09090909090909091, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.09659090909090909, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.08522727272727272, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.08522727272727272, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.09090909090909091 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.16477272727272727, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727 }, - "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.07386363636363637 - } - }, - "AC3_2": 0.4575101316622902, - "AC3_3": 0.3404708339557091, - "AC3_4": 0.2582339474536238, - "AC3_5": 0.2002317146539029, - "AC3_6": 0.15881913300513226, - "AC3_7": 0.1267550701784458 - }, - "prompt_3": { - "overall_acc": 0.4293831168831169, - "language_acc": { - "Indonesian": 0.4147727272727273, - "English": 0.5340909090909091, - "Filipino": 0.3522727272727273, - "Spanish": 0.42045454545454547, - "Chinese": 0.42613636363636365, - "Malay": 0.4090909090909091, - "Vietnamese": 0.44886363636363635 + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273 + } }, - "consistency_score_2": 0.46617965367965375, - "consistency_score_3": 0.28035714285714286, - "consistency_score_4": 0.19512987012987018, - "consistency_score_5": 0.14826839826839824, - "consistency_score_6": 0.12012987012987013, - "consistency_score_7": 0.10227272727272728, + "AC3_2": 0.48619658866144133, + "AC3_3": 0.39433730936612466, + "AC3_4": 0.3297144788384825, + "AC3_5": 0.28306382016467846, + "AC3_6": 0.24878193697716589, + "AC3_7": 0.22229437225711152 + }, + "prompt_3": { + "overall_acc": 0.46022727272727265, + "language_acc": { + "Indonesian": 0.44886363636363635, + "English": 0.5568181818181818, + "Filipino": 0.36363636363636365, + "Spanish": 0.4602272727272727, + "Chinese": 0.45454545454545453, + "Malay": 0.45454545454545453, + "Vietnamese": 0.48295454545454547 + }, + "consistency_score_2": 0.533008658008658, + "consistency_score_3": 0.35422077922077916, + "consistency_score_4": 0.2649350649350649, + "consistency_score_5": 0.21401515151515152, + "consistency_score_6": 0.18181818181818185, + "consistency_score_7": 0.1590909090909091, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.4772727272727273, - "Indonesian,Filipino": 0.4318181818181818, - "Indonesian,Spanish": 0.5113636363636364, - "Indonesian,Chinese": 0.5227272727272727, - "Indonesian,Malay": 0.4715909090909091, - "Indonesian,Vietnamese": 0.42613636363636365, - "English,Filipino": 0.4034090909090909, - "English,Spanish": 0.5625, - "English,Chinese": 0.48863636363636365, - "English,Malay": 0.4772727272727273, - "English,Vietnamese": 0.4943181818181818, - "Filipino,Spanish": 0.45454545454545453, - "Filipino,Chinese": 0.4090909090909091, - "Filipino,Malay": 0.48295454545454547, - "Filipino,Vietnamese": 0.4147727272727273, - "Spanish,Chinese": 0.48295454545454547, - "Spanish,Malay": 0.4772727272727273, - "Spanish,Vietnamese": 0.5056818181818182, - "Chinese,Malay": 0.39204545454545453, - "Chinese,Vietnamese": 0.45454545454545453, - "Malay,Vietnamese": 0.44886363636363635 + "Indonesian,English": 0.5340909090909091, + "Indonesian,Filipino": 0.48295454545454547, + "Indonesian,Spanish": 0.5625, + "Indonesian,Chinese": 0.5625, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Vietnamese": 0.5340909090909091, + "English,Filipino": 0.4431818181818182, + "English,Spanish": 0.625, + "English,Chinese": 0.5056818181818182, + "English,Malay": 0.5795454545454546, + "English,Vietnamese": 0.5454545454545454, + "Filipino,Spanish": 0.48295454545454547, + "Filipino,Chinese": 0.4034090909090909, + "Filipino,Malay": 0.5113636363636364, + "Filipino,Vietnamese": 0.4772727272727273, + "Spanish,Chinese": 0.5397727272727273, + "Spanish,Malay": 0.6022727272727273, + "Spanish,Vietnamese": 0.5852272727272727, + "Chinese,Malay": 0.5, + "Chinese,Vietnamese": 0.5568181818181818, + "Malay,Vietnamese": 0.5397727272727273 }, "3_combine": { - "Indonesian,English,Filipino": 0.24431818181818182, - "Indonesian,English,Spanish": 0.3409090909090909, - "Indonesian,English,Chinese": 0.3181818181818182, - "Indonesian,English,Malay": 0.2897727272727273, - "Indonesian,English,Vietnamese": 0.2727272727272727, - "Indonesian,Filipino,Spanish": 0.2840909090909091, - "Indonesian,Filipino,Chinese": 0.2727272727272727, - "Indonesian,Filipino,Malay": 0.2727272727272727, - "Indonesian,Filipino,Vietnamese": 0.23863636363636365, - "Indonesian,Spanish,Chinese": 0.32386363636363635, - "Indonesian,Spanish,Malay": 0.3125, - "Indonesian,Spanish,Vietnamese": 0.2784090909090909, - "Indonesian,Chinese,Malay": 0.2840909090909091, - "Indonesian,Chinese,Vietnamese": 0.29545454545454547, - "Indonesian,Malay,Vietnamese": 0.26136363636363635, - "English,Filipino,Spanish": 0.2840909090909091, - "English,Filipino,Chinese": 0.23863636363636365, - "English,Filipino,Malay": 0.2556818181818182, - "English,Filipino,Vietnamese": 0.25, - "English,Spanish,Chinese": 0.32954545454545453, - "English,Spanish,Malay": 0.32386363636363635, - "English,Spanish,Vietnamese": 0.3352272727272727, - "English,Chinese,Malay": 0.26704545454545453, - "English,Chinese,Vietnamese": 0.3068181818181818, - "English,Malay,Vietnamese": 0.2840909090909091, - "Filipino,Spanish,Chinese": 0.2556818181818182, - "Filipino,Spanish,Malay": 0.2897727272727273, - "Filipino,Spanish,Vietnamese": 0.2556818181818182, - "Filipino,Chinese,Malay": 0.23863636363636365, - "Filipino,Chinese,Vietnamese": 0.23295454545454544, - "Filipino,Malay,Vietnamese": 0.26704545454545453, - "Spanish,Chinese,Malay": 0.26136363636363635, - "Spanish,Chinese,Vietnamese": 0.30113636363636365, - "Spanish,Malay,Vietnamese": 0.29545454545454547, - "Chinese,Malay,Vietnamese": 0.25 + "Indonesian,English,Filipino": 0.2840909090909091, + "Indonesian,English,Spanish": 0.4034090909090909, + "Indonesian,English,Chinese": 0.35795454545454547, + "Indonesian,English,Malay": 0.4090909090909091, + "Indonesian,English,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Chinese": 0.2897727272727273, + "Indonesian,Filipino,Malay": 0.35795454545454547, + "Indonesian,Filipino,Vietnamese": 0.32386363636363635, + "Indonesian,Spanish,Chinese": 0.3693181818181818, + "Indonesian,Spanish,Malay": 0.4318181818181818, + "Indonesian,Spanish,Vietnamese": 0.375, + "Indonesian,Chinese,Malay": 0.38636363636363635, + "Indonesian,Chinese,Vietnamese": 0.39204545454545453, + "Indonesian,Malay,Vietnamese": 0.39204545454545453, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Filipino,Chinese": 0.24431818181818182, + "English,Filipino,Malay": 0.3068181818181818, + "English,Filipino,Vietnamese": 0.3068181818181818, + "English,Spanish,Chinese": 0.3806818181818182, + "English,Spanish,Malay": 0.4318181818181818, + "English,Spanish,Vietnamese": 0.4147727272727273, + "English,Chinese,Malay": 0.35795454545454547, + "English,Chinese,Vietnamese": 0.3693181818181818, + "English,Malay,Vietnamese": 0.39204545454545453, + "Filipino,Spanish,Chinese": 0.2840909090909091, + "Filipino,Spanish,Malay": 0.35795454545454547, + "Filipino,Spanish,Vietnamese": 0.32954545454545453, + "Filipino,Chinese,Malay": 0.2840909090909091, + "Filipino,Chinese,Vietnamese": 0.30113636363636365, + "Filipino,Malay,Vietnamese": 0.3352272727272727, + "Spanish,Chinese,Malay": 0.35795454545454547, + "Spanish,Chinese,Vietnamese": 0.3977272727272727, + "Spanish,Malay,Vietnamese": 0.39204545454545453, + "Chinese,Malay,Vietnamese": 0.36363636363636365 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.19886363636363635, - "Indonesian,English,Filipino,Chinese": 0.1875, - "Indonesian,English,Filipino,Malay": 0.18181818181818182, - "Indonesian,English,Filipino,Vietnamese": 0.17045454545454544, - "Indonesian,English,Spanish,Chinese": 0.23295454545454544, - "Indonesian,English,Spanish,Malay": 0.22727272727272727, - "Indonesian,English,Spanish,Vietnamese": 0.21022727272727273, - "Indonesian,English,Chinese,Malay": 0.2159090909090909, - "Indonesian,English,Chinese,Vietnamese": 0.22727272727272727, - "Indonesian,English,Malay,Vietnamese": 0.1875, - "Indonesian,Filipino,Spanish,Chinese": 0.19318181818181818, - "Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, - "Indonesian,Filipino,Spanish,Vietnamese": 0.17045454545454544, - "Indonesian,Filipino,Chinese,Malay": 0.19318181818181818, - "Indonesian,Filipino,Chinese,Vietnamese": 0.1875, - "Indonesian,Filipino,Malay,Vietnamese": 0.17045454545454544, - "Indonesian,Spanish,Chinese,Malay": 0.21022727272727273, - "Indonesian,Spanish,Chinese,Vietnamese": 0.21022727272727273, - "Indonesian,Spanish,Malay,Vietnamese": 0.20454545454545456, - "Indonesian,Chinese,Malay,Vietnamese": 0.20454545454545456, - "English,Filipino,Spanish,Chinese": 0.18181818181818182, - "English,Filipino,Spanish,Malay": 0.19318181818181818, - "English,Filipino,Spanish,Vietnamese": 0.19886363636363635, - "English,Filipino,Chinese,Malay": 0.17613636363636365, - "English,Filipino,Chinese,Vietnamese": 0.17045454545454544, - "English,Filipino,Malay,Vietnamese": 0.17045454545454544, - "English,Spanish,Chinese,Malay": 0.21022727272727273, - "English,Spanish,Chinese,Vietnamese": 0.23295454545454544, - "English,Spanish,Malay,Vietnamese": 0.23295454545454544, - "English,Chinese,Malay,Vietnamese": 0.1875, - "Filipino,Spanish,Chinese,Malay": 0.16477272727272727, - "Filipino,Spanish,Chinese,Vietnamese": 0.17613636363636365, - "Filipino,Spanish,Malay,Vietnamese": 0.1875, - "Filipino,Chinese,Malay,Vietnamese": 0.16477272727272727, - "Spanish,Chinese,Malay,Vietnamese": 0.19318181818181818 + "Indonesian,English,Filipino,Spanish": 0.24431818181818182, + "Indonesian,English,Filipino,Chinese": 0.20454545454545456, + "Indonesian,English,Filipino,Malay": 0.23863636363636365, + "Indonesian,English,Filipino,Vietnamese": 0.23863636363636365, + "Indonesian,English,Spanish,Chinese": 0.29545454545454547, + "Indonesian,English,Spanish,Malay": 0.32386363636363635, + "Indonesian,English,Spanish,Vietnamese": 0.29545454545454547, + "Indonesian,English,Chinese,Malay": 0.30113636363636365, + "Indonesian,English,Chinese,Vietnamese": 0.2897727272727273, + "Indonesian,English,Malay,Vietnamese": 0.2897727272727273, + "Indonesian,Filipino,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Filipino,Spanish,Malay": 0.26136363636363635, + "Indonesian,Filipino,Spanish,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Chinese,Malay": 0.23863636363636365, + "Indonesian,Filipino,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Spanish,Chinese,Malay": 0.30113636363636365, + "Indonesian,Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Indonesian,Spanish,Malay,Vietnamese": 0.29545454545454547, + "Indonesian,Chinese,Malay,Vietnamese": 0.3068181818181818, + "English,Filipino,Spanish,Chinese": 0.21022727272727273, + "English,Filipino,Spanish,Malay": 0.26136363636363635, + "English,Filipino,Spanish,Vietnamese": 0.2556818181818182, + "English,Filipino,Chinese,Malay": 0.2159090909090909, + "English,Filipino,Chinese,Vietnamese": 0.20454545454545456, + "English,Filipino,Malay,Vietnamese": 0.24431818181818182, + "English,Spanish,Chinese,Malay": 0.29545454545454547, + "English,Spanish,Chinese,Vietnamese": 0.3125, + "English,Spanish,Malay,Vietnamese": 0.3181818181818182, + "English,Chinese,Malay,Vietnamese": 0.2897727272727273, + "Filipino,Spanish,Chinese,Malay": 0.23295454545454544, + "Filipino,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "Filipino,Spanish,Malay,Vietnamese": 0.26136363636363635, + "Filipino,Chinese,Malay,Vietnamese": 0.24431818181818182, + "Spanish,Chinese,Malay,Vietnamese": 0.2897727272727273 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.14772727272727273, - "Indonesian,English,Filipino,Spanish,Malay": 0.14772727272727273, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.14204545454545456, - "Indonesian,English,Filipino,Chinese,Malay": 0.14772727272727273, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.14772727272727273, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.13068181818181818, - "Indonesian,English,Spanish,Chinese,Malay": 0.17613636363636365, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.17613636363636365, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.16477272727272727, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.16477272727272727, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.14204545454545456, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.13636363636363635, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.13636363636363635, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727, - "English,Filipino,Spanish,Chinese,Malay": 0.13636363636363635, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.14204545454545456, - "English,Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, - "English,Filipino,Chinese,Malay,Vietnamese": 0.125, - "English,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 + "Indonesian,English,Filipino,Spanish,Chinese": 0.1875, + "Indonesian,English,Filipino,Spanish,Malay": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.20454545454545456, + "Indonesian,English,Filipino,Chinese,Malay": 0.19318181818181818, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1875, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,English,Spanish,Chinese,Malay": 0.2556818181818182, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.25, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.25, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.25, + "English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.1875, + "English,Filipino,Spanish,Malay,Vietnamese": 0.2159090909090909, + "English,Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, + "English,Spanish,Chinese,Malay,Vietnamese": 0.25, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.11931818181818182, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.11931818181818182, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.11931818181818182, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11363636363636363, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.17613636363636365, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 } }, - "AC3_2": 0.44702544429425994, - "AC3_3": 0.3392244477368077, - "AC3_4": 0.26832259236332895, - "AC3_5": 0.220423370502308, - "AC3_6": 0.18773619287189586, - "AC3_7": 0.1651977792888444 + "AC3_2": 0.4939513632151703, + "AC3_3": 0.40032525771082667, + "AC3_4": 0.33628426639235887, + "AC3_5": 0.2921667517442057, + "AC3_6": 0.2606596942474146, + "AC3_7": 0.23644703916115414 }, "prompt_4": { - "overall_acc": 0.4082792207792207, + "overall_acc": 0.43344155844155846, "language_acc": { - "Indonesian": 0.3409090909090909, - "English": 0.4715909090909091, - "Filipino": 0.3181818181818182, - "Spanish": 0.42045454545454547, - "Chinese": 0.5056818181818182, - "Malay": 0.4034090909090909, - "Vietnamese": 0.3977272727272727 + "Indonesian": 0.3806818181818182, + "English": 0.5056818181818182, + "Filipino": 0.30113636363636365, + "Spanish": 0.45454545454545453, + "Chinese": 0.4602272727272727, + "Malay": 0.45454545454545453, + "Vietnamese": 0.4772727272727273 }, - "consistency_score_2": 0.44372294372294374, - "consistency_score_3": 0.24707792207792215, - "consistency_score_4": 0.15340909090909094, - "consistency_score_5": 0.10064935064935066, - "consistency_score_6": 0.06574675324675325, - "consistency_score_7": 0.03977272727272727, + "consistency_score_2": 0.5075757575757577, + "consistency_score_3": 0.32418831168831164, + "consistency_score_4": 0.23538961038961043, + "consistency_score_5": 0.18479437229437232, + "consistency_score_6": 0.15097402597402595, + "consistency_score_7": 0.125, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.4943181818181818, - "Indonesian,Filipino": 0.4090909090909091, - "Indonesian,Spanish": 0.5397727272727273, - "Indonesian,Chinese": 0.3977272727272727, - "Indonesian,Malay": 0.5170454545454546, - "Indonesian,Vietnamese": 0.4090909090909091, + "Indonesian,English": 0.5113636363636364, + "Indonesian,Filipino": 0.48863636363636365, + "Indonesian,Spanish": 0.5056818181818182, + "Indonesian,Chinese": 0.48295454545454547, + "Indonesian,Malay": 0.6022727272727273, + "Indonesian,Vietnamese": 0.4772727272727273, "English,Filipino": 0.39204545454545453, - "English,Spanish": 0.5, - "English,Chinese": 0.4943181818181818, - "English,Malay": 0.4943181818181818, - "English,Vietnamese": 0.4715909090909091, - "Filipino,Spanish": 0.375, - "Filipino,Chinese": 0.32386363636363635, - "Filipino,Malay": 0.4147727272727273, - "Filipino,Vietnamese": 0.375, - "Spanish,Chinese": 0.4375, - "Spanish,Malay": 0.48295454545454547, - "Spanish,Vietnamese": 0.45454545454545453, - "Chinese,Malay": 0.4147727272727273, - "Chinese,Vietnamese": 0.4375, - "Malay,Vietnamese": 0.48295454545454547 + "English,Spanish": 0.625, + "English,Chinese": 0.5227272727272727, + "English,Malay": 0.5681818181818182, + "English,Vietnamese": 0.5397727272727273, + "Filipino,Spanish": 0.42045454545454547, + "Filipino,Chinese": 0.3806818181818182, + "Filipino,Malay": 0.4715909090909091, + "Filipino,Vietnamese": 0.4715909090909091, + "Spanish,Chinese": 0.5340909090909091, + "Spanish,Malay": 0.5511363636363636, + "Spanish,Vietnamese": 0.5681818181818182, + "Chinese,Malay": 0.5056818181818182, + "Chinese,Vietnamese": 0.5227272727272727, + "Malay,Vietnamese": 0.5170454545454546 }, "3_combine": { - "Indonesian,English,Filipino": 0.22727272727272727, - "Indonesian,English,Spanish": 0.3125, - "Indonesian,English,Chinese": 0.26136363636363635, - "Indonesian,English,Malay": 0.3181818181818182, - "Indonesian,English,Vietnamese": 0.25, - "Indonesian,Filipino,Spanish": 0.24431818181818182, - "Indonesian,Filipino,Chinese": 0.1534090909090909, - "Indonesian,Filipino,Malay": 0.23863636363636365, - "Indonesian,Filipino,Vietnamese": 0.19886363636363635, - "Indonesian,Spanish,Chinese": 0.26136363636363635, - "Indonesian,Spanish,Malay": 0.3352272727272727, - "Indonesian,Spanish,Vietnamese": 0.26704545454545453, - "Indonesian,Chinese,Malay": 0.24431818181818182, - "Indonesian,Chinese,Vietnamese": 0.2159090909090909, - "Indonesian,Malay,Vietnamese": 0.2897727272727273, - "English,Filipino,Spanish": 0.2215909090909091, - "English,Filipino,Chinese": 0.1875, - "English,Filipino,Malay": 0.22727272727272727, - "English,Filipino,Vietnamese": 0.19318181818181818, - "English,Spanish,Chinese": 0.2897727272727273, - "English,Spanish,Malay": 0.3125, - "English,Spanish,Vietnamese": 0.29545454545454547, - "English,Chinese,Malay": 0.26704545454545453, - "English,Chinese,Vietnamese": 0.2897727272727273, - "English,Malay,Vietnamese": 0.3068181818181818, - "Filipino,Spanish,Chinese": 0.17045454545454544, - "Filipino,Spanish,Malay": 0.2215909090909091, - "Filipino,Spanish,Vietnamese": 0.19318181818181818, - "Filipino,Chinese,Malay": 0.17045454545454544, - "Filipino,Chinese,Vietnamese": 0.1875, - "Filipino,Malay,Vietnamese": 0.22727272727272727, - "Spanish,Chinese,Malay": 0.25, - "Spanish,Chinese,Vietnamese": 0.25, - "Spanish,Malay,Vietnamese": 0.30113636363636365, - "Chinese,Malay,Vietnamese": 0.26704545454545453 + "Indonesian,English,Filipino": 0.26704545454545453, + "Indonesian,English,Spanish": 0.375, + "Indonesian,English,Chinese": 0.32386363636363635, + "Indonesian,English,Malay": 0.3977272727272727, + "Indonesian,English,Vietnamese": 0.3352272727272727, + "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,Filipino,Chinese": 0.23863636363636365, + "Indonesian,Filipino,Malay": 0.32386363636363635, + "Indonesian,Filipino,Vietnamese": 0.30113636363636365, + "Indonesian,Spanish,Chinese": 0.32386363636363635, + "Indonesian,Spanish,Malay": 0.38636363636363635, + "Indonesian,Spanish,Vietnamese": 0.32954545454545453, + "Indonesian,Chinese,Malay": 0.3522727272727273, + "Indonesian,Chinese,Vietnamese": 0.32954545454545453, + "Indonesian,Malay,Vietnamese": 0.3522727272727273, + "English,Filipino,Spanish": 0.30113636363636365, + "English,Filipino,Chinese": 0.2159090909090909, + "English,Filipino,Malay": 0.2784090909090909, + "English,Filipino,Vietnamese": 0.2727272727272727, + "English,Spanish,Chinese": 0.38636363636363635, + "English,Spanish,Malay": 0.42045454545454547, + "English,Spanish,Vietnamese": 0.4090909090909091, + "English,Chinese,Malay": 0.36363636363636365, + "English,Chinese,Vietnamese": 0.3522727272727273, + "English,Malay,Vietnamese": 0.375, + "Filipino,Spanish,Chinese": 0.23863636363636365, + "Filipino,Spanish,Malay": 0.2897727272727273, + "Filipino,Spanish,Vietnamese": 0.2897727272727273, + "Filipino,Chinese,Malay": 0.24431818181818182, + "Filipino,Chinese,Vietnamese": 0.26704545454545453, + "Filipino,Malay,Vietnamese": 0.3125, + "Spanish,Chinese,Malay": 0.3465909090909091, + "Spanish,Chinese,Vietnamese": 0.3693181818181818, + "Spanish,Malay,Vietnamese": 0.375, + "Chinese,Malay,Vietnamese": 0.3352272727272727 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.14204545454545456, - "Indonesian,English,Filipino,Chinese": 0.10795454545454546, - "Indonesian,English,Filipino,Malay": 0.1590909090909091, - "Indonesian,English,Filipino,Vietnamese": 0.125, - "Indonesian,English,Spanish,Chinese": 0.1875, - "Indonesian,English,Spanish,Malay": 0.23295454545454544, - "Indonesian,English,Spanish,Vietnamese": 0.17045454545454544, - "Indonesian,English,Chinese,Malay": 0.16477272727272727, - "Indonesian,English,Chinese,Vietnamese": 0.16477272727272727, - "Indonesian,English,Malay,Vietnamese": 0.20454545454545456, - "Indonesian,Filipino,Spanish,Chinese": 0.10227272727272728, - "Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, - "Indonesian,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Chinese,Malay": 0.10795454545454546, - "Indonesian,Filipino,Chinese,Vietnamese": 0.09090909090909091, - "Indonesian,Filipino,Malay,Vietnamese": 0.14772727272727273, - "Indonesian,Spanish,Chinese,Malay": 0.1875, - "Indonesian,Spanish,Chinese,Vietnamese": 0.1590909090909091, - "Indonesian,Spanish,Malay,Vietnamese": 0.21022727272727273, - "Indonesian,Chinese,Malay,Vietnamese": 0.16477272727272727, - "English,Filipino,Spanish,Chinese": 0.125, - "English,Filipino,Spanish,Malay": 0.16477272727272727, - "English,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "English,Filipino,Chinese,Malay": 0.11363636363636363, - "English,Filipino,Chinese,Vietnamese": 0.11363636363636363, - "English,Filipino,Malay,Vietnamese": 0.1590909090909091, - "English,Spanish,Chinese,Malay": 0.1875, - "English,Spanish,Chinese,Vietnamese": 0.19318181818181818, - "English,Spanish,Malay,Vietnamese": 0.2159090909090909, - "English,Chinese,Malay,Vietnamese": 0.19318181818181818, - "Filipino,Spanish,Chinese,Malay": 0.11363636363636363, - "Filipino,Spanish,Chinese,Vietnamese": 0.10227272727272728, - "Filipino,Spanish,Malay,Vietnamese": 0.1534090909090909, - "Filipino,Chinese,Malay,Vietnamese": 0.11931818181818182, - "Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365 + "Indonesian,English,Filipino,Spanish": 0.2159090909090909, + "Indonesian,English,Filipino,Chinese": 0.1590909090909091, + "Indonesian,English,Filipino,Malay": 0.2215909090909091, + "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese": 0.26704545454545453, + "Indonesian,English,Spanish,Malay": 0.3125, + "Indonesian,English,Spanish,Vietnamese": 0.26704545454545453, + "Indonesian,English,Chinese,Malay": 0.2784090909090909, + "Indonesian,English,Chinese,Vietnamese": 0.24431818181818182, + "Indonesian,English,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,Filipino,Spanish,Chinese": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Vietnamese": 0.2159090909090909, + "Indonesian,Filipino,Chinese,Malay": 0.19318181818181818, + "Indonesian,Filipino,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Spanish,Chinese,Malay": 0.26704545454545453, + "Indonesian,Spanish,Chinese,Vietnamese": 0.2556818181818182, + "Indonesian,Spanish,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,Chinese,Malay,Vietnamese": 0.26136363636363635, + "English,Filipino,Spanish,Chinese": 0.18181818181818182, + "English,Filipino,Spanish,Malay": 0.22727272727272727, + "English,Filipino,Spanish,Vietnamese": 0.2215909090909091, + "English,Filipino,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Vietnamese": 0.19318181818181818, + "English,Filipino,Malay,Vietnamese": 0.22727272727272727, + "English,Spanish,Chinese,Malay": 0.2897727272727273, + "English,Spanish,Chinese,Vietnamese": 0.2897727272727273, + "English,Spanish,Malay,Vietnamese": 0.3068181818181818, + "English,Chinese,Malay,Vietnamese": 0.2784090909090909, + "Filipino,Spanish,Chinese,Malay": 0.18181818181818182, + "Filipino,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "Filipino,Spanish,Malay,Vietnamese": 0.23295454545454544, + "Filipino,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Spanish,Chinese,Malay,Vietnamese": 0.2727272727272727 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.07386363636363637, - "Indonesian,English,Filipino,Spanish,Malay": 0.11363636363636363, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.08522727272727272, - "Indonesian,English,Filipino,Chinese,Malay": 0.07386363636363637, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.06818181818181818, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,English,Spanish,Chinese,Malay": 0.14204545454545456, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.125, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.1534090909090909, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.07386363636363637, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.0625, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.10227272727272728, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.07386363636363637, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818, - "English,Filipino,Spanish,Chinese,Malay": 0.09090909090909091, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.07386363636363637, - "English,Filipino,Spanish,Malay,Vietnamese": 0.11931818181818182, - "English,Filipino,Chinese,Malay,Vietnamese": 0.08522727272727272, - "English,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.07954545454545454 + "Indonesian,English,Filipino,Spanish,Chinese": 0.13636363636363635, + "Indonesian,English,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Chinese,Malay": 0.1534090909090909, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1534090909090909, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese,Malay": 0.23295454545454544, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2159090909090909, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.2215909090909091, + "English,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, + "English,Filipino,Spanish,Malay,Vietnamese": 0.19318181818181818, + "English,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, + "English,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.056818181818181816, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.045454545454545456, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.056818181818181816, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.05113636363636364, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.0625 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.13068181818181818, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.03977272727272727 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 } }, - "AC3_2": 0.42526384375950027, - "AC3_3": 0.3078528481204634, - "AC3_4": 0.22301957431657435, - "AC3_5": 0.16148843179162897, - "AC3_6": 0.11325553724705598, - "AC3_7": 0.07248435439752456 + "AC3_2": 0.4675884781770865, + "AC3_3": 0.37093755821450325, + "AC3_4": 0.3050923590515977, + "AC3_5": 0.25911648522336955, + "AC3_6": 0.22394480515648574, + "AC3_7": 0.19404069763967177 }, "prompt_5": { - "overall_acc": 0.41314935064935066, + "overall_acc": 0.4293831168831169, "language_acc": { - "Indonesian": 0.4090909090909091, + "Indonesian": 0.42613636363636365, "English": 0.4943181818181818, - "Filipino": 0.2897727272727273, - "Spanish": 0.44886363636363635, - "Chinese": 0.42045454545454547, - "Malay": 0.4034090909090909, - "Vietnamese": 0.42613636363636365 + "Filipino": 0.29545454545454547, + "Spanish": 0.4431818181818182, + "Chinese": 0.4715909090909091, + "Malay": 0.3977272727272727, + "Vietnamese": 0.4772727272727273 }, - "consistency_score_2": 0.43019480519480513, - "consistency_score_3": 0.24269480519480513, - "consistency_score_4": 0.16006493506493505, - "consistency_score_5": 0.11607142857142858, - "consistency_score_6": 0.08847402597402598, - "consistency_score_7": 0.06818181818181818, + "consistency_score_2": 0.4945887445887445, + "consistency_score_3": 0.30746753246753245, + "consistency_score_4": 0.21883116883116882, + "consistency_score_5": 0.16991341991341993, + "consistency_score_6": 0.13798701298701296, + "consistency_score_7": 0.11363636363636363, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.48295454545454547, - "Indonesian,Filipino": 0.39204545454545453, - "Indonesian,Spanish": 0.4602272727272727, - "Indonesian,Chinese": 0.4318181818181818, - "Indonesian,Malay": 0.4772727272727273, - "Indonesian,Vietnamese": 0.39204545454545453, - "English,Filipino": 0.35795454545454547, - "English,Spanish": 0.4772727272727273, - "English,Chinese": 0.5397727272727273, - "English,Malay": 0.4772727272727273, + "Indonesian,English": 0.5227272727272727, + "Indonesian,Filipino": 0.48295454545454547, + "Indonesian,Spanish": 0.5227272727272727, + "Indonesian,Chinese": 0.5, + "Indonesian,Malay": 0.625, + "Indonesian,Vietnamese": 0.4659090909090909, + "English,Filipino": 0.3806818181818182, + "English,Spanish": 0.6079545454545454, + "English,Chinese": 0.5056818181818182, + "English,Malay": 0.5511363636363636, "English,Vietnamese": 0.4943181818181818, - "Filipino,Spanish": 0.3409090909090909, - "Filipino,Chinese": 0.3181818181818182, - "Filipino,Malay": 0.4034090909090909, - "Filipino,Vietnamese": 0.39204545454545453, - "Spanish,Chinese": 0.42613636363636365, - "Spanish,Malay": 0.4431818181818182, - "Spanish,Vietnamese": 0.45454545454545453, - "Chinese,Malay": 0.4375, - "Chinese,Vietnamese": 0.42045454545454547, - "Malay,Vietnamese": 0.4147727272727273 + "Filipino,Spanish": 0.4318181818181818, + "Filipino,Chinese": 0.375, + "Filipino,Malay": 0.4715909090909091, + "Filipino,Vietnamese": 0.4034090909090909, + "Spanish,Chinese": 0.4943181818181818, + "Spanish,Malay": 0.5511363636363636, + "Spanish,Vietnamese": 0.5397727272727273, + "Chinese,Malay": 0.45454545454545453, + "Chinese,Vietnamese": 0.5056818181818182, + "Malay,Vietnamese": 0.5 }, "3_combine": { - "Indonesian,English,Filipino": 0.20454545454545456, - "Indonesian,English,Spanish": 0.3068181818181818, - "Indonesian,English,Chinese": 0.29545454545454547, - "Indonesian,English,Malay": 0.2897727272727273, - "Indonesian,English,Vietnamese": 0.2784090909090909, - "Indonesian,Filipino,Spanish": 0.19318181818181818, - "Indonesian,Filipino,Chinese": 0.16477272727272727, - "Indonesian,Filipino,Malay": 0.22727272727272727, - "Indonesian,Filipino,Vietnamese": 0.19886363636363635, - "Indonesian,Spanish,Chinese": 0.25, - "Indonesian,Spanish,Malay": 0.2784090909090909, - "Indonesian,Spanish,Vietnamese": 0.24431818181818182, - "Indonesian,Chinese,Malay": 0.26136363636363635, - "Indonesian,Chinese,Vietnamese": 0.22727272727272727, - "Indonesian,Malay,Vietnamese": 0.26136363636363635, - "English,Filipino,Spanish": 0.19318181818181818, - "English,Filipino,Chinese": 0.20454545454545456, - "English,Filipino,Malay": 0.23863636363636365, - "English,Filipino,Vietnamese": 0.22727272727272727, - "English,Spanish,Chinese": 0.2840909090909091, - "English,Spanish,Malay": 0.2727272727272727, - "English,Spanish,Vietnamese": 0.30113636363636365, - "English,Chinese,Malay": 0.30113636363636365, - "English,Chinese,Vietnamese": 0.30113636363636365, - "English,Malay,Vietnamese": 0.2727272727272727, - "Filipino,Spanish,Chinese": 0.14772727272727273, - "Filipino,Spanish,Malay": 0.2159090909090909, - "Filipino,Spanish,Vietnamese": 0.2159090909090909, - "Filipino,Chinese,Malay": 0.19886363636363635, - "Filipino,Chinese,Vietnamese": 0.1875, - "Filipino,Malay,Vietnamese": 0.22727272727272727, - "Spanish,Chinese,Malay": 0.26136363636363635, - "Spanish,Chinese,Vietnamese": 0.25, - "Spanish,Malay,Vietnamese": 0.2727272727272727, - "Chinese,Malay,Vietnamese": 0.23863636363636365 + "Indonesian,English,Filipino": 0.26136363636363635, + "Indonesian,English,Spanish": 0.375, + "Indonesian,English,Chinese": 0.32954545454545453, + "Indonesian,English,Malay": 0.39204545454545453, + "Indonesian,English,Vietnamese": 0.3181818181818182, + "Indonesian,Filipino,Spanish": 0.2897727272727273, + "Indonesian,Filipino,Chinese": 0.25, + "Indonesian,Filipino,Malay": 0.3352272727272727, + "Indonesian,Filipino,Vietnamese": 0.26136363636363635, + "Indonesian,Spanish,Chinese": 0.3125, + "Indonesian,Spanish,Malay": 0.39204545454545453, + "Indonesian,Spanish,Vietnamese": 0.32386363636363635, + "Indonesian,Chinese,Malay": 0.3465909090909091, + "Indonesian,Chinese,Vietnamese": 0.32386363636363635, + "Indonesian,Malay,Vietnamese": 0.3465909090909091, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Chinese": 0.2215909090909091, + "English,Filipino,Malay": 0.2727272727272727, + "English,Filipino,Vietnamese": 0.2215909090909091, + "English,Spanish,Chinese": 0.3522727272727273, + "English,Spanish,Malay": 0.39204545454545453, + "English,Spanish,Vietnamese": 0.36363636363636365, + "English,Chinese,Malay": 0.32386363636363635, + "English,Chinese,Vietnamese": 0.3068181818181818, + "English,Malay,Vietnamese": 0.3409090909090909, + "Filipino,Spanish,Chinese": 0.24431818181818182, + "Filipino,Spanish,Malay": 0.2840909090909091, + "Filipino,Spanish,Vietnamese": 0.25, + "Filipino,Chinese,Malay": 0.23863636363636365, + "Filipino,Chinese,Vietnamese": 0.24431818181818182, + "Filipino,Malay,Vietnamese": 0.26704545454545453, + "Spanish,Chinese,Malay": 0.3068181818181818, + "Spanish,Chinese,Vietnamese": 0.32954545454545453, + "Spanish,Malay,Vietnamese": 0.35795454545454547, + "Chinese,Malay,Vietnamese": 0.3068181818181818 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.14204545454545456, - "Indonesian,English,Filipino,Chinese": 0.11931818181818182, - "Indonesian,English,Filipino,Malay": 0.1534090909090909, - "Indonesian,English,Filipino,Vietnamese": 0.1534090909090909, - "Indonesian,English,Spanish,Chinese": 0.18181818181818182, - "Indonesian,English,Spanish,Malay": 0.19886363636363635, - "Indonesian,English,Spanish,Vietnamese": 0.19886363636363635, - "Indonesian,English,Chinese,Malay": 0.19318181818181818, - "Indonesian,English,Chinese,Vietnamese": 0.18181818181818182, - "Indonesian,English,Malay,Vietnamese": 0.19318181818181818, - "Indonesian,Filipino,Spanish,Chinese": 0.10795454545454546, - "Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, - "Indonesian,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Chinese,Malay": 0.125, - "Indonesian,Filipino,Chinese,Vietnamese": 0.11931818181818182, - "Indonesian,Filipino,Malay,Vietnamese": 0.1534090909090909, - "Indonesian,Spanish,Chinese,Malay": 0.18181818181818182, - "Indonesian,Spanish,Chinese,Vietnamese": 0.1590909090909091, - "Indonesian,Spanish,Malay,Vietnamese": 0.18181818181818182, - "Indonesian,Chinese,Malay,Vietnamese": 0.1875, - "English,Filipino,Spanish,Chinese": 0.11363636363636363, - "English,Filipino,Spanish,Malay": 0.1590909090909091, - "English,Filipino,Spanish,Vietnamese": 0.14772727272727273, - "English,Filipino,Chinese,Malay": 0.1590909090909091, - "English,Filipino,Chinese,Vietnamese": 0.14772727272727273, - "English,Filipino,Malay,Vietnamese": 0.17613636363636365, - "English,Spanish,Chinese,Malay": 0.1875, - "English,Spanish,Chinese,Vietnamese": 0.19886363636363635, - "English,Spanish,Malay,Vietnamese": 0.18181818181818182, - "English,Chinese,Malay,Vietnamese": 0.19318181818181818, - "Filipino,Spanish,Chinese,Malay": 0.11931818181818182, - "Filipino,Spanish,Chinese,Vietnamese": 0.11363636363636363, - "Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, - "Filipino,Chinese,Malay,Vietnamese": 0.14204545454545456, - "Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182 + "Indonesian,English,Filipino,Spanish": 0.20454545454545456, + "Indonesian,English,Filipino,Chinese": 0.17613636363636365, + "Indonesian,English,Filipino,Malay": 0.2159090909090909, + "Indonesian,English,Filipino,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese": 0.26704545454545453, + "Indonesian,English,Spanish,Malay": 0.29545454545454547, + "Indonesian,English,Spanish,Vietnamese": 0.26136363636363635, + "Indonesian,English,Chinese,Malay": 0.26136363636363635, + "Indonesian,English,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,English,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Filipino,Spanish,Chinese": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Chinese,Malay": 0.19318181818181818, + "Indonesian,Filipino,Chinese,Vietnamese": 0.19318181818181818, + "Indonesian,Filipino,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Spanish,Chinese,Malay": 0.23863636363636365, + "Indonesian,Spanish,Chinese,Vietnamese": 0.23295454545454544, + "Indonesian,Spanish,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Chinese,Malay,Vietnamese": 0.24431818181818182, + "English,Filipino,Spanish,Chinese": 0.1875, + "English,Filipino,Spanish,Malay": 0.2159090909090909, + "English,Filipino,Spanish,Vietnamese": 0.18181818181818182, + "English,Filipino,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Vietnamese": 0.17045454545454544, + "English,Filipino,Malay,Vietnamese": 0.1875, + "English,Spanish,Chinese,Malay": 0.2556818181818182, + "English,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "English,Spanish,Malay,Vietnamese": 0.2784090909090909, + "English,Chinese,Malay,Vietnamese": 0.23295454545454544, + "Filipino,Spanish,Chinese,Malay": 0.18181818181818182, + "Filipino,Spanish,Chinese,Vietnamese": 0.18181818181818182, + "Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, + "Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.07954545454545454, - "Indonesian,English,Filipino,Spanish,Malay": 0.11931818181818182, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.10795454545454546, - "Indonesian,English,Filipino,Chinese,Malay": 0.10227272727272728, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.10227272727272728, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.125, - "Indonesian,English,Spanish,Chinese,Malay": 0.13636363636363635, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.125, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.14204545454545456, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.14772727272727273, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.09090909090909091, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.08522727272727272, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.11931818181818182, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, - "English,Filipino,Spanish,Chinese,Malay": 0.10227272727272728, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.09659090909090909, - "English,Filipino,Spanish,Malay,Vietnamese": 0.125, - "English,Filipino,Chinese,Malay,Vietnamese": 0.125, - "English,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546 + "Indonesian,English,Filipino,Spanish,Chinese": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Malay": 0.17045454545454544, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.1590909090909091, + "Indonesian,English,Filipino,Chinese,Malay": 0.1534090909090909, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.16477272727272727, + "Indonesian,English,Spanish,Chinese,Malay": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.20454545454545456, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.14772727272727273, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.1590909090909091, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.1875, + "English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, + "English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.07386363636363637, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.06818181818181818, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.09659090909090909, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.09659090909090909, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.08522727272727272, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.09090909090909091 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.13068181818181818, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.13068181818181818, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.06818181818181818 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11363636363636363 } }, - "AC3_2": 0.421499818706817, - "AC3_3": 0.30577142531019874, - "AC3_4": 0.23073648239546468, - "AC3_5": 0.18122808936102297, - "AC3_6": 0.1457387676757337, - "AC3_7": 0.11704737081731985 + "AC3_2": 0.45968511717562976, + "AC3_3": 0.35833955640725035, + "AC3_4": 0.2899115659750307, + "AC3_5": 0.24347864321228224, + "AC3_6": 0.2088558794199266, + "AC3_7": 0.17971191734698388 } }, "sg_eval": { "prompt_1": { - "accuracy": 0.5922330097087378 + "accuracy": 0.6213592233009708 }, "prompt_2": { - "accuracy": 0.6116504854368932 + "accuracy": 0.6407766990291263 }, "prompt_3": { - "accuracy": 0.6407766990291263 + "accuracy": 0.6601941747572816 }, "prompt_4": { - "accuracy": 0.6116504854368932 + "accuracy": 0.6310679611650486 }, "prompt_5": { - "accuracy": 0.6213592233009708 + "accuracy": 0.6019417475728155 } }, "cn_eval": { "prompt_1": { - "accuracy": 0.3142857142857143 + "accuracy": 0.3238095238095238 }, "prompt_2": { - "accuracy": 0.38095238095238093 + "accuracy": 0.26666666666666666 }, "prompt_3": { - "accuracy": 0.3619047619047619 + "accuracy": 0.3238095238095238 }, "prompt_4": { - "accuracy": 0.2857142857142857 + "accuracy": 0.29523809523809524 }, "prompt_5": { - "accuracy": 0.29523809523809524 + "accuracy": 0.2761904761904762 } }, "us_eval": { "prompt_1": { - "accuracy": 0.6635514018691588 + "accuracy": 0.6915887850467289 }, "prompt_2": { - "accuracy": 0.7102803738317757 + "accuracy": 0.7663551401869159 }, "prompt_3": { - "accuracy": 0.7102803738317757 + "accuracy": 0.7383177570093458 }, "prompt_4": { - "accuracy": 0.6915887850467289 + "accuracy": 0.7102803738317757 }, "prompt_5": { - "accuracy": 0.719626168224299 + "accuracy": 0.7009345794392523 } }, "ph_eval": { "prompt_1": { - "accuracy": 0.51, + "accuracy": 0.52, "category_acc": { - "brand": 0.4, - "demographics": 0.2, - "biology": 0.4, + "brand": 0.5, + "demographics": 0.4, + "biology": 0.5, "history": 0.5333333333333333, "literature": 0.6, "politics": 0.8, - "culture": 0.7, - "film": 0.5, + "culture": 0.6, + "film": 0.4, "law": 0.3, "geography": 0.5 } @@ -95700,693 +95700,1508 @@ "brand": 0.4, "demographics": 0.4, "biology": 0.5, - "history": 0.5333333333333333, + "history": 0.4, "literature": 0.5, - "politics": 0.8, - "culture": 0.6, + "politics": 0.7, + "culture": 0.7, "film": 0.5, "law": 0.4, - "geography": 0.4 + "geography": 0.6 } }, "prompt_3": { - "accuracy": 0.5, + "accuracy": 0.53, "category_acc": { - "brand": 0.6, + "brand": 0.5, "demographics": 0.6, - "biology": 0.6, - "history": 0.26666666666666666, + "biology": 0.5, + "history": 0.4, "literature": 0.4, - "politics": 0.6, - "culture": 0.6, + "politics": 0.7, + "culture": 0.8, "film": 0.4, "law": 0.5, "geography": 0.6 } }, "prompt_4": { - "accuracy": 0.46, + "accuracy": 0.55, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.6, + "history": 0.4666666666666667, + "literature": 0.7, + "politics": 0.8, + "culture": 0.8, + "film": 0.4, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.5, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.3, + "history": 0.5333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.6, + "film": 0.4, + "law": 0.4, + "geography": 0.5 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.2396434382915585 + }, + "prompt_2": { + "bleu_score": 0.2657607783185625 + }, + "prompt_3": { + "bleu_score": 0.1991577032131693 + }, + "prompt_4": { + "bleu_score": 0.22222624038742203 + }, + "prompt_5": { + "bleu_score": 0.21913847724220636 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.48220842512851325, + "category_acc": { + "History": 0.4859437751004016, + "Geography": 0.44285714285714284, + "Lampungic": 0.30612244897959184, + "Social science": 0.674457429048414, + "Balinese": 0.29723991507430997, + "Makassarese": 0.2903225806451613, + "Banjarese": 0.3888888888888889, + "Chemistry": 0.3284671532846715, + "Biology": 0.4662721893491124, + "Science": 0.5954592363261094, + "Christian religion": 0.5970149253731343, + "Art": 0.5790349417637272, + "Islam religion": 0.5860597439544808, + "Hindu religion": 0.52, + "Madurese": 0.33559322033898303, + "Sport": 0.5067567567567568, + "Indonesian language": 0.541095890410959, + "Physics": 0.3696969696969697, + "Minangkabau culture": 0.36180904522613067, + "Dayak language": 0.27522935779816515, + "Sociology": 0.4899193548387097, + "Economy": 0.4385245901639344, + "Sundanese": 0.4044943820224719, + "Javanese": 0.3860887096774194, + "Civic education": 0.580829756795422 + } + }, + "prompt_2": { + "accuracy": 0.48815007677415045, + "category_acc": { + "History": 0.4819277108433735, + "Geography": 0.46122448979591835, + "Lampungic": 0.3741496598639456, + "Social science": 0.7045075125208681, + "Balinese": 0.3205944798301486, + "Makassarese": 0.3602150537634409, + "Banjarese": 0.3958333333333333, + "Chemistry": 0.2948905109489051, + "Biology": 0.4686390532544379, + "Science": 0.5799793601651186, + "Christian religion": 0.5920398009950248, + "Art": 0.5657237936772047, + "Islam religion": 0.5974395448079659, + "Hindu religion": 0.48, + "Madurese": 0.3423728813559322, + "Sport": 0.527027027027027, + "Indonesian language": 0.5473225404732254, + "Physics": 0.3797979797979798, + "Minangkabau culture": 0.36180904522613067, + "Dayak language": 0.3394495412844037, + "Sociology": 0.4879032258064516, + "Economy": 0.4672131147540984, + "Sundanese": 0.4140017286084702, + "Javanese": 0.38911290322580644, + "Civic education": 0.592274678111588 + } + }, + "prompt_3": { + "accuracy": 0.3732558915815475, + "category_acc": { + "History": 0.3614457831325301, + "Geography": 0.3489795918367347, + "Lampungic": 0.2789115646258503, + "Social science": 0.4757929883138564, + "Balinese": 0.2781316348195329, + "Makassarese": 0.3064516129032258, + "Banjarese": 0.3819444444444444, + "Chemistry": 0.24379562043795622, + "Biology": 0.3337278106508876, + "Science": 0.3942208462332301, + "Christian religion": 0.34328358208955223, + "Art": 0.4176372712146423, + "Islam religion": 0.39829302987197723, + "Hindu religion": 0.3933333333333333, + "Madurese": 0.2745762711864407, + "Sport": 0.46621621621621623, + "Indonesian language": 0.42714819427148193, + "Physics": 0.36363636363636365, + "Minangkabau culture": 0.3065326633165829, + "Dayak language": 0.27522935779816515, + "Sociology": 0.38911290322580644, + "Economy": 0.33811475409836067, + "Sundanese": 0.35522904062229904, + "Javanese": 0.3326612903225806, + "Civic education": 0.413447782546495 + } + }, + "prompt_4": { + "accuracy": 0.41778489885840175, + "category_acc": { + "History": 0.3654618473895582, + "Geography": 0.3979591836734694, + "Lampungic": 0.3877551020408163, + "Social science": 0.5525876460767947, + "Balinese": 0.2929936305732484, + "Makassarese": 0.3118279569892473, + "Banjarese": 0.3819444444444444, + "Chemistry": 0.29635036496350364, + "Biology": 0.38224852071005916, + "Science": 0.4840041279669763, + "Christian religion": 0.4925373134328358, + "Art": 0.5008319467554077, + "Islam religion": 0.4850640113798009, + "Hindu religion": 0.5, + "Madurese": 0.3220338983050847, + "Sport": 0.4391891891891892, + "Indonesian language": 0.47447073474470736, + "Physics": 0.32323232323232326, + "Minangkabau culture": 0.34673366834170855, + "Dayak language": 0.2018348623853211, + "Sociology": 0.39314516129032256, + "Economy": 0.375, + "Sundanese": 0.3656006914433881, + "Javanese": 0.3497983870967742, + "Civic education": 0.4978540772532189 + } + }, + "prompt_5": { + "accuracy": 0.42132318579344413, + "category_acc": { + "History": 0.40562248995983935, + "Geography": 0.42653061224489797, + "Lampungic": 0.3673469387755102, + "Social science": 0.5893155258764607, + "Balinese": 0.2781316348195329, + "Makassarese": 0.3172043010752688, + "Banjarese": 0.3263888888888889, + "Chemistry": 0.27883211678832115, + "Biology": 0.38698224852071006, + "Science": 0.49122807017543857, + "Christian religion": 0.5422885572139303, + "Art": 0.47254575707154745, + "Islam religion": 0.5049786628733998, + "Hindu religion": 0.44666666666666666, + "Madurese": 0.31186440677966104, + "Sport": 0.4594594594594595, + "Indonesian language": 0.4813200498132005, + "Physics": 0.3212121212121212, + "Minangkabau culture": 0.32663316582914576, + "Dayak language": 0.27522935779816515, + "Sociology": 0.4092741935483871, + "Economy": 0.3668032786885246, + "Sundanese": 0.3716508210890233, + "Javanese": 0.35181451612903225, + "Civic education": 0.4663805436337625 + } + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.34862757099739766 + }, + "prompt_2": { + "bleu_score": 0.3530147157703795 + }, + "prompt_3": { + "bleu_score": 0.35953966781160585 + }, + "prompt_4": { + "bleu_score": 0.33177166236852557 + }, + "prompt_5": { + "bleu_score": 0.33548492537137203 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.28443833413071995 + }, + "prompt_2": { + "bleu_score": 0.2907746744644467 + }, + "prompt_3": { + "bleu_score": 0.29090264975427593 + }, + "prompt_4": { + "bleu_score": 0.2779363781260474 + }, + "prompt_5": { + "bleu_score": 0.27992746635172344 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.2002151122788486 + }, + "prompt_2": { + "bleu_score": 0.20884450972688828 + }, + "prompt_3": { + "bleu_score": 0.21481801019464242 + }, + "prompt_4": { + "bleu_score": 0.19222350540209923 + }, + "prompt_5": { + "bleu_score": 0.20272935145984003 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.3402735018440144 + }, + "prompt_2": { + "bleu_score": 0.34725259353122995 + }, + "prompt_3": { + "bleu_score": 0.3553594382109361 + }, + "prompt_4": { + "bleu_score": 0.3218818358090323 + }, + "prompt_5": { + "bleu_score": 0.32800134077077914 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.5997666277712952 + }, + "prompt_2": { + "accuracy": 0.48191365227537925 + }, + "prompt_3": { + "accuracy": 0.5950991831971996 + }, + "prompt_4": { + "accuracy": 0.6137689614935823 + }, + "prompt_5": { + "accuracy": 0.5950991831971996 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.578190918841616, + "category_acc": { + "high_school_european_history": 0.676829268292683, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.6628787878787878, + "medical_genetics": 0.7373737373737373, + "high_school_us_history": 0.7241379310344828, + "high_school_physics": 0.37333333333333335, + "high_school_world_history": 0.8177966101694916, + "virology": 0.49696969696969695, + "high_school_microeconomics": 0.6455696202531646, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.7119741100323624, + "abstract_algebra": 0.3434343434343434, + "professional_accounting": 0.4626334519572954, + "philosophy": 0.6129032258064516, + "professional_medicine": 0.6494464944649446, + "nutrition": 0.6754098360655738, + "global_facts": 0.3333333333333333, + "machine_learning": 0.36036036036036034, + "security_studies": 0.7008196721311475, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.6284779050736498, + "prehistory": 0.6934984520123839, + "anatomy": 0.6194029850746269, + "human_sexuality": 0.676923076923077, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.8125, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.700507614213198, + "elementary_mathematics": 0.3925729442970822, + "human_aging": 0.6441441441441441, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7867647058823529, + "formal_logic": 0.408, + "high_school_statistics": 0.46511627906976744, + "international_law": 0.7, + "high_school_mathematics": 0.30855018587360594, + "high_school_computer_science": 0.6464646464646465, + "conceptual_physics": 0.5256410256410257, + "miscellaneous": 0.7838874680306905, + "high_school_chemistry": 0.4801980198019802, + "marketing": 0.8240343347639485, + "professional_law": 0.41682974559686886, + "management": 0.7843137254901961, + "college_physics": 0.39603960396039606, + "jurisprudence": 0.6635514018691588, + "world_religions": 0.8, + "sociology": 0.76, + "us_foreign_policy": 0.7878787878787878, + "high_school_macroeconomics": 0.5784061696658098, + "computer_security": 0.7070707070707071, + "moral_scenarios": 0.25838926174496646, + "moral_disputes": 0.6, + "electrical_engineering": 0.5902777777777778, + "astronomy": 0.6357615894039735, + "college_biology": 0.7202797202797203 + } + }, + "prompt_2": { + "accuracy": 0.45505899177690384, + "category_acc": { + "high_school_european_history": 0.6036585365853658, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.4583333333333333, + "medical_genetics": 0.36363636363636365, + "high_school_us_history": 0.7339901477832512, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.7923728813559322, + "virology": 0.3515151515151515, + "high_school_microeconomics": 0.39662447257383965, + "econometrics": 0.3274336283185841, + "college_computer_science": 0.47474747474747475, + "high_school_biology": 0.5080906148867314, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.4483985765124555, + "philosophy": 0.4258064516129032, + "professional_medicine": 0.4907749077490775, + "nutrition": 0.4852459016393443, + "global_facts": 0.32323232323232326, + "machine_learning": 0.32432432432432434, + "security_studies": 0.6967213114754098, + "public_relations": 0.3853211009174312, + "professional_psychology": 0.4877250409165303, + "prehistory": 0.43034055727554177, + "anatomy": 0.3805970149253731, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.38953488372093026, + "high_school_government_and_politics": 0.6145833333333334, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5987654320987654, + "high_school_geography": 0.5380710659898477, + "elementary_mathematics": 0.35543766578249336, + "human_aging": 0.42342342342342343, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.5588235294117647, + "formal_logic": 0.392, + "high_school_statistics": 0.4325581395348837, + "international_law": 0.5583333333333333, + "high_school_mathematics": 0.32342007434944237, + "high_school_computer_science": 0.5555555555555556, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.4884910485933504, + "high_school_chemistry": 0.3564356435643564, + "marketing": 0.6266094420600858, + "professional_law": 0.43574690150032613, + "management": 0.5490196078431373, + "college_physics": 0.297029702970297, + "jurisprudence": 0.5327102803738317, + "world_religions": 0.48823529411764705, + "sociology": 0.575, + "us_foreign_policy": 0.6666666666666666, + "high_school_macroeconomics": 0.40102827763496146, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.48405797101449277, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.4503311258278146, + "college_biology": 0.5174825174825175 + } + }, + "prompt_3": { + "accuracy": 0.5891312120128709, + "category_acc": { + "high_school_european_history": 0.7012195121951219, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.678030303030303, + "medical_genetics": 0.7171717171717171, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.36666666666666664, + "high_school_world_history": 0.8305084745762712, + "virology": 0.503030303030303, + "high_school_microeconomics": 0.679324894514768, + "econometrics": 0.39823008849557523, + "college_computer_science": 0.5252525252525253, + "high_school_biology": 0.7443365695792881, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.46619217081850534, + "philosophy": 0.6483870967741936, + "professional_medicine": 0.6715867158671587, + "nutrition": 0.6721311475409836, + "global_facts": 0.35353535353535354, + "machine_learning": 0.4144144144144144, + "security_studies": 0.7172131147540983, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.6333878887070377, + "prehistory": 0.6501547987616099, + "anatomy": 0.6417910447761194, + "human_sexuality": 0.6692307692307692, + "college_medicine": 0.5755813953488372, + "high_school_government_and_politics": 0.8229166666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6790123456790124, + "high_school_geography": 0.766497461928934, + "elementary_mathematics": 0.3819628647214854, + "human_aging": 0.6216216216216216, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.7922794117647058, + "formal_logic": 0.464, + "high_school_statistics": 0.48372093023255813, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.44871794871794873, + "miscellaneous": 0.7928388746803069, + "high_school_chemistry": 0.4900990099009901, + "marketing": 0.8197424892703863, + "professional_law": 0.43966079582517936, + "management": 0.803921568627451, + "college_physics": 0.36633663366336633, + "jurisprudence": 0.7009345794392523, + "world_religions": 0.7941176470588235, + "sociology": 0.78, + "us_foreign_policy": 0.8484848484848485, + "high_school_macroeconomics": 0.5912596401028277, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.30089485458612975, + "moral_disputes": 0.6086956521739131, + "electrical_engineering": 0.5, + "astronomy": 0.6622516556291391, + "college_biology": 0.7412587412587412 + } + }, + "prompt_4": { + "accuracy": 0.5799785484447623, "category_acc": { - "brand": 0.6, - "demographics": 0.2, - "biology": 0.5, - "history": 0.4, - "literature": 0.4, - "politics": 0.8, - "culture": 0.7, - "film": 0.4, - "law": 0.2, - "geography": 0.3 + "high_school_european_history": 0.676829268292683, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.6553030303030303, + "medical_genetics": 0.7373737373737373, + "high_school_us_history": 0.7241379310344828, + "high_school_physics": 0.36666666666666664, + "high_school_world_history": 0.8008474576271186, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.6751054852320675, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.7216828478964401, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.46619217081850534, + "philosophy": 0.6225806451612903, + "professional_medicine": 0.6531365313653137, + "nutrition": 0.6688524590163935, + "global_facts": 0.32323232323232326, + "machine_learning": 0.34234234234234234, + "security_studies": 0.7008196721311475, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.6219312602291326, + "prehistory": 0.6904024767801857, + "anatomy": 0.6268656716417911, + "human_sexuality": 0.6692307692307692, + "college_medicine": 0.5581395348837209, + "high_school_government_and_politics": 0.8177083333333334, + "college_chemistry": 0.47474747474747475, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.7258883248730964, + "elementary_mathematics": 0.38992042440318303, + "human_aging": 0.6261261261261262, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.7904411764705882, + "formal_logic": 0.424, + "high_school_statistics": 0.44651162790697674, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.789002557544757, + "high_school_chemistry": 0.5, + "marketing": 0.8369098712446352, + "professional_law": 0.42857142857142855, + "management": 0.803921568627451, + "college_physics": 0.39603960396039606, + "jurisprudence": 0.6728971962616822, + "world_religions": 0.8058823529411765, + "sociology": 0.78, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.5629820051413882, + "computer_security": 0.7272727272727273, + "moral_scenarios": 0.2639821029082774, + "moral_disputes": 0.6057971014492753, + "electrical_engineering": 0.5763888888888888, + "astronomy": 0.6158940397350994, + "college_biology": 0.7202797202797203 } }, "prompt_5": { - "accuracy": 0.51, + "accuracy": 0.5732570611369324, "category_acc": { - "brand": 0.4, - "demographics": 0.2, - "biology": 0.5, - "history": 0.6, - "literature": 0.6, - "politics": 0.7, - "culture": 0.7, - "film": 0.4, - "law": 0.2, - "geography": 0.6 + "high_school_european_history": 0.6524390243902439, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.6287878787878788, + "medical_genetics": 0.7373737373737373, + "high_school_us_history": 0.7142857142857143, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.8177966101694916, + "virology": 0.503030303030303, + "high_school_microeconomics": 0.6413502109704642, + "econometrics": 0.4336283185840708, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.7216828478964401, + "abstract_algebra": 0.21212121212121213, + "professional_accounting": 0.45195729537366547, + "philosophy": 0.6193548387096774, + "professional_medicine": 0.6494464944649446, + "nutrition": 0.6721311475409836, + "global_facts": 0.31313131313131315, + "machine_learning": 0.36936936936936937, + "security_studies": 0.6926229508196722, + "public_relations": 0.5321100917431193, + "professional_psychology": 0.6153846153846154, + "prehistory": 0.6749226006191951, + "anatomy": 0.6119402985074627, + "human_sexuality": 0.6923076923076923, + "college_medicine": 0.5348837209302325, + "high_school_government_and_politics": 0.8177083333333334, + "college_chemistry": 0.4444444444444444, + "logical_fallacies": 0.6481481481481481, + "high_school_geography": 0.6954314720812182, + "elementary_mathematics": 0.3448275862068966, + "human_aging": 0.6441441441441441, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7904411764705882, + "formal_logic": 0.416, + "high_school_statistics": 0.42790697674418604, + "international_law": 0.675, + "high_school_mathematics": 0.32342007434944237, + "high_school_computer_science": 0.5858585858585859, + "conceptual_physics": 0.4829059829059829, + "miscellaneous": 0.7659846547314578, + "high_school_chemistry": 0.46534653465346537, + "marketing": 0.8369098712446352, + "professional_law": 0.43313763861709065, + "management": 0.8137254901960784, + "college_physics": 0.4158415841584158, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.8, + "sociology": 0.79, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.5552699228791774, + "computer_security": 0.7070707070707071, + "moral_scenarios": 0.2606263982102908, + "moral_disputes": 0.6086956521739131, + "electrical_engineering": 0.5833333333333334, + "astronomy": 0.609271523178808, + "college_biology": 0.7272727272727273 } } }, - "sing2eng": { - "prompt_1": { - "bleu_score": 0.1638723051197444 - }, - "prompt_2": { - "bleu_score": 0.1970782611449239 - }, - "prompt_3": { - "bleu_score": 0.14916080706976964 - }, - "prompt_4": { - "bleu_score": 0.1530339727333596 - }, - "prompt_5": { - "bleu_score": 0.16518371246302418 - } - }, - "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "flores_ind2eng": { - "prompt_1": { - "bleu_score": 0.26704916114083965 - }, - "prompt_2": { - "bleu_score": 0.28221684718897694 - }, - "prompt_3": { - "bleu_score": 0.2873617369849033 - }, - "prompt_4": { - "bleu_score": 0.24256789651139357 - }, - "prompt_5": { - "bleu_score": 0.26337284077685374 - } - }, - "flores_vie2eng": { - "prompt_1": { - "bleu_score": 0.22066396747375716 - }, - "prompt_2": { - "bleu_score": 0.23341111449709245 - }, - "prompt_3": { - "bleu_score": 0.2916205096218084 - }, - "prompt_4": { - "bleu_score": 0.19199823018621098 - }, - "prompt_5": { - "bleu_score": 0.22112923539912738 - } - }, - "flores_zho2eng": { - "prompt_1": { - "bleu_score": 0.15262094208532923 - }, - "prompt_2": { - "bleu_score": 0.1649457410232309 - }, - "prompt_3": { - "bleu_score": 0.17160870065442554 - }, - "prompt_4": { - "bleu_score": 0.14266131254798284 - }, - "prompt_5": { - "bleu_score": 0.15887213080828538 - } - }, - "flores_zsm2eng": { - "prompt_1": { - "bleu_score": 0.25048215081289615 - }, - "prompt_2": { - "bleu_score": 0.275143144601852 - }, - "prompt_3": { - "bleu_score": 0.2886163282748519 - }, - "prompt_4": { - "bleu_score": 0.22802777936343308 - }, - "prompt_5": { - "bleu_score": 0.257985598532049 - } - }, - "mmlu": { - "prompt_1": { - "accuracy": 0.5717619603267211 - }, - "prompt_2": { - "accuracy": 0.5682613768961493 - }, - "prompt_3": { - "accuracy": 0.5554259043173863 - }, - "prompt_4": { - "accuracy": 0.5717619603267211 - }, - "prompt_5": { - "accuracy": 0.5635939323220537 - } - }, - "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, "c_eval": { "prompt_1": { - "accuracy": 0.4264487369985141 + "accuracy": 0.30534918276374445 }, "prompt_2": { - "accuracy": 0.4442793462109955 + "accuracy": 0.29420505200594355 }, "prompt_3": { - "accuracy": 0.43164933135215455 + "accuracy": 0.3439821693907875 }, "prompt_4": { - "accuracy": 0.39301634472511143 + "accuracy": 0.3588410104011887 }, "prompt_5": { - "accuracy": 0.4078751857355126 + "accuracy": 0.3060921248142645 } }, "c_eval_full": { "prompt_1": { - "accuracy": 0.43773349937733497, + "accuracy": 0.31693648816936487, "category_acc": { - "computer_network": 0.2916666666666667, - "operating_system": 0.5833333333333334, - "computer_architecture": 0.4230769230769231, - "college_programming": 0.5, - "college_physics": 0.375, - "college_chemistry": 0.3448275862068966, - "advanced_mathematics": 0.5, - "probability_and_statistics": 0.2608695652173913, - "discrete_mathematics": 0.2857142857142857, - "electrical_engineer": 0.35714285714285715, - "metrology_engineer": 0.41379310344827586, - "high_school_mathematics": 0.21739130434782608, - "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.2916666666666667, - "high_school_biology": 0.3333333333333333, + "computer_network": 0.25, + "operating_system": 0.375, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.30952380952380953, + "college_physics": 0.4583333333333333, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.25, + "high_school_chemistry": 0.5, + "high_school_biology": 0.20833333333333334, "middle_school_mathematics": 0.25, - "middle_school_biology": 0.6153846153846154, - "middle_school_physics": 0.375, - "middle_school_chemistry": 0.52, - "veterinary_medicine": 0.5, - "college_economics": 0.38333333333333336, - "business_administration": 0.47368421052631576, - "marxism": 0.5416666666666666, - "mao_zedong_thought": 0.5517241379310345, - "education_science": 0.5588235294117647, - "teacher_qualification": 0.5510204081632653, - "high_school_politics": 0.4583333333333333, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.4230769230769231, - "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.39285714285714285, - "ideological_and_moral_cultivation": 0.5, - "logic": 0.48148148148148145, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.5, - "art_studies": 0.5789473684210527, - "professional_tour_guide": 0.4117647058823529, - "legal_professional": 0.5, - "high_school_chinese": 0.16666666666666666, - "high_school_history": 0.6, - "middle_school_history": 0.6666666666666666, - "civil_servant": 0.4230769230769231, - "sports_science": 0.5416666666666666, - "plant_protection": 0.48148148148148145, - "basic_medicine": 0.16666666666666666, - "clinical_medicine": 0.37037037037037035, - "urban_and_rural_planner": 0.49019607843137253, - "accountant": 0.5, - "fire_engineer": 0.3611111111111111, - "environmental_impact_assessment_engineer": 0.4722222222222222, - "tax_accountant": 0.35185185185185186, - "physician": 0.48148148148148145 + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.35, + "business_administration": 0.2631578947368421, + "marxism": 0.375, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.375, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.5925925925925926, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.36538461538461536, + "sports_science": 0.2916666666666667, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.35185185185185186, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.3333333333333333, + "physician": 0.24074074074074073 } }, "prompt_2": { - "accuracy": 0.4364881693648817, + "accuracy": 0.3163138231631382, "category_acc": { - "computer_network": 0.4583333333333333, - "operating_system": 0.6666666666666666, - "computer_architecture": 0.38461538461538464, - "college_programming": 0.35714285714285715, - "college_physics": 0.375, - "college_chemistry": 0.3448275862068966, + "computer_network": 0.20833333333333334, + "operating_system": 0.041666666666666664, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.30952380952380953, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.2413793103448276, "advanced_mathematics": 0.3333333333333333, - "probability_and_statistics": 0.2608695652173913, - "discrete_mathematics": 0.14285714285714285, - "electrical_engineer": 0.35714285714285715, - "metrology_engineer": 0.4827586206896552, - "high_school_mathematics": 0.043478260869565216, - "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.2916666666666667, - "high_school_biology": 0.375, - "middle_school_mathematics": 0.3333333333333333, - "middle_school_biology": 0.6153846153846154, - "middle_school_physics": 0.5833333333333334, - "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.42857142857142855, - "college_economics": 0.38333333333333336, - "business_administration": 0.3684210526315789, - "marxism": 0.5416666666666666, - "mao_zedong_thought": 0.5517241379310345, - "education_science": 0.5294117647058824, - "teacher_qualification": 0.6326530612244898, - "high_school_politics": 0.5833333333333334, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.46153846153846156, - "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.4642857142857143, - "ideological_and_moral_cultivation": 0.5833333333333334, - "logic": 0.7037037037037037, - "law": 0.41379310344827586, - "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.47368421052631576, - "professional_tour_guide": 0.3235294117647059, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.5, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.2, + "business_administration": 0.2894736842105263, + "marxism": 0.25, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.5, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.5555555555555556, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.25, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.29411764705882354, "legal_professional": 0.39285714285714285, - "high_school_chinese": 0.16666666666666666, - "high_school_history": 0.6, - "middle_school_history": 0.5555555555555556, - "civil_servant": 0.5, - "sports_science": 0.4583333333333333, - "plant_protection": 0.5185185185185185, - "basic_medicine": 0.16666666666666666, - "clinical_medicine": 0.4444444444444444, - "urban_and_rural_planner": 0.5098039215686274, - "accountant": 0.4074074074074074, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.36, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.4807692307692308, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.375, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.37037037037037035, "fire_engineer": 0.25, - "environmental_impact_assessment_engineer": 0.5277777777777778, - "tax_accountant": 0.4444444444444444, - "physician": 0.48148148148148145 + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.3333333333333333, + "physician": 0.2037037037037037 } }, "prompt_3": { - "accuracy": 0.4613947696139477, + "accuracy": 0.3318804483188045, "category_acc": { - "computer_network": 0.375, - "operating_system": 0.6666666666666666, - "computer_architecture": 0.5384615384615384, - "college_programming": 0.38095238095238093, - "college_physics": 0.375, - "college_chemistry": 0.2413793103448276, + "computer_network": 0.2916666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.3333333333333333, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3448275862068966, "advanced_mathematics": 0.375, - "probability_and_statistics": 0.391304347826087, - "discrete_mathematics": 0.23809523809523808, - "electrical_engineer": 0.47619047619047616, - "metrology_engineer": 0.4482758620689655, - "high_school_mathematics": 0.13043478260869565, - "high_school_physics": 0.25, - "high_school_chemistry": 0.2916666666666667, - "high_school_biology": 0.4583333333333333, - "middle_school_mathematics": 0.4166666666666667, - "middle_school_biology": 0.7307692307692307, - "middle_school_physics": 0.5416666666666666, - "middle_school_chemistry": 0.56, - "veterinary_medicine": 0.5714285714285714, - "college_economics": 0.5, - "business_administration": 0.5263157894736842, - "marxism": 0.5, - "mao_zedong_thought": 0.6206896551724138, - "education_science": 0.5588235294117647, - "teacher_qualification": 0.6122448979591837, - "high_school_politics": 0.5, - "high_school_geography": 0.5416666666666666, - "middle_school_politics": 0.5, - "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.35714285714285715, - "ideological_and_moral_cultivation": 0.5416666666666666, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.3333333333333333, + "business_administration": 0.18421052631578946, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.20689655172413793, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.125, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.25, "logic": 0.5925925925925926, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.5263157894736842, - "professional_tour_guide": 0.4117647058823529, - "legal_professional": 0.42857142857142855, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.39285714285714285, "high_school_chinese": 0.25, - "high_school_history": 0.68, - "middle_school_history": 0.48148148148148145, - "civil_servant": 0.5192307692307693, + "high_school_history": 0.44, + "middle_school_history": 0.4444444444444444, + "civil_servant": 0.4423076923076923, "sports_science": 0.2916666666666667, - "plant_protection": 0.5185185185185185, + "plant_protection": 0.3333333333333333, "basic_medicine": 0.16666666666666666, - "clinical_medicine": 0.5555555555555556, - "urban_and_rural_planner": 0.5686274509803921, - "accountant": 0.3888888888888889, - "fire_engineer": 0.3888888888888889, - "environmental_impact_assessment_engineer": 0.4722222222222222, - "tax_accountant": 0.3888888888888889, - "physician": 0.48148148148148145 + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.37037037037037035, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.35185185185185186, + "physician": 0.2037037037037037 } }, "prompt_4": { - "accuracy": 0.41033623910336237, + "accuracy": 0.3835616438356164, "category_acc": { - "computer_network": 0.4583333333333333, - "operating_system": 0.625, - "computer_architecture": 0.5769230769230769, + "computer_network": 0.2916666666666667, + "operating_system": 0.25, + "computer_architecture": 0.4230769230769231, "college_programming": 0.40476190476190477, - "college_physics": 0.25, - "college_chemistry": 0.3448275862068966, - "advanced_mathematics": 0.25, - "probability_and_statistics": 0.30434782608695654, - "discrete_mathematics": 0.19047619047619047, - "electrical_engineer": 0.40476190476190477, - "metrology_engineer": 0.4482758620689655, - "high_school_mathematics": 0.30434782608695654, - "high_school_physics": 0.2916666666666667, - "high_school_chemistry": 0.375, - "high_school_biology": 0.25, + "college_physics": 0.375, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.5, + "high_school_biology": 0.3333333333333333, "middle_school_mathematics": 0.20833333333333334, - "middle_school_biology": 0.5769230769230769, - "middle_school_physics": 0.4583333333333333, - "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.42857142857142855, - "college_economics": 0.38333333333333336, - "business_administration": 0.42105263157894735, - "marxism": 0.5, - "mao_zedong_thought": 0.6206896551724138, - "education_science": 0.5, - "teacher_qualification": 0.4897959183673469, - "high_school_politics": 0.4583333333333333, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.46153846153846156, - "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.14285714285714285, - "ideological_and_moral_cultivation": 0.4583333333333333, - "logic": 0.37037037037037035, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.5, - "art_studies": 0.5263157894736842, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.35, + "business_administration": 0.3684210526315789, + "marxism": 0.25, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.375, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.5925925925925926, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.42105263157894735, "professional_tour_guide": 0.2647058823529412, - "legal_professional": 0.21428571428571427, - "high_school_chinese": 0.041666666666666664, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.125, "high_school_history": 0.64, - "middle_school_history": 0.6296296296296297, - "civil_servant": 0.4423076923076923, - "sports_science": 0.25, - "plant_protection": 0.5925925925925926, - "basic_medicine": 0.16666666666666666, - "clinical_medicine": 0.4444444444444444, - "urban_and_rural_planner": 0.5294117647058824, - "accountant": 0.37037037037037035, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.38461538461538464, + "sports_science": 0.2916666666666667, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.42592592592592593, "fire_engineer": 0.4444444444444444, - "environmental_impact_assessment_engineer": 0.4722222222222222, - "tax_accountant": 0.3333333333333333, - "physician": 0.46296296296296297 + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.2777777777777778, + "physician": 0.3333333333333333 } }, "prompt_5": { - "accuracy": 0.4327521793275218, + "accuracy": 0.31320049813200496, "category_acc": { "computer_network": 0.5, - "operating_system": 0.5416666666666666, - "computer_architecture": 0.34615384615384615, - "college_programming": 0.4523809523809524, - "college_physics": 0.4583333333333333, - "college_chemistry": 0.1724137931034483, - "advanced_mathematics": 0.125, - "probability_and_statistics": 0.17391304347826086, + "operating_system": 0.375, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.35714285714285715, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.34782608695652173, "discrete_mathematics": 0.19047619047619047, - "electrical_engineer": 0.30952380952380953, - "metrology_engineer": 0.5517241379310345, - "high_school_mathematics": 0.08695652173913043, - "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.4583333333333333, - "high_school_biology": 0.3333333333333333, - "middle_school_mathematics": 0.2916666666666667, - "middle_school_biology": 0.5, - "middle_school_physics": 0.625, - "middle_school_chemistry": 0.52, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.5, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.44, "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.4166666666666667, - "business_administration": 0.34210526315789475, - "marxism": 0.5416666666666666, - "mao_zedong_thought": 0.5517241379310345, - "education_science": 0.5294117647058824, - "teacher_qualification": 0.5714285714285714, - "high_school_politics": 0.5416666666666666, - "high_school_geography": 0.4166666666666667, - "middle_school_politics": 0.5384615384615384, - "middle_school_geography": 0.47058823529411764, + "college_economics": 0.3, + "business_administration": 0.3684210526315789, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.10344827586206896, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.625, + "high_school_geography": 0.25, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.5882352941176471, "modern_chinese_history": 0.21428571428571427, - "ideological_and_moral_cultivation": 0.5833333333333334, + "ideological_and_moral_cultivation": 0.16666666666666666, "logic": 0.5925925925925926, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.42857142857142855, - "art_studies": 0.5263157894736842, - "professional_tour_guide": 0.4117647058823529, - "legal_professional": 0.4642857142857143, - "high_school_chinese": 0.16666666666666666, - "high_school_history": 0.64, - "middle_school_history": 0.5555555555555556, - "civil_servant": 0.46153846153846156, - "sports_science": 0.625, - "plant_protection": 0.48148148148148145, - "basic_medicine": 0.375, - "clinical_medicine": 0.37037037037037035, - "urban_and_rural_planner": 0.47058823529411764, - "accountant": 0.46296296296296297, - "fire_engineer": 0.4166666666666667, - "environmental_impact_assessment_engineer": 0.5833333333333334, - "tax_accountant": 0.3148148148148148, - "physician": 0.4444444444444444 + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.4230769230769231, + "sports_science": 0.16666666666666666, + "plant_protection": 0.07407407407407407, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.37037037037037035, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.2962962962962963, + "physician": 0.3148148148148148 } } }, "cmmlu": { "prompt_1": { - "accuracy": 0.46236559139784944 + "accuracy": 0.2939068100358423 }, "prompt_2": { - "accuracy": 0.45161290322580644 + "accuracy": 0.25806451612903225 }, "prompt_3": { - "accuracy": 0.3655913978494624 + "accuracy": 0.25448028673835127 }, "prompt_4": { - "accuracy": 0.43727598566308246 + "accuracy": 0.4050179211469534 }, "prompt_5": { - "accuracy": 0.46236559139784944 + "accuracy": 0.33691756272401435 } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.28501122431359005, + "category_acc": { + "agronomy": 0.30177514792899407, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.2926829268292683, + "arts": 0.18125, + "astronomy": 0.2545454545454545, + "business_ethics": 0.24401913875598086, + "chinese_civil_service_exam": 0.38125, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.411214953271028, + "chinese_history": 0.4613003095975232, + "chinese_literature": 0.28921568627450983, + "chinese_teacher_qualification": 0.35195530726256985, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.19811320754716982, + "college_law": 0.2777777777777778, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.2358490566037736, + "college_medicine": 0.2673992673992674, + "computer_science": 0.23529411764705882, + "computer_security": 0.3391812865497076, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.23741007194244604, + "economics": 0.2830188679245283, + "education": 0.2331288343558282, + "electrical_engineering": 0.26744186046511625, + "elementary_chinese": 0.21428571428571427, + "elementary_commonsense": 0.25757575757575757, + "elementary_information_and_technology": 0.28991596638655465, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.21481481481481482, + "food_science": 0.26573426573426573, + "genetics": 0.25, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.28994082840236685, + "high_school_chemistry": 0.3560606060606061, + "high_school_geography": 0.3898305084745763, + "high_school_mathematics": 0.17682926829268292, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.3776223776223776, + "human_sexuality": 0.2698412698412698, + "international_law": 0.31351351351351353, + "journalism": 0.27325581395348836, + "jurisprudence": 0.29683698296836986, + "legal_and_moral_basis": 0.35514018691588783, + "logical": 0.2764227642276423, + "machine_learning": 0.4180327868852459, + "management": 0.3142857142857143, + "marketing": 0.2777777777777778, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.2482758620689655, + "philosophy": 0.26666666666666666, + "professional_accounting": 0.26857142857142857, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.27393617021276595, + "professional_psychology": 0.2801724137931034, + "public_relations": 0.2988505747126437, + "security_study": 0.2740740740740741, + "sociology": 0.26991150442477874, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.24324324324324326, + "virology": 0.2485207100591716, + "world_history": 0.38509316770186336, + "world_religions": 0.2375 + } + }, + "prompt_2": { + "accuracy": 0.27784493179070974, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.25, + "arts": 0.23125, + "astronomy": 0.24848484848484848, + "business_ethics": 0.3062200956937799, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.22900763358778625, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.39009287925696595, + "chinese_literature": 0.19607843137254902, + "chinese_teacher_qualification": 0.3016759776536313, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.25, + "college_mathematics": 0.34285714285714286, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.304029304029304, + "computer_science": 0.2549019607843137, + "computer_security": 0.30409356725146197, + "conceptual_physics": 0.3469387755102041, + "construction_project_management": 0.26618705035971224, + "economics": 0.3333333333333333, + "education": 0.3128834355828221, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.2828282828282828, + "elementary_information_and_technology": 0.2689075630252101, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.3111111111111111, + "food_science": 0.2097902097902098, + "genetics": 0.30113636363636365, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.3986013986013986, + "human_sexuality": 0.23809523809523808, + "international_law": 0.2702702702702703, + "journalism": 0.20930232558139536, + "jurisprudence": 0.26763990267639903, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.3089430894308943, + "machine_learning": 0.28688524590163933, + "management": 0.22857142857142856, + "marketing": 0.26666666666666666, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.2482758620689655, + "philosophy": 0.23809523809523808, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.25, + "professional_psychology": 0.2672413793103448, + "public_relations": 0.2988505747126437, + "security_study": 0.34814814814814815, + "sociology": 0.2610619469026549, + "sports_science": 0.28484848484848485, + "traditional_chinese_medicine": 0.2918918918918919, + "virology": 0.1893491124260355, + "world_history": 0.35403726708074534, + "world_religions": 0.225 + } + }, + "prompt_3": { + "accuracy": 0.28829217751683645, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.27439024390243905, + "arts": 0.24375, + "astronomy": 0.30303030303030304, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.3875, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.4205607476635514, + "chinese_history": 0.49226006191950467, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.36312849162011174, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.2523364485981308, + "college_engineering_hydrology": 0.2169811320754717, + "college_law": 0.25925925925925924, + "college_mathematics": 0.34285714285714286, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.27106227106227104, + "computer_science": 0.29411764705882354, + "computer_security": 0.391812865497076, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.30935251798561153, + "economics": 0.3584905660377358, + "education": 0.20245398773006135, + "electrical_engineering": 0.2441860465116279, + "elementary_chinese": 0.2896825396825397, + "elementary_commonsense": 0.19696969696969696, + "elementary_information_and_technology": 0.3025210084033613, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.28888888888888886, + "food_science": 0.2937062937062937, + "genetics": 0.24431818181818182, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.3609467455621302, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.23809523809523808, + "international_law": 0.2648648648648649, + "journalism": 0.29069767441860467, + "jurisprudence": 0.3284671532846715, + "legal_and_moral_basis": 0.3364485981308411, + "logical": 0.21951219512195122, + "machine_learning": 0.36065573770491804, + "management": 0.23333333333333334, + "marketing": 0.2611111111111111, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.3103448275862069, + "philosophy": 0.20952380952380953, + "professional_accounting": 0.29714285714285715, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.2047872340425532, + "professional_psychology": 0.25, + "public_relations": 0.2988505747126437, + "security_study": 0.26666666666666666, + "sociology": 0.2168141592920354, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.27218934911242604, + "world_history": 0.391304347826087, + "world_religions": 0.2375 + } + }, + "prompt_4": { + "accuracy": 0.36988430322914867, + "category_acc": { + "agronomy": 0.3668639053254438, + "anatomy": 0.32432432432432434, + "ancient_chinese": 0.29878048780487804, + "arts": 0.4125, + "astronomy": 0.2727272727272727, + "business_ethics": 0.3923444976076555, + "chinese_civil_service_exam": 0.39375, + "chinese_driving_rule": 0.45038167938931295, + "chinese_food_culture": 0.4117647058823529, + "chinese_foreign_policy": 0.48598130841121495, + "chinese_history": 0.48606811145510836, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.4692737430167598, + "clinical_knowledge": 0.32489451476793246, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.4392523364485981, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.3425925925925926, + "college_mathematics": 0.19047619047619047, + "college_medical_statistics": 0.3867924528301887, + "college_medicine": 0.29304029304029305, + "computer_science": 0.25980392156862747, + "computer_security": 0.5321637426900585, + "conceptual_physics": 0.43537414965986393, + "construction_project_management": 0.35251798561151076, + "economics": 0.44654088050314467, + "education": 0.3619631901840491, + "electrical_engineering": 0.42441860465116277, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.35353535353535354, + "elementary_information_and_technology": 0.4957983193277311, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.3111111111111111, + "food_science": 0.40559440559440557, + "genetics": 0.3693181818181818, + "global_facts": 0.436241610738255, + "high_school_biology": 0.3727810650887574, + "high_school_chemistry": 0.32575757575757575, + "high_school_geography": 0.3898305084745763, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.3986013986013986, + "human_sexuality": 0.36507936507936506, + "international_law": 0.33513513513513515, + "journalism": 0.3081395348837209, + "jurisprudence": 0.40145985401459855, + "legal_and_moral_basis": 0.5514018691588785, + "logical": 0.3252032520325203, + "machine_learning": 0.4262295081967213, + "management": 0.3904761904761905, + "marketing": 0.4222222222222222, + "marxist_theory": 0.35978835978835977, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.3931034482758621, + "philosophy": 0.3238095238095238, + "professional_accounting": 0.36, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.324468085106383, + "professional_psychology": 0.33189655172413796, + "public_relations": 0.3275862068965517, + "security_study": 0.4444444444444444, + "sociology": 0.3938053097345133, + "sports_science": 0.3333333333333333, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.38461538461538464, + "world_history": 0.42857142857142855, + "world_religions": 0.39375 + } + }, + "prompt_5": { + "accuracy": 0.2940770160594025, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.25609756097560976, + "arts": 0.23125, + "astronomy": 0.23030303030303031, + "business_ethics": 0.31100478468899523, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.33088235294117646, + "chinese_foreign_policy": 0.42990654205607476, + "chinese_history": 0.4520123839009288, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.329608938547486, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.32710280373831774, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.35185185185185186, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.39622641509433965, + "college_medicine": 0.2673992673992674, + "computer_science": 0.25, + "computer_security": 0.34502923976608185, + "conceptual_physics": 0.40816326530612246, + "construction_project_management": 0.302158273381295, + "economics": 0.3018867924528302, + "education": 0.3006134969325153, + "electrical_engineering": 0.27325581395348836, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.3487394957983193, + "elementary_mathematics": 0.20869565217391303, + "ethnology": 0.21481481481481482, + "food_science": 0.2937062937062937, + "genetics": 0.2784090909090909, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.31952662721893493, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.3559322033898305, + "high_school_mathematics": 0.1951219512195122, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3356643356643357, + "human_sexuality": 0.2777777777777778, + "international_law": 0.3081081081081081, + "journalism": 0.23255813953488372, + "jurisprudence": 0.3381995133819951, + "legal_and_moral_basis": 0.4252336448598131, + "logical": 0.3008130081300813, + "machine_learning": 0.32786885245901637, + "management": 0.2904761904761905, + "marketing": 0.25, + "marxist_theory": 0.32275132275132273, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.2689655172413793, + "philosophy": 0.26666666666666666, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.3080568720379147, + "professional_medicine": 0.21808510638297873, + "professional_psychology": 0.30603448275862066, + "public_relations": 0.29310344827586204, + "security_study": 0.2814814814814815, + "sociology": 0.26991150442477874, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.28402366863905326, + "world_history": 0.39751552795031053, + "world_religions": 0.28125 + } + } }, "zbench": { "prompt_1": { - "accuracy": 0.3939393939393939 + "accuracy": 0.24242424242424243 }, "prompt_2": { - "accuracy": 0.3333333333333333 + "accuracy": 0.12121212121212122 }, "prompt_3": { - "accuracy": 0.18181818181818182 + "accuracy": 0.15151515151515152 }, "prompt_4": { - "accuracy": 0.24242424242424243 + "accuracy": 0.30303030303030304 }, "prompt_5": { - "accuracy": 0.24242424242424243 + "accuracy": 0.18181818181818182 } }, "ind_emotion": { "prompt_1": { - "accuracy": 0.5659090909090909 + "accuracy": 0.625 }, "prompt_2": { - "accuracy": 0.5545454545454546 + "accuracy": 0.6386363636363637 }, "prompt_3": { - "accuracy": 0.5454545454545454 + "accuracy": 0.6136363636363636 }, "prompt_4": { - "accuracy": 0.5045454545454545 + "accuracy": 0.5409090909090909 }, "prompt_5": { - "accuracy": 0.525 + "accuracy": 0.6204545454545455 } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46915254237288134 + }, + "prompt_2": { + "accuracy": 0.45389830508474577 + }, + "prompt_3": { + "accuracy": 0.4908474576271186 + }, + "prompt_4": { + "accuracy": 0.41559322033898305 + }, + "prompt_5": { + "accuracy": 0.4216949152542373 + } }, "c3": { "prompt_1": { - "accuracy": 0.7722513089005235 + "accuracy": 0.7816005983545251 }, "prompt_2": { - "accuracy": 0.7700074794315632 + "accuracy": 0.5523560209424084 }, "prompt_3": { - "accuracy": 0.7524308152580403 + "accuracy": 0.8006731488406881 }, "prompt_4": { - "accuracy": 0.7763649962602842 + "accuracy": 0.6851159311892296 }, "prompt_5": { - "accuracy": 0.7677636499626028 + "accuracy": 0.7916978309648467 } }, "dream": { "prompt_1": { - "accuracy": 0.8201861832435081 + "accuracy": 0.8809407153356198 }, "prompt_2": { - "accuracy": 0.850563449289564 + "accuracy": 0.8892699657030867 }, "prompt_3": { - "accuracy": 0.7951984321411073 + "accuracy": 0.8784909358157765 }, "prompt_4": { - "accuracy": 0.8113669769720725 + "accuracy": 0.8794708476237139 }, "prompt_5": { - "accuracy": 0.8260656540911318 + "accuracy": 0.8755512003919648 } }, "samsum": { "prompt_1": { - "rouge1": 0.3898256503481187, - "rouge2": 0.15306940203158081, - "rougeL": 0.3011744289589988, - "avg_rouge": 0.2813564937795661 + "rouge1": 0.3992689686335395, + "rouge2": 0.1649072829138515, + "rougeL": 0.31251190740179147, + "avg_rouge": 0.29222938631639417 }, "prompt_2": { - "rouge1": 0.406130302607862, - "rouge2": 0.15800781301638936, - "rougeL": 0.3096910196450411, - "avg_rouge": 0.29127637842309745 + "rouge1": 0.4149958909508306, + "rouge2": 0.17332643372666745, + "rougeL": 0.3219141360236752, + "avg_rouge": 0.30341215356705775 }, "prompt_3": { - "rouge1": 0.38409266698887473, - "rouge2": 0.14798264146118462, - "rougeL": 0.29370600998592633, - "avg_rouge": 0.2752604394786619 + "rouge1": 0.3922099270623417, + "rouge2": 0.15755329731140477, + "rougeL": 0.30426789092009127, + "avg_rouge": 0.2846770384312793 }, "prompt_4": { - "rouge1": 0.38168641649733276, - "rouge2": 0.14359103655775662, - "rougeL": 0.29018365722911116, - "avg_rouge": 0.27182037009473353 + "rouge1": 0.3875422557422527, + "rouge2": 0.15520209191882986, + "rougeL": 0.29887378320084235, + "avg_rouge": 0.28053937695397496 }, "prompt_5": { - "rouge1": 0.40327733025555995, - "rouge2": 0.14602691601227763, - "rougeL": 0.3115648821754325, - "avg_rouge": 0.2869563761477567 + "rouge1": 0.41424382573544144, + "rouge2": 0.16273788460008867, + "rougeL": 0.3231591977429508, + "avg_rouge": 0.30004696935949365 } }, "dialogsum": { "prompt_1": { - "rouge1": 0.3441544129554577, - "rouge2": 0.12416434521540233, - "rougeL": 0.2639185356495055, - "avg_rouge": 0.24407909794012184 + "rouge1": 0.3471676316551315, + "rouge2": 0.12804934249301983, + "rougeL": 0.26693426071297793, + "avg_rouge": 0.24738374495370974 }, "prompt_2": { - "rouge1": 0.3535027948060569, - "rouge2": 0.12435379690458442, - "rougeL": 0.2673734873874034, - "avg_rouge": 0.24841002636601492 + "rouge1": 0.3536462719638873, + "rouge2": 0.12849295704954358, + "rougeL": 0.27132344139721254, + "avg_rouge": 0.2511542234702145 }, "prompt_3": { - "rouge1": 0.34898757078815607, - "rouge2": 0.12631055775356542, - "rougeL": 0.266283553995538, - "avg_rouge": 0.24719389417908647 + "rouge1": 0.3486625789053131, + "rouge2": 0.1312013920008153, + "rougeL": 0.26913111205422957, + "avg_rouge": 0.24966502765345266 }, "prompt_4": { - "rouge1": 0.33875794104002055, - "rouge2": 0.11643146803543704, - "rougeL": 0.25730453513515394, - "avg_rouge": 0.23749798140353717 + "rouge1": 0.34210585871053295, + "rouge2": 0.1238699268921856, + "rougeL": 0.2625347098391768, + "avg_rouge": 0.24283683181396512 }, "prompt_5": { - "rouge1": 0.3653246607541355, - "rouge2": 0.12170542663447692, - "rougeL": 0.27838259186096737, - "avg_rouge": 0.2551375597498599 + "rouge1": 0.3683250571772577, + "rouge2": 0.1288145261489626, + "rougeL": 0.28486316877157086, + "avg_rouge": 0.2606675840325971 } }, "sst2": { "prompt_1": { - "accuracy": 0.856651376146789 + "accuracy": 0.908256880733945 }, "prompt_2": { - "accuracy": 0.841743119266055 + "accuracy": 0.908256880733945 }, "prompt_3": { - "accuracy": 0.9002293577981652 + "accuracy": 0.9185779816513762 }, "prompt_4": { - "accuracy": 0.8314220183486238 + "accuracy": 0.9105504587155964 }, "prompt_5": { - "accuracy": 0.9036697247706422 + "accuracy": 0.6032110091743119 } }, "cola": { "prompt_1": { - "accuracy": 0.7948226270373921 + "accuracy": 0.8092042186001918 }, "prompt_2": { - "accuracy": 0.785234899328859 + "accuracy": 0.8034515819750719 }, "prompt_3": { - "accuracy": 0.7890699904122723 + "accuracy": 0.8139980824544583 }, "prompt_4": { - "accuracy": 0.7708533077660594 + "accuracy": 0.8293384467881112 }, "prompt_5": { - "accuracy": 0.7718120805369127 + "accuracy": 0.8082454458293384 } }, "qqp": { "prompt_1": { - "accuracy": 0.7065 + "accuracy": 0.725 }, "prompt_2": { - "accuracy": 0.8085 + "accuracy": 0.814 }, "prompt_3": { - "accuracy": 0.793 + "accuracy": 0.815 }, "prompt_4": { - "accuracy": 0.742 + "accuracy": 0.7515 }, "prompt_5": { - "accuracy": 0.7675 + "accuracy": 0.7795 } }, "mnli": { @@ -96397,7 +97212,7 @@ "accuracy": 0.6 }, "prompt_3": { - "accuracy": 0.7 + "accuracy": 0.6 }, "prompt_4": { "accuracy": 0.6 @@ -96408,7 +97223,7 @@ }, "qnli": { "prompt_1": { - "accuracy": 0.9 + "accuracy": 0.8 }, "prompt_2": { "accuracy": 0.9 @@ -96417,18 +97232,18 @@ "accuracy": 0.8 }, "prompt_4": { - "accuracy": 0.4 + "accuracy": 0.6 }, "prompt_5": { - "accuracy": 0.7 + "accuracy": 0.8 } }, "wnli": { "prompt_1": { - "accuracy": 0.4 + "accuracy": 0.7 }, "prompt_2": { - "accuracy": 1.0 + "accuracy": 0.9 }, "prompt_3": { "accuracy": 0.7 @@ -96437,7 +97252,7 @@ "accuracy": 0.4 }, "prompt_5": { - "accuracy": 0.5 + "accuracy": 0.7 } }, "rte": { @@ -96445,10 +97260,10 @@ "accuracy": 0.9 }, "prompt_2": { - "accuracy": 0.6 + "accuracy": 0.9 }, "prompt_3": { - "accuracy": 0.8 + "accuracy": 0.9 }, "prompt_4": { "accuracy": 0.8 @@ -96465,7 +97280,7 @@ "accuracy": 0.9 }, "prompt_3": { - "accuracy": 0.8 + "accuracy": 0.9 }, "prompt_4": { "accuracy": 0.9