diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -30224,576 +30224,576 @@ "zero_shot": { "cross_xquad": { "prompt_1": { - "overall_acc": 0.926470588235294, + "overall_acc": 0.9254201680672268, "language_acc": { "Spanish": 0.9294117647058824, - "English": 0.9436974789915966, - "Chinese": 0.9176470588235294, - "Vietnamese": 0.915126050420168 + "English": 0.9420168067226891, + "Chinese": 0.9184873949579831, + "Vietnamese": 0.9117647058823529 }, - "consistency_score_2": 0.9120448179271708, - "consistency_score_3": 0.8716386554621849, - "consistency_score_4": 0.8436974789915966, + "consistency_score_2": 0.9126050420168067, + "consistency_score_3": 0.8728991596638656, + "consistency_score_4": 0.8453781512605042, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.926890756302521, - "Spanish,Chinese": 0.9016806722689076, - "Spanish,Vietnamese": 0.9134453781512605, - "English,Chinese": 0.9226890756302522, - "English,Vietnamese": 0.9176470588235294, + "Spanish,English": 0.9285714285714286, + "Spanish,Chinese": 0.9042016806722689, + "Spanish,Vietnamese": 0.9126050420168067, + "English,Chinese": 0.9235294117647059, + "English,Vietnamese": 0.9168067226890756, "Chinese,Vietnamese": 0.8899159663865546 }, "3_combine": { - "Spanish,English,Chinese": 0.8798319327731092, - "Spanish,English,Vietnamese": 0.8815126050420168, - "Spanish,Chinese,Vietnamese": 0.8563025210084033, + "Spanish,English,Chinese": 0.8823529411764706, + "Spanish,English,Vietnamese": 0.8823529411764706, + "Spanish,Chinese,Vietnamese": 0.8579831932773109, "English,Chinese,Vietnamese": 0.8689075630252101 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8436974789915966 + "Spanish,English,Chinese,Vietnamese": 0.8453781512605042 } }, - "AC3_2": 0.9192011076803658, - "AC3_3": 0.8982185933811098, - "AC3_4": 0.8831487970906128 + "AC3_2": 0.9189679300183607, + "AC3_3": 0.898392709837736, + "AC3_4": 0.8835901663524087 }, "prompt_2": { - "overall_acc": 0.8930672268907563, + "overall_acc": 0.894327731092437, "language_acc": { "Spanish": 0.9235294117647059, - "English": 0.9142857142857143, - "Chinese": 0.8739495798319328, - "Vietnamese": 0.8605042016806723 + "English": 0.9117647058823529, + "Chinese": 0.8789915966386554, + "Vietnamese": 0.8630252100840337 }, - "consistency_score_2": 0.8663865546218488, - "consistency_score_3": 0.8067226890756303, - "consistency_score_4": 0.7680672268907563, + "consistency_score_2": 0.8677871148459384, + "consistency_score_3": 0.8088235294117647, + "consistency_score_4": 0.7705882352941177, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8983193277310925, - "Spanish,Chinese": 0.8596638655462185, - "Spanish,Vietnamese": 0.8563025210084033, + "Spanish,English": 0.8991596638655462, + "Spanish,Chinese": 0.8638655462184874, + "Spanish,Vietnamese": 0.8546218487394958, "English,Chinese": 0.8672268907563025, - "English,Vietnamese": 0.8663865546218488, - "Chinese,Vietnamese": 0.8504201680672269 + "English,Vietnamese": 0.8647058823529412, + "Chinese,Vietnamese": 0.8571428571428571 }, "3_combine": { - "Spanish,English,Chinese": 0.819327731092437, - "Spanish,English,Vietnamese": 0.8168067226890756, - "Spanish,Chinese,Vietnamese": 0.7907563025210084, - "English,Chinese,Vietnamese": 0.8 + "Spanish,English,Chinese": 0.8210084033613445, + "Spanish,English,Vietnamese": 0.8142857142857143, + "Spanish,Chinese,Vietnamese": 0.7966386554621848, + "English,Chinese,Vietnamese": 0.8033613445378152 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.7680672268907563 + "Spanish,English,Chinese,Vietnamese": 0.7705882352941177 } }, - "AC3_2": 0.8795245954598574, - "AC3_3": 0.8477019283299347, - "AC3_4": 0.8258641156737193 + "AC3_2": 0.8808575482310176, + "AC3_3": 0.8494293239185193, + "AC3_4": 0.8278597141829724 }, "prompt_3": { - "overall_acc": 0.9176470588235294, + "overall_acc": 0.9184873949579831, "language_acc": { - "Spanish": 0.9201680672268907, - "English": 0.9394957983193277, - "Chinese": 0.9109243697478991, - "Vietnamese": 0.9 + "Spanish": 0.9210084033613445, + "English": 0.9403361344537815, + "Chinese": 0.9117647058823529, + "Vietnamese": 0.9008403361344538 }, - "consistency_score_2": 0.9019607843137255, - "consistency_score_3": 0.857983193277311, - "consistency_score_4": 0.826890756302521, + "consistency_score_2": 0.9015406162464986, + "consistency_score_3": 0.8565126050420169, + "consistency_score_4": 0.8243697478991596, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9252100840336135, + "Spanish,English": 0.9218487394957983, "Spanish,Chinese": 0.8941176470588236, - "Spanish,Vietnamese": 0.892436974789916, - "English,Chinese": 0.9100840336134454, - "English,Vietnamese": 0.907563025210084, + "Spanish,Vietnamese": 0.8907563025210085, + "English,Chinese": 0.9109243697478991, + "English,Vietnamese": 0.9092436974789916, "Chinese,Vietnamese": 0.8823529411764706 }, "3_combine": { - "Spanish,English,Chinese": 0.8689075630252101, - "Spanish,English,Vietnamese": 0.865546218487395, - "Spanish,Chinese,Vietnamese": 0.8411764705882353, + "Spanish,English,Chinese": 0.8672268907563025, + "Spanish,English,Vietnamese": 0.8630252100840337, + "Spanish,Chinese,Vietnamese": 0.8394957983193277, "English,Chinese,Vietnamese": 0.8563025210084033 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.826890756302521 + "Spanish,English,Chinese,Vietnamese": 0.8243697478991596 } }, - "AC3_2": 0.9097363082664338, - "AC3_3": 0.8868127278807935, - "AC3_4": 0.8699081944418048 + "AC3_2": 0.9099351075059162, + "AC3_3": 0.8864180634472081, + "AC3_4": 0.868887304148585 }, "prompt_4": { - "overall_acc": 0.915126050420168, + "overall_acc": 0.9144957983193277, "language_acc": { - "Spanish": 0.9218487394957983, - "English": 0.926890756302521, - "Chinese": 0.9008403361344538, - "Vietnamese": 0.9109243697478991 + "Spanish": 0.9235294117647059, + "English": 0.9252100840336135, + "Chinese": 0.8991596638655462, + "Vietnamese": 0.9100840336134454 }, - "consistency_score_2": 0.8948179271708684, - "consistency_score_3": 0.8510504201680673, - "consistency_score_4": 0.8218487394957983, + "consistency_score_2": 0.8931372549019608, + "consistency_score_3": 0.8481092436974791, + "consistency_score_4": 0.8176470588235294, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9184873949579831, + "Spanish,English": 0.9159663865546218, "Spanish,Chinese": 0.8857142857142857, - "Spanish,Vietnamese": 0.9016806722689076, - "English,Chinese": 0.8899159663865546, - "English,Vietnamese": 0.907563025210084, - "Chinese,Vietnamese": 0.865546218487395 + "Spanish,Vietnamese": 0.8991596638655462, + "English,Chinese": 0.888235294117647, + "English,Vietnamese": 0.9058823529411765, + "Chinese,Vietnamese": 0.8638655462184874 }, "3_combine": { - "Spanish,English,Chinese": 0.8563025210084033, - "Spanish,English,Vietnamese": 0.8714285714285714, - "Spanish,Chinese,Vietnamese": 0.8344537815126051, - "English,Chinese,Vietnamese": 0.8420168067226891 + "Spanish,English,Chinese": 0.853781512605042, + "Spanish,English,Vietnamese": 0.8680672268907563, + "Spanish,Chinese,Vietnamese": 0.8319327731092437, + "English,Chinese,Vietnamese": 0.838655462184874 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8218487394957983 + "Spanish,English,Chinese,Vietnamese": 0.8176470588235294 } }, - "AC3_2": 0.9048580570782975, - "AC3_3": 0.8819259260240758, - "AC3_4": 0.8659828517265586 + "AC3_2": 0.9036903429372387, + "AC3_3": 0.8800523331598746, + "AC3_4": 0.863363892505359 }, "prompt_5": { - "overall_acc": 0.8102941176470588, + "overall_acc": 0.8159663865546218, "language_acc": { - "Spanish": 0.8848739495798319, - "English": 0.8142857142857143, - "Chinese": 0.7630252100840336, - "Vietnamese": 0.7789915966386555 - }, - "consistency_score_2": 0.7879551820728291, - "consistency_score_3": 0.7092436974789916, - "consistency_score_4": 0.6655462184873949, + "Spanish": 0.8899159663865546, + "English": 0.8151260504201681, + "Chinese": 0.7747899159663866, + "Vietnamese": 0.7840336134453781 + }, + "consistency_score_2": 0.7901960784313725, + "consistency_score_3": 0.7140756302521009, + "consistency_score_4": 0.6697478991596638, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8092436974789916, - "Spanish,Chinese": 0.7605042016806722, - "Spanish,Vietnamese": 0.7764705882352941, - "English,Chinese": 0.7890756302521008, - "English,Vietnamese": 0.8109243697478992, - "Chinese,Vietnamese": 0.7815126050420168 + "Spanish,English": 0.807563025210084, + "Spanish,Chinese": 0.780672268907563, + "Spanish,Vietnamese": 0.7848739495798319, + "English,Chinese": 0.7815126050420168, + "English,Vietnamese": 0.8218487394957983, + "Chinese,Vietnamese": 0.7647058823529411 }, "3_combine": { - "Spanish,English,Chinese": 0.7109243697478992, - "Spanish,English,Vietnamese": 0.7201680672268908, - "Spanish,Chinese,Vietnamese": 0.6882352941176471, - "English,Chinese,Vietnamese": 0.7176470588235294 + "Spanish,English,Chinese": 0.7159663865546219, + "Spanish,English,Vietnamese": 0.7294117647058823, + "Spanish,Chinese,Vietnamese": 0.6974789915966386, + "English,Chinese,Vietnamese": 0.7134453781512605 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.6655462184873949 + "Spanish,English,Chinese,Vietnamese": 0.6697478991596638 } }, - "AC3_2": 0.7989685327252546, - "AC3_3": 0.7564089426231012, - "AC3_4": 0.7308218546710168 + "AC3_2": 0.8028744947078043, + "AC3_3": 0.7616283805349149, + "AC3_4": 0.7356620023079882 } }, "cross_mmlu": { "prompt_1": { - "overall_acc": 0.5857142857142856, + "overall_acc": 0.5866666666666667, "language_acc": { "Filipino": 0.5333333333333333, "Vietnamese": 0.6, - "Chinese": 0.6333333333333333, - "Spanish": 0.6133333333333333, - "Malay": 0.52, - "Indonesian": 0.52, + "Chinese": 0.6266666666666667, + "Spanish": 0.6066666666666667, + "Malay": 0.5266666666666666, + "Indonesian": 0.5333333333333333, "English": 0.68 }, - "consistency_score_2": 0.6180952380952381, - "consistency_score_3": 0.4647619047619048, - "consistency_score_4": 0.37885714285714284, - "consistency_score_5": 0.32317460317460317, - "consistency_score_6": 0.28380952380952384, - "consistency_score_7": 0.25333333333333335, + "consistency_score_2": 0.6174603174603175, + "consistency_score_3": 0.4632380952380953, + "consistency_score_4": 0.37504761904761913, + "consistency_score_5": 0.31619047619047624, + "consistency_score_6": 0.2733333333333333, + "consistency_score_7": 0.24, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5866666666666667, - "Filipino,Chinese": 0.6066666666666667, - "Filipino,Spanish": 0.5933333333333334, + "Filipino,Vietnamese": 0.58, + "Filipino,Chinese": 0.5933333333333334, + "Filipino,Spanish": 0.6066666666666667, "Filipino,Malay": 0.56, - "Filipino,Indonesian": 0.5933333333333334, - "Filipino,English": 0.5866666666666667, + "Filipino,Indonesian": 0.6, + "Filipino,English": 0.58, "Vietnamese,Chinese": 0.6866666666666666, - "Vietnamese,Spanish": 0.6266666666666667, - "Vietnamese,Malay": 0.6, - "Vietnamese,Indonesian": 0.66, - "Vietnamese,English": 0.6333333333333333, + "Vietnamese,Spanish": 0.64, + "Vietnamese,Malay": 0.6133333333333333, + "Vietnamese,Indonesian": 0.6733333333333333, + "Vietnamese,English": 0.64, "Chinese,Spanish": 0.66, "Chinese,Malay": 0.6333333333333333, - "Chinese,Indonesian": 0.5866666666666667, + "Chinese,Indonesian": 0.5933333333333334, "Chinese,English": 0.64, - "Spanish,Malay": 0.56, + "Spanish,Malay": 0.54, "Spanish,Indonesian": 0.6066666666666667, - "Spanish,English": 0.7066666666666667, - "Malay,Indonesian": 0.6533333333333333, - "Malay,English": 0.62, - "Indonesian,English": 0.58 + "Spanish,English": 0.7, + "Malay,Indonesian": 0.64, + "Malay,English": 0.5933333333333334, + "Indonesian,English": 0.5866666666666667 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.4666666666666667, - "Filipino,Vietnamese,Spanish": 0.43333333333333335, + "Filipino,Vietnamese,Chinese": 0.4533333333333333, + "Filipino,Vietnamese,Spanish": 0.44, "Filipino,Vietnamese,Malay": 0.41333333333333333, - "Filipino,Vietnamese,Indonesian": 0.46, + "Filipino,Vietnamese,Indonesian": 0.4666666666666667, "Filipino,Vietnamese,English": 0.43333333333333335, - "Filipino,Chinese,Spanish": 0.4533333333333333, + "Filipino,Chinese,Spanish": 0.46, "Filipino,Chinese,Malay": 0.44, "Filipino,Chinese,Indonesian": 0.4266666666666667, "Filipino,Chinese,English": 0.44666666666666666, "Filipino,Spanish,Malay": 0.4, - "Filipino,Spanish,Indonesian": 0.4266666666666667, - "Filipino,Spanish,English": 0.47333333333333333, + "Filipino,Spanish,Indonesian": 0.43333333333333335, + "Filipino,Spanish,English": 0.4666666666666667, "Filipino,Malay,Indonesian": 0.43333333333333335, - "Filipino,Malay,English": 0.43333333333333335, + "Filipino,Malay,English": 0.42, "Filipino,Indonesian,English": 0.43333333333333335, - "Vietnamese,Chinese,Spanish": 0.5133333333333333, - "Vietnamese,Chinese,Malay": 0.49333333333333335, - "Vietnamese,Chinese,Indonesian": 0.5133333333333333, - "Vietnamese,Chinese,English": 0.52, + "Vietnamese,Chinese,Spanish": 0.5266666666666666, + "Vietnamese,Chinese,Malay": 0.5, + "Vietnamese,Chinese,Indonesian": 0.52, + "Vietnamese,Chinese,English": 0.5266666666666666, "Vietnamese,Spanish,Malay": 0.44, - "Vietnamese,Spanish,Indonesian": 0.48, + "Vietnamese,Spanish,Indonesian": 0.4866666666666667, "Vietnamese,Spanish,English": 0.5066666666666667, "Vietnamese,Malay,Indonesian": 0.5066666666666667, - "Vietnamese,Malay,English": 0.4666666666666667, - "Vietnamese,Indonesian,English": 0.48, - "Chinese,Spanish,Malay": 0.4666666666666667, - "Chinese,Spanish,Indonesian": 0.46, - "Chinese,Spanish,English": 0.5333333333333333, + "Vietnamese,Malay,English": 0.46, + "Vietnamese,Indonesian,English": 0.4866666666666667, + "Chinese,Spanish,Malay": 0.46, + "Chinese,Spanish,Indonesian": 0.4666666666666667, + "Chinese,Spanish,English": 0.5266666666666666, "Chinese,Malay,Indonesian": 0.47333333333333333, - "Chinese,Malay,English": 0.4866666666666667, - "Chinese,Indonesian,English": 0.4533333333333333, - "Spanish,Malay,Indonesian": 0.44666666666666666, - "Spanish,Malay,English": 0.4866666666666667, - "Spanish,Indonesian,English": 0.48, - "Malay,Indonesian,English": 0.4866666666666667 + "Chinese,Malay,English": 0.47333333333333333, + "Chinese,Indonesian,English": 0.46, + "Spanish,Malay,Indonesian": 0.4266666666666667, + "Spanish,Malay,English": 0.4533333333333333, + "Spanish,Indonesian,English": 0.47333333333333333, + "Malay,Indonesian,English": 0.47333333333333333 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Filipino,Vietnamese,Chinese,Spanish": 0.37333333333333335, "Filipino,Vietnamese,Chinese,Malay": 0.36666666666666664, "Filipino,Vietnamese,Chinese,Indonesian": 0.38666666666666666, "Filipino,Vietnamese,Chinese,English": 0.36666666666666664, "Filipino,Vietnamese,Spanish,Malay": 0.3333333333333333, "Filipino,Vietnamese,Spanish,Indonesian": 0.3466666666666667, - "Filipino,Vietnamese,Spanish,English": 0.38, + "Filipino,Vietnamese,Spanish,English": 0.37333333333333335, "Filipino,Vietnamese,Malay,Indonesian": 0.36666666666666664, - "Filipino,Vietnamese,Malay,English": 0.34, + "Filipino,Vietnamese,Malay,English": 0.3333333333333333, "Filipino,Vietnamese,Indonesian,English": 0.36666666666666664, "Filipino,Chinese,Spanish,Malay": 0.35333333333333333, "Filipino,Chinese,Spanish,Indonesian": 0.35333333333333333, - "Filipino,Chinese,Spanish,English": 0.38666666666666666, + "Filipino,Chinese,Spanish,English": 0.38, "Filipino,Chinese,Malay,Indonesian": 0.35333333333333333, - "Filipino,Chinese,Malay,English": 0.36, + "Filipino,Chinese,Malay,English": 0.35333333333333333, "Filipino,Chinese,Indonesian,English": 0.36, - "Filipino,Spanish,Malay,Indonesian": 0.3333333333333333, - "Filipino,Spanish,Malay,English": 0.3466666666666667, - "Filipino,Spanish,Indonesian,English": 0.38, - "Filipino,Malay,Indonesian,English": 0.35333333333333333, + "Filipino,Spanish,Malay,Indonesian": 0.32666666666666666, + "Filipino,Spanish,Malay,English": 0.3333333333333333, + "Filipino,Spanish,Indonesian,English": 0.37333333333333335, + "Filipino,Malay,Indonesian,English": 0.3466666666666667, "Vietnamese,Chinese,Spanish,Malay": 0.3933333333333333, - "Vietnamese,Chinese,Spanish,Indonesian": 0.41333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian": 0.42, "Vietnamese,Chinese,Spanish,English": 0.44, "Vietnamese,Chinese,Malay,Indonesian": 0.43333333333333335, - "Vietnamese,Chinese,Malay,English": 0.4066666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.42, - "Vietnamese,Spanish,Malay,Indonesian": 0.3933333333333333, - "Vietnamese,Spanish,Malay,English": 0.38, + "Vietnamese,Chinese,Malay,English": 0.4, + "Vietnamese,Chinese,Indonesian,English": 0.4266666666666667, + "Vietnamese,Spanish,Malay,Indonesian": 0.38666666666666666, + "Vietnamese,Spanish,Malay,English": 0.36666666666666664, "Vietnamese,Spanish,Indonesian,English": 0.4, - "Vietnamese,Malay,Indonesian,English": 0.42, - "Chinese,Spanish,Malay,Indonesian": 0.37333333333333335, - "Chinese,Spanish,Malay,English": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,English": 0.41333333333333333, + "Chinese,Spanish,Malay,Indonesian": 0.36666666666666664, + "Chinese,Spanish,Malay,English": 0.38666666666666666, "Chinese,Spanish,Indonesian,English": 0.3933333333333333, - "Chinese,Malay,Indonesian,English": 0.38666666666666666, - "Spanish,Malay,Indonesian,English": 0.4 + "Chinese,Malay,Indonesian,English": 0.38, + "Spanish,Malay,Indonesian,English": 0.37333333333333335 }, "5_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.32, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.32, "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.32666666666666666, - "Filipino,Vietnamese,Chinese,Malay,English": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Malay,English": 0.3, "Filipino,Vietnamese,Chinese,Indonesian,English": 0.32666666666666666, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.3, - "Filipino,Vietnamese,Spanish,Malay,English": 0.29333333333333333, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.32, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.31333333333333335, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.3, - "Filipino,Chinese,Spanish,Malay,English": 0.30666666666666664, - "Filipino,Chinese,Spanish,Indonesian,English": 0.32666666666666666, - "Filipino,Chinese,Malay,Indonesian,English": 0.3, - "Filipino,Spanish,Malay,Indonesian,English": 0.30666666666666664, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.35333333333333333, - "Vietnamese,Chinese,Spanish,Malay,English": 0.3466666666666667, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.28, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.30666666666666664, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.29333333333333333, + "Filipino,Chinese,Spanish,Indonesian,English": 0.32, + "Filipino,Chinese,Malay,Indonesian,English": 0.29333333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Malay,English": 0.3333333333333333, "Vietnamese,Chinese,Spanish,Indonesian,English": 0.36, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.36666666666666664, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.3466666666666667, - "Chinese,Spanish,Malay,Indonesian,English": 0.3333333333333333 + "Vietnamese,Chinese,Malay,Indonesian,English": 0.36, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.3333333333333333, + "Chinese,Spanish,Malay,Indonesian,English": 0.32 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.29333333333333333, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.28, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.2733333333333333, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.2733333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.31333333333333335 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.26, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.26, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.26, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.3 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.24 } }, - "AC3_2": 0.6014692585395479, - "AC3_3": 0.5182748348166127, - "AC3_4": 0.4601049424032136, - "AC3_5": 0.4165261214051973, - "AC3_6": 0.38235017989656905, - "AC3_7": 0.3536889897421823 + "AC3_2": 0.6016697424581617, + "AC3_3": 0.517697145572365, + "AC3_4": 0.45757443713470136, + "AC3_5": 0.41091420529907247, + "AC3_6": 0.3729198965974641, + "AC3_7": 0.34064516124911554 }, "prompt_2": { - "overall_acc": 0.5628571428571428, + "overall_acc": 0.5647619047619047, "language_acc": { "Filipino": 0.4866666666666667, - "Vietnamese": 0.5933333333333334, - "Chinese": 0.56, - "Spanish": 0.5933333333333334, + "Vietnamese": 0.6, + "Chinese": 0.5733333333333334, + "Spanish": 0.6, "Malay": 0.46, - "Indonesian": 0.56, + "Indonesian": 0.5466666666666666, "English": 0.6866666666666666 }, - "consistency_score_2": 0.6282539682539682, - "consistency_score_3": 0.48400000000000004, - "consistency_score_4": 0.40514285714285714, - "consistency_score_5": 0.35365079365079366, - "consistency_score_6": 0.3161904761904762, + "consistency_score_2": 0.6276190476190476, + "consistency_score_3": 0.48514285714285704, + "consistency_score_4": 0.4072380952380951, + "consistency_score_5": 0.35619047619047617, + "consistency_score_6": 0.3180952380952381, "consistency_score_7": 0.2866666666666667, "detailed_consistency_score": { "2_combine": { "Filipino,Vietnamese": 0.58, - "Filipino,Chinese": 0.5933333333333334, + "Filipino,Chinese": 0.6, "Filipino,Spanish": 0.5933333333333334, - "Filipino,Malay": 0.5533333333333333, - "Filipino,Indonesian": 0.5933333333333334, - "Filipino,English": 0.5733333333333334, - "Vietnamese,Chinese": 0.6733333333333333, - "Vietnamese,Spanish": 0.6533333333333333, - "Vietnamese,Malay": 0.6333333333333333, - "Vietnamese,Indonesian": 0.6733333333333333, - "Vietnamese,English": 0.6666666666666666, - "Chinese,Spanish": 0.6666666666666666, - "Chinese,Malay": 0.6266666666666667, - "Chinese,Indonesian": 0.62, - "Chinese,English": 0.6466666666666666, - "Spanish,Malay": 0.6, + "Filipino,Malay": 0.5466666666666666, + "Filipino,Indonesian": 0.5733333333333334, + "Filipino,English": 0.5666666666666667, + "Vietnamese,Chinese": 0.68, + "Vietnamese,Spanish": 0.6666666666666666, + "Vietnamese,Malay": 0.64, + "Vietnamese,Indonesian": 0.6533333333333333, + "Vietnamese,English": 0.68, + "Chinese,Spanish": 0.68, + "Chinese,Malay": 0.6133333333333333, + "Chinese,Indonesian": 0.5933333333333334, + "Chinese,English": 0.66, + "Spanish,Malay": 0.6066666666666667, "Spanish,Indonesian": 0.62, - "Spanish,English": 0.7, - "Malay,Indonesian": 0.68, - "Malay,English": 0.5933333333333334, - "Indonesian,English": 0.6533333333333333 + "Spanish,English": 0.7066666666666667, + "Malay,Indonesian": 0.6733333333333333, + "Malay,English": 0.6, + "Indonesian,English": 0.6466666666666666 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.4666666666666667, + "Filipino,Vietnamese,Chinese": 0.47333333333333333, "Filipino,Vietnamese,Spanish": 0.4533333333333333, "Filipino,Vietnamese,Malay": 0.44, - "Filipino,Vietnamese,Indonesian": 0.4666666666666667, + "Filipino,Vietnamese,Indonesian": 0.44666666666666666, "Filipino,Vietnamese,English": 0.4533333333333333, - "Filipino,Chinese,Spanish": 0.4533333333333333, + "Filipino,Chinese,Spanish": 0.46, "Filipino,Chinese,Malay": 0.44666666666666666, - "Filipino,Chinese,Indonesian": 0.46, - "Filipino,Chinese,English": 0.4533333333333333, + "Filipino,Chinese,Indonesian": 0.44666666666666666, + "Filipino,Chinese,English": 0.46, "Filipino,Spanish,Malay": 0.4266666666666667, "Filipino,Spanish,Indonesian": 0.4533333333333333, "Filipino,Spanish,English": 0.4666666666666667, - "Filipino,Malay,Indonesian": 0.4533333333333333, - "Filipino,Malay,English": 0.42, - "Filipino,Indonesian,English": 0.4533333333333333, - "Vietnamese,Chinese,Spanish": 0.5333333333333333, - "Vietnamese,Chinese,Malay": 0.49333333333333335, - "Vietnamese,Chinese,Indonesian": 0.5266666666666666, - "Vietnamese,Chinese,English": 0.54, - "Vietnamese,Spanish,Malay": 0.5, - "Vietnamese,Spanish,Indonesian": 0.5133333333333333, - "Vietnamese,Spanish,English": 0.5266666666666666, + "Filipino,Malay,Indonesian": 0.44, + "Filipino,Malay,English": 0.41333333333333333, + "Filipino,Indonesian,English": 0.44, + "Vietnamese,Chinese,Spanish": 0.5466666666666666, + "Vietnamese,Chinese,Malay": 0.5, + "Vietnamese,Chinese,Indonesian": 0.5133333333333333, + "Vietnamese,Chinese,English": 0.5533333333333333, + "Vietnamese,Spanish,Malay": 0.5066666666666667, + "Vietnamese,Spanish,Indonesian": 0.5066666666666667, + "Vietnamese,Spanish,English": 0.54, "Vietnamese,Malay,Indonesian": 0.52, - "Vietnamese,Malay,English": 0.4866666666666667, - "Vietnamese,Indonesian,English": 0.5333333333333333, - "Chinese,Spanish,Malay": 0.5, - "Chinese,Spanish,Indonesian": 0.5, - "Chinese,Spanish,English": 0.5266666666666666, - "Chinese,Malay,Indonesian": 0.49333333333333335, - "Chinese,Malay,English": 0.4666666666666667, - "Chinese,Indonesian,English": 0.5066666666666667, - "Spanish,Malay,Indonesian": 0.5, - "Spanish,Malay,English": 0.4866666666666667, - "Spanish,Indonesian,English": 0.5133333333333333, - "Malay,Indonesian,English": 0.5066666666666667 + "Vietnamese,Malay,English": 0.5, + "Vietnamese,Indonesian,English": 0.5266666666666666, + "Chinese,Spanish,Malay": 0.5066666666666667, + "Chinese,Spanish,Indonesian": 0.49333333333333335, + "Chinese,Spanish,English": 0.5466666666666666, + "Chinese,Malay,Indonesian": 0.4866666666666667, + "Chinese,Malay,English": 0.47333333333333333, + "Chinese,Indonesian,English": 0.5, + "Spanish,Malay,Indonesian": 0.5066666666666667, + "Spanish,Malay,English": 0.5, + "Spanish,Indonesian,English": 0.52, + "Malay,Indonesian,English": 0.5133333333333333 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.38666666666666666, - "Filipino,Vietnamese,Chinese,Malay": 0.3933333333333333, - "Filipino,Vietnamese,Chinese,Indonesian": 0.3933333333333333, - "Filipino,Vietnamese,Chinese,English": 0.3933333333333333, + "Filipino,Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Filipino,Vietnamese,Chinese,Malay": 0.4, + "Filipino,Vietnamese,Chinese,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Chinese,English": 0.4, "Filipino,Vietnamese,Spanish,Malay": 0.38, - "Filipino,Vietnamese,Spanish,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Spanish,Indonesian": 0.38, "Filipino,Vietnamese,Spanish,English": 0.3933333333333333, - "Filipino,Vietnamese,Malay,Indonesian": 0.38666666666666666, + "Filipino,Vietnamese,Malay,Indonesian": 0.38, "Filipino,Vietnamese,Malay,English": 0.37333333333333335, - "Filipino,Vietnamese,Indonesian,English": 0.3933333333333333, - "Filipino,Chinese,Spanish,Malay": 0.38666666666666666, + "Filipino,Vietnamese,Indonesian,English": 0.38, + "Filipino,Chinese,Spanish,Malay": 0.3933333333333333, "Filipino,Chinese,Spanish,Indonesian": 0.38666666666666666, - "Filipino,Chinese,Spanish,English": 0.38, - "Filipino,Chinese,Malay,Indonesian": 0.3933333333333333, - "Filipino,Chinese,Malay,English": 0.36, - "Filipino,Chinese,Indonesian,English": 0.4, + "Filipino,Chinese,Spanish,English": 0.38666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.38666666666666666, + "Filipino,Chinese,Malay,English": 0.36666666666666664, + "Filipino,Chinese,Indonesian,English": 0.3933333333333333, "Filipino,Spanish,Malay,Indonesian": 0.37333333333333335, "Filipino,Spanish,Malay,English": 0.36666666666666664, "Filipino,Spanish,Indonesian,English": 0.38, - "Filipino,Malay,Indonesian,English": 0.37333333333333335, - "Vietnamese,Chinese,Spanish,Malay": 0.44, - "Vietnamese,Chinese,Spanish,Indonesian": 0.4533333333333333, - "Vietnamese,Chinese,Spanish,English": 0.4533333333333333, + "Filipino,Malay,Indonesian,English": 0.36666666666666664, + "Vietnamese,Chinese,Spanish,Malay": 0.44666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian": 0.44666666666666666, + "Vietnamese,Chinese,Spanish,English": 0.4666666666666667, "Vietnamese,Chinese,Malay,Indonesian": 0.44, - "Vietnamese,Chinese,Malay,English": 0.41333333333333333, - "Vietnamese,Chinese,Indonesian,English": 0.46, - "Vietnamese,Spanish,Malay,Indonesian": 0.43333333333333335, - "Vietnamese,Spanish,Malay,English": 0.41333333333333333, - "Vietnamese,Spanish,Indonesian,English": 0.44, - "Vietnamese,Malay,Indonesian,English": 0.44666666666666666, + "Vietnamese,Chinese,Malay,English": 0.4266666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.4533333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.44, + "Vietnamese,Spanish,Malay,English": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian,English": 0.44666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.4533333333333333, "Chinese,Spanish,Malay,Indonesian": 0.4266666666666667, - "Chinese,Spanish,Malay,English": 0.41333333333333333, + "Chinese,Spanish,Malay,English": 0.4266666666666667, "Chinese,Spanish,Indonesian,English": 0.43333333333333335, - "Chinese,Malay,Indonesian,English": 0.4066666666666667, - "Spanish,Malay,Indonesian,English": 0.4266666666666667 + "Chinese,Malay,Indonesian,English": 0.41333333333333333, + "Spanish,Malay,Indonesian,English": 0.44 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.36, "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.3466666666666667, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.34, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.3466666666666667, "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,Chinese,Malay,English": 0.34, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.36, + "Filipino,Vietnamese,Chinese,Malay,English": 0.3466666666666667, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.35333333333333333, "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.34, "Filipino,Vietnamese,Spanish,Malay,English": 0.3333333333333333, "Filipino,Vietnamese,Spanish,Indonesian,English": 0.34, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.3466666666666667, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.34, "Filipino,Chinese,Spanish,Malay,Indonesian": 0.3466666666666667, - "Filipino,Chinese,Spanish,Malay,English": 0.32666666666666666, + "Filipino,Chinese,Spanish,Malay,English": 0.3333333333333333, "Filipino,Chinese,Spanish,Indonesian,English": 0.34, "Filipino,Chinese,Malay,Indonesian,English": 0.3333333333333333, "Filipino,Spanish,Malay,Indonesian,English": 0.32666666666666666, "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3933333333333333, - "Vietnamese,Chinese,Spanish,Malay,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Malay,English": 0.38666666666666666, "Vietnamese,Chinese,Spanish,Indonesian,English": 0.4, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.38666666666666666, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.38, - "Chinese,Spanish,Malay,Indonesian,English": 0.36666666666666664 + "Vietnamese,Chinese,Malay,Indonesian,English": 0.3933333333333333, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.3933333333333333, + "Chinese,Spanish,Malay,Indonesian,English": 0.37333333333333335 }, "6_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.32, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.31333333333333335, "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.31333333333333335, "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.32, "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.30666666666666664, "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.3, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.3466666666666667 + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.35333333333333333 }, "7_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 } }, - "AC3_2": 0.5937602801816463, - "AC3_3": 0.5204585152341265, - "AC3_4": 0.47115196486952593, - "AC3_5": 0.434376762781048, - "AC3_6": 0.40491564768648614, - "AC3_7": 0.37986547080730276 + "AC3_2": 0.5945336984135499, + "AC3_3": 0.5219334438648634, + "AC3_4": 0.47323572496834326, + "AC3_5": 0.4368582261799737, + "AC3_6": 0.4069697436226759, + "AC3_7": 0.3802982848173773 }, "prompt_3": { - "overall_acc": 0.5771428571428572, + "overall_acc": 0.5742857142857144, "language_acc": { - "Filipino": 0.52, - "Vietnamese": 0.58, + "Filipino": 0.5066666666666667, + "Vietnamese": 0.5933333333333334, "Chinese": 0.5733333333333334, - "Spanish": 0.5933333333333334, - "Malay": 0.5, + "Spanish": 0.6, + "Malay": 0.4866666666666667, "Indonesian": 0.56, - "English": 0.7133333333333334 + "English": 0.7 }, - "consistency_score_2": 0.6301587301587301, - "consistency_score_3": 0.4872380952380952, - "consistency_score_4": 0.40628571428571425, - "consistency_score_5": 0.35238095238095235, - "consistency_score_6": 0.31238095238095237, + "consistency_score_2": 0.6276190476190476, + "consistency_score_3": 0.48514285714285704, + "consistency_score_4": 0.4043809523809524, + "consistency_score_5": 0.3507936507936507, + "consistency_score_6": 0.3114285714285714, "consistency_score_7": 0.28, "detailed_consistency_score": { "2_combine": { "Filipino,Vietnamese": 0.6133333333333333, - "Filipino,Chinese": 0.5733333333333334, - "Filipino,Spanish": 0.5866666666666667, - "Filipino,Malay": 0.5666666666666667, - "Filipino,Indonesian": 0.5866666666666667, - "Filipino,English": 0.5866666666666667, - "Vietnamese,Chinese": 0.6666666666666666, - "Vietnamese,Spanish": 0.6666666666666666, + "Filipino,Chinese": 0.58, + "Filipino,Spanish": 0.5933333333333334, + "Filipino,Malay": 0.56, + "Filipino,Indonesian": 0.5733333333333334, + "Filipino,English": 0.58, + "Vietnamese,Chinese": 0.68, + "Vietnamese,Spanish": 0.66, "Vietnamese,Malay": 0.6066666666666667, - "Vietnamese,Indonesian": 0.68, - "Vietnamese,English": 0.6666666666666666, - "Chinese,Spanish": 0.68, + "Vietnamese,Indonesian": 0.6666666666666666, + "Vietnamese,English": 0.6866666666666666, + "Chinese,Spanish": 0.6733333333333333, "Chinese,Malay": 0.6066666666666667, "Chinese,Indonesian": 0.6266666666666667, "Chinese,English": 0.6466666666666666, - "Spanish,Malay": 0.6, - "Spanish,Indonesian": 0.6266666666666667, - "Spanish,English": 0.7333333333333333, - "Malay,Indonesian": 0.6933333333333334, - "Malay,English": 0.5866666666666667, - "Indonesian,English": 0.6333333333333333 + "Spanish,Malay": 0.5933333333333334, + "Spanish,Indonesian": 0.6066666666666667, + "Spanish,English": 0.7466666666666667, + "Malay,Indonesian": 0.68, + "Malay,English": 0.58, + "Indonesian,English": 0.62 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.4666666666666667, + "Filipino,Vietnamese,Chinese": 0.48, "Filipino,Vietnamese,Spanish": 0.47333333333333333, "Filipino,Vietnamese,Malay": 0.44, - "Filipino,Vietnamese,Indonesian": 0.46, - "Filipino,Vietnamese,English": 0.4533333333333333, + "Filipino,Vietnamese,Indonesian": 0.4533333333333333, + "Filipino,Vietnamese,English": 0.46, "Filipino,Chinese,Spanish": 0.44666666666666666, "Filipino,Chinese,Malay": 0.44666666666666666, "Filipino,Chinese,Indonesian": 0.4533333333333333, "Filipino,Chinese,English": 0.44, "Filipino,Spanish,Malay": 0.44666666666666666, - "Filipino,Spanish,Indonesian": 0.4533333333333333, - "Filipino,Spanish,English": 0.4866666666666667, - "Filipino,Malay,Indonesian": 0.4533333333333333, - "Filipino,Malay,English": 0.43333333333333335, - "Filipino,Indonesian,English": 0.44666666666666666, - "Vietnamese,Chinese,Spanish": 0.5466666666666666, + "Filipino,Spanish,Indonesian": 0.44666666666666666, + "Filipino,Spanish,English": 0.49333333333333335, + "Filipino,Malay,Indonesian": 0.44666666666666666, + "Filipino,Malay,English": 0.4266666666666667, + "Filipino,Indonesian,English": 0.44, + "Vietnamese,Chinese,Spanish": 0.54, "Vietnamese,Chinese,Malay": 0.48, "Vietnamese,Chinese,Indonesian": 0.5266666666666666, - "Vietnamese,Chinese,English": 0.5333333333333333, - "Vietnamese,Spanish,Malay": 0.4866666666666667, - "Vietnamese,Spanish,Indonesian": 0.5133333333333333, - "Vietnamese,Spanish,English": 0.5533333333333333, - "Vietnamese,Malay,Indonesian": 0.5133333333333333, + "Vietnamese,Chinese,English": 0.54, + "Vietnamese,Spanish,Malay": 0.48, + "Vietnamese,Spanish,Indonesian": 0.5066666666666667, + "Vietnamese,Spanish,English": 0.56, + "Vietnamese,Malay,Indonesian": 0.5066666666666667, "Vietnamese,Malay,English": 0.47333333333333333, - "Vietnamese,Indonesian,English": 0.5333333333333333, - "Chinese,Spanish,Malay": 0.5066666666666667, - "Chinese,Spanish,Indonesian": 0.5266666666666666, + "Vietnamese,Indonesian,English": 0.5266666666666666, + "Chinese,Spanish,Malay": 0.5, + "Chinese,Spanish,Indonesian": 0.52, "Chinese,Spanish,English": 0.5533333333333333, - "Chinese,Malay,Indonesian": 0.5133333333333333, + "Chinese,Malay,Indonesian": 0.5066666666666667, "Chinese,Malay,English": 0.4666666666666667, "Chinese,Indonesian,English": 0.5066666666666667, - "Spanish,Malay,Indonesian": 0.5, + "Spanish,Malay,Indonesian": 0.4866666666666667, "Spanish,Malay,English": 0.49333333333333335, "Spanish,Indonesian,English": 0.52, - "Malay,Indonesian,English": 0.5066666666666667 + "Malay,Indonesian,English": 0.49333333333333335 }, "4_combine": { "Filipino,Vietnamese,Chinese,Spanish": 0.3933333333333333, "Filipino,Vietnamese,Chinese,Malay": 0.38, "Filipino,Vietnamese,Chinese,Indonesian": 0.38, - "Filipino,Vietnamese,Chinese,English": 0.38, + "Filipino,Vietnamese,Chinese,English": 0.38666666666666666, "Filipino,Vietnamese,Spanish,Malay": 0.38666666666666666, "Filipino,Vietnamese,Spanish,Indonesian": 0.38, "Filipino,Vietnamese,Spanish,English": 0.4066666666666667, @@ -30809,22 +30809,22 @@ "Filipino,Spanish,Malay,Indonesian": 0.38, "Filipino,Spanish,Malay,English": 0.38, "Filipino,Spanish,Indonesian,English": 0.3933333333333333, - "Filipino,Malay,Indonesian,English": 0.38, - "Vietnamese,Chinese,Spanish,Malay": 0.4266666666666667, - "Vietnamese,Chinese,Spanish,Indonesian": 0.4666666666666667, + "Filipino,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Malay": 0.42, + "Vietnamese,Chinese,Spanish,Indonesian": 0.46, "Vietnamese,Chinese,Spanish,English": 0.4666666666666667, - "Vietnamese,Chinese,Malay,Indonesian": 0.4266666666666667, + "Vietnamese,Chinese,Malay,Indonesian": 0.42, "Vietnamese,Chinese,Malay,English": 0.4, "Vietnamese,Chinese,Indonesian,English": 0.4533333333333333, - "Vietnamese,Spanish,Malay,Indonesian": 0.4266666666666667, + "Vietnamese,Spanish,Malay,Indonesian": 0.41333333333333333, "Vietnamese,Spanish,Malay,English": 0.41333333333333333, "Vietnamese,Spanish,Indonesian,English": 0.44666666666666666, - "Vietnamese,Malay,Indonesian,English": 0.43333333333333335, - "Chinese,Spanish,Malay,Indonesian": 0.44666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.4266666666666667, + "Chinese,Spanish,Malay,Indonesian": 0.43333333333333335, "Chinese,Spanish,Malay,English": 0.4266666666666667, "Chinese,Spanish,Indonesian,English": 0.4533333333333333, - "Chinese,Malay,Indonesian,English": 0.4266666666666667, - "Spanish,Malay,Indonesian,English": 0.43333333333333335 + "Chinese,Malay,Indonesian,English": 0.42, + "Spanish,Malay,Indonesian,English": 0.4266666666666667 }, "5_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.3466666666666667, @@ -30842,12 +30842,12 @@ "Filipino,Chinese,Spanish,Indonesian,English": 0.35333333333333333, "Filipino,Chinese,Malay,Indonesian,English": 0.34, "Filipino,Spanish,Malay,Indonesian,English": 0.34, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.38, "Vietnamese,Chinese,Spanish,Malay,English": 0.36666666666666664, "Vietnamese,Chinese,Spanish,Indonesian,English": 0.4066666666666667, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.38, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.38, - "Chinese,Spanish,Malay,Indonesian,English": 0.38666666666666666 + "Vietnamese,Chinese,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.37333333333333335, + "Chinese,Spanish,Malay,Indonesian,English": 0.38 }, "6_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, @@ -30856,137 +30856,137 @@ "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.3, "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.3, "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.31333333333333335, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.3466666666666667 + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.34 }, "7_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.28 } }, - "AC3_2": 0.6024867585241545, - "AC3_3": 0.5283934962178687, - "AC3_4": 0.47687225030429414, - "AC3_5": 0.43758782196697493, - "AC3_6": 0.4053594370911686, - "AC3_7": 0.3770666666226756 + "AC3_2": 0.599769073982247, + "AC3_3": 0.5259639389240017, + "AC3_4": 0.47458488567691437, + "AC3_5": 0.4355427002176501, + "AC3_6": 0.4038525345166157, + "AC3_7": 0.3764548494542612 }, "prompt_4": { - "overall_acc": 0.5866666666666667, + "overall_acc": 0.5876190476190477, "language_acc": { "Filipino": 0.52, - "Vietnamese": 0.6133333333333333, + "Vietnamese": 0.6, "Chinese": 0.62, - "Spanish": 0.62, - "Malay": 0.49333333333333335, + "Spanish": 0.6266666666666667, + "Malay": 0.4866666666666667, "Indonesian": 0.56, - "English": 0.68 + "English": 0.7 }, - "consistency_score_2": 0.6234920634920633, - "consistency_score_3": 0.47085714285714286, - "consistency_score_4": 0.3826666666666666, - "consistency_score_5": 0.32253968253968257, + "consistency_score_2": 0.6253968253968255, + "consistency_score_3": 0.47352380952380957, + "consistency_score_4": 0.38476190476190475, + "consistency_score_5": 0.3234920634920635, "consistency_score_6": 0.27714285714285714, "consistency_score_7": 0.24, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5733333333333334, - "Filipino,Chinese": 0.5866666666666667, - "Filipino,Spanish": 0.5933333333333334, + "Filipino,Vietnamese": 0.5666666666666667, + "Filipino,Chinese": 0.5933333333333334, + "Filipino,Spanish": 0.6066666666666667, "Filipino,Malay": 0.56, - "Filipino,Indonesian": 0.6, - "Filipino,English": 0.5733333333333334, - "Vietnamese,Chinese": 0.6733333333333333, - "Vietnamese,Spanish": 0.6466666666666666, + "Filipino,Indonesian": 0.5933333333333334, + "Filipino,English": 0.5666666666666667, + "Vietnamese,Chinese": 0.6666666666666666, + "Vietnamese,Spanish": 0.66, "Vietnamese,Malay": 0.6266666666666667, "Vietnamese,Indonesian": 0.66, - "Vietnamese,English": 0.62, - "Chinese,Spanish": 0.68, + "Vietnamese,English": 0.6333333333333333, + "Chinese,Spanish": 0.6866666666666666, "Chinese,Malay": 0.6133333333333333, "Chinese,Indonesian": 0.62, - "Chinese,English": 0.6333333333333333, - "Spanish,Malay": 0.6466666666666666, - "Spanish,Indonesian": 0.6533333333333333, - "Spanish,English": 0.7, - "Malay,Indonesian": 0.6533333333333333, + "Chinese,English": 0.64, + "Spanish,Malay": 0.64, + "Spanish,Indonesian": 0.66, + "Spanish,English": 0.72, + "Malay,Indonesian": 0.64, "Malay,English": 0.58, "Indonesian,English": 0.6 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.44666666666666666, - "Filipino,Vietnamese,Spanish": 0.43333333333333335, - "Filipino,Vietnamese,Malay": 0.4266666666666667, + "Filipino,Vietnamese,Chinese": 0.44, + "Filipino,Vietnamese,Spanish": 0.44666666666666666, + "Filipino,Vietnamese,Malay": 0.42, "Filipino,Vietnamese,Indonesian": 0.4533333333333333, "Filipino,Vietnamese,English": 0.41333333333333333, - "Filipino,Chinese,Spanish": 0.4533333333333333, - "Filipino,Chinese,Malay": 0.42, + "Filipino,Chinese,Spanish": 0.46, + "Filipino,Chinese,Malay": 0.4266666666666667, "Filipino,Chinese,Indonesian": 0.44666666666666666, - "Filipino,Chinese,English": 0.43333333333333335, + "Filipino,Chinese,English": 0.44, "Filipino,Spanish,Malay": 0.43333333333333335, - "Filipino,Spanish,Indonesian": 0.44, - "Filipino,Spanish,English": 0.46, - "Filipino,Malay,Indonesian": 0.44666666666666666, + "Filipino,Spanish,Indonesian": 0.4533333333333333, + "Filipino,Spanish,English": 0.4666666666666667, + "Filipino,Malay,Indonesian": 0.44, "Filipino,Malay,English": 0.3933333333333333, - "Filipino,Indonesian,English": 0.4266666666666667, - "Vietnamese,Chinese,Spanish": 0.5266666666666666, + "Filipino,Indonesian,English": 0.42, + "Vietnamese,Chinese,Spanish": 0.5333333333333333, "Vietnamese,Chinese,Malay": 0.48, "Vietnamese,Chinese,Indonesian": 0.52, - "Vietnamese,Chinese,English": 0.5, + "Vietnamese,Chinese,English": 0.5133333333333333, "Vietnamese,Spanish,Malay": 0.5, - "Vietnamese,Spanish,Indonesian": 0.52, - "Vietnamese,Spanish,English": 0.5, - "Vietnamese,Malay,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish,Indonesian": 0.5333333333333333, + "Vietnamese,Spanish,English": 0.5133333333333333, + "Vietnamese,Malay,Indonesian": 0.5, "Vietnamese,Malay,English": 0.4533333333333333, - "Vietnamese,Indonesian,English": 0.49333333333333335, - "Chinese,Spanish,Malay": 0.5066666666666667, - "Chinese,Spanish,Indonesian": 0.5266666666666666, - "Chinese,Spanish,English": 0.5333333333333333, + "Vietnamese,Indonesian,English": 0.5, + "Chinese,Spanish,Malay": 0.5, + "Chinese,Spanish,Indonesian": 0.5333333333333333, + "Chinese,Spanish,English": 0.5466666666666666, "Chinese,Malay,Indonesian": 0.48, - "Chinese,Malay,English": 0.4533333333333333, - "Chinese,Indonesian,English": 0.47333333333333333, - "Spanish,Malay,Indonesian": 0.5133333333333333, - "Spanish,Malay,English": 0.4866666666666667, - "Spanish,Indonesian,English": 0.5133333333333333, + "Chinese,Malay,English": 0.44666666666666666, + "Chinese,Indonesian,English": 0.48, + "Spanish,Malay,Indonesian": 0.5066666666666667, + "Spanish,Malay,English": 0.49333333333333335, + "Spanish,Indonesian,English": 0.5266666666666666, "Malay,Indonesian,English": 0.46 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Filipino,Vietnamese,Chinese,Spanish": 0.37333333333333335, "Filipino,Vietnamese,Chinese,Malay": 0.3466666666666667, "Filipino,Vietnamese,Chinese,Indonesian": 0.38666666666666666, - "Filipino,Vietnamese,Chinese,English": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,English": 0.36, "Filipino,Vietnamese,Spanish,Malay": 0.36, - "Filipino,Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "Filipino,Vietnamese,Spanish,Indonesian": 0.38, "Filipino,Vietnamese,Spanish,English": 0.35333333333333333, - "Filipino,Vietnamese,Malay,Indonesian": 0.37333333333333335, + "Filipino,Vietnamese,Malay,Indonesian": 0.36666666666666664, "Filipino,Vietnamese,Malay,English": 0.32, - "Filipino,Vietnamese,Indonesian,English": 0.36666666666666664, + "Filipino,Vietnamese,Indonesian,English": 0.36, "Filipino,Chinese,Spanish,Malay": 0.36, - "Filipino,Chinese,Spanish,Indonesian": 0.38666666666666666, - "Filipino,Chinese,Spanish,English": 0.37333333333333335, + "Filipino,Chinese,Spanish,Indonesian": 0.3933333333333333, + "Filipino,Chinese,Spanish,English": 0.38, "Filipino,Chinese,Malay,Indonesian": 0.35333333333333333, "Filipino,Chinese,Malay,English": 0.32666666666666666, "Filipino,Chinese,Indonesian,English": 0.36, "Filipino,Spanish,Malay,Indonesian": 0.36666666666666664, "Filipino,Spanish,Malay,English": 0.34, - "Filipino,Spanish,Indonesian,English": 0.36666666666666664, + "Filipino,Spanish,Indonesian,English": 0.37333333333333335, "Filipino,Malay,Indonesian,English": 0.3333333333333333, "Vietnamese,Chinese,Spanish,Malay": 0.42, - "Vietnamese,Chinese,Spanish,Indonesian": 0.4533333333333333, - "Vietnamese,Chinese,Spanish,English": 0.44, + "Vietnamese,Chinese,Spanish,Indonesian": 0.46, + "Vietnamese,Chinese,Spanish,English": 0.44666666666666666, "Vietnamese,Chinese,Malay,Indonesian": 0.4266666666666667, "Vietnamese,Chinese,Malay,English": 0.38, - "Vietnamese,Chinese,Indonesian,English": 0.41333333333333333, - "Vietnamese,Spanish,Malay,Indonesian": 0.44, + "Vietnamese,Chinese,Indonesian,English": 0.42, + "Vietnamese,Spanish,Malay,Indonesian": 0.43333333333333335, "Vietnamese,Spanish,Malay,English": 0.38666666666666666, - "Vietnamese,Spanish,Indonesian,English": 0.42, + "Vietnamese,Spanish,Indonesian,English": 0.43333333333333335, "Vietnamese,Malay,Indonesian,English": 0.4, "Chinese,Spanish,Malay,Indonesian": 0.4266666666666667, "Chinese,Spanish,Malay,English": 0.41333333333333333, - "Chinese,Spanish,Indonesian,English": 0.4266666666666667, + "Chinese,Spanish,Indonesian,English": 0.44, "Chinese,Malay,Indonesian,English": 0.37333333333333335, "Spanish,Malay,Indonesian,English": 0.41333333333333333 }, "5_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.34, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.3466666666666667, "Filipino,Vietnamese,Chinese,Spanish,English": 0.31333333333333335, "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.32, "Filipino,Vietnamese,Chinese,Malay,English": 0.28, @@ -30997,12 +30997,12 @@ "Filipino,Vietnamese,Malay,Indonesian,English": 0.3, "Filipino,Chinese,Spanish,Malay,Indonesian": 0.32, "Filipino,Chinese,Spanish,Malay,English": 0.3, - "Filipino,Chinese,Spanish,Indonesian,English": 0.32, + "Filipino,Chinese,Spanish,Indonesian,English": 0.32666666666666666, "Filipino,Chinese,Malay,Indonesian,English": 0.28, "Filipino,Spanish,Malay,Indonesian,English": 0.3, "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.38666666666666666, "Vietnamese,Chinese,Spanish,Malay,English": 0.3466666666666667, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.37333333333333335, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.38, "Vietnamese,Chinese,Malay,Indonesian,English": 0.34, "Vietnamese,Spanish,Malay,Indonesian,English": 0.36, "Chinese,Spanish,Malay,Indonesian,English": 0.35333333333333333 @@ -31020,178 +31020,178 @@ "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.24 } }, - "AC3_2": 0.6045190625593502, - "AC3_3": 0.5224207492301385, - "AC3_4": 0.4632003667564327, - "AC3_5": 0.41623836122051383, - "AC3_6": 0.3764498345760449, - "AC3_7": 0.34064516124911554 + "AC3_2": 0.6059196669532015, + "AC3_3": 0.5244376062621747, + "AC3_4": 0.4650305489004528, + "AC3_5": 0.4172709473575536, + "AC3_6": 0.3766456890690947, + "AC3_7": 0.3408055235491547 }, "prompt_5": { - "overall_acc": 0.560952380952381, + "overall_acc": 0.5466666666666666, "language_acc": { - "Filipino": 0.5133333333333333, - "Vietnamese": 0.5666666666666667, - "Chinese": 0.5666666666666667, + "Filipino": 0.5066666666666667, + "Vietnamese": 0.5533333333333333, + "Chinese": 0.5333333333333333, "Spanish": 0.5866666666666667, - "Malay": 0.52, - "Indonesian": 0.56, - "English": 0.6133333333333333 + "Malay": 0.5133333333333333, + "Indonesian": 0.5333333333333333, + "English": 0.6 }, - "consistency_score_2": 0.6244444444444445, - "consistency_score_3": 0.4790476190476191, - "consistency_score_4": 0.3958095238095237, - "consistency_score_5": 0.33873015873015866, - "consistency_score_6": 0.29523809523809524, + "consistency_score_2": 0.6266666666666667, + "consistency_score_3": 0.48361904761904767, + "consistency_score_4": 0.3992380952380952, + "consistency_score_5": 0.34063492063492057, + "consistency_score_6": 0.29619047619047617, "consistency_score_7": 0.26, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5533333333333333, - "Filipino,Chinese": 0.5933333333333334, - "Filipino,Spanish": 0.5666666666666667, - "Filipino,Malay": 0.5866666666666667, - "Filipino,Indonesian": 0.58, - "Filipino,English": 0.58, - "Vietnamese,Chinese": 0.6333333333333333, - "Vietnamese,Spanish": 0.6466666666666666, - "Vietnamese,Malay": 0.5933333333333334, - "Vietnamese,Indonesian": 0.5933333333333334, - "Vietnamese,English": 0.6266666666666667, - "Chinese,Spanish": 0.6533333333333333, - "Chinese,Malay": 0.64, - "Chinese,Indonesian": 0.6533333333333333, - "Chinese,English": 0.6066666666666667, - "Spanish,Malay": 0.6466666666666666, - "Spanish,Indonesian": 0.6466666666666666, - "Spanish,English": 0.6933333333333334, - "Malay,Indonesian": 0.74, - "Malay,English": 0.6333333333333333, - "Indonesian,English": 0.6466666666666666 + "Filipino,Vietnamese": 0.56, + "Filipino,Chinese": 0.6, + "Filipino,Spanish": 0.5733333333333334, + "Filipino,Malay": 0.5733333333333334, + "Filipino,Indonesian": 0.5733333333333334, + "Filipino,English": 0.5333333333333333, + "Vietnamese,Chinese": 0.62, + "Vietnamese,Spanish": 0.7066666666666667, + "Vietnamese,Malay": 0.6333333333333333, + "Vietnamese,Indonesian": 0.5866666666666667, + "Vietnamese,English": 0.62, + "Chinese,Spanish": 0.6866666666666666, + "Chinese,Malay": 0.68, + "Chinese,Indonesian": 0.62, + "Chinese,English": 0.6, + "Spanish,Malay": 0.6866666666666666, + "Spanish,Indonesian": 0.64, + "Spanish,English": 0.6733333333333333, + "Malay,Indonesian": 0.7466666666666667, + "Malay,English": 0.6133333333333333, + "Indonesian,English": 0.6333333333333333 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.43333333333333335, - "Filipino,Vietnamese,Spanish": 0.43333333333333335, - "Filipino,Vietnamese,Malay": 0.4066666666666667, - "Filipino,Vietnamese,Indonesian": 0.42, - "Filipino,Vietnamese,English": 0.41333333333333333, - "Filipino,Chinese,Spanish": 0.4533333333333333, - "Filipino,Chinese,Malay": 0.4533333333333333, - "Filipino,Chinese,Indonesian": 0.48, - "Filipino,Chinese,English": 0.4266666666666667, - "Filipino,Spanish,Malay": 0.44666666666666666, - "Filipino,Spanish,Indonesian": 0.4533333333333333, - "Filipino,Spanish,English": 0.4533333333333333, - "Filipino,Malay,Indonesian": 0.49333333333333335, - "Filipino,Malay,English": 0.44, - "Filipino,Indonesian,English": 0.4533333333333333, - "Vietnamese,Chinese,Spanish": 0.5133333333333333, - "Vietnamese,Chinese,Malay": 0.48, - "Vietnamese,Chinese,Indonesian": 0.4866666666666667, + "Filipino,Vietnamese,Chinese": 0.4266666666666667, + "Filipino,Vietnamese,Spanish": 0.44, + "Filipino,Vietnamese,Malay": 0.42, + "Filipino,Vietnamese,Indonesian": 0.4266666666666667, + "Filipino,Vietnamese,English": 0.4066666666666667, + "Filipino,Chinese,Spanish": 0.4666666666666667, + "Filipino,Chinese,Malay": 0.46, + "Filipino,Chinese,Indonesian": 0.46, + "Filipino,Chinese,English": 0.4066666666666667, + "Filipino,Spanish,Malay": 0.4533333333333333, + "Filipino,Spanish,Indonesian": 0.44666666666666666, + "Filipino,Spanish,English": 0.44, + "Filipino,Malay,Indonesian": 0.5066666666666667, + "Filipino,Malay,English": 0.42, + "Filipino,Indonesian,English": 0.4266666666666667, + "Vietnamese,Chinese,Spanish": 0.54, + "Vietnamese,Chinese,Malay": 0.5133333333333333, + "Vietnamese,Chinese,Indonesian": 0.47333333333333333, "Vietnamese,Chinese,English": 0.47333333333333333, - "Vietnamese,Spanish,Malay": 0.4866666666666667, - "Vietnamese,Spanish,Indonesian": 0.4866666666666667, - "Vietnamese,Spanish,English": 0.5133333333333333, - "Vietnamese,Malay,Indonesian": 0.4866666666666667, - "Vietnamese,Malay,English": 0.46, - "Vietnamese,Indonesian,English": 0.47333333333333333, - "Chinese,Spanish,Malay": 0.5266666666666666, - "Chinese,Spanish,Indonesian": 0.52, - "Chinese,Spanish,English": 0.5133333333333333, + "Vietnamese,Spanish,Malay": 0.54, + "Vietnamese,Spanish,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish,English": 0.5266666666666666, + "Vietnamese,Malay,Indonesian": 0.52, + "Vietnamese,Malay,English": 0.47333333333333333, + "Vietnamese,Indonesian,English": 0.4666666666666667, + "Chinese,Spanish,Malay": 0.5666666666666667, + "Chinese,Spanish,Indonesian": 0.5133333333333333, + "Chinese,Spanish,English": 0.5266666666666666, "Chinese,Malay,Indonesian": 0.5533333333333333, - "Chinese,Malay,English": 0.49333333333333335, - "Chinese,Indonesian,English": 0.5, - "Spanish,Malay,Indonesian": 0.56, + "Chinese,Malay,English": 0.5, + "Chinese,Indonesian,English": 0.4866666666666667, + "Spanish,Malay,Indonesian": 0.5733333333333334, "Spanish,Malay,English": 0.5266666666666666, - "Spanish,Indonesian,English": 0.52, - "Malay,Indonesian,English": 0.5333333333333333 + "Spanish,Indonesian,English": 0.5133333333333333, + "Malay,Indonesian,English": 0.52 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.37333333333333335, + "Filipino,Vietnamese,Chinese,Spanish": 0.38, "Filipino,Vietnamese,Chinese,Malay": 0.36, - "Filipino,Vietnamese,Chinese,Indonesian": 0.37333333333333335, - "Filipino,Vietnamese,Chinese,English": 0.34, - "Filipino,Vietnamese,Spanish,Malay": 0.36, - "Filipino,Vietnamese,Spanish,Indonesian": 0.36, + "Filipino,Vietnamese,Chinese,Indonesian": 0.36, + "Filipino,Vietnamese,Chinese,English": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,Malay": 0.36666666666666664, + "Filipino,Vietnamese,Spanish,Indonesian": 0.36666666666666664, "Filipino,Vietnamese,Spanish,English": 0.35333333333333333, - "Filipino,Vietnamese,Malay,Indonesian": 0.36, + "Filipino,Vietnamese,Malay,Indonesian": 0.37333333333333335, "Filipino,Vietnamese,Malay,English": 0.3333333333333333, "Filipino,Vietnamese,Indonesian,English": 0.3466666666666667, - "Filipino,Chinese,Spanish,Malay": 0.38666666666666666, - "Filipino,Chinese,Spanish,Indonesian": 0.4, + "Filipino,Chinese,Spanish,Malay": 0.4, + "Filipino,Chinese,Spanish,Indonesian": 0.3933333333333333, "Filipino,Chinese,Spanish,English": 0.36666666666666664, - "Filipino,Chinese,Malay,Indonesian": 0.42, - "Filipino,Chinese,Malay,English": 0.36, - "Filipino,Chinese,Indonesian,English": 0.37333333333333335, + "Filipino,Chinese,Malay,Indonesian": 0.41333333333333333, + "Filipino,Chinese,Malay,English": 0.35333333333333333, + "Filipino,Chinese,Indonesian,English": 0.36, "Filipino,Spanish,Malay,Indonesian": 0.4066666666666667, - "Filipino,Spanish,Malay,English": 0.37333333333333335, - "Filipino,Spanish,Indonesian,English": 0.38666666666666666, - "Filipino,Malay,Indonesian,English": 0.3933333333333333, - "Vietnamese,Chinese,Spanish,Malay": 0.43333333333333335, - "Vietnamese,Chinese,Spanish,Indonesian": 0.4266666666666667, - "Vietnamese,Chinese,Spanish,English": 0.4266666666666667, - "Vietnamese,Chinese,Malay,Indonesian": 0.4266666666666667, - "Vietnamese,Chinese,Malay,English": 0.4, + "Filipino,Spanish,Malay,English": 0.36666666666666664, + "Filipino,Spanish,Indonesian,English": 0.38, + "Filipino,Malay,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish,Malay": 0.4666666666666667, + "Vietnamese,Chinese,Spanish,Indonesian": 0.44, + "Vietnamese,Chinese,Spanish,English": 0.43333333333333335, + "Vietnamese,Chinese,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,Malay,English": 0.41333333333333333, "Vietnamese,Chinese,Indonesian,English": 0.3933333333333333, - "Vietnamese,Spanish,Malay,Indonesian": 0.43333333333333335, - "Vietnamese,Spanish,Malay,English": 0.4, - "Vietnamese,Spanish,Indonesian,English": 0.41333333333333333, - "Vietnamese,Malay,Indonesian,English": 0.4066666666666667, - "Chinese,Spanish,Malay,Indonesian": 0.4666666666666667, - "Chinese,Spanish,Malay,English": 0.44666666666666666, + "Vietnamese,Spanish,Malay,Indonesian": 0.4666666666666667, + "Vietnamese,Spanish,Malay,English": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian,English": 0.4266666666666667, + "Vietnamese,Malay,Indonesian,English": 0.41333333333333333, + "Chinese,Spanish,Malay,Indonesian": 0.47333333333333333, + "Chinese,Spanish,Malay,English": 0.46, "Chinese,Spanish,Indonesian,English": 0.44, - "Chinese,Malay,Indonesian,English": 0.44, - "Spanish,Malay,Indonesian,English": 0.47333333333333333 + "Chinese,Malay,Indonesian,English": 0.43333333333333335, + "Spanish,Malay,Indonesian,English": 0.4666666666666667 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.3333333333333333, "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.3333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.30666666666666664, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.3333333333333333, - "Filipino,Vietnamese,Chinese,Malay,English": 0.3, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.30666666666666664, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.32666666666666666, - "Filipino,Vietnamese,Spanish,Malay,English": 0.29333333333333333, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.3, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.3, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.3, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.31333333333333335, "Filipino,Vietnamese,Malay,Indonesian,English": 0.30666666666666664, "Filipino,Chinese,Spanish,Malay,Indonesian": 0.36, "Filipino,Chinese,Spanish,Malay,English": 0.32666666666666666, "Filipino,Chinese,Spanish,Indonesian,English": 0.3333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.3333333333333333, - "Filipino,Spanish,Malay,Indonesian,English": 0.35333333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3933333333333333, - "Vietnamese,Chinese,Spanish,Malay,English": 0.36666666666666664, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.36666666666666664, + "Filipino,Chinese,Malay,Indonesian,English": 0.32666666666666666, + "Filipino,Spanish,Malay,Indonesian,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.4066666666666667, + "Vietnamese,Chinese,Spanish,Malay,English": 0.38666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.37333333333333335, "Vietnamese,Chinese,Malay,Indonesian,English": 0.36, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.38666666666666666, "Chinese,Spanish,Malay,Indonesian,English": 0.4066666666666667 }, "6_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.30666666666666664, "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.28, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.28, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.28, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.2866666666666667, "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.30666666666666664, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.34 + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.3466666666666667 }, "7_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26 } }, - "AC3_2": 0.5909980360151319, - "AC3_3": 0.5167748124394084, - "AC3_4": 0.4641286273132446, - "AC3_5": 0.42239674693695334, - "AC3_6": 0.3868637109564578, - "AC3_7": 0.355313225014724 + "AC3_2": 0.5839393938896263, + "AC3_3": 0.5132137794534842, + "AC3_4": 0.4614632836134512, + "AC3_5": 0.419730471032004, + "AC3_6": 0.3842109227416096, + "AC3_7": 0.3523966941711905 } }, "cross_logiqa": { "prompt_1": { - "overall_acc": 0.5081168831168831, + "overall_acc": 0.5089285714285714, "language_acc": { "Indonesian": 0.4772727272727273, - "English": 0.5852272727272727, + "English": 0.5909090909090909, "Filipino": 0.4147727272727273, "Spanish": 0.5454545454545454, "Chinese": 0.5795454545454546, @@ -31199,29 +31199,29 @@ "Vietnamese": 0.48863636363636365 }, "consistency_score_2": 0.6263528138528139, - "consistency_score_3": 0.4767857142857143, + "consistency_score_3": 0.47678571428571437, "consistency_score_4": 0.3964285714285714, "consistency_score_5": 0.3444264069264069, "consistency_score_6": 0.30681818181818177, "consistency_score_7": 0.2784090909090909, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5852272727272727, + "Indonesian,English": 0.5795454545454546, "Indonesian,Filipino": 0.5909090909090909, "Indonesian,Spanish": 0.6477272727272727, "Indonesian,Chinese": 0.5625, "Indonesian,Malay": 0.7329545454545454, "Indonesian,Vietnamese": 0.6420454545454546, - "English,Filipino": 0.5511363636363636, - "English,Spanish": 0.75, - "English,Chinese": 0.6647727272727273, - "English,Malay": 0.5625, - "English,Vietnamese": 0.6931818181818182, + "English,Filipino": 0.5625, + "English,Spanish": 0.7556818181818182, + "English,Chinese": 0.6704545454545454, + "English,Malay": 0.5568181818181818, + "English,Vietnamese": 0.6875, "Filipino,Spanish": 0.5738636363636364, "Filipino,Chinese": 0.5170454545454546, "Filipino,Malay": 0.6875, "Filipino,Vietnamese": 0.5681818181818182, - "Spanish,Chinese": 0.6647727272727273, + "Spanish,Chinese": 0.6590909090909091, "Spanish,Malay": 0.6477272727272727, "Spanish,Vietnamese": 0.6875, "Chinese,Malay": 0.5795454545454546, @@ -31232,8 +31232,8 @@ "Indonesian,English,Filipino": 0.42045454545454547, "Indonesian,English,Spanish": 0.5170454545454546, "Indonesian,English,Chinese": 0.4431818181818182, - "Indonesian,English,Malay": 0.48863636363636365, - "Indonesian,English,Vietnamese": 0.48863636363636365, + "Indonesian,English,Malay": 0.48295454545454547, + "Indonesian,English,Vietnamese": 0.48295454545454547, "Indonesian,Filipino,Spanish": 0.4431818181818182, "Indonesian,Filipino,Chinese": 0.375, "Indonesian,Filipino,Malay": 0.5340909090909091, @@ -31244,16 +31244,16 @@ "Indonesian,Chinese,Malay": 0.4715909090909091, "Indonesian,Chinese,Vietnamese": 0.44886363636363635, "Indonesian,Malay,Vietnamese": 0.5454545454545454, - "English,Filipino,Spanish": 0.4772727272727273, - "English,Filipino,Chinese": 0.42045454545454547, + "English,Filipino,Spanish": 0.48295454545454547, + "English,Filipino,Chinese": 0.42613636363636365, "English,Filipino,Malay": 0.44886363636363635, "English,Filipino,Vietnamese": 0.4602272727272727, - "English,Spanish,Chinese": 0.5511363636363636, + "English,Spanish,Chinese": 0.5568181818181818, "English,Spanish,Malay": 0.5, "English,Spanish,Vietnamese": 0.5852272727272727, "English,Chinese,Malay": 0.4431818181818182, "English,Chinese,Vietnamese": 0.5, - "English,Malay,Vietnamese": 0.48863636363636365, + "English,Malay,Vietnamese": 0.48295454545454547, "Filipino,Spanish,Chinese": 0.42613636363636365, "Filipino,Spanish,Malay": 0.4943181818181818, "Filipino,Spanish,Vietnamese": 0.4602272727272727, @@ -31275,7 +31275,7 @@ "Indonesian,English,Spanish,Vietnamese": 0.4431818181818182, "Indonesian,English,Chinese,Malay": 0.39204545454545453, "Indonesian,English,Chinese,Vietnamese": 0.3977272727272727, - "Indonesian,English,Malay,Vietnamese": 0.4318181818181818, + "Indonesian,English,Malay,Vietnamese": 0.42613636363636365, "Indonesian,Filipino,Spanish,Chinese": 0.3352272727272727, "Indonesian,Filipino,Spanish,Malay": 0.4318181818181818, "Indonesian,Filipino,Spanish,Vietnamese": 0.38636363636363635, @@ -31286,7 +31286,7 @@ "Indonesian,Spanish,Chinese,Vietnamese": 0.4090909090909091, "Indonesian,Spanish,Malay,Vietnamese": 0.45454545454545453, "Indonesian,Chinese,Malay,Vietnamese": 0.38636363636363635, - "English,Filipino,Spanish,Chinese": 0.38636363636363635, + "English,Filipino,Spanish,Chinese": 0.39204545454545453, "English,Filipino,Spanish,Malay": 0.4034090909090909, "English,Filipino,Spanish,Vietnamese": 0.4034090909090909, "English,Filipino,Chinese,Malay": 0.36363636363636365, @@ -31338,53 +31338,53 @@ "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909 } }, - "AC3_2": 0.5610734959751482, - "AC3_3": 0.49195295386938, - "AC3_4": 0.44537739714079405, - "AC3_5": 0.41055715146424704, - "AC3_6": 0.38260593982990687, - "AC3_7": 0.35971948583474134 + "AC3_2": 0.5615679898729099, + "AC3_3": 0.4923330744842147, + "AC3_4": 0.44568892640892926, + "AC3_5": 0.4108218588158855, + "AC3_6": 0.3828358208485916, + "AC3_7": 0.3599226803666572 }, "prompt_2": { - "overall_acc": 0.5032467532467533, + "overall_acc": 0.502435064935065, "language_acc": { "Indonesian": 0.5056818181818182, "English": 0.5738636363636364, "Filipino": 0.4147727272727273, "Spanish": 0.5397727272727273, "Chinese": 0.5511363636363636, - "Malay": 0.5, + "Malay": 0.4943181818181818, "Vietnamese": 0.4375 }, - "consistency_score_2": 0.6236471861471862, - "consistency_score_3": 0.47727272727272724, - "consistency_score_4": 0.3987012987012987, - "consistency_score_5": 0.3492965367965367, + "consistency_score_2": 0.6222943722943723, + "consistency_score_3": 0.4761363636363635, + "consistency_score_4": 0.39805194805194805, + "consistency_score_5": 0.349025974025974, "consistency_score_6": 0.31655844155844154, "consistency_score_7": 0.29545454545454547, "detailed_consistency_score": { "2_combine": { "Indonesian,English": 0.6079545454545454, "Indonesian,Filipino": 0.625, - "Indonesian,Spanish": 0.6590909090909091, + "Indonesian,Spanish": 0.6534090909090909, "Indonesian,Chinese": 0.5852272727272727, - "Indonesian,Malay": 0.7386363636363636, + "Indonesian,Malay": 0.7329545454545454, "Indonesian,Vietnamese": 0.6477272727272727, - "English,Filipino": 0.5454545454545454, + "English,Filipino": 0.5511363636363636, "English,Spanish": 0.75, "English,Chinese": 0.6761363636363636, "English,Malay": 0.5681818181818182, "English,Vietnamese": 0.6306818181818182, - "Filipino,Spanish": 0.5681818181818182, + "Filipino,Spanish": 0.5625, "Filipino,Chinese": 0.5170454545454546, "Filipino,Malay": 0.6590909090909091, - "Filipino,Vietnamese": 0.5625, - "Spanish,Chinese": 0.6420454545454546, + "Filipino,Vietnamese": 0.5568181818181818, + "Spanish,Chinese": 0.6363636363636364, "Spanish,Malay": 0.625, - "Spanish,Vietnamese": 0.6704545454545454, - "Chinese,Malay": 0.5965909090909091, + "Spanish,Vietnamese": 0.6761363636363636, + "Chinese,Malay": 0.5909090909090909, "Chinese,Vietnamese": 0.5852272727272727, - "Malay,Vietnamese": 0.6363636363636364 + "Malay,Vietnamese": 0.6306818181818182 }, "3_combine": { "Indonesian,English,Filipino": 0.4431818181818182, @@ -31396,12 +31396,12 @@ "Indonesian,Filipino,Chinese": 0.3977272727272727, "Indonesian,Filipino,Malay": 0.5511363636363636, "Indonesian,Filipino,Vietnamese": 0.4715909090909091, - "Indonesian,Spanish,Chinese": 0.4772727272727273, - "Indonesian,Spanish,Malay": 0.5454545454545454, + "Indonesian,Spanish,Chinese": 0.4715909090909091, + "Indonesian,Spanish,Malay": 0.5397727272727273, "Indonesian,Spanish,Vietnamese": 0.5284090909090909, - "Indonesian,Chinese,Malay": 0.4943181818181818, + "Indonesian,Chinese,Malay": 0.48863636363636365, "Indonesian,Chinese,Vietnamese": 0.4659090909090909, - "Indonesian,Malay,Vietnamese": 0.5511363636363636, + "Indonesian,Malay,Vietnamese": 0.5454545454545454, "English,Filipino,Spanish": 0.4602272727272727, "English,Filipino,Chinese": 0.42045454545454547, "English,Filipino,Malay": 0.4431818181818182, @@ -31414,14 +31414,14 @@ "English,Malay,Vietnamese": 0.4659090909090909, "Filipino,Spanish,Chinese": 0.4147727272727273, "Filipino,Spanish,Malay": 0.4772727272727273, - "Filipino,Spanish,Vietnamese": 0.44886363636363635, + "Filipino,Spanish,Vietnamese": 0.4431818181818182, "Filipino,Chinese,Malay": 0.42613636363636365, "Filipino,Chinese,Vietnamese": 0.3977272727272727, "Filipino,Malay,Vietnamese": 0.48295454545454547, - "Spanish,Chinese,Malay": 0.4772727272727273, + "Spanish,Chinese,Malay": 0.4715909090909091, "Spanish,Chinese,Vietnamese": 0.48863636363636365, "Spanish,Malay,Vietnamese": 0.5056818181818182, - "Chinese,Malay,Vietnamese": 0.4715909090909091 + "Chinese,Malay,Vietnamese": 0.4659090909090909 }, "4_combine": { "Indonesian,English,Filipino,Spanish": 0.39204545454545453, @@ -31440,10 +31440,10 @@ "Indonesian,Filipino,Chinese,Malay": 0.375, "Indonesian,Filipino,Chinese,Vietnamese": 0.3409090909090909, "Indonesian,Filipino,Malay,Vietnamese": 0.4375, - "Indonesian,Spanish,Chinese,Malay": 0.42045454545454547, + "Indonesian,Spanish,Chinese,Malay": 0.4147727272727273, "Indonesian,Spanish,Chinese,Vietnamese": 0.4090909090909091, - "Indonesian,Spanish,Malay,Vietnamese": 0.4602272727272727, - "Indonesian,Chinese,Malay,Vietnamese": 0.4147727272727273, + "Indonesian,Spanish,Malay,Vietnamese": 0.45454545454545453, + "Indonesian,Chinese,Malay,Vietnamese": 0.4090909090909091, "English,Filipino,Spanish,Chinese": 0.38636363636363635, "English,Filipino,Spanish,Malay": 0.39204545454545453, "English,Filipino,Spanish,Vietnamese": 0.375, @@ -31458,7 +31458,7 @@ "Filipino,Spanish,Chinese,Vietnamese": 0.3522727272727273, "Filipino,Spanish,Malay,Vietnamese": 0.4090909090909091, "Filipino,Chinese,Malay,Vietnamese": 0.3693181818181818, - "Spanish,Chinese,Malay,Vietnamese": 0.4034090909090909 + "Spanish,Chinese,Malay,Vietnamese": 0.3977272727272727 }, "5_combine": { "Indonesian,English,Filipino,Spanish,Chinese": 0.3352272727272727, @@ -31475,7 +31475,7 @@ "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3181818181818182, "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.38636363636363635, "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.32954545454545453, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.35795454545454547, "English,Filipino,Spanish,Chinese,Malay": 0.3465909090909091, "English,Filipino,Spanish,Chinese,Vietnamese": 0.3352272727272727, "English,Filipino,Spanish,Malay,Vietnamese": 0.3409090909090909, @@ -31496,12 +31496,12 @@ "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547 } }, - "AC3_2": 0.5570150137482976, - "AC3_3": 0.48991571337568224, - "AC3_4": 0.4449150560968294, - "AC3_5": 0.4123716651001917, - "AC3_6": 0.3886460074104815, - "AC3_7": 0.37232076861561636 + "AC3_2": 0.5559781810189581, + "AC3_3": 0.48893233089049576, + "AC3_4": 0.44419353851954846, + "AC3_5": 0.41191053936454575, + "AC3_6": 0.3884037171471747, + "AC3_7": 0.37209840002734973 }, "prompt_3": { "overall_acc": 0.5064935064935064, @@ -31662,42 +31662,42 @@ "AC3_7": 0.35931183599821936 }, "prompt_4": { - "overall_acc": 0.49107142857142855, + "overall_acc": 0.49188311688311676, "language_acc": { "Indonesian": 0.4659090909090909, "English": 0.5397727272727273, "Filipino": 0.4034090909090909, - "Spanish": 0.5284090909090909, + "Spanish": 0.5340909090909091, "Chinese": 0.5738636363636364, "Malay": 0.44886363636363635, "Vietnamese": 0.4772727272727273 }, - "consistency_score_2": 0.6501623376623377, - "consistency_score_3": 0.5081168831168831, - "consistency_score_4": 0.4254870129870131, - "consistency_score_5": 0.36931818181818177, - "consistency_score_6": 0.3279220779220779, - "consistency_score_7": 0.29545454545454547, + "consistency_score_2": 0.6531385281385281, + "consistency_score_3": 0.5118506493506493, + "consistency_score_4": 0.4295454545454545, + "consistency_score_5": 0.37418831168831174, + "consistency_score_6": 0.3336038961038961, + "consistency_score_7": 0.30113636363636365, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.6363636363636364, + "Indonesian,English": 0.6420454545454546, "Indonesian,Filipino": 0.6079545454545454, - "Indonesian,Spanish": 0.6704545454545454, + "Indonesian,Spanish": 0.6818181818181818, "Indonesian,Chinese": 0.6193181818181818, "Indonesian,Malay": 0.7386363636363636, "Indonesian,Vietnamese": 0.6875, - "English,Filipino": 0.6193181818181818, - "English,Spanish": 0.75, - "English,Chinese": 0.7045454545454546, - "English,Malay": 0.6477272727272727, - "English,Vietnamese": 0.6647727272727273, - "Filipino,Spanish": 0.5795454545454546, + "English,Filipino": 0.6079545454545454, + "English,Spanish": 0.7556818181818182, + "English,Chinese": 0.7102272727272727, + "English,Malay": 0.6306818181818182, + "English,Vietnamese": 0.6818181818181818, + "Filipino,Spanish": 0.5909090909090909, "Filipino,Chinese": 0.5170454545454546, "Filipino,Malay": 0.6818181818181818, "Filipino,Vietnamese": 0.5738636363636364, "Spanish,Chinese": 0.6761363636363636, - "Spanish,Malay": 0.6534090909090909, - "Spanish,Vietnamese": 0.6875, + "Spanish,Malay": 0.6761363636363636, + "Spanish,Vietnamese": 0.6988636363636364, "Chinese,Malay": 0.6193181818181818, "Chinese,Vietnamese": 0.6420454545454546, "Malay,Vietnamese": 0.6761363636363636 @@ -31706,276 +31706,276 @@ "Indonesian,English,Filipino": 0.48295454545454547, "Indonesian,English,Spanish": 0.5511363636363636, "Indonesian,English,Chinese": 0.5, - "Indonesian,English,Malay": 0.5397727272727273, - "Indonesian,English,Vietnamese": 0.5227272727272727, - "Indonesian,Filipino,Spanish": 0.4715909090909091, + "Indonesian,English,Malay": 0.5340909090909091, + "Indonesian,English,Vietnamese": 0.5340909090909091, + "Indonesian,Filipino,Spanish": 0.48295454545454547, "Indonesian,Filipino,Chinese": 0.4147727272727273, "Indonesian,Filipino,Malay": 0.5454545454545454, "Indonesian,Filipino,Vietnamese": 0.4943181818181818, - "Indonesian,Spanish,Chinese": 0.5056818181818182, - "Indonesian,Spanish,Malay": 0.5681818181818182, - "Indonesian,Spanish,Vietnamese": 0.5454545454545454, + "Indonesian,Spanish,Chinese": 0.5113636363636364, + "Indonesian,Spanish,Malay": 0.5795454545454546, + "Indonesian,Spanish,Vietnamese": 0.5568181818181818, "Indonesian,Chinese,Malay": 0.5227272727272727, "Indonesian,Chinese,Vietnamese": 0.5056818181818182, "Indonesian,Malay,Vietnamese": 0.5909090909090909, - "English,Filipino,Spanish": 0.5056818181818182, + "English,Filipino,Spanish": 0.5170454545454546, "English,Filipino,Chinese": 0.4431818181818182, - "English,Filipino,Malay": 0.5113636363636364, + "English,Filipino,Malay": 0.5056818181818182, "English,Filipino,Vietnamese": 0.4772727272727273, "English,Spanish,Chinese": 0.5738636363636364, "English,Spanish,Malay": 0.5397727272727273, - "English,Spanish,Vietnamese": 0.5681818181818182, - "English,Chinese,Malay": 0.5056818181818182, - "English,Chinese,Vietnamese": 0.5284090909090909, - "English,Malay,Vietnamese": 0.5284090909090909, - "Filipino,Spanish,Chinese": 0.4318181818181818, - "Filipino,Spanish,Malay": 0.48863636363636365, - "Filipino,Spanish,Vietnamese": 0.4602272727272727, + "English,Spanish,Vietnamese": 0.5852272727272727, + "English,Chinese,Malay": 0.4943181818181818, + "English,Chinese,Vietnamese": 0.5397727272727273, + "English,Malay,Vietnamese": 0.5340909090909091, + "Filipino,Spanish,Chinese": 0.4375, + "Filipino,Spanish,Malay": 0.5056818181818182, + "Filipino,Spanish,Vietnamese": 0.4715909090909091, "Filipino,Chinese,Malay": 0.45454545454545453, "Filipino,Chinese,Vietnamese": 0.4147727272727273, "Filipino,Malay,Vietnamese": 0.5113636363636364, - "Spanish,Chinese,Malay": 0.5056818181818182, - "Spanish,Chinese,Vietnamese": 0.5284090909090909, - "Spanish,Malay,Vietnamese": 0.5397727272727273, + "Spanish,Chinese,Malay": 0.5113636363636364, + "Spanish,Chinese,Vietnamese": 0.5340909090909091, + "Spanish,Malay,Vietnamese": 0.5511363636363636, "Chinese,Malay,Vietnamese": 0.5056818181818182 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.4147727272727273, + "Indonesian,English,Filipino,Spanish": 0.42613636363636365, "Indonesian,English,Filipino,Chinese": 0.375, "Indonesian,English,Filipino,Malay": 0.44886363636363635, "Indonesian,English,Filipino,Vietnamese": 0.42613636363636365, - "Indonesian,English,Spanish,Chinese": 0.4431818181818182, + "Indonesian,English,Spanish,Chinese": 0.4375, "Indonesian,English,Spanish,Malay": 0.4715909090909091, - "Indonesian,English,Spanish,Vietnamese": 0.4602272727272727, - "Indonesian,English,Chinese,Malay": 0.4431818181818182, - "Indonesian,English,Chinese,Vietnamese": 0.42613636363636365, - "Indonesian,English,Malay,Vietnamese": 0.4659090909090909, - "Indonesian,Filipino,Spanish,Chinese": 0.36363636363636365, - "Indonesian,Filipino,Spanish,Malay": 0.4431818181818182, - "Indonesian,Filipino,Spanish,Vietnamese": 0.42045454545454547, + "Indonesian,English,Spanish,Vietnamese": 0.4715909090909091, + "Indonesian,English,Chinese,Malay": 0.4318181818181818, + "Indonesian,English,Chinese,Vietnamese": 0.4318181818181818, + "Indonesian,English,Malay,Vietnamese": 0.4715909090909091, + "Indonesian,Filipino,Spanish,Chinese": 0.3693181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.45454545454545453, + "Indonesian,Filipino,Spanish,Vietnamese": 0.4318181818181818, "Indonesian,Filipino,Chinese,Malay": 0.3977272727272727, "Indonesian,Filipino,Chinese,Vietnamese": 0.3693181818181818, "Indonesian,Filipino,Malay,Vietnamese": 0.4659090909090909, - "Indonesian,Spanish,Chinese,Malay": 0.4431818181818182, - "Indonesian,Spanish,Chinese,Vietnamese": 0.4375, - "Indonesian,Spanish,Malay,Vietnamese": 0.48295454545454547, + "Indonesian,Spanish,Chinese,Malay": 0.44886363636363635, + "Indonesian,Spanish,Chinese,Vietnamese": 0.4431818181818182, + "Indonesian,Spanish,Malay,Vietnamese": 0.4943181818181818, "Indonesian,Chinese,Malay,Vietnamese": 0.44886363636363635, - "English,Filipino,Spanish,Chinese": 0.39204545454545453, - "English,Filipino,Spanish,Malay": 0.42613636363636365, - "English,Filipino,Spanish,Vietnamese": 0.4034090909090909, + "English,Filipino,Spanish,Chinese": 0.3977272727272727, + "English,Filipino,Spanish,Malay": 0.4375, + "English,Filipino,Spanish,Vietnamese": 0.4147727272727273, "English,Filipino,Chinese,Malay": 0.3977272727272727, "English,Filipino,Chinese,Vietnamese": 0.375, "English,Filipino,Malay,Vietnamese": 0.42613636363636365, - "English,Spanish,Chinese,Malay": 0.44886363636363635, - "English,Spanish,Chinese,Vietnamese": 0.4659090909090909, - "English,Spanish,Malay,Vietnamese": 0.4659090909090909, + "English,Spanish,Chinese,Malay": 0.4431818181818182, + "English,Spanish,Chinese,Vietnamese": 0.4772727272727273, + "English,Spanish,Malay,Vietnamese": 0.4772727272727273, "English,Chinese,Malay,Vietnamese": 0.4375, - "Filipino,Spanish,Chinese,Malay": 0.38636363636363635, - "Filipino,Spanish,Chinese,Vietnamese": 0.3693181818181818, - "Filipino,Spanish,Malay,Vietnamese": 0.42613636363636365, + "Filipino,Spanish,Chinese,Malay": 0.39204545454545453, + "Filipino,Spanish,Chinese,Vietnamese": 0.375, + "Filipino,Spanish,Malay,Vietnamese": 0.4375, "Filipino,Chinese,Malay,Vietnamese": 0.39204545454545453, - "Spanish,Chinese,Malay,Vietnamese": 0.4318181818181818 + "Spanish,Chinese,Malay,Vietnamese": 0.4375 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.3352272727272727, - "Indonesian,English,Filipino,Spanish,Malay": 0.38636363636363635, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3693181818181818, + "Indonesian,English,Filipino,Spanish,Chinese": 0.3409090909090909, + "Indonesian,English,Filipino,Spanish,Malay": 0.3977272727272727, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3806818181818182, "Indonesian,English,Filipino,Chinese,Malay": 0.35795454545454547, "Indonesian,English,Filipino,Chinese,Vietnamese": 0.3409090909090909, "Indonesian,English,Filipino,Malay,Vietnamese": 0.3977272727272727, - "Indonesian,English,Spanish,Chinese,Malay": 0.3977272727272727, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.38636363636363635, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.4147727272727273, + "Indonesian,English,Spanish,Chinese,Malay": 0.39204545454545453, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.39204545454545453, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.42613636363636365, "Indonesian,English,Chinese,Malay,Vietnamese": 0.39204545454545453, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.3465909090909091, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3352272727272727, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.3977272727272727, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.3522727272727273, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3409090909090909, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.4090909090909091, "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.35795454545454547, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.39204545454545453, - "English,Filipino,Spanish,Chinese,Malay": 0.3522727272727273, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.3352272727272727, - "English,Filipino,Spanish,Malay,Vietnamese": 0.3693181818181818, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.3977272727272727, + "English,Filipino,Spanish,Chinese,Malay": 0.35795454545454547, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.3409090909090909, + "English,Filipino,Spanish,Malay,Vietnamese": 0.3806818181818182, "English,Filipino,Chinese,Malay,Vietnamese": 0.3522727272727273, - "English,Spanish,Chinese,Malay,Vietnamese": 0.39204545454545453, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3465909090909091 + "English,Spanish,Chinese,Malay,Vietnamese": 0.3977272727272727, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3522727272727273 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.3181818181818182, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.3068181818181818, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.3465909090909091, + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.32386363636363635, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.3125, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.35795454545454547, "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.32954545454545453, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.35795454545454547, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.32386363636363635, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3125 + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.32954545454545453, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3181818181818182 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.30113636363636365 } }, - "AC3_2": 0.5595280430316463, - "AC3_3": 0.49944876402103694, - "AC3_4": 0.45593277157112544, - "AC3_5": 0.42158018863024654, - "AC3_6": 0.39324649577528864, - "AC3_7": 0.36893704845670483 + "AC3_2": 0.5611558809320143, + "AC3_3": 0.5016682734523544, + "AC3_4": 0.45860561209706713, + "AC3_5": 0.42503864453033646, + "AC3_6": 0.39756924486621587, + "AC3_7": 0.3735693681486683 }, "prompt_5": { - "overall_acc": 0.43262987012987014, + "overall_acc": 0.43181818181818177, "language_acc": { - "Indonesian": 0.42045454545454547, - "English": 0.48863636363636365, - "Filipino": 0.4034090909090909, + "Indonesian": 0.4318181818181818, + "English": 0.48295454545454547, + "Filipino": 0.4090909090909091, "Spanish": 0.4602272727272727, - "Chinese": 0.45454545454545453, - "Malay": 0.42613636363636365, - "Vietnamese": 0.375 + "Chinese": 0.4659090909090909, + "Malay": 0.4034090909090909, + "Vietnamese": 0.3693181818181818 }, - "consistency_score_2": 0.6176948051948054, - "consistency_score_3": 0.465422077922078, - "consistency_score_4": 0.3821428571428571, - "consistency_score_5": 0.3273809523809524, - "consistency_score_6": 0.2873376623376624, - "consistency_score_7": 0.2556818181818182, + "consistency_score_2": 0.6174242424242424, + "consistency_score_3": 0.46493506493506503, + "consistency_score_4": 0.3803571428571429, + "consistency_score_5": 0.32413419913419916, + "consistency_score_6": 0.2824675324675325, + "consistency_score_7": 0.25, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.6306818181818182, - "Indonesian,Filipino": 0.5852272727272727, - "Indonesian,Spanish": 0.6306818181818182, - "Indonesian,Chinese": 0.6079545454545454, - "Indonesian,Malay": 0.6818181818181818, - "Indonesian,Vietnamese": 0.6761363636363636, - "English,Filipino": 0.5568181818181818, - "English,Spanish": 0.6875, - "English,Chinese": 0.6590909090909091, - "English,Malay": 0.5965909090909091, - "English,Vietnamese": 0.6363636363636364, - "Filipino,Spanish": 0.5397727272727273, - "Filipino,Chinese": 0.5, - "Filipino,Malay": 0.6136363636363636, - "Filipino,Vietnamese": 0.5511363636363636, - "Spanish,Chinese": 0.6534090909090909, - "Spanish,Malay": 0.5795454545454546, - "Spanish,Vietnamese": 0.6761363636363636, - "Chinese,Malay": 0.5909090909090909, - "Chinese,Vietnamese": 0.6477272727272727, - "Malay,Vietnamese": 0.6704545454545454 + "Indonesian,English": 0.6193181818181818, + "Indonesian,Filipino": 0.6079545454545454, + "Indonesian,Spanish": 0.625, + "Indonesian,Chinese": 0.625, + "Indonesian,Malay": 0.6590909090909091, + "Indonesian,Vietnamese": 0.6875, + "English,Filipino": 0.5284090909090909, + "English,Spanish": 0.7045454545454546, + "English,Chinese": 0.6647727272727273, + "English,Malay": 0.5852272727272727, + "English,Vietnamese": 0.6477272727272727, + "Filipino,Spanish": 0.5227272727272727, + "Filipino,Chinese": 0.5113636363636364, + "Filipino,Malay": 0.6079545454545454, + "Filipino,Vietnamese": 0.5681818181818182, + "Spanish,Chinese": 0.6647727272727273, + "Spanish,Malay": 0.5738636363636364, + "Spanish,Vietnamese": 0.6647727272727273, + "Chinese,Malay": 0.5852272727272727, + "Chinese,Vietnamese": 0.6306818181818182, + "Malay,Vietnamese": 0.6818181818181818 }, "3_combine": { - "Indonesian,English,Filipino": 0.4147727272727273, - "Indonesian,English,Spanish": 0.5056818181818182, + "Indonesian,English,Filipino": 0.42045454545454547, + "Indonesian,English,Spanish": 0.5170454545454546, "Indonesian,English,Chinese": 0.5, - "Indonesian,English,Malay": 0.5, - "Indonesian,English,Vietnamese": 0.5056818181818182, - "Indonesian,Filipino,Spanish": 0.42045454545454547, - "Indonesian,Filipino,Chinese": 0.3977272727272727, + "Indonesian,English,Malay": 0.4715909090909091, + "Indonesian,English,Vietnamese": 0.5170454545454546, + "Indonesian,Filipino,Spanish": 0.4147727272727273, + "Indonesian,Filipino,Chinese": 0.42045454545454547, "Indonesian,Filipino,Malay": 0.4659090909090909, - "Indonesian,Filipino,Vietnamese": 0.4431818181818182, - "Indonesian,Spanish,Chinese": 0.4772727272727273, - "Indonesian,Spanish,Malay": 0.48863636363636365, - "Indonesian,Spanish,Vietnamese": 0.5284090909090909, + "Indonesian,Filipino,Vietnamese": 0.4602272727272727, + "Indonesian,Spanish,Chinese": 0.48863636363636365, + "Indonesian,Spanish,Malay": 0.48295454545454547, + "Indonesian,Spanish,Vietnamese": 0.5227272727272727, "Indonesian,Chinese,Malay": 0.48295454545454547, "Indonesian,Chinese,Vietnamese": 0.4943181818181818, "Indonesian,Malay,Vietnamese": 0.5454545454545454, - "English,Filipino,Spanish": 0.42613636363636365, - "English,Filipino,Chinese": 0.4090909090909091, - "English,Filipino,Malay": 0.4375, + "English,Filipino,Spanish": 0.4147727272727273, + "English,Filipino,Chinese": 0.4034090909090909, + "English,Filipino,Malay": 0.4147727272727273, "English,Filipino,Vietnamese": 0.4090909090909091, - "English,Spanish,Chinese": 0.5397727272727273, - "English,Spanish,Malay": 0.4659090909090909, - "English,Spanish,Vietnamese": 0.5170454545454546, - "English,Chinese,Malay": 0.4659090909090909, + "English,Spanish,Chinese": 0.5568181818181818, + "English,Spanish,Malay": 0.4602272727272727, + "English,Spanish,Vietnamese": 0.5340909090909091, + "English,Chinese,Malay": 0.45454545454545453, "English,Chinese,Vietnamese": 0.5056818181818182, "English,Malay,Vietnamese": 0.48863636363636365, "Filipino,Spanish,Chinese": 0.3977272727272727, "Filipino,Spanish,Malay": 0.4034090909090909, - "Filipino,Spanish,Vietnamese": 0.4147727272727273, + "Filipino,Spanish,Vietnamese": 0.4034090909090909, "Filipino,Chinese,Malay": 0.4090909090909091, - "Filipino,Chinese,Vietnamese": 0.3977272727272727, - "Filipino,Malay,Vietnamese": 0.45454545454545453, - "Spanish,Chinese,Malay": 0.4602272727272727, - "Spanish,Chinese,Vietnamese": 0.5227272727272727, - "Spanish,Malay,Vietnamese": 0.5056818181818182, - "Chinese,Malay,Vietnamese": 0.48863636363636365 + "Filipino,Chinese,Vietnamese": 0.39204545454545453, + "Filipino,Malay,Vietnamese": 0.4659090909090909, + "Spanish,Chinese,Malay": 0.45454545454545453, + "Spanish,Chinese,Vietnamese": 0.5170454545454546, + "Spanish,Malay,Vietnamese": 0.5, + "Chinese,Malay,Vietnamese": 0.48295454545454547 }, "4_combine": { "Indonesian,English,Filipino,Spanish": 0.3409090909090909, - "Indonesian,English,Filipino,Chinese": 0.3522727272727273, - "Indonesian,English,Filipino,Malay": 0.3693181818181818, - "Indonesian,English,Filipino,Vietnamese": 0.3409090909090909, - "Indonesian,English,Spanish,Chinese": 0.4147727272727273, - "Indonesian,English,Spanish,Malay": 0.4147727272727273, - "Indonesian,English,Spanish,Vietnamese": 0.4375, - "Indonesian,English,Chinese,Malay": 0.4147727272727273, + "Indonesian,English,Filipino,Chinese": 0.35795454545454547, + "Indonesian,English,Filipino,Malay": 0.35795454545454547, + "Indonesian,English,Filipino,Vietnamese": 0.35795454545454547, + "Indonesian,English,Spanish,Chinese": 0.42613636363636365, + "Indonesian,English,Spanish,Malay": 0.4034090909090909, + "Indonesian,English,Spanish,Vietnamese": 0.45454545454545453, + "Indonesian,English,Chinese,Malay": 0.4090909090909091, "Indonesian,English,Chinese,Vietnamese": 0.42613636363636365, "Indonesian,English,Malay,Vietnamese": 0.42045454545454547, - "Indonesian,Filipino,Spanish,Chinese": 0.3409090909090909, + "Indonesian,Filipino,Spanish,Chinese": 0.3465909090909091, "Indonesian,Filipino,Spanish,Malay": 0.3522727272727273, - "Indonesian,Filipino,Spanish,Vietnamese": 0.36363636363636365, - "Indonesian,Filipino,Chinese,Malay": 0.35795454545454547, - "Indonesian,Filipino,Chinese,Vietnamese": 0.3465909090909091, - "Indonesian,Filipino,Malay,Vietnamese": 0.38636363636363635, + "Indonesian,Filipino,Spanish,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Chinese,Malay": 0.36363636363636365, + "Indonesian,Filipino,Chinese,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Malay,Vietnamese": 0.39204545454545453, "Indonesian,Spanish,Chinese,Malay": 0.4034090909090909, "Indonesian,Spanish,Chinese,Vietnamese": 0.42613636363636365, - "Indonesian,Spanish,Malay,Vietnamese": 0.4431818181818182, + "Indonesian,Spanish,Malay,Vietnamese": 0.4375, "Indonesian,Chinese,Malay,Vietnamese": 0.42045454545454547, - "English,Filipino,Spanish,Chinese": 0.3522727272727273, - "English,Filipino,Spanish,Malay": 0.3465909090909091, + "English,Filipino,Spanish,Chinese": 0.3465909090909091, + "English,Filipino,Spanish,Malay": 0.3352272727272727, "English,Filipino,Spanish,Vietnamese": 0.3409090909090909, - "English,Filipino,Chinese,Malay": 0.3522727272727273, - "English,Filipino,Chinese,Vietnamese": 0.3409090909090909, - "English,Filipino,Malay,Vietnamese": 0.3693181818181818, - "English,Spanish,Chinese,Malay": 0.3977272727272727, - "English,Spanish,Chinese,Vietnamese": 0.4431818181818182, + "English,Filipino,Chinese,Malay": 0.3352272727272727, + "English,Filipino,Chinese,Vietnamese": 0.32954545454545453, + "English,Filipino,Malay,Vietnamese": 0.36363636363636365, + "English,Spanish,Chinese,Malay": 0.39204545454545453, + "English,Spanish,Chinese,Vietnamese": 0.44886363636363635, "English,Spanish,Malay,Vietnamese": 0.42613636363636365, - "English,Chinese,Malay,Vietnamese": 0.4090909090909091, - "Filipino,Spanish,Chinese,Malay": 0.3409090909090909, - "Filipino,Spanish,Chinese,Vietnamese": 0.3409090909090909, - "Filipino,Spanish,Malay,Vietnamese": 0.36363636363636365, + "English,Chinese,Malay,Vietnamese": 0.4034090909090909, + "Filipino,Spanish,Chinese,Malay": 0.3352272727272727, + "Filipino,Spanish,Chinese,Vietnamese": 0.32954545454545453, + "Filipino,Spanish,Malay,Vietnamese": 0.35795454545454547, "Filipino,Chinese,Malay,Vietnamese": 0.35795454545454547, - "Spanish,Chinese,Malay,Vietnamese": 0.42045454545454547 + "Spanish,Chinese,Malay,Vietnamese": 0.4090909090909091 }, "5_combine": { "Indonesian,English,Filipino,Spanish,Chinese": 0.30113636363636365, - "Indonesian,English,Filipino,Spanish,Malay": 0.3068181818181818, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.30113636363636365, - "Indonesian,English,Filipino,Chinese,Malay": 0.3181818181818182, + "Indonesian,English,Filipino,Spanish,Malay": 0.30113636363636365, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.3068181818181818, + "Indonesian,English,Filipino,Chinese,Malay": 0.3125, "Indonesian,English,Filipino,Chinese,Vietnamese": 0.3068181818181818, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.3181818181818182, - "Indonesian,English,Spanish,Chinese,Malay": 0.35795454545454547, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.375, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,English,Spanish,Chinese,Malay": 0.3522727272727273, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.3806818181818182, "Indonesian,English,Spanish,Malay,Vietnamese": 0.3806818181818182, "Indonesian,English,Chinese,Malay,Vietnamese": 0.36363636363636365, "Indonesian,Filipino,Spanish,Chinese,Malay": 0.3068181818181818, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.3068181818181818, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.32386363636363635, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.3181818181818182, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.375, - "English,Filipino,Spanish,Chinese,Malay": 0.30113636363636365, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.30113636363636365, - "English,Filipino,Spanish,Malay,Vietnamese": 0.3181818181818182, - "English,Filipino,Chinese,Malay,Vietnamese": 0.3125, - "English,Spanish,Chinese,Malay,Vietnamese": 0.3693181818181818, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.3125 + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.3181818181818182, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.3693181818181818, + "English,Filipino,Spanish,Chinese,Malay": 0.2897727272727273, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.2897727272727273, + "English,Filipino,Spanish,Malay,Vietnamese": 0.3125, + "English,Filipino,Chinese,Malay,Vietnamese": 0.30113636363636365, + "English,Spanish,Chinese,Malay,Vietnamese": 0.36363636363636365, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.30113636363636365 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.2727272727272727, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.2727272727272727, + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.26704545454545453, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.26704545454545453, "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.2840909090909091, "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.2840909090909091, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.3352272727272727, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2840909090909091, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909 + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.26704545454545453 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.2556818181818182 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.25 } }, - "AC3_2": 0.508858317059483, - "AC3_3": 0.44842727314686265, - "AC3_4": 0.4058221614507755, - "AC3_5": 0.3727177947940652, - "AC3_6": 0.3453235039564873, - "AC3_7": 0.3214113421488447 + "AC3_2": 0.508204791549858, + "AC3_3": 0.44776512403669844, + "AC3_4": 0.40445732555483793, + "AC3_5": 0.370306500894595, + "AC3_6": 0.3415289255720207, + "AC3_7": 0.3166666666202222 } }, "sg_eval": { @@ -31986,10 +31986,10 @@ "accuracy": 0.6601941747572816 }, "prompt_3": { - "accuracy": 0.6213592233009708 + "accuracy": 0.6310679611650486 }, "prompt_4": { - "accuracy": 0.6310679611650486 + "accuracy": 0.6601941747572816 }, "prompt_5": { "accuracy": 0.6601941747572816 @@ -31997,33 +31997,33 @@ }, "cn_eval": { "prompt_1": { - "accuracy": 0.49523809523809526 + "accuracy": 0.4666666666666667 }, "prompt_2": { - "accuracy": 0.4380952380952381 + "accuracy": 0.41904761904761906 }, "prompt_3": { "accuracy": 0.5047619047619047 }, "prompt_4": { - "accuracy": 0.4666666666666667 + "accuracy": 0.49523809523809526 }, "prompt_5": { - "accuracy": 0.45714285714285713 + "accuracy": 0.44761904761904764 } }, "us_eval": { "prompt_1": { - "accuracy": 0.6728971962616822 + "accuracy": 0.6822429906542056 }, "prompt_2": { "accuracy": 0.7102803738317757 }, "prompt_3": { - "accuracy": 0.7383177570093458 + "accuracy": 0.719626168224299 }, "prompt_4": { - "accuracy": 0.7383177570093458 + "accuracy": 0.7289719626168224 }, "prompt_5": { "accuracy": 0.7289719626168224 @@ -32046,7 +32046,7 @@ } }, "prompt_2": { - "accuracy": 0.55, + "accuracy": 0.56, "category_acc": { "brand": 0.5, "demographics": 0.2, @@ -32057,7 +32057,7 @@ "culture": 0.6, "film": 0.5, "law": 0.6, - "geography": 0.5 + "geography": 0.6 } }, "prompt_3": { @@ -32076,13 +32076,13 @@ } }, "prompt_4": { - "accuracy": 0.61, + "accuracy": 0.62, "category_acc": { "brand": 0.6, "demographics": 0.4, "biology": 0.7, "history": 0.6666666666666666, - "literature": 0.2, + "literature": 0.3, "politics": 0.9, "culture": 0.7, "film": 0.7, @@ -32125,153 +32125,153 @@ }, "indommlu": { "prompt_1": { - "accuracy": 0.5280726350223647, + "accuracy": 0.5283396755457641, "category_acc": { - "History": 0.5240963855421686, - "Geography": 0.5020408163265306, - "Lampungic": 0.3741496598639456, - "Social science": 0.7495826377295493, - "Balinese": 0.30148619957537154, - "Makassarese": 0.3763440860215054, - "Banjarese": 0.4444444444444444, - "Chemistry": 0.31386861313868614, - "Biology": 0.5029585798816568, - "Science": 0.6780185758513931, - "Christian religion": 0.6616915422885572, - "Art": 0.5940099833610649, - "Islam religion": 0.65149359886202, - "Hindu religion": 0.5466666666666666, - "Madurese": 0.288135593220339, - "Sport": 0.5337837837837838, + "History": 0.5301204819277109, + "Geography": 0.5, + "Lampungic": 0.3469387755102041, + "Social science": 0.7462437395659433, + "Balinese": 0.31422505307855625, + "Makassarese": 0.3924731182795699, + "Banjarese": 0.4583333333333333, + "Chemistry": 0.32262773722627736, + "Biology": 0.4958579881656805, + "Science": 0.675954592363261, + "Christian religion": 0.6666666666666666, + "Art": 0.6039933444259568, + "Islam religion": 0.6429587482219061, + "Hindu religion": 0.54, + "Madurese": 0.2847457627118644, + "Sport": 0.5405405405405406, "Indonesian language": 0.587173100871731, - "Physics": 0.4686868686868687, - "Minangkabau culture": 0.457286432160804, - "Dayak language": 0.3486238532110092, + "Physics": 0.48282828282828283, + "Minangkabau culture": 0.4472361809045226, + "Dayak language": 0.3577981651376147, "Sociology": 0.5463709677419355, - "Economy": 0.5020491803278688, - "Sundanese": 0.4606741573033708, - "Javanese": 0.4092741935483871, - "Civic education": 0.6151645207439199 + "Economy": 0.49385245901639346, + "Sundanese": 0.4589455488331893, + "Javanese": 0.4112903225806452, + "Civic education": 0.6108726752503576 } }, "prompt_2": { - "accuracy": 0.530676280125509, + "accuracy": 0.528873756592563, "category_acc": { - "History": 0.5160642570281124, - "Geography": 0.5, - "Lampungic": 0.35374149659863946, - "Social science": 0.7612687813021702, - "Balinese": 0.28662420382165604, - "Makassarese": 0.3655913978494624, - "Banjarese": 0.4375, - "Chemistry": 0.345985401459854, - "Biology": 0.5076923076923077, - "Science": 0.6904024767801857, - "Christian religion": 0.6567164179104478, - "Art": 0.6073211314475874, + "History": 0.5120481927710844, + "Geography": 0.4959183673469388, + "Lampungic": 0.3197278911564626, + "Social science": 0.7679465776293823, + "Balinese": 0.28450106157112526, + "Makassarese": 0.3548387096774194, + "Banjarese": 0.4722222222222222, + "Chemistry": 0.3197080291970803, + "Biology": 0.506508875739645, + "Science": 0.6924664602683178, + "Christian religion": 0.6517412935323383, + "Art": 0.6089850249584027, "Islam religion": 0.6486486486486487, "Hindu religion": 0.58, - "Madurese": 0.3593220338983051, - "Sport": 0.5337837837837838, - "Indonesian language": 0.5840597758405978, - "Physics": 0.4909090909090909, - "Minangkabau culture": 0.4221105527638191, - "Dayak language": 0.24770642201834864, - "Sociology": 0.5504032258064516, - "Economy": 0.4979508196721312, - "Sundanese": 0.4520311149524633, - "Javanese": 0.42338709677419356, - "Civic education": 0.6065808297567954 + "Madurese": 0.3559322033898305, + "Sport": 0.5472972972972973, + "Indonesian language": 0.5812577833125778, + "Physics": 0.494949494949495, + "Minangkabau culture": 0.4271356783919598, + "Dayak language": 0.27522935779816515, + "Sociology": 0.5483870967741935, + "Economy": 0.5, + "Sundanese": 0.4442523768366465, + "Javanese": 0.4284274193548387, + "Civic education": 0.6051502145922747 } }, "prompt_3": { - "accuracy": 0.5258695507043194, + "accuracy": 0.5278055944989652, "category_acc": { - "History": 0.5140562248995983, - "Geography": 0.5142857142857142, - "Lampungic": 0.3469387755102041, - "Social science": 0.7462437395659433, - "Balinese": 0.29723991507430997, + "History": 0.5160642570281124, + "Geography": 0.5204081632653061, + "Lampungic": 0.36054421768707484, + "Social science": 0.7529215358931552, + "Balinese": 0.3078556263269639, "Makassarese": 0.3602150537634409, - "Banjarese": 0.4097222222222222, + "Banjarese": 0.4166666666666667, "Chemistry": 0.31386861313868614, - "Biology": 0.5183431952662721, - "Science": 0.6790505675954592, - "Christian religion": 0.6517412935323383, - "Art": 0.6073211314475874, - "Islam religion": 0.635846372688478, + "Biology": 0.5159763313609468, + "Science": 0.6769865841073271, + "Christian religion": 0.6567164179104478, + "Art": 0.610648918469218, + "Islam religion": 0.6372688477951636, "Hindu religion": 0.58, - "Madurese": 0.3423728813559322, - "Sport": 0.527027027027027, - "Indonesian language": 0.5874844333748444, - "Physics": 0.4585858585858586, + "Madurese": 0.33559322033898303, + "Sport": 0.5405405405405406, + "Indonesian language": 0.5912204234122043, + "Physics": 0.4444444444444444, "Minangkabau culture": 0.46733668341708545, - "Dayak language": 0.26605504587155965, + "Dayak language": 0.25688073394495414, "Sociology": 0.5362903225806451, - "Economy": 0.48565573770491804, - "Sundanese": 0.44511668107173724, - "Javanese": 0.41330645161290325, - "Civic education": 0.6022889842632332 + "Economy": 0.48770491803278687, + "Sundanese": 0.4511668107173725, + "Javanese": 0.4112903225806452, + "Civic education": 0.6065808297567954 } }, "prompt_4": { - "accuracy": 0.5315441618265572, + "accuracy": 0.5309433206489085, "category_acc": { - "History": 0.5261044176706827, - "Geography": 0.5244897959183673, - "Lampungic": 0.36054421768707484, - "Social science": 0.7479131886477463, - "Balinese": 0.3205944798301486, + "History": 0.5281124497991968, + "Geography": 0.5224489795918368, + "Lampungic": 0.3469387755102041, + "Social science": 0.7545909849749582, + "Balinese": 0.31634819532908703, "Makassarese": 0.4032258064516129, - "Banjarese": 0.4166666666666667, - "Chemistry": 0.3284671532846715, - "Biology": 0.5360946745562131, - "Science": 0.6862745098039216, + "Banjarese": 0.4375, + "Chemistry": 0.32116788321167883, + "Biology": 0.5349112426035503, + "Science": 0.6842105263157895, "Christian religion": 0.6766169154228856, - "Art": 0.6023294509151415, - "Islam religion": 0.6273115220483642, + "Art": 0.6056572379367721, + "Islam religion": 0.6344238975817923, "Hindu religion": 0.56, - "Madurese": 0.3389830508474576, - "Sport": 0.527027027027027, - "Indonesian language": 0.5896637608966376, - "Physics": 0.45252525252525255, - "Minangkabau culture": 0.4623115577889447, - "Dayak language": 0.23853211009174313, + "Madurese": 0.3423728813559322, + "Sport": 0.5472972972972973, + "Indonesian language": 0.5909090909090909, + "Physics": 0.42828282828282827, + "Minangkabau culture": 0.457286432160804, + "Dayak language": 0.27522935779816515, "Sociology": 0.5725806451612904, - "Economy": 0.5122950819672131, - "Sundanese": 0.452895419187554, - "Javanese": 0.39818548387096775, - "Civic education": 0.6051502145922747 + "Economy": 0.5020491803278688, + "Sundanese": 0.44770959377700953, + "Javanese": 0.4012096774193548, + "Civic education": 0.6022889842632332 } }, "prompt_5": { - "accuracy": 0.5029040656919688, + "accuracy": 0.5021697042526203, "category_acc": { - "History": 0.4779116465863454, - "Geography": 0.4714285714285714, - "Lampungic": 0.38095238095238093, - "Social science": 0.6944908180300501, - "Balinese": 0.3057324840764331, - "Makassarese": 0.2903225806451613, - "Banjarese": 0.4027777777777778, - "Chemistry": 0.2934306569343066, - "Biology": 0.46153846153846156, - "Science": 0.6295149638802889, - "Christian religion": 0.6567164179104478, - "Art": 0.589018302828619, - "Islam religion": 0.6443812233285917, - "Hindu religion": 0.52, - "Madurese": 0.30847457627118646, - "Sport": 0.5135135135135135, - "Indonesian language": 0.5716064757160647, - "Physics": 0.4484848484848485, + "History": 0.4919678714859438, + "Geography": 0.4673469387755102, + "Lampungic": 0.3673469387755102, + "Social science": 0.6928213689482471, + "Balinese": 0.28874734607218683, + "Makassarese": 0.27419354838709675, + "Banjarese": 0.3888888888888889, + "Chemistry": 0.3051094890510949, + "Biology": 0.45680473372781066, + "Science": 0.6222910216718266, + "Christian religion": 0.6616915422885572, + "Art": 0.5906821963394343, + "Islam religion": 0.635846372688478, + "Hindu religion": 0.5333333333333333, + "Madurese": 0.288135593220339, + "Sport": 0.5067567567567568, + "Indonesian language": 0.5731631382316313, + "Physics": 0.4585858585858586, "Minangkabau culture": 0.4271356783919598, - "Dayak language": 0.29357798165137616, - "Sociology": 0.48185483870967744, - "Economy": 0.4672131147540984, - "Sundanese": 0.4399308556611927, - "Javanese": 0.39818548387096775, - "Civic education": 0.5793991416309013 + "Dayak language": 0.28440366972477066, + "Sociology": 0.4838709677419355, + "Economy": 0.45901639344262296, + "Sundanese": 0.44338807260155577, + "Javanese": 0.4012096774193548, + "Civic education": 0.5779685264663805 } } }, @@ -32345,359 +32345,359 @@ }, "mmlu": { "prompt_1": { - "accuracy": 0.574095682613769 + "accuracy": 0.5612602100350058 }, "prompt_2": { - "accuracy": 0.5997666277712952 + "accuracy": 0.5950991831971996 }, "prompt_3": { - "accuracy": 0.5997666277712952 + "accuracy": 0.5985997666277713 }, "prompt_4": { - "accuracy": 0.6161026837806302 + "accuracy": 0.6126021003500584 }, "prompt_5": { - "accuracy": 0.5355892648774796 + "accuracy": 0.543757292882147 } }, "mmlu_full": { "prompt_1": { - "accuracy": 0.619449410082231, + "accuracy": 0.5681801930639971, "category_acc": { - "high_school_european_history": 0.7378048780487805, - "business_ethics": 0.5656565656565656, - "clinical_knowledge": 0.6856060606060606, - "medical_genetics": 0.7171717171717171, - "high_school_us_history": 0.7635467980295566, - "high_school_physics": 0.2866666666666667, - "high_school_world_history": 0.7796610169491526, - "virology": 0.4909090909090909, - "high_school_microeconomics": 0.6835443037974683, - "econometrics": 0.4336283185840708, - "college_computer_science": 0.4444444444444444, - "high_school_biology": 0.7216828478964401, - "abstract_algebra": 0.32323232323232326, - "professional_accounting": 0.47330960854092524, - "philosophy": 0.6483870967741936, + "high_school_european_history": 0.7012195121951219, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.6477272727272727, + "medical_genetics": 0.696969696969697, + "high_school_us_history": 0.6748768472906403, + "high_school_physics": 0.37333333333333335, + "high_school_world_history": 0.7584745762711864, + "virology": 0.5272727272727272, + "high_school_microeconomics": 0.6708860759493671, + "econometrics": 0.415929203539823, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.7152103559870551, + "abstract_algebra": 0.3434343434343434, + "professional_accounting": 0.45907473309608543, + "philosophy": 0.6064516129032258, "professional_medicine": 0.7343173431734318, - "nutrition": 0.7475409836065574, - "global_facts": 0.5252525252525253, + "nutrition": 0.7081967213114754, + "global_facts": 0.37373737373737376, "machine_learning": 0.3783783783783784, - "security_studies": 0.6352459016393442, - "public_relations": 0.6146788990825688, - "professional_psychology": 0.6415711947626841, - "prehistory": 0.7058823529411765, - "anatomy": 0.664179104477612, - "human_sexuality": 0.7230769230769231, - "college_medicine": 0.6046511627906976, - "high_school_government_and_politics": 0.8333333333333334, - "college_chemistry": 0.41414141414141414, - "logical_fallacies": 0.7469135802469136, - "high_school_geography": 0.7360406091370558, - "elementary_mathematics": 0.7029177718832891, - "human_aging": 0.6621621621621622, - "college_mathematics": 0.23232323232323232, - "high_school_psychology": 0.8051470588235294, - "formal_logic": 0.456, - "high_school_statistics": 0.4511627906976744, - "international_law": 0.75, - "high_school_mathematics": 0.3048327137546468, - "high_school_computer_science": 0.696969696969697, - "conceptual_physics": 0.5555555555555556, - "miscellaneous": 0.8414322250639387, + "security_studies": 0.6557377049180327, + "public_relations": 0.6422018348623854, + "professional_psychology": 0.5990180032733224, + "prehistory": 0.6160990712074303, + "anatomy": 0.6044776119402985, + "human_sexuality": 0.6692307692307692, + "college_medicine": 0.5930232558139535, + "high_school_government_and_politics": 0.8125, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.6604938271604939, + "high_school_geography": 0.7106598984771574, + "elementary_mathematics": 0.4562334217506631, + "human_aging": 0.6396396396396397, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7794117647058824, + "formal_logic": 0.376, + "high_school_statistics": 0.4372093023255814, + "international_law": 0.675, + "high_school_mathematics": 0.34572490706319703, + "high_school_computer_science": 0.5353535353535354, + "conceptual_physics": 0.5085470085470085, + "miscellaneous": 0.7762148337595908, "high_school_chemistry": 0.4801980198019802, - "marketing": 0.8497854077253219, - "professional_law": 0.4592302674494455, - "management": 0.7647058823529411, - "college_physics": 0.3564356435643564, - "jurisprudence": 0.7383177570093458, - "world_religions": 0.8, - "sociology": 0.8, - "us_foreign_policy": 0.8585858585858586, - "high_school_macroeconomics": 0.5989717223650386, - "computer_security": 0.696969696969697, - "moral_scenarios": 0.4004474272930649, - "moral_disputes": 0.6608695652173913, - "electrical_engineering": 0.625, - "astronomy": 0.6887417218543046, - "college_biology": 0.6923076923076923 + "marketing": 0.6137339055793991, + "professional_law": 0.40574037834311805, + "management": 0.7352941176470589, + "college_physics": 0.46534653465346537, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.7705882352941177, + "sociology": 0.795, + "us_foreign_policy": 0.7676767676767676, + "high_school_macroeconomics": 0.5784061696658098, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.5739130434782609, + "electrical_engineering": 0.4791666666666667, + "astronomy": 0.6754966887417219, + "college_biology": 0.7202797202797203 } }, "prompt_2": { - "accuracy": 0.6249553092599214, + "accuracy": 0.6002145155523776, "category_acc": { - "high_school_european_history": 0.7439024390243902, - "business_ethics": 0.6464646464646465, - "clinical_knowledge": 0.7424242424242424, - "medical_genetics": 0.7676767676767676, - "high_school_us_history": 0.7783251231527094, - "high_school_physics": 0.4066666666666667, - "high_school_world_history": 0.8220338983050848, - "virology": 0.49696969696969695, - "high_school_microeconomics": 0.7088607594936709, - "econometrics": 0.45132743362831856, - "college_computer_science": 0.47474747474747475, - "high_school_biology": 0.7734627831715211, - "abstract_algebra": 0.25252525252525254, - "professional_accounting": 0.5088967971530249, - "philosophy": 0.6516129032258065, - "professional_medicine": 0.7527675276752768, - "nutrition": 0.7344262295081967, - "global_facts": 0.45454545454545453, - "machine_learning": 0.4864864864864865, - "security_studies": 0.6721311475409836, - "public_relations": 0.6605504587155964, - "professional_psychology": 0.6612111292962357, - "prehistory": 0.7120743034055728, - "anatomy": 0.6417910447761194, + "high_school_european_history": 0.75, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.7045454545454546, + "medical_genetics": 0.7272727272727273, + "high_school_us_history": 0.7684729064039408, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.7966101694915254, + "virology": 0.509090909090909, + "high_school_microeconomics": 0.70042194092827, + "econometrics": 0.415929203539823, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.7508090614886731, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.501779359430605, + "philosophy": 0.6774193548387096, + "professional_medicine": 0.7158671586715867, + "nutrition": 0.7147540983606557, + "global_facts": 0.3939393939393939, + "machine_learning": 0.38738738738738737, + "security_studies": 0.6270491803278688, + "public_relations": 0.6330275229357798, + "professional_psychology": 0.6382978723404256, + "prehistory": 0.7151702786377709, + "anatomy": 0.6119402985074627, "human_sexuality": 0.7230769230769231, - "college_medicine": 0.6162790697674418, - "high_school_government_and_politics": 0.859375, - "college_chemistry": 0.37373737373737376, + "college_medicine": 0.6104651162790697, + "high_school_government_and_politics": 0.8333333333333334, + "college_chemistry": 0.40404040404040403, "logical_fallacies": 0.7407407407407407, - "high_school_geography": 0.7766497461928934, - "elementary_mathematics": 0.7082228116710876, - "human_aging": 0.6666666666666666, - "college_mathematics": 0.3333333333333333, - "high_school_psychology": 0.8198529411764706, - "formal_logic": 0.504, - "high_school_statistics": 0.5209302325581395, - "international_law": 0.7583333333333333, + "high_school_geography": 0.766497461928934, + "elementary_mathematics": 0.44297082228116713, + "human_aging": 0.6756756756756757, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.8216911764705882, + "formal_logic": 0.44, + "high_school_statistics": 0.4511627906976744, + "international_law": 0.675, "high_school_mathematics": 0.35687732342007433, - "high_school_computer_science": 0.7070707070707071, - "conceptual_physics": 0.5769230769230769, - "miscellaneous": 0.8286445012787724, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.5726495726495726, + "miscellaneous": 0.8005115089514067, "high_school_chemistry": 0.5148514851485149, - "marketing": 0.8755364806866953, - "professional_law": 0.4611872146118721, - "management": 0.8137254901960784, - "college_physics": 0.37623762376237624, - "jurisprudence": 0.7383177570093458, - "world_religions": 0.788235294117647, + "marketing": 0.8068669527896996, + "professional_law": 0.4344422700587084, + "management": 0.803921568627451, + "college_physics": 0.42574257425742573, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.7764705882352941, "sociology": 0.795, - "us_foreign_policy": 0.8181818181818182, - "high_school_macroeconomics": 0.5938303341902313, - "computer_security": 0.6868686868686869, - "moral_scenarios": 0.25615212527964204, - "moral_disputes": 0.6840579710144927, - "electrical_engineering": 0.5625, - "astronomy": 0.7152317880794702, - "college_biology": 0.7202797202797203 + "us_foreign_policy": 0.8787878787878788, + "high_school_macroeconomics": 0.622107969151671, + "computer_security": 0.7171717171717171, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.663768115942029, + "electrical_engineering": 0.5, + "astronomy": 0.6821192052980133, + "college_biology": 0.7622377622377622 } }, "prompt_3": { - "accuracy": 0.6251698248122989, + "accuracy": 0.6081515909903468, "category_acc": { - "high_school_european_history": 0.7317073170731707, - "business_ethics": 0.696969696969697, - "clinical_knowledge": 0.7234848484848485, + "high_school_european_history": 0.75, + "business_ethics": 0.6565656565656566, + "clinical_knowledge": 0.696969696969697, "medical_genetics": 0.7474747474747475, - "high_school_us_history": 0.7684729064039408, - "high_school_physics": 0.36666666666666664, - "high_school_world_history": 0.809322033898305, - "virology": 0.503030303030303, - "high_school_microeconomics": 0.7215189873417721, - "econometrics": 0.46017699115044247, - "college_computer_science": 0.43434343434343436, - "high_school_biology": 0.7443365695792881, - "abstract_algebra": 0.2727272727272727, - "professional_accounting": 0.5088967971530249, - "philosophy": 0.6645161290322581, - "professional_medicine": 0.7490774907749077, - "nutrition": 0.7180327868852459, - "global_facts": 0.48484848484848486, - "machine_learning": 0.4864864864864865, - "security_studies": 0.6721311475409836, - "public_relations": 0.6330275229357798, - "professional_psychology": 0.6350245499181669, - "prehistory": 0.7151702786377709, - "anatomy": 0.6567164179104478, - "human_sexuality": 0.7461538461538462, - "college_medicine": 0.5988372093023255, + "high_school_us_history": 0.7536945812807881, + "high_school_physics": 0.37333333333333335, + "high_school_world_history": 0.8220338983050848, + "virology": 0.5272727272727272, + "high_school_microeconomics": 0.70042194092827, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.7411003236245954, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.498220640569395, + "philosophy": 0.667741935483871, + "professional_medicine": 0.7121771217712177, + "nutrition": 0.7245901639344262, + "global_facts": 0.3838383838383838, + "machine_learning": 0.4594594594594595, + "security_studies": 0.680327868852459, + "public_relations": 0.6697247706422018, + "professional_psychology": 0.662847790507365, + "prehistory": 0.7120743034055728, + "anatomy": 0.6492537313432836, + "human_sexuality": 0.7384615384615385, + "college_medicine": 0.6046511627906976, "high_school_government_and_politics": 0.859375, - "college_chemistry": 0.47474747474747475, - "logical_fallacies": 0.7592592592592593, - "high_school_geography": 0.7461928934010152, - "elementary_mathematics": 0.7294429708222812, - "human_aging": 0.6891891891891891, - "college_mathematics": 0.30303030303030304, - "high_school_psychology": 0.8051470588235294, - "formal_logic": 0.536, - "high_school_statistics": 0.5023255813953489, - "international_law": 0.7416666666666667, - "high_school_mathematics": 0.3345724907063197, - "high_school_computer_science": 0.6565656565656566, - "conceptual_physics": 0.6068376068376068, - "miscellaneous": 0.8350383631713555, - "high_school_chemistry": 0.5198019801980198, - "marketing": 0.8540772532188842, - "professional_law": 0.4683626875407697, - "management": 0.7647058823529411, - "college_physics": 0.37623762376237624, - "jurisprudence": 0.719626168224299, - "world_religions": 0.8058823529411765, - "sociology": 0.8, - "us_foreign_policy": 0.8686868686868687, - "high_school_macroeconomics": 0.583547557840617, - "computer_security": 0.6767676767676768, - "moral_scenarios": 0.2897091722595078, - "moral_disputes": 0.6753623188405797, - "electrical_engineering": 0.5763888888888888, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.7716049382716049, + "high_school_geography": 0.751269035532995, + "elementary_mathematics": 0.46153846153846156, + "human_aging": 0.6801801801801802, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.8198529411764706, + "formal_logic": 0.432, + "high_school_statistics": 0.4325581395348837, + "international_law": 0.7833333333333333, + "high_school_mathematics": 0.36059479553903345, + "high_school_computer_science": 0.6363636363636364, + "conceptual_physics": 0.5769230769230769, + "miscellaneous": 0.8043478260869565, + "high_school_chemistry": 0.4900990099009901, + "marketing": 0.7424892703862661, + "professional_law": 0.44422700587084146, + "management": 0.8235294117647058, + "college_physics": 0.42574257425742573, + "jurisprudence": 0.7289719626168224, + "world_religions": 0.8, + "sociology": 0.81, + "us_foreign_policy": 0.8484848484848485, + "high_school_macroeconomics": 0.609254498714653, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.25838926174496646, + "moral_disputes": 0.6579710144927536, + "electrical_engineering": 0.5416666666666666, "astronomy": 0.695364238410596, - "college_biology": 0.7272727272727273 + "college_biology": 0.7762237762237763 } }, "prompt_4": { - "accuracy": 0.6171612441902038, + "accuracy": 0.6057919199141938, "category_acc": { "high_school_european_history": 0.7317073170731707, - "business_ethics": 0.6565656565656566, - "clinical_knowledge": 0.6590909090909091, - "medical_genetics": 0.7575757575757576, - "high_school_us_history": 0.7931034482758621, - "high_school_physics": 0.34, - "high_school_world_history": 0.8008474576271186, - "virology": 0.49696969696969695, - "high_school_microeconomics": 0.7088607594936709, - "econometrics": 0.3893805309734513, - "college_computer_science": 0.3939393939393939, - "high_school_biology": 0.7249190938511327, - "abstract_algebra": 0.30303030303030304, - "professional_accounting": 0.5480427046263345, - "philosophy": 0.6774193548387096, - "professional_medicine": 0.6678966789667896, - "nutrition": 0.7114754098360656, - "global_facts": 0.5050505050505051, - "machine_learning": 0.35135135135135137, - "security_studies": 0.6721311475409836, - "public_relations": 0.6513761467889908, - "professional_psychology": 0.630114566284779, - "prehistory": 0.7120743034055728, - "anatomy": 0.6119402985074627, - "human_sexuality": 0.6923076923076923, - "college_medicine": 0.6046511627906976, - "high_school_government_and_politics": 0.8229166666666666, - "college_chemistry": 0.3939393939393939, - "logical_fallacies": 0.7654320987654321, - "high_school_geography": 0.7309644670050761, - "elementary_mathematics": 0.6816976127320955, - "human_aging": 0.6486486486486487, - "college_mathematics": 0.35353535353535354, - "high_school_psychology": 0.7886029411764706, - "formal_logic": 0.488, - "high_school_statistics": 0.4744186046511628, - "international_law": 0.775, - "high_school_mathematics": 0.38661710037174724, - "high_school_computer_science": 0.6767676767676768, - "conceptual_physics": 0.594017094017094, - "miscellaneous": 0.7877237851662404, - "high_school_chemistry": 0.5, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.696969696969697, + "medical_genetics": 0.7474747474747475, + "high_school_us_history": 0.7536945812807881, + "high_school_physics": 0.4, + "high_school_world_history": 0.7923728813559322, + "virology": 0.5212121212121212, + "high_school_microeconomics": 0.7046413502109705, + "econometrics": 0.4778761061946903, + "college_computer_science": 0.47474747474747475, + "high_school_biology": 0.7702265372168284, + "abstract_algebra": 0.3333333333333333, + "professional_accounting": 0.4875444839857651, + "philosophy": 0.6483870967741936, + "professional_medicine": 0.7380073800738007, + "nutrition": 0.7180327868852459, + "global_facts": 0.37373737373737376, + "machine_learning": 0.40540540540540543, + "security_studies": 0.6516393442622951, + "public_relations": 0.6605504587155964, + "professional_psychology": 0.6366612111292962, + "prehistory": 0.6996904024767802, + "anatomy": 0.6268656716417911, + "human_sexuality": 0.7307692307692307, + "college_medicine": 0.6104651162790697, + "high_school_government_and_politics": 0.8177083333333334, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.7777777777777778, + "high_school_geography": 0.751269035532995, + "elementary_mathematics": 0.47214854111405835, + "human_aging": 0.6621621621621622, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.8327205882352942, + "formal_logic": 0.44, + "high_school_statistics": 0.44651162790697674, + "international_law": 0.7, + "high_school_mathematics": 0.3754646840148699, + "high_school_computer_science": 0.6363636363636364, + "conceptual_physics": 0.5769230769230769, + "miscellaneous": 0.8184143222506394, + "high_school_chemistry": 0.4900990099009901, "marketing": 0.8283261802575107, - "professional_law": 0.47619047619047616, + "professional_law": 0.4435746901500326, "management": 0.803921568627451, - "college_physics": 0.4158415841584158, - "jurisprudence": 0.7476635514018691, - "world_religions": 0.8, - "sociology": 0.8, - "us_foreign_policy": 0.8383838383838383, - "high_school_macroeconomics": 0.5732647814910026, - "computer_security": 0.6161616161616161, - "moral_scenarios": 0.3434004474272931, - "moral_disputes": 0.6695652173913044, - "electrical_engineering": 0.6180555555555556, - "astronomy": 0.7218543046357616, - "college_biology": 0.6503496503496503 + "college_physics": 0.40594059405940597, + "jurisprudence": 0.7009345794392523, + "world_religions": 0.8117647058823529, + "sociology": 0.845, + "us_foreign_policy": 0.8686868686868687, + "high_school_macroeconomics": 0.6015424164524421, + "computer_security": 0.7070707070707071, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.6260869565217392, + "electrical_engineering": 0.5555555555555556, + "astronomy": 0.6622516556291391, + "college_biology": 0.7762237762237763 } }, "prompt_5": { - "accuracy": 0.5823382195209152, + "accuracy": 0.5470146585627458, "category_acc": { - "high_school_european_history": 0.7134146341463414, + "high_school_european_history": 0.6158536585365854, "business_ethics": 0.6161616161616161, - "clinical_knowledge": 0.6401515151515151, - "medical_genetics": 0.7373737373737373, - "high_school_us_history": 0.7783251231527094, - "high_school_physics": 0.32666666666666666, - "high_school_world_history": 0.7923728813559322, - "virology": 0.4909090909090909, - "high_school_microeconomics": 0.6118143459915611, - "econometrics": 0.35398230088495575, - "college_computer_science": 0.36363636363636365, - "high_school_biology": 0.6472491909385113, - "abstract_algebra": 0.29292929292929293, - "professional_accounting": 0.4483985765124555, - "philosophy": 0.6580645161290323, - "professional_medicine": 0.6457564575645757, - "nutrition": 0.6786885245901639, - "global_facts": 0.494949494949495, - "machine_learning": 0.36936936936936937, - "security_studies": 0.5983606557377049, - "public_relations": 0.5504587155963303, - "professional_psychology": 0.6104746317512275, - "prehistory": 0.6687306501547987, - "anatomy": 0.5895522388059702, - "human_sexuality": 0.5923076923076923, - "college_medicine": 0.5348837209302325, - "high_school_government_and_politics": 0.796875, - "college_chemistry": 0.31313131313131315, - "logical_fallacies": 0.7345679012345679, - "high_school_geography": 0.6700507614213198, - "elementary_mathematics": 0.726790450928382, - "human_aging": 0.6396396396396397, - "college_mathematics": 0.21212121212121213, - "high_school_psychology": 0.7408088235294118, - "formal_logic": 0.4, - "high_school_statistics": 0.40930232558139534, - "international_law": 0.7083333333333334, - "high_school_mathematics": 0.2936802973977695, - "high_school_computer_science": 0.6666666666666666, - "conceptual_physics": 0.5470085470085471, - "miscellaneous": 0.7902813299232737, - "high_school_chemistry": 0.41089108910891087, - "marketing": 0.8111587982832618, - "professional_law": 0.44879321591650356, - "management": 0.7549019607843137, - "college_physics": 0.3564356435643564, - "jurisprudence": 0.6915887850467289, - "world_religions": 0.8176470588235294, - "sociology": 0.775, - "us_foreign_policy": 0.8282828282828283, - "high_school_macroeconomics": 0.5475578406169666, + "clinical_knowledge": 0.625, + "medical_genetics": 0.6767676767676768, + "high_school_us_history": 0.6059113300492611, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.6059322033898306, + "virology": 0.5272727272727272, + "high_school_microeconomics": 0.5991561181434599, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.6990291262135923, + "abstract_algebra": 0.35353535353535354, + "professional_accounting": 0.43416370106761565, + "philosophy": 0.632258064516129, + "professional_medicine": 0.6383763837638377, + "nutrition": 0.6655737704918033, + "global_facts": 0.41414141414141414, + "machine_learning": 0.38738738738738737, + "security_studies": 0.610655737704918, + "public_relations": 0.6146788990825688, + "professional_psychology": 0.5793780687397708, + "prehistory": 0.6099071207430341, + "anatomy": 0.5447761194029851, + "human_sexuality": 0.6538461538461539, + "college_medicine": 0.5813953488372093, + "high_school_government_and_politics": 0.8072916666666666, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.6790123456790124, + "high_school_geography": 0.7258883248730964, + "elementary_mathematics": 0.41644562334217505, + "human_aging": 0.6261261261261262, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.7610294117647058, + "formal_logic": 0.384, + "high_school_statistics": 0.4, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.35687732342007433, + "high_school_computer_science": 0.5454545454545454, + "conceptual_physics": 0.5384615384615384, + "miscellaneous": 0.7672634271099744, + "high_school_chemistry": 0.5148514851485149, + "marketing": 0.5493562231759657, + "professional_law": 0.3796477495107632, + "management": 0.7843137254901961, + "college_physics": 0.45544554455445546, + "jurisprudence": 0.5887850467289719, + "world_religions": 0.7352941176470589, + "sociology": 0.75, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.5269922879177378, "computer_security": 0.6666666666666666, - "moral_scenarios": 0.30648769574944074, - "moral_disputes": 0.6347826086956522, - "electrical_engineering": 0.6111111111111112, - "astronomy": 0.6357615894039735, - "college_biology": 0.6013986013986014 + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.5768115942028985, + "electrical_engineering": 0.4722222222222222, + "astronomy": 0.6291390728476821, + "college_biology": 0.6713286713286714 } } }, "c_eval": { "prompt_1": { - "accuracy": 0.4851411589895988 + "accuracy": 0.4814264487369985 }, "prompt_2": { - "accuracy": 0.4160475482912333 + "accuracy": 0.4049034175334324 }, "prompt_3": { - "accuracy": 0.4658246656760773 + "accuracy": 0.4702823179791976 }, "prompt_4": { - "accuracy": 0.4606240713224368 + "accuracy": 0.4554234769687964 }, "prompt_5": { - "accuracy": 0.3848439821693908 + "accuracy": 0.3885586924219911 } }, "c_eval_full": { "prompt_1": { - "accuracy": 0.4900373599003736, + "accuracy": 0.49066002490660027, "category_acc": { "computer_network": 0.4583333333333333, "operating_system": 0.625, "computer_architecture": 0.4230769230769231, "college_programming": 0.5714285714285714, - "college_physics": 0.4166666666666667, + "college_physics": 0.4583333333333333, "college_chemistry": 0.4482758620689655, "advanced_mathematics": 0.2916666666666667, "probability_and_statistics": 0.43478260869565216, @@ -32708,7 +32708,7 @@ "high_school_physics": 0.3333333333333333, "high_school_chemistry": 0.5833333333333334, "high_school_biology": 0.375, - "middle_school_mathematics": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, "middle_school_biology": 0.6538461538461539, "middle_school_physics": 0.5833333333333334, "middle_school_chemistry": 0.44, @@ -32718,7 +32718,7 @@ "marxism": 0.5833333333333334, "mao_zedong_thought": 0.6551724137931034, "education_science": 0.5882352941176471, - "teacher_qualification": 0.7755102040816326, + "teacher_qualification": 0.7346938775510204, "high_school_politics": 0.5833333333333334, "high_school_geography": 0.5416666666666666, "middle_school_politics": 0.6923076923076923, @@ -32730,8 +32730,8 @@ "chinese_language_and_literature": 0.5357142857142857, "art_studies": 0.5789473684210527, "professional_tour_guide": 0.5588235294117647, - "legal_professional": 0.42857142857142855, - "high_school_chinese": 0.25, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.2916666666666667, "high_school_history": 0.36, "middle_school_history": 0.7037037037037037, "civil_servant": 0.4423076923076923, @@ -32744,117 +32744,117 @@ "fire_engineer": 0.4444444444444444, "environmental_impact_assessment_engineer": 0.3333333333333333, "tax_accountant": 0.3333333333333333, - "physician": 0.5555555555555556 + "physician": 0.5740740740740741 } }, "prompt_2": { - "accuracy": 0.4215442092154421, + "accuracy": 0.412826899128269, "category_acc": { - "computer_network": 0.5833333333333334, + "computer_network": 0.4583333333333333, "operating_system": 0.5833333333333334, - "computer_architecture": 0.46153846153846156, + "computer_architecture": 0.5, "college_programming": 0.5238095238095238, "college_physics": 0.3333333333333333, - "college_chemistry": 0.3448275862068966, - "advanced_mathematics": 0.625, - "probability_and_statistics": 0.43478260869565216, - "discrete_mathematics": 0.2857142857142857, - "electrical_engineer": 0.2857142857142857, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.5416666666666666, + "probability_and_statistics": 0.4782608695652174, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.3333333333333333, "metrology_engineer": 0.3793103448275862, - "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.25, - "high_school_chemistry": 0.4583333333333333, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.3333333333333333, "high_school_biology": 0.2916666666666667, "middle_school_mathematics": 0.375, - "middle_school_biology": 0.38461538461538464, + "middle_school_biology": 0.7307692307692307, "middle_school_physics": 0.5833333333333334, - "middle_school_chemistry": 0.32, - "veterinary_medicine": 0.42857142857142855, - "college_economics": 0.36666666666666664, - "business_administration": 0.4473684210526316, - "marxism": 0.4583333333333333, - "mao_zedong_thought": 0.5862068965517241, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.2833333333333333, + "business_administration": 0.34210526315789475, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.5172413793103449, "education_science": 0.5294117647058824, - "teacher_qualification": 0.6530612244897959, - "high_school_politics": 0.5, - "high_school_geography": 0.4583333333333333, + "teacher_qualification": 0.6326530612244898, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.5, "middle_school_politics": 0.46153846153846156, "middle_school_geography": 0.4117647058823529, - "modern_chinese_history": 0.2857142857142857, + "modern_chinese_history": 0.35714285714285715, "ideological_and_moral_cultivation": 0.5416666666666666, "logic": 0.25925925925925924, "law": 0.3103448275862069, "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.6052631578947368, - "professional_tour_guide": 0.5294117647058824, - "legal_professional": 0.4642857142857143, - "high_school_chinese": 0.3333333333333333, - "high_school_history": 0.56, - "middle_school_history": 0.5925925925925926, + "art_studies": 0.5789473684210527, + "professional_tour_guide": 0.5, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.44, + "middle_school_history": 0.48148148148148145, "civil_servant": 0.3269230769230769, - "sports_science": 0.4583333333333333, + "sports_science": 0.5, "plant_protection": 0.5925925925925926, - "basic_medicine": 0.4166666666666667, - "clinical_medicine": 0.37037037037037035, - "urban_and_rural_planner": 0.37254901960784315, - "accountant": 0.2777777777777778, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.3148148148148148, "fire_engineer": 0.3611111111111111, - "environmental_impact_assessment_engineer": 0.2222222222222222, - "tax_accountant": 0.2222222222222222, - "physician": 0.5555555555555556 + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2962962962962963, + "physician": 0.46296296296296297 } }, "prompt_3": { - "accuracy": 0.4738480697384807, + "accuracy": 0.47198007471980075, "category_acc": { - "computer_network": 0.375, + "computer_network": 0.4166666666666667, "operating_system": 0.625, "computer_architecture": 0.46153846153846156, "college_programming": 0.5714285714285714, - "college_physics": 0.4166666666666667, + "college_physics": 0.375, "college_chemistry": 0.41379310344827586, "advanced_mathematics": 0.4583333333333333, "probability_and_statistics": 0.2608695652173913, - "discrete_mathematics": 0.23809523809523808, + "discrete_mathematics": 0.19047619047619047, "electrical_engineer": 0.38095238095238093, - "metrology_engineer": 0.6206896551724138, + "metrology_engineer": 0.6551724137931034, "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.4583333333333333, + "high_school_physics": 0.4166666666666667, "high_school_chemistry": 0.5, - "high_school_biology": 0.375, - "middle_school_mathematics": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.20833333333333334, "middle_school_biology": 0.6538461538461539, - "middle_school_physics": 0.5, - "middle_school_chemistry": 0.44, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.4, "veterinary_medicine": 0.42857142857142855, - "college_economics": 0.4166666666666667, + "college_economics": 0.4, "business_administration": 0.3684210526315789, "marxism": 0.5833333333333334, "mao_zedong_thought": 0.5862068965517241, - "education_science": 0.5588235294117647, - "teacher_qualification": 0.7551020408163265, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.7142857142857143, "high_school_politics": 0.625, "high_school_geography": 0.5416666666666666, - "middle_school_politics": 0.6538461538461539, + "middle_school_politics": 0.6153846153846154, "middle_school_geography": 0.4117647058823529, - "modern_chinese_history": 0.4642857142857143, + "modern_chinese_history": 0.42857142857142855, "ideological_and_moral_cultivation": 0.5833333333333334, "logic": 0.4444444444444444, "law": 0.3448275862068966, "chinese_language_and_literature": 0.5357142857142857, "art_studies": 0.5, "professional_tour_guide": 0.5588235294117647, - "legal_professional": 0.4642857142857143, - "high_school_chinese": 0.16666666666666666, - "high_school_history": 0.4, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, "middle_school_history": 0.7037037037037037, - "civil_servant": 0.4423076923076923, + "civil_servant": 0.4807692307692308, "sports_science": 0.4583333333333333, "plant_protection": 0.6666666666666666, "basic_medicine": 0.625, "clinical_medicine": 0.5185185185185185, - "urban_and_rural_planner": 0.45098039215686275, - "accountant": 0.35185185185185186, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.37037037037037035, "fire_engineer": 0.4166666666666667, "environmental_impact_assessment_engineer": 0.3888888888888889, "tax_accountant": 0.3333333333333333, @@ -32862,502 +32862,502 @@ } }, "prompt_4": { - "accuracy": 0.4726027397260274, + "accuracy": 0.4713574097135741, "category_acc": { "computer_network": 0.4166666666666667, - "operating_system": 0.625, - "computer_architecture": 0.3076923076923077, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.34615384615384615, "college_programming": 0.5952380952380952, - "college_physics": 0.3333333333333333, - "college_chemistry": 0.3448275862068966, - "advanced_mathematics": 0.20833333333333334, - "probability_and_statistics": 0.391304347826087, - "discrete_mathematics": 0.2857142857142857, + "college_physics": 0.375, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.23809523809523808, "electrical_engineer": 0.42857142857142855, - "metrology_engineer": 0.5862068965517241, + "metrology_engineer": 0.5517241379310345, "high_school_mathematics": 0.08695652173913043, - "high_school_physics": 0.4166666666666667, + "high_school_physics": 0.375, "high_school_chemistry": 0.4583333333333333, "high_school_biology": 0.4166666666666667, - "middle_school_mathematics": 0.3333333333333333, - "middle_school_biology": 0.6923076923076923, - "middle_school_physics": 0.5416666666666666, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.7307692307692307, + "middle_school_physics": 0.5, "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.42857142857142855, + "veterinary_medicine": 0.39285714285714285, "college_economics": 0.38333333333333336, "business_administration": 0.42105263157894735, - "marxism": 0.5833333333333334, - "mao_zedong_thought": 0.6206896551724138, - "education_science": 0.5588235294117647, - "teacher_qualification": 0.673469387755102, - "high_school_politics": 0.625, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.6326530612244898, + "high_school_politics": 0.5833333333333334, "high_school_geography": 0.5833333333333334, - "middle_school_politics": 0.6538461538461539, + "middle_school_politics": 0.6923076923076923, "middle_school_geography": 0.47058823529411764, "modern_chinese_history": 0.5357142857142857, "ideological_and_moral_cultivation": 0.6666666666666666, - "logic": 0.4444444444444444, + "logic": 0.5185185185185185, "law": 0.41379310344827586, - "chinese_language_and_literature": 0.4642857142857143, + "chinese_language_and_literature": 0.42857142857142855, "art_studies": 0.39473684210526316, "professional_tour_guide": 0.5, - "legal_professional": 0.39285714285714285, - "high_school_chinese": 0.3333333333333333, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.375, "high_school_history": 0.4, "middle_school_history": 0.6296296296296297, "civil_servant": 0.4230769230769231, - "sports_science": 0.4583333333333333, - "plant_protection": 0.5555555555555556, + "sports_science": 0.5, + "plant_protection": 0.5925925925925926, "basic_medicine": 0.625, "clinical_medicine": 0.48148148148148145, "urban_and_rural_planner": 0.5686274509803921, - "accountant": 0.48148148148148145, - "fire_engineer": 0.3888888888888889, - "environmental_impact_assessment_engineer": 0.4722222222222222, - "tax_accountant": 0.37037037037037035, + "accountant": 0.5, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.3888888888888889, "physician": 0.5185185185185185 } }, "prompt_5": { - "accuracy": 0.39352428393524286, + "accuracy": 0.39663760896637607, "category_acc": { "computer_network": 0.4166666666666667, - "operating_system": 0.5, + "operating_system": 0.4583333333333333, "computer_architecture": 0.46153846153846156, - "college_programming": 0.47619047619047616, - "college_physics": 0.25, - "college_chemistry": 0.3448275862068966, + "college_programming": 0.4523809523809524, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3793103448275862, "advanced_mathematics": 0.3333333333333333, "probability_and_statistics": 0.4782608695652174, - "discrete_mathematics": 0.19047619047619047, - "electrical_engineer": 0.3333333333333333, - "metrology_engineer": 0.4482758620689655, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.41379310344827586, "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.16666666666666666, - "high_school_chemistry": 0.4166666666666667, - "high_school_biology": 0.375, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.3333333333333333, "middle_school_mathematics": 0.2916666666666667, - "middle_school_biology": 0.46153846153846156, - "middle_school_physics": 0.5416666666666666, - "middle_school_chemistry": 0.56, - "veterinary_medicine": 0.42857142857142855, - "college_economics": 0.25, - "business_administration": 0.39473684210526316, - "marxism": 0.4166666666666667, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.68, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.3, + "business_administration": 0.3684210526315789, + "marxism": 0.375, "mao_zedong_thought": 0.3103448275862069, - "education_science": 0.4411764705882353, - "teacher_qualification": 0.5102040816326531, - "high_school_politics": 0.625, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.46153846153846156, - "middle_school_geography": 0.5882352941176471, - "modern_chinese_history": 0.4642857142857143, - "ideological_and_moral_cultivation": 0.5, - "logic": 0.4074074074074074, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.42857142857142855, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.5918367346938775, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.2962962962962963, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.5, "art_studies": 0.5263157894736842, - "professional_tour_guide": 0.5, - "legal_professional": 0.32142857142857145, - "high_school_chinese": 0.2916666666666667, - "high_school_history": 0.48, - "middle_school_history": 0.5555555555555556, - "civil_servant": 0.3269230769230769, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.25, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.44, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.36538461538461536, "sports_science": 0.3333333333333333, - "plant_protection": 0.4074074074074074, - "basic_medicine": 0.4166666666666667, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.5, "clinical_medicine": 0.25925925925925924, - "urban_and_rural_planner": 0.35294117647058826, - "accountant": 0.37037037037037035, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.2962962962962963, "fire_engineer": 0.3055555555555556, "environmental_impact_assessment_engineer": 0.2777777777777778, - "tax_accountant": 0.25925925925925924, - "physician": 0.46296296296296297 + "tax_accountant": 0.2777777777777778, + "physician": 0.42592592592592593 } } }, "cmmlu": { "prompt_1": { - "accuracy": 0.5053763440860215 + "accuracy": 0.5017921146953405 }, "prompt_2": { - "accuracy": 0.3978494623655914 + "accuracy": 0.3727598566308244 }, "prompt_3": { "accuracy": 0.5017921146953405 }, "prompt_4": { - "accuracy": 0.4874551971326165 + "accuracy": 0.4946236559139785 }, "prompt_5": { - "accuracy": 0.4014336917562724 + "accuracy": 0.44086021505376344 } }, "cmmlu_full": { "prompt_1": { - "accuracy": 0.5118286997064411, + "accuracy": 0.4803142807805215, "category_acc": { - "agronomy": 0.4911242603550296, - "anatomy": 0.4189189189189189, - "ancient_chinese": 0.32926829268292684, - "arts": 0.6, - "astronomy": 0.3212121212121212, - "business_ethics": 0.5263157894736842, - "chinese_civil_service_exam": 0.43125, - "chinese_driving_rule": 0.6717557251908397, + "agronomy": 0.48520710059171596, + "anatomy": 0.34459459459459457, + "ancient_chinese": 0.3475609756097561, + "arts": 0.59375, + "astronomy": 0.3151515151515151, + "business_ethics": 0.4784688995215311, + "chinese_civil_service_exam": 0.4125, + "chinese_driving_rule": 0.6030534351145038, "chinese_food_culture": 0.4411764705882353, - "chinese_foreign_policy": 0.5607476635514018, - "chinese_history": 0.5139318885448917, - "chinese_literature": 0.3382352941176471, - "chinese_teacher_qualification": 0.5865921787709497, - "clinical_knowledge": 0.5021097046413502, - "college_actuarial_science": 0.2641509433962264, - "college_education": 0.6448598130841121, - "college_engineering_hydrology": 0.5, - "college_law": 0.3888888888888889, - "college_mathematics": 0.29523809523809524, - "college_medical_statistics": 0.5283018867924528, - "college_medicine": 0.5384615384615384, - "computer_science": 0.6323529411764706, - "computer_security": 0.672514619883041, - "conceptual_physics": 0.5102040816326531, - "construction_project_management": 0.4028776978417266, - "economics": 0.6352201257861635, - "education": 0.5337423312883436, - "electrical_engineering": 0.5058139534883721, - "elementary_chinese": 0.3492063492063492, - "elementary_commonsense": 0.47474747474747475, - "elementary_information_and_technology": 0.7521008403361344, - "elementary_mathematics": 0.3826086956521739, - "ethnology": 0.48148148148148145, - "food_science": 0.5384615384615384, - "genetics": 0.42613636363636365, - "global_facts": 0.5771812080536913, - "high_school_biology": 0.42011834319526625, - "high_school_chemistry": 0.32575757575757575, - "high_school_geography": 0.4745762711864407, - "high_school_mathematics": 0.2865853658536585, - "high_school_physics": 0.4090909090909091, - "high_school_politics": 0.4965034965034965, - "human_sexuality": 0.5714285714285714, - "international_law": 0.4810810810810811, - "journalism": 0.5523255813953488, - "jurisprudence": 0.5036496350364964, - "legal_and_moral_basis": 0.8130841121495327, - "logical": 0.6097560975609756, + "chinese_foreign_policy": 0.5327102803738317, + "chinese_history": 0.48606811145510836, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.5363128491620112, + "clinical_knowledge": 0.4810126582278481, + "college_actuarial_science": 0.3584905660377358, + "college_education": 0.6355140186915887, + "college_engineering_hydrology": 0.4811320754716981, + "college_law": 0.4074074074074074, + "college_mathematics": 0.4, + "college_medical_statistics": 0.5377358490566038, + "college_medicine": 0.5201465201465202, + "computer_science": 0.5343137254901961, + "computer_security": 0.5730994152046783, + "conceptual_physics": 0.4897959183673469, + "construction_project_management": 0.3597122302158273, + "economics": 0.5723270440251572, + "education": 0.5214723926380368, + "electrical_engineering": 0.4883720930232558, + "elementary_chinese": 0.36507936507936506, + "elementary_commonsense": 0.4444444444444444, + "elementary_information_and_technology": 0.726890756302521, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.37777777777777777, + "food_science": 0.5244755244755245, + "genetics": 0.3806818181818182, + "global_facts": 0.5369127516778524, + "high_school_biology": 0.2958579881656805, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.4576271186440678, + "high_school_mathematics": 0.3475609756097561, + "high_school_physics": 0.37272727272727274, + "high_school_politics": 0.46153846153846156, + "human_sexuality": 0.5555555555555556, + "international_law": 0.42162162162162165, + "journalism": 0.5174418604651163, + "jurisprudence": 0.46958637469586373, + "legal_and_moral_basis": 0.8364485981308412, + "logical": 0.5203252032520326, "machine_learning": 0.4426229508196721, - "management": 0.6285714285714286, - "marketing": 0.6055555555555555, - "marxist_theory": 0.6084656084656085, - "modern_chinese": 0.43103448275862066, - "nutrition": 0.5310344827586206, - "philosophy": 0.6476190476190476, - "professional_accounting": 0.5885714285714285, - "professional_law": 0.33175355450236965, - "professional_medicine": 0.4654255319148936, - "professional_psychology": 0.5732758620689655, - "public_relations": 0.5862068965517241, - "security_study": 0.5703703703703704, - "sociology": 0.6061946902654868, - "sports_science": 0.5636363636363636, - "traditional_chinese_medicine": 0.41621621621621624, - "virology": 0.6094674556213018, + "management": 0.5904761904761905, + "marketing": 0.5777777777777777, + "marxist_theory": 0.5343915343915344, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.47586206896551725, + "philosophy": 0.6285714285714286, + "professional_accounting": 0.5485714285714286, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.4148936170212766, + "professional_psychology": 0.5043103448275862, + "public_relations": 0.5172413793103449, + "security_study": 0.5555555555555556, + "sociology": 0.5221238938053098, + "sports_science": 0.503030303030303, + "traditional_chinese_medicine": 0.43783783783783786, + "virology": 0.6035502958579881, "world_history": 0.5217391304347826, - "world_religions": 0.6375 + "world_religions": 0.64375 } }, "prompt_2": { - "accuracy": 0.49378345708858573, + "accuracy": 0.38084959419789327, "category_acc": { - "agronomy": 0.5384615384615384, - "anatomy": 0.36486486486486486, - "ancient_chinese": 0.36585365853658536, - "arts": 0.56875, - "astronomy": 0.2909090909090909, - "business_ethics": 0.5215311004784688, - "chinese_civil_service_exam": 0.3875, - "chinese_driving_rule": 0.6870229007633588, - "chinese_food_culture": 0.4117647058823529, - "chinese_foreign_policy": 0.5794392523364486, - "chinese_history": 0.4891640866873065, - "chinese_literature": 0.3627450980392157, - "chinese_teacher_qualification": 0.5754189944134078, - "clinical_knowledge": 0.5021097046413502, - "college_actuarial_science": 0.20754716981132076, - "college_education": 0.6261682242990654, - "college_engineering_hydrology": 0.49056603773584906, + "agronomy": 0.3727810650887574, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.27439024390243905, + "arts": 0.50625, + "astronomy": 0.32727272727272727, + "business_ethics": 0.40669856459330145, + "chinese_civil_service_exam": 0.2875, + "chinese_driving_rule": 0.4351145038167939, + "chinese_food_culture": 0.3897058823529412, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.33436532507739936, + "chinese_literature": 0.28921568627450983, + "chinese_teacher_qualification": 0.4860335195530726, + "clinical_knowledge": 0.37130801687763715, + "college_actuarial_science": 0.33962264150943394, + "college_education": 0.45794392523364486, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.3333333333333333, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.326007326007326, + "computer_science": 0.4950980392156863, + "computer_security": 0.5029239766081871, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.3597122302158273, + "economics": 0.3836477987421384, + "education": 0.39263803680981596, + "electrical_engineering": 0.3372093023255814, + "elementary_chinese": 0.31746031746031744, + "elementary_commonsense": 0.4090909090909091, + "elementary_information_and_technology": 0.5504201680672269, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.37777777777777777, + "food_science": 0.3916083916083916, + "genetics": 0.3522727272727273, + "global_facts": 0.5167785234899329, + "high_school_biology": 0.33136094674556216, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.3356643356643357, + "human_sexuality": 0.36507936507936506, + "international_law": 0.4, + "journalism": 0.38953488372093026, + "jurisprudence": 0.35523114355231145, + "legal_and_moral_basis": 0.5233644859813084, + "logical": 0.44715447154471544, + "machine_learning": 0.48360655737704916, + "management": 0.3952380952380952, + "marketing": 0.55, + "marxist_theory": 0.35978835978835977, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.4068965517241379, + "philosophy": 0.45714285714285713, + "professional_accounting": 0.3657142857142857, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.4482758620689655, + "public_relations": 0.3793103448275862, + "security_study": 0.43703703703703706, + "sociology": 0.504424778761062, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.3621621621621622, + "virology": 0.35502958579881655, + "world_history": 0.391304347826087, + "world_religions": 0.575 + } + }, + "prompt_3": { + "accuracy": 0.4855810740804697, + "category_acc": { + "agronomy": 0.48520710059171596, + "anatomy": 0.35135135135135137, + "ancient_chinese": 0.34146341463414637, + "arts": 0.59375, + "astronomy": 0.3575757575757576, + "business_ethics": 0.5167464114832536, + "chinese_civil_service_exam": 0.41875, + "chinese_driving_rule": 0.5343511450381679, + "chinese_food_culture": 0.4338235294117647, + "chinese_foreign_policy": 0.5607476635514018, + "chinese_history": 0.47987616099071206, + "chinese_literature": 0.3333333333333333, + "chinese_teacher_qualification": 0.5698324022346368, + "clinical_knowledge": 0.4936708860759494, + "college_actuarial_science": 0.36792452830188677, + "college_education": 0.6074766355140186, + "college_engineering_hydrology": 0.41509433962264153, "college_law": 0.3888888888888889, - "college_mathematics": 0.3333333333333333, - "college_medical_statistics": 0.6037735849056604, - "college_medicine": 0.5347985347985348, - "computer_science": 0.5980392156862745, - "computer_security": 0.6023391812865497, - "conceptual_physics": 0.3197278911564626, - "construction_project_management": 0.41007194244604317, - "economics": 0.5786163522012578, - "education": 0.5337423312883436, - "electrical_engineering": 0.5116279069767442, - "elementary_chinese": 0.3611111111111111, - "elementary_commonsense": 0.4444444444444444, - "elementary_information_and_technology": 0.7142857142857143, - "elementary_mathematics": 0.41304347826086957, - "ethnology": 0.4888888888888889, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.5377358490566038, + "college_medicine": 0.5201465201465202, + "computer_science": 0.5245098039215687, + "computer_security": 0.6140350877192983, + "conceptual_physics": 0.46938775510204084, + "construction_project_management": 0.35251798561151076, + "economics": 0.5849056603773585, + "education": 0.5644171779141104, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.4494949494949495, + "elementary_information_and_technology": 0.7184873949579832, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.42962962962962964, "food_science": 0.48951048951048953, - "genetics": 0.4034090909090909, - "global_facts": 0.5704697986577181, - "high_school_biology": 0.31952662721893493, - "high_school_chemistry": 0.29545454545454547, - "high_school_geography": 0.5, - "high_school_mathematics": 0.2073170731707317, + "genetics": 0.4147727272727273, + "global_facts": 0.5167785234899329, + "high_school_biology": 0.33727810650887574, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.4830508474576271, + "high_school_mathematics": 0.3048780487804878, "high_school_physics": 0.34545454545454546, - "high_school_politics": 0.4755244755244755, - "human_sexuality": 0.5079365079365079, - "international_law": 0.43243243243243246, - "journalism": 0.5465116279069767, - "jurisprudence": 0.5060827250608273, - "legal_and_moral_basis": 0.822429906542056, - "logical": 0.5203252032520326, - "machine_learning": 0.4098360655737705, - "management": 0.6095238095238096, - "marketing": 0.5944444444444444, + "high_school_politics": 0.46853146853146854, + "human_sexuality": 0.5793650793650794, + "international_law": 0.41621621621621624, + "journalism": 0.5290697674418605, + "jurisprudence": 0.5012165450121655, + "legal_and_moral_basis": 0.8177570093457944, + "logical": 0.5365853658536586, + "machine_learning": 0.4426229508196721, + "management": 0.5904761904761905, + "marketing": 0.6055555555555555, "marxist_theory": 0.6084656084656085, - "modern_chinese": 0.3879310344827586, - "nutrition": 0.4827586206896552, - "philosophy": 0.5714285714285714, - "professional_accounting": 0.5428571428571428, - "professional_law": 0.3459715639810427, - "professional_medicine": 0.4441489361702128, - "professional_psychology": 0.5732758620689655, - "public_relations": 0.5459770114942529, - "security_study": 0.5185185185185185, - "sociology": 0.584070796460177, - "sports_science": 0.5818181818181818, - "traditional_chinese_medicine": 0.4486486486486487, - "virology": 0.5798816568047337, - "world_history": 0.546583850931677, - "world_religions": 0.61875 + "modern_chinese": 0.3448275862068966, + "nutrition": 0.5103448275862069, + "philosophy": 0.6476190476190476, + "professional_accounting": 0.5085714285714286, + "professional_law": 0.3791469194312796, + "professional_medicine": 0.4574468085106383, + "professional_psychology": 0.5344827586206896, + "public_relations": 0.5172413793103449, + "security_study": 0.5407407407407407, + "sociology": 0.49557522123893805, + "sports_science": 0.5212121212121212, + "traditional_chinese_medicine": 0.4810810810810811, + "virology": 0.591715976331361, + "world_history": 0.5527950310559007, + "world_religions": 0.6375 } }, - "prompt_3": { - "accuracy": 0.5107926092212053, + "prompt_4": { + "accuracy": 0.4860127784493179, "category_acc": { - "agronomy": 0.5325443786982249, - "anatomy": 0.3918918918918919, - "ancient_chinese": 0.3353658536585366, - "arts": 0.575, - "astronomy": 0.3212121212121212, - "business_ethics": 0.5550239234449761, - "chinese_civil_service_exam": 0.44375, - "chinese_driving_rule": 0.648854961832061, - "chinese_food_culture": 0.375, - "chinese_foreign_policy": 0.6074766355140186, - "chinese_history": 0.5201238390092879, - "chinese_literature": 0.35294117647058826, - "chinese_teacher_qualification": 0.5977653631284916, - "clinical_knowledge": 0.5569620253164557, - "college_actuarial_science": 0.29245283018867924, - "college_education": 0.6261682242990654, - "college_engineering_hydrology": 0.5283018867924528, - "college_law": 0.3888888888888889, - "college_mathematics": 0.2761904761904762, + "agronomy": 0.46153846153846156, + "anatomy": 0.3716216216216216, + "ancient_chinese": 0.2926829268292683, + "arts": 0.54375, + "astronomy": 0.3151515151515151, + "business_ethics": 0.5311004784688995, + "chinese_civil_service_exam": 0.425, + "chinese_driving_rule": 0.5877862595419847, + "chinese_food_culture": 0.4338235294117647, + "chinese_foreign_policy": 0.5233644859813084, + "chinese_history": 0.47678018575851394, + "chinese_literature": 0.3627450980392157, + "chinese_teacher_qualification": 0.553072625698324, + "clinical_knowledge": 0.4978902953586498, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.6448598130841121, + "college_engineering_hydrology": 0.46226415094339623, + "college_law": 0.3333333333333333, + "college_mathematics": 0.3047619047619048, "college_medical_statistics": 0.5471698113207547, - "college_medicine": 0.5347985347985348, - "computer_science": 0.6421568627450981, - "computer_security": 0.6608187134502924, - "conceptual_physics": 0.4421768707482993, - "construction_project_management": 0.37410071942446044, - "economics": 0.6226415094339622, + "college_medicine": 0.4981684981684982, + "computer_science": 0.5147058823529411, + "computer_security": 0.6374269005847953, + "conceptual_physics": 0.4489795918367347, + "construction_project_management": 0.381294964028777, + "economics": 0.5911949685534591, "education": 0.5398773006134969, - "electrical_engineering": 0.5058139534883721, - "elementary_chinese": 0.34523809523809523, - "elementary_commonsense": 0.45454545454545453, - "elementary_information_and_technology": 0.7605042016806722, - "elementary_mathematics": 0.4391304347826087, - "ethnology": 0.48148148148148145, - "food_science": 0.5314685314685315, - "genetics": 0.4318181818181818, - "global_facts": 0.5906040268456376, - "high_school_biology": 0.34911242603550297, - "high_school_chemistry": 0.30303030303030304, - "high_school_geography": 0.5084745762711864, - "high_school_mathematics": 0.25609756097560976, - "high_school_physics": 0.32727272727272727, - "high_school_politics": 0.48951048951048953, - "human_sexuality": 0.5555555555555556, - "international_law": 0.4486486486486487, - "journalism": 0.5581395348837209, - "jurisprudence": 0.48905109489051096, - "legal_and_moral_basis": 0.8364485981308412, - "logical": 0.5528455284552846, - "machine_learning": 0.4262295081967213, - "management": 0.6047619047619047, - "marketing": 0.6444444444444445, - "marxist_theory": 0.6031746031746031, - "modern_chinese": 0.3620689655172414, - "nutrition": 0.5103448275862069, - "philosophy": 0.6666666666666666, - "professional_accounting": 0.56, - "professional_law": 0.3744075829383886, - "professional_medicine": 0.4734042553191489, - "professional_psychology": 0.5732758620689655, - "public_relations": 0.5574712643678161, - "security_study": 0.6222222222222222, - "sociology": 0.5707964601769911, - "sports_science": 0.5757575757575758, - "traditional_chinese_medicine": 0.4486486486486487, - "virology": 0.6094674556213018, - "world_history": 0.5714285714285714, - "world_religions": 0.64375 - } - }, - "prompt_4": { - "accuracy": 0.4285960973925056, - "category_acc": { - "agronomy": 0.4319526627218935, - "anatomy": 0.3310810810810811, - "ancient_chinese": 0.3048780487804878, - "arts": 0.5125, - "astronomy": 0.296969696969697, - "business_ethics": 0.40669856459330145, - "chinese_civil_service_exam": 0.4125, - "chinese_driving_rule": 0.5725190839694656, - "chinese_food_culture": 0.3602941176470588, - "chinese_foreign_policy": 0.4953271028037383, - "chinese_history": 0.4582043343653251, - "chinese_literature": 0.3284313725490196, - "chinese_teacher_qualification": 0.48044692737430167, - "clinical_knowledge": 0.5274261603375527, - "college_actuarial_science": 0.2358490566037736, - "college_education": 0.5514018691588785, - "college_engineering_hydrology": 0.5094339622641509, - "college_law": 0.3425925925925926, - "college_mathematics": 0.2571428571428571, - "college_medical_statistics": 0.3584905660377358, - "college_medicine": 0.4652014652014652, - "computer_science": 0.4950980392156863, - "computer_security": 0.4853801169590643, - "conceptual_physics": 0.24489795918367346, - "construction_project_management": 0.3597122302158273, - "economics": 0.46540880503144655, - "education": 0.4294478527607362, - "electrical_engineering": 0.4127906976744186, - "elementary_chinese": 0.35714285714285715, - "elementary_commonsense": 0.398989898989899, - "elementary_information_and_technology": 0.5672268907563025, - "elementary_mathematics": 0.3173913043478261, - "ethnology": 0.4074074074074074, - "food_science": 0.44755244755244755, - "genetics": 0.30113636363636365, - "global_facts": 0.4966442953020134, - "high_school_biology": 0.3254437869822485, - "high_school_chemistry": 0.2803030303030303, - "high_school_geography": 0.3898305084745763, - "high_school_mathematics": 0.2804878048780488, + "electrical_engineering": 0.45930232558139533, + "elementary_chinese": 0.3611111111111111, + "elementary_commonsense": 0.4494949494949495, + "elementary_information_and_technology": 0.680672268907563, + "elementary_mathematics": 0.3695652173913043, + "ethnology": 0.4666666666666667, + "food_science": 0.5244755244755245, + "genetics": 0.42613636363636365, + "global_facts": 0.5570469798657718, + "high_school_biology": 0.38461538461538464, + "high_school_chemistry": 0.3181818181818182, + "high_school_geography": 0.4745762711864407, + "high_school_mathematics": 0.3719512195121951, "high_school_physics": 0.3090909090909091, - "high_school_politics": 0.44755244755244755, - "human_sexuality": 0.48412698412698413, - "international_law": 0.43243243243243246, - "journalism": 0.4418604651162791, - "jurisprudence": 0.39659367396593675, - "legal_and_moral_basis": 0.7102803738317757, - "logical": 0.4065040650406504, - "machine_learning": 0.36885245901639346, - "management": 0.5333333333333333, - "marketing": 0.55, - "marxist_theory": 0.5026455026455027, - "modern_chinese": 0.3017241379310345, - "nutrition": 0.4, - "philosophy": 0.5619047619047619, - "professional_accounting": 0.5085714285714286, - "professional_law": 0.3175355450236967, - "professional_medicine": 0.4095744680851064, - "professional_psychology": 0.49137931034482757, - "public_relations": 0.4827586206896552, - "security_study": 0.5259259259259259, - "sociology": 0.4823008849557522, - "sports_science": 0.41818181818181815, - "traditional_chinese_medicine": 0.372972972972973, - "virology": 0.4319526627218935, - "world_history": 0.5031055900621118, - "world_religions": 0.53125 + "high_school_politics": 0.5034965034965035, + "human_sexuality": 0.5873015873015873, + "international_law": 0.4810810810810811, + "journalism": 0.5058139534883721, + "jurisprudence": 0.5206812652068127, + "legal_and_moral_basis": 0.7757009345794392, + "logical": 0.5609756097560976, + "machine_learning": 0.4098360655737705, + "management": 0.5904761904761905, + "marketing": 0.5944444444444444, + "marxist_theory": 0.5555555555555556, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.5103448275862069, + "philosophy": 0.6, + "professional_accounting": 0.49714285714285716, + "professional_law": 0.3696682464454976, + "professional_medicine": 0.4175531914893617, + "professional_psychology": 0.5344827586206896, + "public_relations": 0.5114942528735632, + "security_study": 0.5777777777777777, + "sociology": 0.5221238938053098, + "sports_science": 0.5151515151515151, + "traditional_chinese_medicine": 0.40540540540540543, + "virology": 0.6035502958579881, + "world_history": 0.5341614906832298, + "world_religions": 0.60625 } }, "prompt_5": { - "accuracy": 0.4709894664134001, + "accuracy": 0.4241063719564842, "category_acc": { - "agronomy": 0.47337278106508873, + "agronomy": 0.4437869822485207, "anatomy": 0.33783783783783783, - "ancient_chinese": 0.2926829268292683, - "arts": 0.54375, + "ancient_chinese": 0.31097560975609756, + "arts": 0.53125, "astronomy": 0.3212121212121212, - "business_ethics": 0.49760765550239233, - "chinese_civil_service_exam": 0.38125, - "chinese_driving_rule": 0.6793893129770993, - "chinese_food_culture": 0.36764705882352944, - "chinese_foreign_policy": 0.5327102803738317, - "chinese_history": 0.4953560371517028, - "chinese_literature": 0.3382352941176471, - "chinese_teacher_qualification": 0.5642458100558659, - "clinical_knowledge": 0.510548523206751, - "college_actuarial_science": 0.24528301886792453, - "college_education": 0.5607476635514018, - "college_engineering_hydrology": 0.5094339622641509, - "college_law": 0.37962962962962965, - "college_mathematics": 0.21904761904761905, - "college_medical_statistics": 0.5094339622641509, - "college_medicine": 0.47619047619047616, - "computer_science": 0.5735294117647058, - "computer_security": 0.5497076023391813, - "conceptual_physics": 0.3401360544217687, - "construction_project_management": 0.3597122302158273, - "economics": 0.559748427672956, - "education": 0.5460122699386503, - "electrical_engineering": 0.4883720930232558, - "elementary_chinese": 0.3531746031746032, - "elementary_commonsense": 0.4292929292929293, - "elementary_information_and_technology": 0.6848739495798319, - "elementary_mathematics": 0.3652173913043478, - "ethnology": 0.4888888888888889, - "food_science": 0.5524475524475524, - "genetics": 0.3977272727272727, - "global_facts": 0.5369127516778524, - "high_school_biology": 0.33136094674556216, - "high_school_chemistry": 0.25757575757575757, - "high_school_geography": 0.4661016949152542, - "high_school_mathematics": 0.29878048780487804, - "high_school_physics": 0.3181818181818182, - "high_school_politics": 0.4965034965034965, - "human_sexuality": 0.5158730158730159, - "international_law": 0.42162162162162165, - "journalism": 0.5116279069767442, - "jurisprudence": 0.44525547445255476, - "legal_and_moral_basis": 0.8084112149532711, - "logical": 0.5121951219512195, - "machine_learning": 0.4344262295081967, - "management": 0.6285714285714286, + "business_ethics": 0.45933014354066987, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.4580152671755725, + "chinese_food_culture": 0.38235294117647056, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.3653250773993808, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.4092827004219409, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.5420560747663551, + "college_engineering_hydrology": 0.39622641509433965, + "college_law": 0.4074074074074074, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.39622641509433965, + "college_medicine": 0.3992673992673993, + "computer_science": 0.5049019607843137, + "computer_security": 0.543859649122807, + "conceptual_physics": 0.4489795918367347, + "construction_project_management": 0.4172661870503597, + "economics": 0.44025157232704404, + "education": 0.44785276073619634, + "electrical_engineering": 0.38372093023255816, + "elementary_chinese": 0.32142857142857145, + "elementary_commonsense": 0.37373737373737376, + "elementary_information_and_technology": 0.6134453781512605, + "elementary_mathematics": 0.3, + "ethnology": 0.3925925925925926, + "food_science": 0.48951048951048953, + "genetics": 0.3806818181818182, + "global_facts": 0.4899328859060403, + "high_school_biology": 0.3136094674556213, + "high_school_chemistry": 0.3181818181818182, + "high_school_geography": 0.3728813559322034, + "high_school_mathematics": 0.3048780487804878, + "high_school_physics": 0.4090909090909091, + "high_school_politics": 0.36363636363636365, + "human_sexuality": 0.42857142857142855, + "international_law": 0.42702702702702705, + "journalism": 0.4127906976744186, + "jurisprudence": 0.44768856447688565, + "legal_and_moral_basis": 0.677570093457944, + "logical": 0.4878048780487805, + "machine_learning": 0.4180327868852459, + "management": 0.5047619047619047, "marketing": 0.5722222222222222, - "marxist_theory": 0.582010582010582, - "modern_chinese": 0.3275862068965517, - "nutrition": 0.46206896551724136, - "philosophy": 0.6095238095238096, - "professional_accounting": 0.4514285714285714, - "professional_law": 0.32701421800947866, - "professional_medicine": 0.4122340425531915, - "professional_psychology": 0.5172413793103449, - "public_relations": 0.5459770114942529, - "security_study": 0.5407407407407407, - "sociology": 0.5132743362831859, - "sports_science": 0.509090909090909, - "traditional_chinese_medicine": 0.4, - "virology": 0.5088757396449705, - "world_history": 0.5341614906832298, - "world_religions": 0.5875 + "marxist_theory": 0.48148148148148145, + "modern_chinese": 0.39655172413793105, + "nutrition": 0.45517241379310347, + "philosophy": 0.47619047619047616, + "professional_accounting": 0.4, + "professional_law": 0.33175355450236965, + "professional_medicine": 0.35904255319148937, + "professional_psychology": 0.47844827586206895, + "public_relations": 0.47126436781609193, + "security_study": 0.5037037037037037, + "sociology": 0.46017699115044247, + "sports_science": 0.44242424242424244, + "traditional_chinese_medicine": 0.372972972972973, + "virology": 0.4556213017751479, + "world_history": 0.43478260869565216, + "world_religions": 0.6125 } } }, "zbench": { "prompt_1": { - "accuracy": 0.36363636363636365 + "accuracy": 0.3939393939393939 }, "prompt_2": { "accuracy": 0.30303030303030304 @@ -33366,10 +33366,10 @@ "accuracy": 0.45454545454545453 }, "prompt_4": { - "accuracy": 0.3939393939393939 + "accuracy": 0.36363636363636365 }, "prompt_5": { - "accuracy": 0.3939393939393939 + "accuracy": 0.3333333333333333 } }, "ind_emotion": { @@ -33377,10 +33377,10 @@ "accuracy": 0.6863636363636364 }, "prompt_2": { - "accuracy": 0.6409090909090909 + "accuracy": 0.6386363636363637 }, "prompt_3": { - "accuracy": 0.6772727272727272 + "accuracy": 0.6795454545454546 }, "prompt_4": { "accuracy": 0.6590909090909091 @@ -33408,36 +33408,36 @@ }, "c3": { "prompt_1": { - "accuracy": 0.8066566940912491 + "accuracy": 0.8040388930441287 }, "prompt_2": { - "accuracy": 0.8515332834704562 + "accuracy": 0.8511593118922962 }, "prompt_3": { - "accuracy": 0.8081525804038893 + "accuracy": 0.805908750934929 }, "prompt_4": { - "accuracy": 0.8421839940164547 + "accuracy": 0.8425579655946148 }, "prompt_5": { - "accuracy": 0.837696335078534 + "accuracy": 0.8388182498130142 } }, "dream": { "prompt_1": { - "accuracy": 0.6751592356687898 + "accuracy": 0.6722195002449779 }, "prompt_2": { - "accuracy": 0.6712395884370407 + "accuracy": 0.6648701616854483 }, "prompt_3": { - "accuracy": 0.8118569328760411 + "accuracy": 0.8045075943165115 }, "prompt_4": { - "accuracy": 0.7951984321411073 + "accuracy": 0.7966682998530132 }, "prompt_5": { - "accuracy": 0.6320431161195492 + "accuracy": 0.6447819696227339 } }, "samsum": { @@ -33540,19 +33540,19 @@ }, "qqp": { "prompt_1": { - "accuracy": 0.6045 + "accuracy": 0.605 }, "prompt_2": { - "accuracy": 0.692 + "accuracy": 0.6905 }, "prompt_3": { - "accuracy": 0.5995 + "accuracy": 0.6 }, "prompt_4": { - "accuracy": 0.522 + "accuracy": 0.5225 }, "prompt_5": { - "accuracy": 0.727 + "accuracy": 0.7275 } }, "mnli": { @@ -33600,7 +33600,7 @@ "accuracy": 0.4 }, "prompt_4": { - "accuracy": 0.4 + "accuracy": 0.5 }, "prompt_5": { "accuracy": 0.5 @@ -33617,7 +33617,7 @@ "accuracy": 0.8 }, "prompt_4": { - "accuracy": 0.5 + "accuracy": 0.7 }, "prompt_5": { "accuracy": 0.8 @@ -93873,795 +93873,795 @@ "zero_shot": { "cross_xquad": { "prompt_1": { - "overall_acc": 0.9121848739495798, + "overall_acc": 0.9123949579831933, "language_acc": { - "Spanish": 0.9193277310924369, - "English": 0.9411764705882353, - "Chinese": 0.8915966386554622, - "Vietnamese": 0.8966386554621849 + "Spanish": 0.9210084033613445, + "English": 0.9394957983193277, + "Chinese": 0.8932773109243698, + "Vietnamese": 0.8957983193277311 }, - "consistency_score_2": 0.888795518207283, - "consistency_score_3": 0.8378151260504202, - "consistency_score_4": 0.8033613445378152, + "consistency_score_2": 0.8893557422969188, + "consistency_score_3": 0.8380252100840336, + "consistency_score_4": 0.8016806722689076, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9210084033613445, - "Spanish,Chinese": 0.8672268907563025, - "Spanish,Vietnamese": 0.8915966386554622, + "Spanish,English": 0.9201680672268907, + "Spanish,Chinese": 0.8680672268907563, + "Spanish,Vietnamese": 0.892436974789916, "English,Chinese": 0.888235294117647, "English,Vietnamese": 0.8983193277310925, - "Chinese,Vietnamese": 0.8663865546218488 + "Chinese,Vietnamese": 0.8689075630252101 }, "3_combine": { - "Spanish,English,Chinese": 0.8428571428571429, + "Spanish,English,Chinese": 0.8420168067226891, "Spanish,English,Vietnamese": 0.8596638655462185, - "Spanish,Chinese,Vietnamese": 0.8184873949579832, - "English,Chinese,Vietnamese": 0.8302521008403362 + "Spanish,Chinese,Vietnamese": 0.819327731092437, + "English,Chinese,Vietnamese": 0.83109243697479 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8033613445378152 + "Spanish,English,Chinese,Vietnamese": 0.8016806722689076 } }, - "AC3_2": 0.9003383170950483, - "AC3_3": 0.8734197544064948, - "AC3_4": 0.8543215669311448 + "AC3_2": 0.9007280473973676, + "AC3_3": 0.8736302177597297, + "AC3_4": 0.8534622280820545 }, "prompt_2": { - "overall_acc": 0.9174369747899159, + "overall_acc": 0.9172268907563025, "language_acc": { - "Spanish": 0.9252100840336135, - "English": 0.9378151260504202, - "Chinese": 0.9042016806722689, + "Spanish": 0.9260504201680673, + "English": 0.938655462184874, + "Chinese": 0.9016806722689076, "Vietnamese": 0.9025210084033614 }, - "consistency_score_2": 0.8983193277310925, - "consistency_score_3": 0.8525210084033614, - "consistency_score_4": 0.8201680672268907, + "consistency_score_2": 0.8988795518207283, + "consistency_score_3": 0.8527310924369749, + "consistency_score_4": 0.819327731092437, "detailed_consistency_score": { "2_combine": { "Spanish,English": 0.9226890756302522, - "Spanish,Chinese": 0.8840336134453781, - "Spanish,Vietnamese": 0.8983193277310925, - "English,Chinese": 0.8966386554621849, - "English,Vietnamese": 0.907563025210084, - "Chinese,Vietnamese": 0.880672268907563 + "Spanish,Chinese": 0.8857142857142857, + "Spanish,Vietnamese": 0.8974789915966387, + "English,Chinese": 0.8957983193277311, + "English,Vietnamese": 0.9084033613445378, + "Chinese,Vietnamese": 0.8831932773109243 }, "3_combine": { "Spanish,English,Chinese": 0.8546218487394958, "Spanish,English,Vietnamese": 0.8689075630252101, "Spanish,Chinese,Vietnamese": 0.838655462184874, - "English,Chinese,Vietnamese": 0.8478991596638655 + "English,Chinese,Vietnamese": 0.8487394957983193 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8201680672268907 + "Spanish,English,Chinese,Vietnamese": 0.819327731092437 } }, - "AC3_2": 0.9077775087320675, - "AC3_3": 0.8837885444479853, - "AC3_4": 0.8660800264475165 + "AC3_2": 0.9079605436183863, + "AC3_3": 0.8838039048948337, + "AC3_4": 0.8655177531438631 }, "prompt_3": { - "overall_acc": 0.9119747899159664, + "overall_acc": 0.9086134453781513, "language_acc": { - "Spanish": 0.9218487394957983, - "English": 0.9378151260504202, + "Spanish": 0.915126050420168, + "English": 0.934453781512605, "Chinese": 0.892436974789916, - "Vietnamese": 0.8957983193277311 + "Vietnamese": 0.892436974789916 }, - "consistency_score_2": 0.8915966386554622, - "consistency_score_3": 0.8422268907563025, - "consistency_score_4": 0.8084033613445378, + "consistency_score_2": 0.8869747899159665, + "consistency_score_3": 0.8359243697478992, + "consistency_score_4": 0.8008403361344538, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9260504201680673, - "Spanish,Chinese": 0.8747899159663866, - "Spanish,Vietnamese": 0.8899159663865546, - "English,Chinese": 0.8857142857142857, - "English,Vietnamese": 0.9042016806722689, - "Chinese,Vietnamese": 0.8689075630252101 + "Spanish,English": 0.915126050420168, + "Spanish,Chinese": 0.8705882352941177, + "Spanish,Vietnamese": 0.8831932773109243, + "English,Chinese": 0.8890756302521008, + "English,Vietnamese": 0.8974789915966387, + "Chinese,Vietnamese": 0.8663865546218488 }, "3_combine": { - "Spanish,English,Chinese": 0.8470588235294118, - "Spanish,English,Vietnamese": 0.8638655462184874, - "Spanish,Chinese,Vietnamese": 0.8235294117647058, - "English,Chinese,Vietnamese": 0.8344537815126051 + "Spanish,English,Chinese": 0.8420168067226891, + "Spanish,English,Vietnamese": 0.8521008403361344, + "Spanish,Chinese,Vietnamese": 0.8176470588235294, + "English,Chinese,Vietnamese": 0.8319327731092437 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8084033613445378 + "Spanish,English,Chinese,Vietnamese": 0.8008403361344538 } }, - "AC3_2": 0.9016705901429852, - "AC3_3": 0.8757142353448357, - "AC3_4": 0.8570714351917585 + "AC3_2": 0.8976637337481949, + "AC3_3": 0.8707545517708151, + "AC3_4": 0.8513295940958329 }, "prompt_4": { - "overall_acc": 0.9128151260504201, + "overall_acc": 0.9130252100840336, "language_acc": { "Spanish": 0.9235294117647059, "English": 0.9403361344537815, - "Chinese": 0.892436974789916, + "Chinese": 0.8932773109243698, "Vietnamese": 0.8949579831932774 }, - "consistency_score_2": 0.8897759103641457, - "consistency_score_3": 0.8394957983193277, - "consistency_score_4": 0.8042016806722689, + "consistency_score_2": 0.8914565826330533, + "consistency_score_3": 0.8415966386554622, + "consistency_score_4": 0.807563025210084, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.9226890756302522, - "Spanish,Chinese": 0.8722689075630252, - "Spanish,Vietnamese": 0.892436974789916, + "Spanish,English": 0.9252100840336135, + "Spanish,Chinese": 0.8689075630252101, + "Spanish,Vietnamese": 0.8974789915966387, "English,Chinese": 0.888235294117647, - "English,Vietnamese": 0.8941176470588236, + "English,Vietnamese": 0.9, "Chinese,Vietnamese": 0.8689075630252101 }, "3_combine": { - "Spanish,English,Chinese": 0.846218487394958, - "Spanish,English,Vietnamese": 0.8579831932773109, - "Spanish,Chinese,Vietnamese": 0.8235294117647058, - "English,Chinese,Vietnamese": 0.8302521008403362 + "Spanish,English,Chinese": 0.8453781512605042, + "Spanish,English,Vietnamese": 0.8647058823529412, + "Spanish,Chinese,Vietnamese": 0.8243697478991596, + "English,Chinese,Vietnamese": 0.8319327731092437 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.8042016806722689 + "Spanish,English,Chinese,Vietnamese": 0.807563025210084 } }, - "AC3_2": 0.9011482841345106, - "AC3_3": 0.8746215665961681, - "AC3_4": 0.8550731193729354 + "AC3_2": 0.902111993458991, + "AC3_3": 0.8758570381676687, + "AC3_4": 0.8570620042390829 }, "prompt_5": { "overall_acc": 0.9115546218487395, "language_acc": { - "Spanish": 0.9210084033613445, - "English": 0.9403361344537815, - "Chinese": 0.8932773109243698, - "Vietnamese": 0.8915966386554622 + "Spanish": 0.9235294117647059, + "English": 0.9394957983193277, + "Chinese": 0.892436974789916, + "Vietnamese": 0.8907563025210085 }, - "consistency_score_2": 0.8893557422969188, - "consistency_score_3": 0.8384453781512605, + "consistency_score_2": 0.888795518207283, + "consistency_score_3": 0.8380252100840336, "consistency_score_4": 0.8033613445378152, "detailed_consistency_score": { "2_combine": { "Spanish,English": 0.9193277310924369, "Spanish,Chinese": 0.8680672268907563, - "Spanish,Vietnamese": 0.8941176470588236, - "English,Chinese": 0.8907563025210085, - "English,Vietnamese": 0.8957983193277311, + "Spanish,Vietnamese": 0.892436974789916, + "English,Chinese": 0.888235294117647, + "English,Vietnamese": 0.8966386554621849, "Chinese,Vietnamese": 0.8680672268907563 }, "3_combine": { - "Spanish,English,Chinese": 0.8436974789915966, - "Spanish,English,Vietnamese": 0.8579831932773109, - "Spanish,Chinese,Vietnamese": 0.8210084033613445, - "English,Chinese,Vietnamese": 0.83109243697479 + "Spanish,English,Chinese": 0.8428571428571429, + "Spanish,English,Vietnamese": 0.8588235294117647, + "Spanish,Chinese,Vietnamese": 0.8201680672268907, + "English,Chinese,Vietnamese": 0.8302521008403362 }, "4_combine": { "Spanish,English,Chinese,Vietnamese": 0.8033613445378152 } }, - "AC3_2": 0.9003183650376143, - "AC3_3": 0.8734728680889394, + "AC3_2": 0.9000312155168679, + "AC3_3": 0.8732448094012463, "AC3_4": 0.8540450505319899 } }, "cross_mmlu": { "prompt_1": { - "overall_acc": 0.5361904761904762, + "overall_acc": 0.5447619047619047, "language_acc": { - "Filipino": 0.5066666666666667, - "Vietnamese": 0.48, - "Chinese": 0.5466666666666666, + "Filipino": 0.5266666666666666, + "Vietnamese": 0.5066666666666667, + "Chinese": 0.5733333333333334, "Spanish": 0.56, - "Malay": 0.5, - "Indonesian": 0.5, - "English": 0.66 + "Malay": 0.47333333333333333, + "Indonesian": 0.4866666666666667, + "English": 0.6866666666666666 }, - "consistency_score_2": 0.5752380952380952, - "consistency_score_3": 0.40971428571428575, - "consistency_score_4": 0.3234285714285714, - "consistency_score_5": 0.27142857142857146, - "consistency_score_6": 0.23714285714285713, + "consistency_score_2": 0.5641269841269841, + "consistency_score_3": 0.399047619047619, + "consistency_score_4": 0.3146666666666667, + "consistency_score_5": 0.2653968253968254, + "consistency_score_6": 0.2342857142857143, "consistency_score_7": 0.21333333333333335, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5266666666666666, - "Filipino,Chinese": 0.5933333333333334, - "Filipino,Spanish": 0.58, - "Filipino,Malay": 0.5266666666666666, - "Filipino,Indonesian": 0.5666666666666667, - "Filipino,English": 0.5666666666666667, + "Filipino,Vietnamese": 0.5466666666666666, + "Filipino,Chinese": 0.58, + "Filipino,Spanish": 0.52, + "Filipino,Malay": 0.54, + "Filipino,Indonesian": 0.56, + "Filipino,English": 0.5466666666666666, "Vietnamese,Chinese": 0.58, - "Vietnamese,Spanish": 0.6266666666666667, + "Vietnamese,Spanish": 0.5866666666666667, "Vietnamese,Malay": 0.5466666666666666, - "Vietnamese,Indonesian": 0.58, - "Vietnamese,English": 0.5533333333333333, - "Chinese,Spanish": 0.5733333333333334, - "Chinese,Malay": 0.5, - "Chinese,Indonesian": 0.54, - "Chinese,English": 0.5733333333333334, + "Vietnamese,Indonesian": 0.6, + "Vietnamese,English": 0.5866666666666667, + "Chinese,Spanish": 0.5266666666666666, + "Chinese,Malay": 0.5266666666666666, + "Chinese,Indonesian": 0.56, + "Chinese,English": 0.5266666666666666, "Spanish,Malay": 0.58, - "Spanish,Indonesian": 0.6, - "Spanish,English": 0.66, - "Malay,Indonesian": 0.6666666666666666, - "Malay,English": 0.5333333333333333, - "Indonesian,English": 0.6066666666666667 + "Spanish,Indonesian": 0.5733333333333334, + "Spanish,English": 0.62, + "Malay,Indonesian": 0.66, + "Malay,English": 0.52, + "Indonesian,English": 0.56 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.4066666666666667, - "Filipino,Vietnamese,Spanish": 0.4266666666666667, - "Filipino,Vietnamese,Malay": 0.35333333333333333, - "Filipino,Vietnamese,Indonesian": 0.38666666666666666, - "Filipino,Vietnamese,English": 0.38666666666666666, - "Filipino,Chinese,Spanish": 0.43333333333333335, - "Filipino,Chinese,Malay": 0.35333333333333333, + "Filipino,Vietnamese,Chinese": 0.41333333333333333, + "Filipino,Vietnamese,Spanish": 0.3933333333333333, + "Filipino,Vietnamese,Malay": 0.36666666666666664, + "Filipino,Vietnamese,Indonesian": 0.4, + "Filipino,Vietnamese,English": 0.4066666666666667, + "Filipino,Chinese,Spanish": 0.38666666666666666, + "Filipino,Chinese,Malay": 0.36666666666666664, "Filipino,Chinese,Indonesian": 0.4, - "Filipino,Chinese,English": 0.4266666666666667, - "Filipino,Spanish,Malay": 0.4, - "Filipino,Spanish,Indonesian": 0.43333333333333335, - "Filipino,Spanish,English": 0.4533333333333333, + "Filipino,Chinese,English": 0.4, + "Filipino,Spanish,Malay": 0.36666666666666664, + "Filipino,Spanish,Indonesian": 0.4, + "Filipino,Spanish,English": 0.42, "Filipino,Malay,Indonesian": 0.42, - "Filipino,Malay,English": 0.36666666666666664, - "Filipino,Indonesian,English": 0.4266666666666667, - "Vietnamese,Chinese,Spanish": 0.43333333333333335, - "Vietnamese,Chinese,Malay": 0.36666666666666664, - "Vietnamese,Chinese,Indonesian": 0.4066666666666667, + "Filipino,Malay,English": 0.37333333333333335, + "Filipino,Indonesian,English": 0.4, + "Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Vietnamese,Chinese,Malay": 0.38, + "Vietnamese,Chinese,Indonesian": 0.4266666666666667, "Vietnamese,Chinese,English": 0.4, - "Vietnamese,Spanish,Malay": 0.4266666666666667, - "Vietnamese,Spanish,Indonesian": 0.4533333333333333, + "Vietnamese,Spanish,Malay": 0.4066666666666667, + "Vietnamese,Spanish,Indonesian": 0.4266666666666667, "Vietnamese,Spanish,English": 0.44666666666666666, "Vietnamese,Malay,Indonesian": 0.44666666666666666, - "Vietnamese,Malay,English": 0.37333333333333335, - "Vietnamese,Indonesian,English": 0.41333333333333333, - "Chinese,Spanish,Malay": 0.36666666666666664, - "Chinese,Spanish,Indonesian": 0.4, - "Chinese,Spanish,English": 0.43333333333333335, - "Chinese,Malay,Indonesian": 0.38, - "Chinese,Malay,English": 0.3466666666666667, - "Chinese,Indonesian,English": 0.4066666666666667, - "Spanish,Malay,Indonesian": 0.4533333333333333, - "Spanish,Malay,English": 0.42, - "Spanish,Indonesian,English": 0.46, - "Malay,Indonesian,English": 0.43333333333333335 + "Vietnamese,Malay,English": 0.4, + "Vietnamese,Indonesian,English": 0.42, + "Chinese,Spanish,Malay": 0.35333333333333333, + "Chinese,Spanish,Indonesian": 0.37333333333333335, + "Chinese,Spanish,English": 0.38666666666666666, + "Chinese,Malay,Indonesian": 0.3933333333333333, + "Chinese,Malay,English": 0.3333333333333333, + "Chinese,Indonesian,English": 0.38666666666666666, + "Spanish,Malay,Indonesian": 0.44, + "Spanish,Malay,English": 0.4066666666666667, + "Spanish,Indonesian,English": 0.4266666666666667, + "Malay,Indonesian,English": 0.4066666666666667 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.34, - "Filipino,Vietnamese,Chinese,Malay": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Indonesian": 0.31333333333333335, - "Filipino,Vietnamese,Chinese,English": 0.32, - "Filipino,Vietnamese,Spanish,Malay": 0.31333333333333335, - "Filipino,Vietnamese,Spanish,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,Spanish,English": 0.3466666666666667, - "Filipino,Vietnamese,Malay,Indonesian": 0.30666666666666664, - "Filipino,Vietnamese,Malay,English": 0.28, - "Filipino,Vietnamese,Indonesian,English": 0.32, - "Filipino,Chinese,Spanish,Malay": 0.3, - "Filipino,Chinese,Spanish,Indonesian": 0.34, - "Filipino,Chinese,Spanish,English": 0.36, - "Filipino,Chinese,Malay,Indonesian": 0.29333333333333333, - "Filipino,Chinese,Malay,English": 0.29333333333333333, - "Filipino,Chinese,Indonesian,English": 0.34, - "Filipino,Spanish,Malay,Indonesian": 0.35333333333333333, - "Filipino,Spanish,Malay,English": 0.32, - "Filipino,Spanish,Indonesian,English": 0.35333333333333333, - "Filipino,Malay,Indonesian,English": 0.3333333333333333, - "Vietnamese,Chinese,Spanish,Malay": 0.29333333333333333, - "Vietnamese,Chinese,Spanish,Indonesian": 0.34, - "Vietnamese,Chinese,Spanish,English": 0.32666666666666666, - "Vietnamese,Chinese,Malay,Indonesian": 0.32, - "Vietnamese,Chinese,Malay,English": 0.2733333333333333, - "Vietnamese,Chinese,Indonesian,English": 0.32, - "Vietnamese,Spanish,Malay,Indonesian": 0.37333333333333335, - "Vietnamese,Spanish,Malay,English": 0.32666666666666666, - "Vietnamese,Spanish,Indonesian,English": 0.36, - "Vietnamese,Malay,Indonesian,English": 0.32, - "Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, - "Chinese,Spanish,Malay,English": 0.30666666666666664, - "Chinese,Spanish,Indonesian,English": 0.32666666666666666, - "Chinese,Malay,Indonesian,English": 0.3, - "Spanish,Malay,Indonesian,English": 0.35333333333333333 + "Filipino,Vietnamese,Chinese,Spanish": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,Malay": 0.3, + "Filipino,Vietnamese,Chinese,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,English": 0.31333333333333335, + "Filipino,Vietnamese,Spanish,Malay": 0.3, + "Filipino,Vietnamese,Spanish,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Spanish,English": 0.34, + "Filipino,Vietnamese,Malay,Indonesian": 0.32, + "Filipino,Vietnamese,Malay,English": 0.31333333333333335, + "Filipino,Vietnamese,Indonesian,English": 0.3333333333333333, + "Filipino,Chinese,Spanish,Malay": 0.26666666666666666, + "Filipino,Chinese,Spanish,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Spanish,English": 0.32, + "Filipino,Chinese,Malay,Indonesian": 0.3, + "Filipino,Chinese,Malay,English": 0.2866666666666667, + "Filipino,Chinese,Indonesian,English": 0.31333333333333335, + "Filipino,Spanish,Malay,Indonesian": 0.32, + "Filipino,Spanish,Malay,English": 0.30666666666666664, + "Filipino,Spanish,Indonesian,English": 0.32666666666666666, + "Filipino,Malay,Indonesian,English": 0.32, + "Vietnamese,Chinese,Spanish,Malay": 0.2866666666666667, + "Vietnamese,Chinese,Spanish,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,English": 0.30666666666666664, + "Vietnamese,Chinese,Malay,Indonesian": 0.34, + "Vietnamese,Chinese,Malay,English": 0.28, + "Vietnamese,Chinese,Indonesian,English": 0.32666666666666666, + "Vietnamese,Spanish,Malay,Indonesian": 0.36666666666666664, + "Vietnamese,Spanish,Malay,English": 0.34, + "Vietnamese,Spanish,Indonesian,English": 0.3466666666666667, + "Vietnamese,Malay,Indonesian,English": 0.34, + "Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Chinese,Spanish,Malay,English": 0.28, + "Chinese,Spanish,Indonesian,English": 0.29333333333333333, + "Chinese,Malay,Indonesian,English": 0.2866666666666667, + "Spanish,Malay,Indonesian,English": 0.34 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.28, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Malay,English": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.24666666666666667, "Filipino,Vietnamese,Chinese,Indonesian,English": 0.2733333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.29333333333333333, - "Filipino,Vietnamese,Spanish,Malay,English": 0.26, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.3, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.26, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, - "Filipino,Chinese,Spanish,Malay,English": 0.26666666666666666, - "Filipino,Chinese,Spanish,Indonesian,English": 0.29333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.26666666666666666, - "Filipino,Spanish,Malay,Indonesian,English": 0.29333333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Spanish,Malay,English": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.29333333333333333, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.2866666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.24666666666666667, + "Filipino,Chinese,Spanish,Malay,English": 0.24, + "Filipino,Chinese,Spanish,Indonesian,English": 0.26, + "Filipino,Chinese,Malay,Indonesian,English": 0.25333333333333335, + "Filipino,Spanish,Malay,Indonesian,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, "Vietnamese,Chinese,Spanish,Malay,English": 0.24, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.28, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.25333333333333335, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.29333333333333333, - "Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.26, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.26666666666666666, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Chinese,Spanish,Malay,Indonesian,English": 0.24666666666666667 }, "6_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.24, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.22, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.22666666666666666, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24666666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.24666666666666667, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22666666666666666 + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.21333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.24, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.26, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.22, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334 }, "7_combine": { "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.21333333333333335 } }, - "AC3_2": 0.5550283591943592, - "AC3_3": 0.4644968643430462, - "AC3_4": 0.4034794719545824, - "AC3_5": 0.3604110511683117, - "AC3_6": 0.32884588313833535, - "AC3_7": 0.305226598858049 + "AC3_2": 0.5542753533779595, + "AC3_3": 0.4606563835944573, + "AC3_4": 0.39891252950441136, + "AC3_5": 0.3569129720413312, + "AC3_6": 0.32765630453356137, + "AC3_7": 0.30659966495118135 }, "prompt_2": { - "overall_acc": 0.5495238095238094, + "overall_acc": 0.539047619047619, "language_acc": { "Filipino": 0.5333333333333333, - "Vietnamese": 0.5266666666666666, + "Vietnamese": 0.52, "Chinese": 0.5733333333333334, "Spanish": 0.5866666666666667, - "Malay": 0.47333333333333333, - "Indonesian": 0.5333333333333333, + "Malay": 0.4533333333333333, + "Indonesian": 0.4866666666666667, "English": 0.62 }, - "consistency_score_2": 0.5904761904761906, - "consistency_score_3": 0.4382857142857143, - "consistency_score_4": 0.3575238095238095, - "consistency_score_5": 0.30698412698412697, - "consistency_score_6": 0.2723809523809524, - "consistency_score_7": 0.24666666666666667, + "consistency_score_2": 0.5625396825396826, + "consistency_score_3": 0.40361904761904766, + "consistency_score_4": 0.3268571428571428, + "consistency_score_5": 0.2815873015873016, + "consistency_score_6": 0.25047619047619046, + "consistency_score_7": 0.22666666666666666, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5933333333333334, - "Filipino,Chinese": 0.6, - "Filipino,Spanish": 0.6266666666666667, - "Filipino,Malay": 0.5533333333333333, - "Filipino,Indonesian": 0.6333333333333333, - "Filipino,English": 0.6, - "Vietnamese,Chinese": 0.5733333333333334, - "Vietnamese,Spanish": 0.6533333333333333, - "Vietnamese,Malay": 0.52, - "Vietnamese,Indonesian": 0.6266666666666667, - "Vietnamese,English": 0.5533333333333333, + "Filipino,Vietnamese": 0.5733333333333334, + "Filipino,Chinese": 0.58, + "Filipino,Spanish": 0.5466666666666666, + "Filipino,Malay": 0.5333333333333333, + "Filipino,Indonesian": 0.5466666666666666, + "Filipino,English": 0.5333333333333333, + "Vietnamese,Chinese": 0.5466666666666666, + "Vietnamese,Spanish": 0.6266666666666667, + "Vietnamese,Malay": 0.4866666666666667, + "Vietnamese,Indonesian": 0.5933333333333334, + "Vietnamese,English": 0.56, "Chinese,Spanish": 0.5533333333333333, - "Chinese,Malay": 0.5533333333333333, - "Chinese,Indonesian": 0.5866666666666667, - "Chinese,English": 0.5466666666666666, + "Chinese,Malay": 0.5733333333333334, + "Chinese,Indonesian": 0.54, + "Chinese,English": 0.5333333333333333, "Spanish,Malay": 0.5666666666666667, - "Spanish,Indonesian": 0.6733333333333333, - "Spanish,English": 0.6333333333333333, + "Spanish,Indonesian": 0.6, + "Spanish,English": 0.6066666666666667, "Malay,Indonesian": 0.6266666666666667, - "Malay,English": 0.5266666666666666, - "Indonesian,English": 0.6 + "Malay,English": 0.5066666666666667, + "Indonesian,English": 0.58 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.44666666666666666, - "Filipino,Vietnamese,Spanish": 0.4666666666666667, - "Filipino,Vietnamese,Malay": 0.3933333333333333, - "Filipino,Vietnamese,Indonesian": 0.47333333333333333, - "Filipino,Vietnamese,English": 0.44, - "Filipino,Chinese,Spanish": 0.44666666666666666, - "Filipino,Chinese,Malay": 0.4066666666666667, - "Filipino,Chinese,Indonesian": 0.4533333333333333, - "Filipino,Chinese,English": 0.44666666666666666, - "Filipino,Spanish,Malay": 0.44, - "Filipino,Spanish,Indonesian": 0.52, - "Filipino,Spanish,English": 0.4866666666666667, - "Filipino,Malay,Indonesian": 0.44666666666666666, - "Filipino,Malay,English": 0.41333333333333333, - "Filipino,Indonesian,English": 0.47333333333333333, - "Vietnamese,Chinese,Spanish": 0.43333333333333335, - "Vietnamese,Chinese,Malay": 0.38666666666666666, - "Vietnamese,Chinese,Indonesian": 0.44, - "Vietnamese,Chinese,English": 0.3933333333333333, - "Vietnamese,Spanish,Malay": 0.42, - "Vietnamese,Spanish,Indonesian": 0.5133333333333333, - "Vietnamese,Spanish,English": 0.48, - "Vietnamese,Malay,Indonesian": 0.44, - "Vietnamese,Malay,English": 0.36666666666666664, - "Vietnamese,Indonesian,English": 0.44, + "Filipino,Vietnamese,Chinese": 0.41333333333333333, + "Filipino,Vietnamese,Spanish": 0.41333333333333333, + "Filipino,Vietnamese,Malay": 0.36666666666666664, + "Filipino,Vietnamese,Indonesian": 0.4266666666666667, + "Filipino,Vietnamese,English": 0.4, + "Filipino,Chinese,Spanish": 0.4, + "Filipino,Chinese,Malay": 0.38666666666666666, + "Filipino,Chinese,Indonesian": 0.3933333333333333, + "Filipino,Chinese,English": 0.38, + "Filipino,Spanish,Malay": 0.3933333333333333, + "Filipino,Spanish,Indonesian": 0.42, + "Filipino,Spanish,English": 0.4066666666666667, + "Filipino,Malay,Indonesian": 0.41333333333333333, + "Filipino,Malay,English": 0.36, + "Filipino,Indonesian,English": 0.4, + "Vietnamese,Chinese,Spanish": 0.42, + "Vietnamese,Chinese,Malay": 0.36666666666666664, + "Vietnamese,Chinese,Indonesian": 0.4, + "Vietnamese,Chinese,English": 0.38666666666666666, + "Vietnamese,Spanish,Malay": 0.4066666666666667, + "Vietnamese,Spanish,Indonesian": 0.4666666666666667, + "Vietnamese,Spanish,English": 0.44666666666666666, + "Vietnamese,Malay,Indonesian": 0.41333333333333333, + "Vietnamese,Malay,English": 0.3466666666666667, + "Vietnamese,Indonesian,English": 0.4066666666666667, "Chinese,Spanish,Malay": 0.3933333333333333, - "Chinese,Spanish,Indonesian": 0.44, - "Chinese,Spanish,English": 0.43333333333333335, - "Chinese,Malay,Indonesian": 0.4266666666666667, - "Chinese,Malay,English": 0.38, - "Chinese,Indonesian,English": 0.41333333333333333, - "Spanish,Malay,Indonesian": 0.47333333333333333, - "Spanish,Malay,English": 0.4066666666666667, - "Spanish,Indonesian,English": 0.49333333333333335, - "Malay,Indonesian,English": 0.41333333333333333 + "Chinese,Spanish,Indonesian": 0.4066666666666667, + "Chinese,Spanish,English": 0.41333333333333333, + "Chinese,Malay,Indonesian": 0.42, + "Chinese,Malay,English": 0.36666666666666664, + "Chinese,Indonesian,English": 0.38666666666666666, + "Spanish,Malay,Indonesian": 0.46, + "Spanish,Malay,English": 0.3933333333333333, + "Spanish,Indonesian,English": 0.44666666666666666, + "Malay,Indonesian,English": 0.4066666666666667 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.36666666666666664, - "Filipino,Vietnamese,Chinese,Malay": 0.32666666666666666, - "Filipino,Vietnamese,Chinese,Indonesian": 0.38, - "Filipino,Vietnamese,Chinese,English": 0.3466666666666667, - "Filipino,Vietnamese,Spanish,Malay": 0.35333333333333333, - "Filipino,Vietnamese,Spanish,Indonesian": 0.41333333333333333, - "Filipino,Vietnamese,Spanish,English": 0.4, - "Filipino,Vietnamese,Malay,Indonesian": 0.36, - "Filipino,Vietnamese,Malay,English": 0.3333333333333333, - "Filipino,Vietnamese,Indonesian,English": 0.38666666666666666, - "Filipino,Chinese,Spanish,Malay": 0.3333333333333333, - "Filipino,Chinese,Spanish,Indonesian": 0.37333333333333335, - "Filipino,Chinese,Spanish,English": 0.38666666666666666, - "Filipino,Chinese,Malay,Indonesian": 0.34, - "Filipino,Chinese,Malay,English": 0.3333333333333333, - "Filipino,Chinese,Indonesian,English": 0.37333333333333335, - "Filipino,Spanish,Malay,Indonesian": 0.4, - "Filipino,Spanish,Malay,English": 0.36666666666666664, - "Filipino,Spanish,Indonesian,English": 0.42, - "Filipino,Malay,Indonesian,English": 0.36, - "Vietnamese,Chinese,Spanish,Malay": 0.32666666666666666, - "Vietnamese,Chinese,Spanish,Indonesian": 0.37333333333333335, - "Vietnamese,Chinese,Spanish,English": 0.34, - "Vietnamese,Chinese,Malay,Indonesian": 0.34, - "Vietnamese,Chinese,Malay,English": 0.3, - "Vietnamese,Chinese,Indonesian,English": 0.3333333333333333, + "Filipino,Vietnamese,Chinese,Spanish": 0.32666666666666666, + "Filipino,Vietnamese,Chinese,Malay": 0.3, + "Filipino,Vietnamese,Chinese,Indonesian": 0.34, + "Filipino,Vietnamese,Chinese,English": 0.30666666666666664, + "Filipino,Vietnamese,Spanish,Malay": 0.32, + "Filipino,Vietnamese,Spanish,Indonesian": 0.36, + "Filipino,Vietnamese,Spanish,English": 0.3466666666666667, + "Filipino,Vietnamese,Malay,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Indonesian,English": 0.34, + "Filipino,Chinese,Spanish,Malay": 0.30666666666666664, + "Filipino,Chinese,Spanish,Indonesian": 0.32, + "Filipino,Chinese,Spanish,English": 0.32, + "Filipino,Chinese,Malay,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Malay,English": 0.29333333333333333, + "Filipino,Chinese,Indonesian,English": 0.30666666666666664, + "Filipino,Spanish,Malay,Indonesian": 0.36666666666666664, + "Filipino,Spanish,Malay,English": 0.32, + "Filipino,Spanish,Indonesian,English": 0.3466666666666667, + "Filipino,Malay,Indonesian,English": 0.32, + "Vietnamese,Chinese,Spanish,Malay": 0.32, + "Vietnamese,Chinese,Spanish,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,English": 0.3333333333333333, + "Vietnamese,Chinese,Malay,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Malay,English": 0.29333333333333333, + "Vietnamese,Chinese,Indonesian,English": 0.31333333333333335, "Vietnamese,Spanish,Malay,Indonesian": 0.38666666666666666, - "Vietnamese,Spanish,Malay,English": 0.34, - "Vietnamese,Spanish,Indonesian,English": 0.4066666666666667, - "Vietnamese,Malay,Indonesian,English": 0.3333333333333333, - "Chinese,Spanish,Malay,Indonesian": 0.3333333333333333, - "Chinese,Spanish,Malay,English": 0.32666666666666666, - "Chinese,Spanish,Indonesian,English": 0.3466666666666667, - "Chinese,Malay,Indonesian,English": 0.32, - "Spanish,Malay,Indonesian,English": 0.35333333333333333 + "Vietnamese,Spanish,Malay,English": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian,English": 0.36666666666666664, + "Vietnamese,Malay,Indonesian,English": 0.32666666666666666, + "Chinese,Spanish,Malay,Indonesian": 0.34, + "Chinese,Spanish,Malay,English": 0.30666666666666664, + "Chinese,Spanish,Indonesian,English": 0.32666666666666666, + "Chinese,Malay,Indonesian,English": 0.30666666666666664, + "Spanish,Malay,Indonesian,English": 0.3466666666666667 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.29333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.32666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.31333333333333335, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.3, - "Filipino,Vietnamese,Chinese,Malay,English": 0.28, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.31333333333333335, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.3333333333333333, - "Filipino,Vietnamese,Spanish,Malay,English": 0.32, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.36, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.31333333333333335, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, - "Filipino,Chinese,Spanish,Malay,English": 0.3, - "Filipino,Chinese,Spanish,Indonesian,English": 0.3333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.29333333333333333, - "Filipino,Spanish,Malay,Indonesian,English": 0.3333333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.3, - "Vietnamese,Chinese,Spanish,Malay,English": 0.28, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.3, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Spanish,Malay,English": 0.28, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.28, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.28, + "Filipino,Chinese,Spanish,Malay,English": 0.26, + "Filipino,Chinese,Spanish,Indonesian,English": 0.2733333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.26, + "Filipino,Spanish,Malay,Indonesian,English": 0.3, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.2866666666666667, "Vietnamese,Chinese,Malay,Indonesian,English": 0.2733333333333333, "Vietnamese,Spanish,Malay,Indonesian,English": 0.31333333333333335, "Chinese,Spanish,Malay,Indonesian,English": 0.2733333333333333 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.26666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.26, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.3, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.23333333333333334, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.24, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.24666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22666666666666666 } }, - "AC3_2": 0.5692644308686308, - "AC3_3": 0.4876414522122174, - "AC3_4": 0.43320293589352155, - "AC3_5": 0.39391365733690314, - "AC3_6": 0.364226673243772, - "AC3_7": 0.34049441782007345 + "AC3_2": 0.5505431590003863, + "AC3_3": 0.46160513422539234, + "AC3_4": 0.4069536797971354, + "AC3_5": 0.3699305516727072, + "AC3_6": 0.3420253891232025, + "AC3_7": 0.3191376450661159 }, "prompt_3": { - "overall_acc": 0.5333333333333333, + "overall_acc": 0.5380952380952381, "language_acc": { - "Filipino": 0.5133333333333333, - "Vietnamese": 0.5266666666666666, - "Chinese": 0.5666666666666667, - "Spanish": 0.5866666666666667, - "Malay": 0.43333333333333335, - "Indonesian": 0.5, - "English": 0.6066666666666667 + "Filipino": 0.5, + "Vietnamese": 0.5333333333333333, + "Chinese": 0.56, + "Spanish": 0.5933333333333334, + "Malay": 0.44, + "Indonesian": 0.5266666666666666, + "English": 0.6133333333333333 }, - "consistency_score_2": 0.5847619047619047, - "consistency_score_3": 0.4276190476190476, - "consistency_score_4": 0.34419047619047616, - "consistency_score_5": 0.29111111111111115, - "consistency_score_6": 0.2542857142857143, - "consistency_score_7": 0.22666666666666666, + "consistency_score_2": 0.5584126984126986, + "consistency_score_3": 0.3963809523809524, + "consistency_score_4": 0.31466666666666665, + "consistency_score_5": 0.26603174603174606, + "consistency_score_6": 0.2323809523809524, + "consistency_score_7": 0.20666666666666667, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.5866666666666667, - "Filipino,Chinese": 0.58, - "Filipino,Spanish": 0.5866666666666667, - "Filipino,Malay": 0.5533333333333333, - "Filipino,Indonesian": 0.62, - "Filipino,English": 0.5533333333333333, - "Vietnamese,Chinese": 0.58, - "Vietnamese,Spanish": 0.62, - "Vietnamese,Malay": 0.54, - "Vietnamese,Indonesian": 0.6333333333333333, - "Vietnamese,English": 0.5666666666666667, - "Chinese,Spanish": 0.5266666666666666, - "Chinese,Malay": 0.5733333333333334, - "Chinese,Indonesian": 0.56, - "Chinese,English": 0.5266666666666666, - "Spanish,Malay": 0.5466666666666666, - "Spanish,Indonesian": 0.6466666666666666, - "Spanish,English": 0.7, - "Malay,Indonesian": 0.6533333333333333, - "Malay,English": 0.5133333333333333, - "Indonesian,English": 0.6133333333333333 + "Filipino,Vietnamese": 0.5666666666666667, + "Filipino,Chinese": 0.54, + "Filipino,Spanish": 0.52, + "Filipino,Malay": 0.54, + "Filipino,Indonesian": 0.56, + "Filipino,English": 0.5066666666666667, + "Vietnamese,Chinese": 0.5666666666666667, + "Vietnamese,Spanish": 0.5933333333333334, + "Vietnamese,Malay": 0.5266666666666666, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,English": 0.56, + "Chinese,Spanish": 0.5133333333333333, + "Chinese,Malay": 0.5466666666666666, + "Chinese,Indonesian": 0.5466666666666666, + "Chinese,English": 0.52, + "Spanish,Malay": 0.54, + "Spanish,Indonesian": 0.6266666666666667, + "Spanish,English": 0.64, + "Malay,Indonesian": 0.6466666666666666, + "Malay,English": 0.49333333333333335, + "Indonesian,English": 0.6 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.43333333333333335, - "Filipino,Vietnamese,Spanish": 0.44, + "Filipino,Vietnamese,Chinese": 0.38666666666666666, + "Filipino,Vietnamese,Spanish": 0.3933333333333333, "Filipino,Vietnamese,Malay": 0.38666666666666666, - "Filipino,Vietnamese,Indonesian": 0.47333333333333333, - "Filipino,Vietnamese,English": 0.41333333333333333, - "Filipino,Chinese,Spanish": 0.4, - "Filipino,Chinese,Malay": 0.3933333333333333, - "Filipino,Chinese,Indonesian": 0.4266666666666667, - "Filipino,Chinese,English": 0.4, - "Filipino,Spanish,Malay": 0.42, - "Filipino,Spanish,Indonesian": 0.48, - "Filipino,Spanish,English": 0.48, - "Filipino,Malay,Indonesian": 0.4533333333333333, - "Filipino,Malay,English": 0.38666666666666666, - "Filipino,Indonesian,English": 0.44666666666666666, - "Vietnamese,Chinese,Spanish": 0.4066666666666667, - "Vietnamese,Chinese,Malay": 0.4, - "Vietnamese,Chinese,Indonesian": 0.43333333333333335, - "Vietnamese,Chinese,English": 0.4066666666666667, - "Vietnamese,Spanish,Malay": 0.4, - "Vietnamese,Spanish,Indonesian": 0.4866666666666667, - "Vietnamese,Spanish,English": 0.4866666666666667, - "Vietnamese,Malay,Indonesian": 0.44666666666666666, - "Vietnamese,Malay,English": 0.38, - "Vietnamese,Indonesian,English": 0.4666666666666667, - "Chinese,Spanish,Malay": 0.37333333333333335, + "Filipino,Vietnamese,Indonesian": 0.41333333333333333, + "Filipino,Vietnamese,English": 0.38, + "Filipino,Chinese,Spanish": 0.3466666666666667, + "Filipino,Chinese,Malay": 0.36, + "Filipino,Chinese,Indonesian": 0.38666666666666666, + "Filipino,Chinese,English": 0.35333333333333333, + "Filipino,Spanish,Malay": 0.36666666666666664, + "Filipino,Spanish,Indonesian": 0.4066666666666667, + "Filipino,Spanish,English": 0.3933333333333333, + "Filipino,Malay,Indonesian": 0.43333333333333335, + "Filipino,Malay,English": 0.3466666666666667, + "Filipino,Indonesian,English": 0.4, + "Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Vietnamese,Chinese,Malay": 0.38, + "Vietnamese,Chinese,Indonesian": 0.4066666666666667, + "Vietnamese,Chinese,English": 0.3933333333333333, + "Vietnamese,Spanish,Malay": 0.38666666666666666, + "Vietnamese,Spanish,Indonesian": 0.4533333333333333, + "Vietnamese,Spanish,English": 0.4533333333333333, + "Vietnamese,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Malay,English": 0.36666666666666664, + "Vietnamese,Indonesian,English": 0.42, + "Chinese,Spanish,Malay": 0.35333333333333333, "Chinese,Spanish,Indonesian": 0.3933333333333333, - "Chinese,Spanish,English": 0.43333333333333335, + "Chinese,Spanish,English": 0.3933333333333333, "Chinese,Malay,Indonesian": 0.42, - "Chinese,Malay,English": 0.37333333333333335, - "Chinese,Indonesian,English": 0.4066666666666667, + "Chinese,Malay,English": 0.3466666666666667, + "Chinese,Indonesian,English": 0.3933333333333333, "Spanish,Malay,Indonesian": 0.46, - "Spanish,Malay,English": 0.43333333333333335, - "Spanish,Indonesian,English": 0.5066666666666667, + "Spanish,Malay,English": 0.38666666666666666, + "Spanish,Indonesian,English": 0.4666666666666667, "Malay,Indonesian,English": 0.42 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.34, - "Filipino,Vietnamese,Chinese,Malay": 0.32, - "Filipino,Vietnamese,Chinese,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,Chinese,English": 0.34, - "Filipino,Vietnamese,Spanish,Malay": 0.32, - "Filipino,Vietnamese,Spanish,Indonesian": 0.38666666666666666, - "Filipino,Vietnamese,Spanish,English": 0.37333333333333335, - "Filipino,Vietnamese,Malay,Indonesian": 0.35333333333333333, - "Filipino,Vietnamese,Malay,English": 0.31333333333333335, - "Filipino,Vietnamese,Indonesian,English": 0.36666666666666664, - "Filipino,Chinese,Spanish,Malay": 0.30666666666666664, - "Filipino,Chinese,Spanish,Indonesian": 0.32666666666666666, - "Filipino,Chinese,Spanish,English": 0.35333333333333333, + "Filipino,Vietnamese,Chinese,Spanish": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Malay": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Indonesian": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,English": 0.2866666666666667, + "Filipino,Vietnamese,Spanish,Malay": 0.3, + "Filipino,Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,English": 0.31333333333333335, + "Filipino,Vietnamese,Malay,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,Malay,English": 0.3, + "Filipino,Vietnamese,Indonesian,English": 0.32, + "Filipino,Chinese,Spanish,Malay": 0.26, + "Filipino,Chinese,Spanish,Indonesian": 0.2866666666666667, + "Filipino,Chinese,Spanish,English": 0.29333333333333333, "Filipino,Chinese,Malay,Indonesian": 0.32666666666666666, - "Filipino,Chinese,Malay,English": 0.30666666666666664, - "Filipino,Chinese,Indonesian,English": 0.3333333333333333, - "Filipino,Spanish,Malay,Indonesian": 0.37333333333333335, - "Filipino,Spanish,Malay,English": 0.36666666666666664, - "Filipino,Spanish,Indonesian,English": 0.4, - "Filipino,Malay,Indonesian,English": 0.34, - "Vietnamese,Chinese,Spanish,Malay": 0.31333333333333335, - "Vietnamese,Chinese,Spanish,Indonesian": 0.3333333333333333, - "Vietnamese,Chinese,Spanish,English": 0.3466666666666667, - "Vietnamese,Chinese,Malay,Indonesian": 0.3466666666666667, - "Vietnamese,Chinese,Malay,English": 0.32, - "Vietnamese,Chinese,Indonesian,English": 0.3466666666666667, - "Vietnamese,Spanish,Malay,Indonesian": 0.35333333333333333, - "Vietnamese,Spanish,Malay,English": 0.3466666666666667, - "Vietnamese,Spanish,Indonesian,English": 0.4066666666666667, + "Filipino,Chinese,Malay,English": 0.26, + "Filipino,Chinese,Indonesian,English": 0.3, + "Filipino,Spanish,Malay,Indonesian": 0.35333333333333333, + "Filipino,Spanish,Malay,English": 0.3, + "Filipino,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Malay,Indonesian,English": 0.32, + "Vietnamese,Chinese,Spanish,Malay": 0.3, + "Vietnamese,Chinese,Spanish,Indonesian": 0.34, + "Vietnamese,Chinese,Spanish,English": 0.30666666666666664, + "Vietnamese,Chinese,Malay,Indonesian": 0.34, + "Vietnamese,Chinese,Malay,English": 0.29333333333333333, + "Vietnamese,Chinese,Indonesian,English": 0.32, + "Vietnamese,Spanish,Malay,Indonesian": 0.36, + "Vietnamese,Spanish,Malay,English": 0.32, + "Vietnamese,Spanish,Indonesian,English": 0.37333333333333335, "Vietnamese,Malay,Indonesian,English": 0.34, - "Chinese,Spanish,Malay,Indonesian": 0.32666666666666666, - "Chinese,Spanish,Malay,English": 0.3333333333333333, - "Chinese,Spanish,Indonesian,English": 0.34, - "Chinese,Malay,Indonesian,English": 0.32666666666666666, - "Spanish,Malay,Indonesian,English": 0.36666666666666664 + "Chinese,Spanish,Malay,Indonesian": 0.32, + "Chinese,Spanish,Malay,English": 0.3, + "Chinese,Spanish,Indonesian,English": 0.32666666666666666, + "Chinese,Malay,Indonesian,English": 0.31333333333333335, + "Spanish,Malay,Indonesian,English": 0.35333333333333333 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.26666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.3, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Malay,English": 0.2733333333333333, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.29333333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.29333333333333333, - "Filipino,Vietnamese,Spanish,Malay,English": 0.29333333333333333, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.3333333333333333, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.2866666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, - "Filipino,Chinese,Spanish,Malay,English": 0.2866666666666667, - "Filipino,Chinese,Spanish,Indonesian,English": 0.29333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.2733333333333333, - "Filipino,Spanish,Malay,Indonesian,English": 0.32, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, - "Vietnamese,Chinese,Spanish,Malay,English": 0.29333333333333333, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.3, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.24, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.25333333333333335, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Spanish,Malay,English": 0.26, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.28, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.28, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Chinese,Spanish,Malay,English": 0.23333333333333334, + "Filipino,Chinese,Spanish,Indonesian,English": 0.25333333333333335, + "Filipino,Chinese,Malay,Indonesian,English": 0.25333333333333335, + "Filipino,Spanish,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.26, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.28, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.28, "Vietnamese,Spanish,Malay,Indonesian,English": 0.30666666666666664, - "Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 + "Chinese,Spanish,Malay,Indonesian,English": 0.28 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.24, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.26, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.24666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.26666666666666666, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.20666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.22, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.22666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.25333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22666666666666666 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.20666666666666667 } }, - "AC3_2": 0.5578648494674255, - "AC3_3": 0.4746613808557917, - "AC3_4": 0.41837783078945473, - "AC3_5": 0.3766397124430851, - "AC3_6": 0.3443772671872314, - "AC3_7": 0.3181286549289012 + "AC3_2": 0.5480657346399558, + "AC3_3": 0.45649253581588306, + "AC3_4": 0.3971111607009567, + "AC3_5": 0.3560393284387946, + "AC3_6": 0.3245864971030836, + "AC3_7": 0.29863597608948406 }, "prompt_4": { - "overall_acc": 0.5457142857142857, + "overall_acc": 0.5361904761904761, "language_acc": { - "Filipino": 0.5333333333333333, - "Vietnamese": 0.5066666666666667, - "Chinese": 0.5733333333333334, - "Spanish": 0.5466666666666666, - "Malay": 0.5, - "Indonesian": 0.5133333333333333, - "English": 0.6466666666666666 + "Filipino": 0.5266666666666666, + "Vietnamese": 0.5266666666666666, + "Chinese": 0.5866666666666667, + "Spanish": 0.5266666666666666, + "Malay": 0.47333333333333333, + "Indonesian": 0.4866666666666667, + "English": 0.6266666666666667 }, - "consistency_score_2": 0.5590476190476191, - "consistency_score_3": 0.3906666666666667, - "consistency_score_4": 0.30666666666666664, - "consistency_score_5": 0.25714285714285723, - "consistency_score_6": 0.22380952380952382, + "consistency_score_2": 0.5631746031746032, + "consistency_score_3": 0.3954285714285714, + "consistency_score_4": 0.30971428571428583, + "consistency_score_5": 0.2587301587301587, + "consistency_score_6": 0.22476190476190477, "consistency_score_7": 0.2, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.52, - "Filipino,Chinese": 0.58, - "Filipino,Spanish": 0.6, + "Filipino,Vietnamese": 0.5133333333333333, + "Filipino,Chinese": 0.5666666666666667, + "Filipino,Spanish": 0.5666666666666667, "Filipino,Malay": 0.5333333333333333, - "Filipino,Indonesian": 0.56, - "Filipino,English": 0.56, - "Vietnamese,Chinese": 0.58, - "Vietnamese,Spanish": 0.58, - "Vietnamese,Malay": 0.5466666666666666, - "Vietnamese,Indonesian": 0.5733333333333334, - "Vietnamese,English": 0.5133333333333333, - "Chinese,Spanish": 0.5466666666666666, - "Chinese,Malay": 0.4866666666666667, - "Chinese,Indonesian": 0.6, + "Filipino,Indonesian": 0.5333333333333333, + "Filipino,English": 0.5466666666666666, + "Vietnamese,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.6133333333333333, + "Vietnamese,Malay": 0.5533333333333333, + "Vietnamese,Indonesian": 0.5666666666666667, + "Vietnamese,English": 0.52, + "Chinese,Spanish": 0.5666666666666667, + "Chinese,Malay": 0.54, + "Chinese,Indonesian": 0.6266666666666667, "Chinese,English": 0.5066666666666667, - "Spanish,Malay": 0.5666666666666667, - "Spanish,Indonesian": 0.5733333333333334, - "Spanish,English": 0.6266666666666667, - "Malay,Indonesian": 0.6733333333333333, - "Malay,English": 0.49333333333333335, + "Spanish,Malay": 0.5866666666666667, + "Spanish,Indonesian": 0.58, + "Spanish,English": 0.6066666666666667, + "Malay,Indonesian": 0.6866666666666666, + "Malay,English": 0.54, "Indonesian,English": 0.52 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.3933333333333333, - "Filipino,Vietnamese,Spanish": 0.38666666666666666, - "Filipino,Vietnamese,Malay": 0.34, + "Filipino,Vietnamese,Chinese": 0.38, + "Filipino,Vietnamese,Spanish": 0.4, + "Filipino,Vietnamese,Malay": 0.36, "Filipino,Vietnamese,Indonesian": 0.37333333333333335, "Filipino,Vietnamese,English": 0.36666666666666664, - "Filipino,Chinese,Spanish": 0.4266666666666667, - "Filipino,Chinese,Malay": 0.36, - "Filipino,Chinese,Indonesian": 0.4066666666666667, - "Filipino,Chinese,English": 0.3933333333333333, + "Filipino,Chinese,Spanish": 0.4066666666666667, + "Filipino,Chinese,Malay": 0.37333333333333335, + "Filipino,Chinese,Indonesian": 0.4, + "Filipino,Chinese,English": 0.38, "Filipino,Spanish,Malay": 0.4, - "Filipino,Spanish,Indonesian": 0.4266666666666667, - "Filipino,Spanish,English": 0.46, + "Filipino,Spanish,Indonesian": 0.4, + "Filipino,Spanish,English": 0.4266666666666667, "Filipino,Malay,Indonesian": 0.41333333333333333, - "Filipino,Malay,English": 0.35333333333333333, - "Filipino,Indonesian,English": 0.38, - "Vietnamese,Chinese,Spanish": 0.4066666666666667, + "Filipino,Malay,English": 0.37333333333333335, + "Filipino,Indonesian,English": 0.36666666666666664, + "Vietnamese,Chinese,Spanish": 0.42, "Vietnamese,Chinese,Malay": 0.37333333333333335, - "Vietnamese,Chinese,Indonesian": 0.4266666666666667, - "Vietnamese,Chinese,English": 0.35333333333333333, - "Vietnamese,Spanish,Malay": 0.4, - "Vietnamese,Spanish,Indonesian": 0.42, - "Vietnamese,Spanish,English": 0.41333333333333333, + "Vietnamese,Chinese,Indonesian": 0.42, + "Vietnamese,Chinese,English": 0.3333333333333333, + "Vietnamese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.44, + "Vietnamese,Spanish,English": 0.42, "Vietnamese,Malay,Indonesian": 0.44, - "Vietnamese,Malay,English": 0.3466666666666667, + "Vietnamese,Malay,English": 0.37333333333333335, "Vietnamese,Indonesian,English": 0.36666666666666664, - "Chinese,Spanish,Malay": 0.36, - "Chinese,Spanish,Indonesian": 0.4066666666666667, + "Chinese,Spanish,Malay": 0.38666666666666666, + "Chinese,Spanish,Indonesian": 0.42, "Chinese,Spanish,English": 0.37333333333333335, - "Chinese,Malay,Indonesian": 0.42, - "Chinese,Malay,English": 0.30666666666666664, - "Chinese,Indonesian,English": 0.36666666666666664, - "Spanish,Malay,Indonesian": 0.43333333333333335, - "Spanish,Malay,English": 0.38666666666666666, - "Spanish,Indonesian,English": 0.41333333333333333, - "Malay,Indonesian,English": 0.38 + "Chinese,Malay,Indonesian": 0.44666666666666666, + "Chinese,Malay,English": 0.3333333333333333, + "Chinese,Indonesian,English": 0.37333333333333335, + "Spanish,Malay,Indonesian": 0.46, + "Spanish,Malay,English": 0.4066666666666667, + "Spanish,Indonesian,English": 0.4066666666666667, + "Malay,Indonesian,English": 0.4 }, "4_combine": { "Filipino,Vietnamese,Chinese,Spanish": 0.32, - "Filipino,Vietnamese,Chinese,Malay": 0.2866666666666667, - "Filipino,Vietnamese,Chinese,Indonesian": 0.31333333333333335, - "Filipino,Vietnamese,Chinese,English": 0.3, - "Filipino,Vietnamese,Spanish,Malay": 0.2866666666666667, - "Filipino,Vietnamese,Spanish,Indonesian": 0.32, - "Filipino,Vietnamese,Spanish,English": 0.32666666666666666, - "Filipino,Vietnamese,Malay,Indonesian": 0.3, - "Filipino,Vietnamese,Malay,English": 0.26666666666666666, - "Filipino,Vietnamese,Indonesian,English": 0.31333333333333335, - "Filipino,Chinese,Spanish,Malay": 0.29333333333333333, - "Filipino,Chinese,Spanish,Indonesian": 0.3333333333333333, - "Filipino,Chinese,Spanish,English": 0.32666666666666666, - "Filipino,Chinese,Malay,Indonesian": 0.31333333333333335, - "Filipino,Chinese,Malay,English": 0.2733333333333333, - "Filipino,Chinese,Indonesian,English": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Malay": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,English": 0.28, + "Filipino,Vietnamese,Spanish,Malay": 0.32, + "Filipino,Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,English": 0.31333333333333335, + "Filipino,Vietnamese,Malay,Indonesian": 0.32, + "Filipino,Vietnamese,Malay,English": 0.2866666666666667, + "Filipino,Vietnamese,Indonesian,English": 0.3, + "Filipino,Chinese,Spanish,Malay": 0.3, + "Filipino,Chinese,Spanish,Indonesian": 0.32, + "Filipino,Chinese,Spanish,English": 0.30666666666666664, + "Filipino,Chinese,Malay,Indonesian": 0.32666666666666666, + "Filipino,Chinese,Malay,English": 0.28, + "Filipino,Chinese,Indonesian,English": 0.3, "Filipino,Spanish,Malay,Indonesian": 0.32666666666666666, "Filipino,Spanish,Malay,English": 0.31333333333333335, - "Filipino,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Spanish,Indonesian,English": 0.31333333333333335, "Filipino,Malay,Indonesian,English": 0.29333333333333333, - "Vietnamese,Chinese,Spanish,Malay": 0.3, - "Vietnamese,Chinese,Spanish,Indonesian": 0.34, - "Vietnamese,Chinese,Spanish,English": 0.29333333333333333, - "Vietnamese,Chinese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Malay": 0.32, + "Vietnamese,Chinese,Spanish,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,English": 0.28, + "Vietnamese,Chinese,Malay,Indonesian": 0.35333333333333333, "Vietnamese,Chinese,Malay,English": 0.24666666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.29333333333333333, - "Vietnamese,Spanish,Malay,Indonesian": 0.35333333333333333, - "Vietnamese,Spanish,Malay,English": 0.29333333333333333, + "Vietnamese,Chinese,Indonesian,English": 0.28, + "Vietnamese,Spanish,Malay,Indonesian": 0.37333333333333335, + "Vietnamese,Spanish,Malay,English": 0.30666666666666664, "Vietnamese,Spanish,Indonesian,English": 0.3333333333333333, - "Vietnamese,Malay,Indonesian,English": 0.3, - "Chinese,Spanish,Malay,Indonesian": 0.32, - "Chinese,Spanish,Malay,English": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,English": 0.30666666666666664, + "Chinese,Spanish,Malay,Indonesian": 0.34, + "Chinese,Spanish,Malay,English": 0.2866666666666667, "Chinese,Spanish,Indonesian,English": 0.3, - "Chinese,Malay,Indonesian,English": 0.2733333333333333, - "Spanish,Malay,Indonesian,English": 0.31333333333333335 + "Chinese,Malay,Indonesian,English": 0.2866666666666667, + "Spanish,Malay,Indonesian,English": 0.3333333333333333 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.26666666666666666, "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.2733333333333333, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.26, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.24, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.28, "Filipino,Vietnamese,Chinese,Malay,English": 0.22666666666666666, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.26666666666666666, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.26666666666666666, - "Filipino,Vietnamese,Spanish,Malay,English": 0.24666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2866666666666667, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.25333333333333335, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.24666666666666667, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.25333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2733333333333333, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.26, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.26666666666666666, "Filipino,Chinese,Spanish,Malay,English": 0.24, - "Filipino,Chinese,Spanish,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Indonesian,English": 0.24666666666666667, "Filipino,Chinese,Malay,Indonesian,English": 0.24666666666666667, "Filipino,Spanish,Malay,Indonesian,English": 0.26, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.30666666666666664, "Vietnamese,Chinese,Spanish,Malay,English": 0.22666666666666666, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.26666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.25333333333333335, "Vietnamese,Chinese,Malay,Indonesian,English": 0.24, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.2733333333333333, - "Chinese,Spanish,Malay,Indonesian,English": 0.24 + "Vietnamese,Spanish,Malay,Indonesian,English": 0.28, + "Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.25333333333333335, "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.20666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.22, "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.22, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.23333333333333334, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24, "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.21333333333333335, "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22 }, @@ -94669,962 +94669,962 @@ "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2 } }, - "AC3_2": 0.5523004925608448, - "AC3_3": 0.45535394624917397, - "AC3_4": 0.39267039101538503, - "AC3_5": 0.349567869809027, - "AC3_6": 0.3174328146687919, - "AC3_7": 0.29272030647415626 + "AC3_2": 0.5493513743432756, + "AC3_3": 0.4551753949915947, + "AC3_4": 0.3926348634013275, + "AC3_5": 0.34903773006410915, + "AC3_6": 0.3167483163061905, + "AC3_7": 0.2913324708530532 }, "prompt_5": { - "overall_acc": 0.5438095238095239, + "overall_acc": 0.5504761904761905, "language_acc": { - "Filipino": 0.54, - "Vietnamese": 0.4666666666666667, + "Filipino": 0.5266666666666666, + "Vietnamese": 0.49333333333333335, "Chinese": 0.6, - "Spanish": 0.5533333333333333, - "Malay": 0.5, - "Indonesian": 0.49333333333333335, - "English": 0.6533333333333333 + "Spanish": 0.5466666666666666, + "Malay": 0.52, + "Indonesian": 0.5, + "English": 0.6666666666666666 }, - "consistency_score_2": 0.5673015873015874, - "consistency_score_3": 0.40361904761904777, - "consistency_score_4": 0.3182857142857142, - "consistency_score_5": 0.2663492063492064, - "consistency_score_6": 0.23142857142857146, - "consistency_score_7": 0.20666666666666667, + "consistency_score_2": 0.5733333333333333, + "consistency_score_3": 0.4108571428571428, + "consistency_score_4": 0.3270476190476191, + "consistency_score_5": 0.2755555555555556, + "consistency_score_6": 0.24, + "consistency_score_7": 0.21333333333333335, "detailed_consistency_score": { "2_combine": { "Filipino,Vietnamese": 0.5066666666666667, - "Filipino,Chinese": 0.5866666666666667, - "Filipino,Spanish": 0.56, - "Filipino,Malay": 0.5466666666666666, - "Filipino,Indonesian": 0.5666666666666667, - "Filipino,English": 0.5666666666666667, - "Vietnamese,Chinese": 0.5533333333333333, - "Vietnamese,Spanish": 0.5733333333333334, - "Vietnamese,Malay": 0.5133333333333333, - "Vietnamese,Indonesian": 0.5466666666666666, - "Vietnamese,English": 0.52, - "Chinese,Spanish": 0.56, - "Chinese,Malay": 0.5733333333333334, - "Chinese,Indonesian": 0.58, - "Chinese,English": 0.5333333333333333, - "Spanish,Malay": 0.56, - "Spanish,Indonesian": 0.6066666666666667, + "Filipino,Chinese": 0.5666666666666667, + "Filipino,Spanish": 0.5466666666666666, + "Filipino,Malay": 0.54, + "Filipino,Indonesian": 0.5933333333333334, + "Filipino,English": 0.5333333333333333, + "Vietnamese,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.6133333333333333, + "Vietnamese,Malay": 0.58, + "Vietnamese,Indonesian": 0.5866666666666667, + "Vietnamese,English": 0.5, + "Chinese,Spanish": 0.5466666666666666, + "Chinese,Malay": 0.5933333333333334, + "Chinese,Indonesian": 0.6, + "Chinese,English": 0.5266666666666666, + "Spanish,Malay": 0.5666666666666667, + "Spanish,Indonesian": 0.6333333333333333, "Spanish,English": 0.6533333333333333, - "Malay,Indonesian": 0.68, - "Malay,English": 0.5266666666666666, - "Indonesian,English": 0.6 + "Malay,Indonesian": 0.6733333333333333, + "Malay,English": 0.54, + "Indonesian,English": 0.5666666666666667 }, "3_combine": { "Filipino,Vietnamese,Chinese": 0.38666666666666666, - "Filipino,Vietnamese,Spanish": 0.36666666666666664, - "Filipino,Vietnamese,Malay": 0.34, - "Filipino,Vietnamese,Indonesian": 0.38666666666666666, - "Filipino,Vietnamese,English": 0.37333333333333335, - "Filipino,Chinese,Spanish": 0.41333333333333333, - "Filipino,Chinese,Malay": 0.4066666666666667, - "Filipino,Chinese,Indonesian": 0.42, - "Filipino,Chinese,English": 0.41333333333333333, - "Filipino,Spanish,Malay": 0.3933333333333333, + "Filipino,Vietnamese,Spanish": 0.38666666666666666, + "Filipino,Vietnamese,Malay": 0.37333333333333335, + "Filipino,Vietnamese,Indonesian": 0.4, + "Filipino,Vietnamese,English": 0.34, + "Filipino,Chinese,Spanish": 0.38666666666666666, + "Filipino,Chinese,Malay": 0.4, + "Filipino,Chinese,Indonesian": 0.4266666666666667, + "Filipino,Chinese,English": 0.38666666666666666, + "Filipino,Spanish,Malay": 0.38666666666666666, "Filipino,Spanish,Indonesian": 0.42, - "Filipino,Spanish,English": 0.44, - "Filipino,Malay,Indonesian": 0.43333333333333335, - "Filipino,Malay,English": 0.36666666666666664, - "Filipino,Indonesian,English": 0.4266666666666667, - "Vietnamese,Chinese,Spanish": 0.3933333333333333, - "Vietnamese,Chinese,Malay": 0.38, - "Vietnamese,Chinese,Indonesian": 0.3933333333333333, - "Vietnamese,Chinese,English": 0.36666666666666664, - "Vietnamese,Spanish,Malay": 0.3933333333333333, - "Vietnamese,Spanish,Indonesian": 0.42, - "Vietnamese,Spanish,English": 0.42, - "Vietnamese,Malay,Indonesian": 0.42, - "Vietnamese,Malay,English": 0.35333333333333333, - "Vietnamese,Indonesian,English": 0.4, + "Filipino,Spanish,English": 0.41333333333333333, + "Filipino,Malay,Indonesian": 0.44, + "Filipino,Malay,English": 0.36, + "Filipino,Indonesian,English": 0.41333333333333333, + "Vietnamese,Chinese,Spanish": 0.41333333333333333, + "Vietnamese,Chinese,Malay": 0.42, + "Vietnamese,Chinese,Indonesian": 0.4266666666666667, + "Vietnamese,Chinese,English": 0.3466666666666667, + "Vietnamese,Spanish,Malay": 0.44666666666666666, + "Vietnamese,Spanish,Indonesian": 0.4666666666666667, + "Vietnamese,Spanish,English": 0.44, + "Vietnamese,Malay,Indonesian": 0.4666666666666667, + "Vietnamese,Malay,English": 0.37333333333333335, + "Vietnamese,Indonesian,English": 0.3933333333333333, "Chinese,Spanish,Malay": 0.4, - "Chinese,Spanish,Indonesian": 0.41333333333333333, - "Chinese,Spanish,English": 0.41333333333333333, - "Chinese,Malay,Indonesian": 0.44666666666666666, - "Chinese,Malay,English": 0.36666666666666664, - "Chinese,Indonesian,English": 0.41333333333333333, - "Spanish,Malay,Indonesian": 0.4533333333333333, - "Spanish,Malay,English": 0.4, + "Chinese,Spanish,Indonesian": 0.4266666666666667, + "Chinese,Spanish,English": 0.4, + "Chinese,Malay,Indonesian": 0.4666666666666667, + "Chinese,Malay,English": 0.38666666666666666, + "Chinese,Indonesian,English": 0.3933333333333333, + "Spanish,Malay,Indonesian": 0.48, + "Spanish,Malay,English": 0.42, "Spanish,Indonesian,English": 0.46, "Malay,Indonesian,English": 0.43333333333333335 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.29333333333333333, - "Filipino,Vietnamese,Chinese,Malay": 0.30666666666666664, - "Filipino,Vietnamese,Chinese,Indonesian": 0.31333333333333335, - "Filipino,Vietnamese,Chinese,English": 0.3, - "Filipino,Vietnamese,Spanish,Malay": 0.28, - "Filipino,Vietnamese,Spanish,Indonesian": 0.31333333333333335, - "Filipino,Vietnamese,Spanish,English": 0.32, - "Filipino,Vietnamese,Malay,Indonesian": 0.31333333333333335, - "Filipino,Vietnamese,Malay,English": 0.2866666666666667, - "Filipino,Vietnamese,Indonesian,English": 0.32666666666666666, - "Filipino,Chinese,Spanish,Malay": 0.32, - "Filipino,Chinese,Spanish,Indonesian": 0.32, - "Filipino,Chinese,Spanish,English": 0.32666666666666666, - "Filipino,Chinese,Malay,Indonesian": 0.34, + "Filipino,Vietnamese,Chinese,Spanish": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Malay": 0.31333333333333335, + "Filipino,Vietnamese,Chinese,Indonesian": 0.32, + "Filipino,Vietnamese,Chinese,English": 0.28, + "Filipino,Vietnamese,Spanish,Malay": 0.31333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Filipino,Vietnamese,Spanish,English": 0.3, + "Filipino,Vietnamese,Malay,Indonesian": 0.34, + "Filipino,Vietnamese,Malay,English": 0.29333333333333333, + "Filipino,Vietnamese,Indonesian,English": 0.30666666666666664, + "Filipino,Chinese,Spanish,Malay": 0.30666666666666664, + "Filipino,Chinese,Spanish,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Spanish,English": 0.30666666666666664, + "Filipino,Chinese,Malay,Indonesian": 0.3466666666666667, "Filipino,Chinese,Malay,English": 0.30666666666666664, - "Filipino,Chinese,Indonesian,English": 0.34, - "Filipino,Spanish,Malay,Indonesian": 0.3333333333333333, - "Filipino,Spanish,Malay,English": 0.30666666666666664, - "Filipino,Spanish,Indonesian,English": 0.3466666666666667, + "Filipino,Chinese,Indonesian,English": 0.32666666666666666, + "Filipino,Spanish,Malay,Indonesian": 0.34, + "Filipino,Spanish,Malay,English": 0.32, + "Filipino,Spanish,Indonesian,English": 0.3333333333333333, "Filipino,Malay,Indonesian,English": 0.32666666666666666, - "Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, - "Vietnamese,Chinese,Spanish,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,Malay": 0.34, + "Vietnamese,Chinese,Spanish,Indonesian": 0.35333333333333333, "Vietnamese,Chinese,Spanish,English": 0.30666666666666664, - "Vietnamese,Chinese,Malay,Indonesian": 0.34, - "Vietnamese,Chinese,Malay,English": 0.28, - "Vietnamese,Chinese,Indonesian,English": 0.30666666666666664, - "Vietnamese,Spanish,Malay,Indonesian": 0.3466666666666667, - "Vietnamese,Spanish,Malay,English": 0.30666666666666664, - "Vietnamese,Spanish,Indonesian,English": 0.3466666666666667, - "Vietnamese,Malay,Indonesian,English": 0.32, - "Chinese,Spanish,Malay,Indonesian": 0.34, - "Chinese,Spanish,Malay,English": 0.30666666666666664, + "Vietnamese,Chinese,Malay,Indonesian": 0.38, + "Vietnamese,Chinese,Malay,English": 0.29333333333333333, + "Vietnamese,Chinese,Indonesian,English": 0.3, + "Vietnamese,Spanish,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Spanish,Malay,English": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian,English": 0.36, + "Vietnamese,Malay,Indonesian,English": 0.3333333333333333, + "Chinese,Spanish,Malay,Indonesian": 0.36, + "Chinese,Spanish,Malay,English": 0.32666666666666666, "Chinese,Spanish,Indonesian,English": 0.32, - "Chinese,Malay,Indonesian,English": 0.32666666666666666, - "Spanish,Malay,Indonesian,English": 0.34 + "Chinese,Malay,Indonesian,English": 0.34, + "Spanish,Malay,Indonesian,English": 0.37333333333333335 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.29333333333333333, "Filipino,Vietnamese,Chinese,Malay,English": 0.25333333333333335, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.26666666666666666, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.26, - "Filipino,Vietnamese,Spanish,Malay,English": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.25333333333333335, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Spanish,Malay,English": 0.26666666666666666, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2733333333333333, "Filipino,Vietnamese,Malay,Indonesian,English": 0.2733333333333333, "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, - "Filipino,Chinese,Spanish,Malay,English": 0.26, - "Filipino,Chinese,Spanish,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Indonesian,English": 0.26, "Filipino,Chinese,Malay,Indonesian,English": 0.28, - "Filipino,Spanish,Malay,Indonesian,English": 0.2733333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2866666666666667, - "Vietnamese,Chinese,Spanish,Malay,English": 0.24666666666666667, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.26666666666666666, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.26666666666666666, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.28, - "Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 + "Filipino,Spanish,Malay,Indonesian,English": 0.2866666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.2733333333333333, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.28, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Chinese,Spanish,Malay,Indonesian,English": 0.29333333333333333 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.22, + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.22666666666666666, "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.22666666666666666, "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.24, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334 + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.24, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.25333333333333335 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.20666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.21333333333333335 } }, - "AC3_2": 0.5553072108343763, - "AC3_3": 0.4633423324610993, - "AC3_4": 0.40154914457812974, - "AC3_5": 0.35756754735099816, - "AC3_6": 0.32468234464046314, - "AC3_7": 0.29950930622066607 + "AC3_2": 0.5616723163342014, + "AC3_3": 0.47052789488156493, + "AC3_4": 0.4103180459908446, + "AC3_5": 0.3672662054385003, + "AC3_6": 0.3342650601986773, + "AC3_7": 0.3074979218217532 } }, "cross_logiqa": { "prompt_1": { - "overall_acc": 0.43344155844155846, + "overall_acc": 0.4391233766233767, "language_acc": { - "Indonesian": 0.3977272727272727, - "English": 0.4943181818181818, - "Filipino": 0.3068181818181818, + "Indonesian": 0.4147727272727273, + "English": 0.48863636363636365, + "Filipino": 0.3125, "Spanish": 0.4659090909090909, - "Chinese": 0.4659090909090909, - "Malay": 0.4431818181818182, - "Vietnamese": 0.4602272727272727 + "Chinese": 0.4772727272727273, + "Malay": 0.4375, + "Vietnamese": 0.4772727272727273 }, - "consistency_score_2": 0.49485930735930744, - "consistency_score_3": 0.3073051948051948, - "consistency_score_4": 0.21704545454545454, - "consistency_score_5": 0.16639610389610388, - "consistency_score_6": 0.13311688311688313, - "consistency_score_7": 0.10795454545454546, + "consistency_score_2": 0.5005411255411255, + "consistency_score_3": 0.31477272727272726, + "consistency_score_4": 0.22467532467532467, + "consistency_score_5": 0.17424242424242425, + "consistency_score_6": 0.14204545454545456, + "consistency_score_7": 0.11931818181818182, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.4715909090909091, - "Indonesian,Filipino": 0.4659090909090909, - "Indonesian,Spanish": 0.5056818181818182, - "Indonesian,Chinese": 0.4943181818181818, - "Indonesian,Malay": 0.6193181818181818, + "Indonesian,English": 0.5170454545454546, + "Indonesian,Filipino": 0.48295454545454547, + "Indonesian,Spanish": 0.5170454545454546, + "Indonesian,Chinese": 0.48863636363636365, + "Indonesian,Malay": 0.6306818181818182, "Indonesian,Vietnamese": 0.48863636363636365, - "English,Filipino": 0.3806818181818182, - "English,Spanish": 0.6136363636363636, + "English,Filipino": 0.3693181818181818, + "English,Spanish": 0.6193181818181818, "English,Chinese": 0.5113636363636364, - "English,Malay": 0.5454545454545454, - "English,Vietnamese": 0.4715909090909091, - "Filipino,Spanish": 0.44886363636363635, + "English,Malay": 0.5568181818181818, + "English,Vietnamese": 0.5, + "Filipino,Spanish": 0.42045454545454547, "Filipino,Chinese": 0.3693181818181818, - "Filipino,Malay": 0.4659090909090909, - "Filipino,Vietnamese": 0.42045454545454547, - "Spanish,Chinese": 0.5113636363636364, - "Spanish,Malay": 0.5511363636363636, - "Spanish,Vietnamese": 0.5056818181818182, - "Chinese,Malay": 0.48863636363636365, - "Chinese,Vietnamese": 0.5511363636363636, + "Filipino,Malay": 0.4715909090909091, + "Filipino,Vietnamese": 0.4375, + "Spanish,Chinese": 0.5284090909090909, + "Spanish,Malay": 0.5454545454545454, + "Spanish,Vietnamese": 0.5170454545454546, + "Chinese,Malay": 0.4943181818181818, + "Chinese,Vietnamese": 0.5340909090909091, "Malay,Vietnamese": 0.5113636363636364 }, "3_combine": { - "Indonesian,English,Filipino": 0.23863636363636365, - "Indonesian,English,Spanish": 0.3409090909090909, - "Indonesian,English,Chinese": 0.3068181818181818, - "Indonesian,English,Malay": 0.36363636363636365, - "Indonesian,English,Vietnamese": 0.2840909090909091, - "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,English,Filipino": 0.2556818181818182, + "Indonesian,English,Spanish": 0.3693181818181818, + "Indonesian,English,Chinese": 0.32386363636363635, + "Indonesian,English,Malay": 0.39204545454545453, + "Indonesian,English,Vietnamese": 0.32386363636363635, + "Indonesian,Filipino,Spanish": 0.2784090909090909, "Indonesian,Filipino,Chinese": 0.23863636363636365, - "Indonesian,Filipino,Malay": 0.32386363636363635, - "Indonesian,Filipino,Vietnamese": 0.2784090909090909, - "Indonesian,Spanish,Chinese": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.3465909090909091, + "Indonesian,Filipino,Vietnamese": 0.29545454545454547, + "Indonesian,Spanish,Chinese": 0.32386363636363635, "Indonesian,Spanish,Malay": 0.39204545454545453, - "Indonesian,Spanish,Vietnamese": 0.30113636363636365, - "Indonesian,Chinese,Malay": 0.36363636363636365, - "Indonesian,Chinese,Vietnamese": 0.3409090909090909, - "Indonesian,Malay,Vietnamese": 0.3693181818181818, - "English,Filipino,Spanish": 0.29545454545454547, - "English,Filipino,Chinese": 0.21022727272727273, - "English,Filipino,Malay": 0.26136363636363635, - "English,Filipino,Vietnamese": 0.21022727272727273, - "English,Spanish,Chinese": 0.36363636363636365, + "Indonesian,Spanish,Vietnamese": 0.3125, + "Indonesian,Chinese,Malay": 0.3693181818181818, + "Indonesian,Chinese,Vietnamese": 0.32386363636363635, + "Indonesian,Malay,Vietnamese": 0.36363636363636365, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Chinese": 0.20454545454545456, + "English,Filipino,Malay": 0.2784090909090909, + "English,Filipino,Vietnamese": 0.22727272727272727, + "English,Spanish,Chinese": 0.375, "English,Spanish,Malay": 0.39204545454545453, - "English,Spanish,Vietnamese": 0.3409090909090909, - "English,Chinese,Malay": 0.3522727272727273, + "English,Spanish,Vietnamese": 0.36363636363636365, + "English,Chinese,Malay": 0.35795454545454547, "English,Chinese,Vietnamese": 0.32954545454545453, - "English,Malay,Vietnamese": 0.3181818181818182, - "Filipino,Spanish,Chinese": 0.26136363636363635, - "Filipino,Spanish,Malay": 0.3125, - "Filipino,Spanish,Vietnamese": 0.26704545454545453, - "Filipino,Chinese,Malay": 0.25, + "English,Malay,Vietnamese": 0.3522727272727273, + "Filipino,Spanish,Chinese": 0.2556818181818182, + "Filipino,Spanish,Malay": 0.30113636363636365, + "Filipino,Spanish,Vietnamese": 0.2556818181818182, + "Filipino,Chinese,Malay": 0.24431818181818182, "Filipino,Chinese,Vietnamese": 0.25, - "Filipino,Malay,Vietnamese": 0.2897727272727273, - "Spanish,Chinese,Malay": 0.32386363636363635, + "Filipino,Malay,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Malay": 0.32954545454545453, "Spanish,Chinese,Vietnamese": 0.3465909090909091, - "Spanish,Malay,Vietnamese": 0.32954545454545453, + "Spanish,Malay,Vietnamese": 0.3409090909090909, "Chinese,Malay,Vietnamese": 0.32386363636363635 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.19886363636363635, - "Indonesian,English,Filipino,Chinese": 0.14772727272727273, - "Indonesian,English,Filipino,Malay": 0.19886363636363635, - "Indonesian,English,Filipino,Vietnamese": 0.16477272727272727, - "Indonesian,English,Spanish,Chinese": 0.25, - "Indonesian,English,Spanish,Malay": 0.2840909090909091, - "Indonesian,English,Spanish,Vietnamese": 0.22727272727272727, - "Indonesian,English,Chinese,Malay": 0.26136363636363635, - "Indonesian,English,Chinese,Vietnamese": 0.22727272727272727, - "Indonesian,English,Malay,Vietnamese": 0.23863636363636365, - "Indonesian,Filipino,Spanish,Chinese": 0.17613636363636365, + "Indonesian,English,Filipino,Spanish": 0.21022727272727273, + "Indonesian,English,Filipino,Chinese": 0.1590909090909091, + "Indonesian,English,Filipino,Malay": 0.2215909090909091, + "Indonesian,English,Filipino,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese": 0.26136363636363635, + "Indonesian,English,Spanish,Malay": 0.29545454545454547, + "Indonesian,English,Spanish,Vietnamese": 0.24431818181818182, + "Indonesian,English,Chinese,Malay": 0.2727272727272727, + "Indonesian,English,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,English,Malay,Vietnamese": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Chinese": 0.19318181818181818, "Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, - "Indonesian,Filipino,Spanish,Vietnamese": 0.19318181818181818, - "Indonesian,Filipino,Chinese,Malay": 0.1875, - "Indonesian,Filipino,Chinese,Vietnamese": 0.1875, - "Indonesian,Filipino,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Spanish,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Chinese,Malay": 0.19886363636363635, + "Indonesian,Filipino,Chinese,Vietnamese": 0.18181818181818182, + "Indonesian,Filipino,Malay,Vietnamese": 0.25, "Indonesian,Spanish,Chinese,Malay": 0.2556818181818182, "Indonesian,Spanish,Chinese,Vietnamese": 0.25, - "Indonesian,Spanish,Malay,Vietnamese": 0.2556818181818182, - "Indonesian,Chinese,Malay,Vietnamese": 0.2727272727272727, + "Indonesian,Spanish,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Chinese,Malay,Vietnamese": 0.2556818181818182, "English,Filipino,Spanish,Chinese": 0.1875, - "English,Filipino,Spanish,Malay": 0.2215909090909091, + "English,Filipino,Spanish,Malay": 0.22727272727272727, "English,Filipino,Spanish,Vietnamese": 0.17613636363636365, - "English,Filipino,Chinese,Malay": 0.17045454545454544, - "English,Filipino,Chinese,Vietnamese": 0.1534090909090909, - "English,Filipino,Malay,Vietnamese": 0.17613636363636365, + "English,Filipino,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Vietnamese": 0.1590909090909091, + "English,Filipino,Malay,Vietnamese": 0.19318181818181818, "English,Spanish,Chinese,Malay": 0.2784090909090909, - "English,Spanish,Chinese,Vietnamese": 0.25, - "English,Spanish,Malay,Vietnamese": 0.2556818181818182, - "English,Chinese,Malay,Vietnamese": 0.24431818181818182, - "Filipino,Spanish,Chinese,Malay": 0.19886363636363635, - "Filipino,Spanish,Chinese,Vietnamese": 0.19318181818181818, - "Filipino,Spanish,Malay,Vietnamese": 0.2159090909090909, - "Filipino,Chinese,Malay,Vietnamese": 0.1875, - "Spanish,Chinese,Malay,Vietnamese": 0.24431818181818182 + "English,Spanish,Chinese,Vietnamese": 0.26136363636363635, + "English,Spanish,Malay,Vietnamese": 0.2727272727272727, + "English,Chinese,Malay,Vietnamese": 0.26136363636363635, + "Filipino,Spanish,Chinese,Malay": 0.20454545454545456, + "Filipino,Spanish,Chinese,Vietnamese": 0.1875, + "Filipino,Spanish,Malay,Vietnamese": 0.21022727272727273, + "Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Spanish,Chinese,Malay,Vietnamese": 0.25 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.13636363636363635, - "Indonesian,English,Filipino,Spanish,Malay": 0.17045454545454544, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.14772727272727273, - "Indonesian,English,Filipino,Chinese,Malay": 0.13068181818181818, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.125, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.1534090909090909, - "Indonesian,English,Spanish,Chinese,Malay": 0.2159090909090909, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.19886363636363635, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.20454545454545456, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.19886363636363635, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Chinese": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.1534090909090909, + "Indonesian,English,Filipino,Chinese,Malay": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.13636363636363635, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,English,Spanish,Chinese,Malay": 0.2215909090909091, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.20454545454545456, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.18181818181818182, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.16477272727272727, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, - "English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.13636363636363635, - "English,Filipino,Spanish,Malay,Vietnamese": 0.1534090909090909, - "English,Filipino,Chinese,Malay,Vietnamese": 0.13068181818181818, - "English,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.1590909090909091, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, + "English,Filipino,Spanish,Chinese,Malay": 0.17045454545454544, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.14204545454545456, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, + "English,Filipino,Chinese,Malay,Vietnamese": 0.14204545454545456, + "English,Spanish,Chinese,Malay,Vietnamese": 0.2159090909090909, "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.125, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.11931818181818182, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.13636363636363635, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.14204545454545456, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.125, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182, "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 } }, - "AC3_2": 0.4621186886063602, - "AC3_3": 0.3596339557392517, - "AC3_4": 0.2892494951571124, - "AC3_5": 0.24047501885170827, - "AC3_6": 0.20368027384489654, - "AC3_7": 0.1728567534095331 + "AC3_2": 0.4678250772627657, + "AC3_3": 0.36669260428931355, + "AC3_4": 0.2972593559086316, + "AC3_5": 0.2494887115124385, + "AC3_6": 0.21465528183203395, + "AC3_7": 0.18764865218627097 }, "prompt_2": { - "overall_acc": 0.4488636363636364, + "overall_acc": 0.45454545454545453, "language_acc": { - "Indonesian": 0.42045454545454547, - "English": 0.5511363636363636, - "Filipino": 0.3352272727272727, - "Spanish": 0.44886363636363635, - "Chinese": 0.48863636363636365, - "Malay": 0.4375, - "Vietnamese": 0.4602272727272727 + "Indonesian": 0.42613636363636365, + "English": 0.5568181818181818, + "Filipino": 0.32386363636363635, + "Spanish": 0.4602272727272727, + "Chinese": 0.4943181818181818, + "Malay": 0.42613636363636365, + "Vietnamese": 0.4943181818181818 }, - "consistency_score_2": 0.5303030303030303, - "consistency_score_3": 0.35162337662337656, - "consistency_score_4": 0.260551948051948, - "consistency_score_5": 0.20670995670995668, - "consistency_score_6": 0.17207792207792208, - "consistency_score_7": 0.14772727272727273, + "consistency_score_2": 0.5311147186147187, + "consistency_score_3": 0.3503246753246753, + "consistency_score_4": 0.25762987012987015, + "consistency_score_5": 0.2021103896103896, + "consistency_score_6": 0.16477272727272727, + "consistency_score_7": 0.13636363636363635, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5625, - "Indonesian,Filipino": 0.4943181818181818, - "Indonesian,Spanish": 0.5909090909090909, - "Indonesian,Chinese": 0.5227272727272727, - "Indonesian,Malay": 0.6193181818181818, - "Indonesian,Vietnamese": 0.5113636363636364, - "English,Filipino": 0.4090909090909091, - "English,Spanish": 0.6079545454545454, - "English,Chinese": 0.5568181818181818, - "English,Malay": 0.5852272727272727, - "English,Vietnamese": 0.5511363636363636, - "Filipino,Spanish": 0.4943181818181818, - "Filipino,Chinese": 0.4034090909090909, - "Filipino,Malay": 0.5113636363636364, - "Filipino,Vietnamese": 0.4602272727272727, - "Spanish,Chinese": 0.5170454545454546, - "Spanish,Malay": 0.6079545454545454, - "Spanish,Vietnamese": 0.5454545454545454, - "Chinese,Malay": 0.5170454545454546, - "Chinese,Vietnamese": 0.5511363636363636, - "Malay,Vietnamese": 0.5170454545454546 + "Indonesian,English": 0.5568181818181818, + "Indonesian,Filipino": 0.5113636363636364, + "Indonesian,Spanish": 0.5852272727272727, + "Indonesian,Chinese": 0.5170454545454546, + "Indonesian,Malay": 0.6079545454545454, + "Indonesian,Vietnamese": 0.5056818181818182, + "English,Filipino": 0.4318181818181818, + "English,Spanish": 0.6136363636363636, + "English,Chinese": 0.5113636363636364, + "English,Malay": 0.5681818181818182, + "English,Vietnamese": 0.5738636363636364, + "Filipino,Spanish": 0.5113636363636364, + "Filipino,Chinese": 0.38636363636363635, + "Filipino,Malay": 0.4943181818181818, + "Filipino,Vietnamese": 0.4659090909090909, + "Spanish,Chinese": 0.5340909090909091, + "Spanish,Malay": 0.6420454545454546, + "Spanish,Vietnamese": 0.5511363636363636, + "Chinese,Malay": 0.4943181818181818, + "Chinese,Vietnamese": 0.5681818181818182, + "Malay,Vietnamese": 0.5227272727272727 }, "3_combine": { - "Indonesian,English,Filipino": 0.2840909090909091, - "Indonesian,English,Spanish": 0.4147727272727273, - "Indonesian,English,Chinese": 0.38636363636363635, - "Indonesian,English,Malay": 0.4318181818181818, - "Indonesian,English,Vietnamese": 0.35795454545454547, - "Indonesian,Filipino,Spanish": 0.3409090909090909, - "Indonesian,Filipino,Chinese": 0.2727272727272727, - "Indonesian,Filipino,Malay": 0.35795454545454547, - "Indonesian,Filipino,Vietnamese": 0.2897727272727273, - "Indonesian,Spanish,Chinese": 0.3693181818181818, + "Indonesian,English,Filipino": 0.3068181818181818, + "Indonesian,English,Spanish": 0.42045454545454547, + "Indonesian,English,Chinese": 0.3522727272727273, + "Indonesian,English,Malay": 0.42045454545454547, + "Indonesian,English,Vietnamese": 0.36363636363636365, + "Indonesian,Filipino,Spanish": 0.35795454545454547, + "Indonesian,Filipino,Chinese": 0.26136363636363635, + "Indonesian,Filipino,Malay": 0.36363636363636365, + "Indonesian,Filipino,Vietnamese": 0.30113636363636365, + "Indonesian,Spanish,Chinese": 0.35795454545454547, "Indonesian,Spanish,Malay": 0.4602272727272727, "Indonesian,Spanish,Vietnamese": 0.3693181818181818, - "Indonesian,Chinese,Malay": 0.3977272727272727, - "Indonesian,Chinese,Vietnamese": 0.3693181818181818, - "Indonesian,Malay,Vietnamese": 0.375, - "English,Filipino,Spanish": 0.3125, - "English,Filipino,Chinese": 0.2556818181818182, - "English,Filipino,Malay": 0.3068181818181818, - "English,Filipino,Vietnamese": 0.26704545454545453, - "English,Spanish,Chinese": 0.3977272727272727, + "Indonesian,Chinese,Malay": 0.36363636363636365, + "Indonesian,Chinese,Vietnamese": 0.36363636363636365, + "Indonesian,Malay,Vietnamese": 0.3693181818181818, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Filipino,Chinese": 0.23295454545454544, + "English,Filipino,Malay": 0.30113636363636365, + "English,Filipino,Vietnamese": 0.30113636363636365, + "English,Spanish,Chinese": 0.375, "English,Spanish,Malay": 0.4431818181818182, - "English,Spanish,Vietnamese": 0.39204545454545453, - "English,Chinese,Malay": 0.4034090909090909, - "English,Chinese,Vietnamese": 0.38636363636363635, + "English,Spanish,Vietnamese": 0.4090909090909091, + "English,Chinese,Malay": 0.3693181818181818, + "English,Chinese,Vietnamese": 0.375, "English,Malay,Vietnamese": 0.38636363636363635, - "Filipino,Spanish,Chinese": 0.26704545454545453, - "Filipino,Spanish,Malay": 0.36363636363636365, - "Filipino,Spanish,Vietnamese": 0.3068181818181818, - "Filipino,Chinese,Malay": 0.2897727272727273, + "Filipino,Spanish,Chinese": 0.2727272727272727, + "Filipino,Spanish,Malay": 0.375, + "Filipino,Spanish,Vietnamese": 0.32386363636363635, + "Filipino,Chinese,Malay": 0.2556818181818182, "Filipino,Chinese,Vietnamese": 0.2727272727272727, - "Filipino,Malay,Vietnamese": 0.29545454545454547, + "Filipino,Malay,Vietnamese": 0.3068181818181818, "Spanish,Chinese,Malay": 0.3693181818181818, - "Spanish,Chinese,Vietnamese": 0.3693181818181818, - "Spanish,Malay,Vietnamese": 0.3806818181818182, - "Chinese,Malay,Vietnamese": 0.36363636363636365 + "Spanish,Chinese,Vietnamese": 0.3806818181818182, + "Spanish,Malay,Vietnamese": 0.38636363636363635, + "Chinese,Malay,Vietnamese": 0.35795454545454547 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.25, - "Indonesian,English,Filipino,Chinese": 0.2159090909090909, + "Indonesian,English,Filipino,Spanish": 0.26704545454545453, + "Indonesian,English,Filipino,Chinese": 0.19886363636363635, "Indonesian,English,Filipino,Malay": 0.25, - "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, - "Indonesian,English,Spanish,Chinese": 0.3068181818181818, + "Indonesian,English,Filipino,Vietnamese": 0.23295454545454544, + "Indonesian,English,Spanish,Chinese": 0.2840909090909091, "Indonesian,English,Spanish,Malay": 0.3522727272727273, - "Indonesian,English,Spanish,Vietnamese": 0.2784090909090909, - "Indonesian,English,Chinese,Malay": 0.32954545454545453, - "Indonesian,English,Chinese,Vietnamese": 0.2897727272727273, + "Indonesian,English,Spanish,Vietnamese": 0.2897727272727273, + "Indonesian,English,Chinese,Malay": 0.30113636363636365, + "Indonesian,English,Chinese,Vietnamese": 0.2784090909090909, "Indonesian,English,Malay,Vietnamese": 0.30113636363636365, - "Indonesian,Filipino,Spanish,Chinese": 0.2215909090909091, - "Indonesian,Filipino,Spanish,Malay": 0.2897727272727273, - "Indonesian,Filipino,Spanish,Vietnamese": 0.2215909090909091, - "Indonesian,Filipino,Chinese,Malay": 0.23295454545454544, - "Indonesian,Filipino,Chinese,Vietnamese": 0.21022727272727273, - "Indonesian,Filipino,Malay,Vietnamese": 0.2215909090909091, - "Indonesian,Spanish,Chinese,Malay": 0.3068181818181818, + "Indonesian,Filipino,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.29545454545454547, + "Indonesian,Filipino,Spanish,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Chinese,Malay": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Malay,Vietnamese": 0.22727272727272727, + "Indonesian,Spanish,Chinese,Malay": 0.29545454545454547, "Indonesian,Spanish,Chinese,Vietnamese": 0.2784090909090909, - "Indonesian,Spanish,Malay,Vietnamese": 0.2897727272727273, - "Indonesian,Chinese,Malay,Vietnamese": 0.29545454545454547, - "English,Filipino,Spanish,Chinese": 0.2159090909090909, - "English,Filipino,Spanish,Malay": 0.26704545454545453, - "English,Filipino,Spanish,Vietnamese": 0.2215909090909091, - "English,Filipino,Chinese,Malay": 0.22727272727272727, + "Indonesian,Spanish,Malay,Vietnamese": 0.30113636363636365, + "Indonesian,Chinese,Malay,Vietnamese": 0.2784090909090909, + "English,Filipino,Spanish,Chinese": 0.20454545454545456, + "English,Filipino,Spanish,Malay": 0.2727272727272727, + "English,Filipino,Spanish,Vietnamese": 0.24431818181818182, + "English,Filipino,Chinese,Malay": 0.19886363636363635, "English,Filipino,Chinese,Vietnamese": 0.19318181818181818, - "English,Filipino,Malay,Vietnamese": 0.2215909090909091, - "English,Spanish,Chinese,Malay": 0.32386363636363635, + "English,Filipino,Malay,Vietnamese": 0.23295454545454544, + "English,Spanish,Chinese,Malay": 0.3068181818181818, "English,Spanish,Chinese,Vietnamese": 0.30113636363636365, - "English,Spanish,Malay,Vietnamese": 0.3068181818181818, - "English,Chinese,Malay,Vietnamese": 0.30113636363636365, - "Filipino,Spanish,Chinese,Malay": 0.22727272727272727, + "English,Spanish,Malay,Vietnamese": 0.3181818181818182, + "English,Chinese,Malay,Vietnamese": 0.2897727272727273, + "Filipino,Spanish,Chinese,Malay": 0.21022727272727273, "Filipino,Spanish,Chinese,Vietnamese": 0.2159090909090909, - "Filipino,Spanish,Malay,Vietnamese": 0.23863636363636365, - "Filipino,Chinese,Malay,Vietnamese": 0.2215909090909091, + "Filipino,Spanish,Malay,Vietnamese": 0.24431818181818182, + "Filipino,Chinese,Malay,Vietnamese": 0.2159090909090909, "Spanish,Chinese,Malay,Vietnamese": 0.2840909090909091 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish,Chinese": 0.17613636363636365, "Indonesian,English,Filipino,Spanish,Malay": 0.23295454545454544, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.18181818181818182, - "Indonesian,English,Filipino,Chinese,Malay": 0.19886363636363635, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.17613636363636365, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.1875, - "Indonesian,English,Spanish,Chinese,Malay": 0.2727272727272727, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Malay": 0.17613636363636365, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.17045454545454544, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,English,Spanish,Chinese,Malay": 0.2556818181818182, "Indonesian,English,Spanish,Chinese,Vietnamese": 0.23295454545454544, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.24431818181818182, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.2556818181818182, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.19886363636363635, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.17613636363636365, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.1875, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2556818181818182, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.16477272727272727, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544, - "English,Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, "English,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, - "English,Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, - "English,Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "English,Filipino,Spanish,Malay,Vietnamese": 0.21022727272727273, + "English,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, "English,Spanish,Chinese,Malay,Vietnamese": 0.25, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1875 + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.16477272727272727, + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.1534090909090909, "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727 + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635 } }, - "AC3_2": 0.48619658866144133, - "AC3_3": 0.39433730936612466, - "AC3_4": 0.3297144788384825, - "AC3_5": 0.28306382016467846, - "AC3_6": 0.24878193697716589, - "AC3_7": 0.22229437225711152 + "AC3_2": 0.48985601272698426, + "AC3_3": 0.39568741056362206, + "AC3_4": 0.3288642061829707, + "AC3_5": 0.27980671981355276, + "AC3_6": 0.24186822348054574, + "AC3_7": 0.20979020975470683 }, "prompt_3": { - "overall_acc": 0.46022727272727265, + "overall_acc": 0.4545454545454546, "language_acc": { - "Indonesian": 0.44886363636363635, - "English": 0.5568181818181818, - "Filipino": 0.36363636363636365, + "Indonesian": 0.4318181818181818, + "English": 0.5454545454545454, + "Filipino": 0.35795454545454547, "Spanish": 0.4602272727272727, - "Chinese": 0.45454545454545453, - "Malay": 0.45454545454545453, - "Vietnamese": 0.48295454545454547 + "Chinese": 0.48863636363636365, + "Malay": 0.4090909090909091, + "Vietnamese": 0.48863636363636365 }, - "consistency_score_2": 0.533008658008658, - "consistency_score_3": 0.35422077922077916, - "consistency_score_4": 0.2649350649350649, - "consistency_score_5": 0.21401515151515152, - "consistency_score_6": 0.18181818181818185, + "consistency_score_2": 0.5403138528138529, + "consistency_score_3": 0.36379870129870123, + "consistency_score_4": 0.2722402597402597, + "consistency_score_5": 0.21861471861471865, + "consistency_score_6": 0.18425324675324675, "consistency_score_7": 0.1590909090909091, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5340909090909091, - "Indonesian,Filipino": 0.48295454545454547, - "Indonesian,Spanish": 0.5625, - "Indonesian,Chinese": 0.5625, - "Indonesian,Malay": 0.6193181818181818, + "Indonesian,English": 0.5681818181818182, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.5909090909090909, + "Indonesian,Chinese": 0.5681818181818182, + "Indonesian,Malay": 0.625, "Indonesian,Vietnamese": 0.5340909090909091, - "English,Filipino": 0.4431818181818182, - "English,Spanish": 0.625, - "English,Chinese": 0.5056818181818182, + "English,Filipino": 0.45454545454545453, + "English,Spanish": 0.5852272727272727, + "English,Chinese": 0.5284090909090909, "English,Malay": 0.5795454545454546, - "English,Vietnamese": 0.5454545454545454, + "English,Vietnamese": 0.5511363636363636, "Filipino,Spanish": 0.48295454545454547, - "Filipino,Chinese": 0.4034090909090909, - "Filipino,Malay": 0.5113636363636364, - "Filipino,Vietnamese": 0.4772727272727273, - "Spanish,Chinese": 0.5397727272727273, - "Spanish,Malay": 0.6022727272727273, - "Spanish,Vietnamese": 0.5852272727272727, - "Chinese,Malay": 0.5, + "Filipino,Chinese": 0.3977272727272727, + "Filipino,Malay": 0.5056818181818182, + "Filipino,Vietnamese": 0.4602272727272727, + "Spanish,Chinese": 0.5738636363636364, + "Spanish,Malay": 0.625, + "Spanish,Vietnamese": 0.5909090909090909, + "Chinese,Malay": 0.5227272727272727, "Chinese,Vietnamese": 0.5568181818181818, - "Malay,Vietnamese": 0.5397727272727273 + "Malay,Vietnamese": 0.5511363636363636 }, "3_combine": { - "Indonesian,English,Filipino": 0.2840909090909091, + "Indonesian,English,Filipino": 0.3068181818181818, "Indonesian,English,Spanish": 0.4034090909090909, - "Indonesian,English,Chinese": 0.35795454545454547, - "Indonesian,English,Malay": 0.4090909090909091, - "Indonesian,English,Vietnamese": 0.3522727272727273, - "Indonesian,Filipino,Spanish": 0.32954545454545453, - "Indonesian,Filipino,Chinese": 0.2897727272727273, + "Indonesian,English,Chinese": 0.38636363636363635, + "Indonesian,English,Malay": 0.4318181818181818, + "Indonesian,English,Vietnamese": 0.375, + "Indonesian,Filipino,Spanish": 0.3352272727272727, + "Indonesian,Filipino,Chinese": 0.29545454545454547, "Indonesian,Filipino,Malay": 0.35795454545454547, - "Indonesian,Filipino,Vietnamese": 0.32386363636363635, - "Indonesian,Spanish,Chinese": 0.3693181818181818, - "Indonesian,Spanish,Malay": 0.4318181818181818, - "Indonesian,Spanish,Vietnamese": 0.375, - "Indonesian,Chinese,Malay": 0.38636363636363635, - "Indonesian,Chinese,Vietnamese": 0.39204545454545453, - "Indonesian,Malay,Vietnamese": 0.39204545454545453, - "English,Filipino,Spanish": 0.3352272727272727, - "English,Filipino,Chinese": 0.24431818181818182, - "English,Filipino,Malay": 0.3068181818181818, - "English,Filipino,Vietnamese": 0.3068181818181818, - "English,Spanish,Chinese": 0.3806818181818182, - "English,Spanish,Malay": 0.4318181818181818, - "English,Spanish,Vietnamese": 0.4147727272727273, - "English,Chinese,Malay": 0.35795454545454547, - "English,Chinese,Vietnamese": 0.3693181818181818, - "English,Malay,Vietnamese": 0.39204545454545453, - "Filipino,Spanish,Chinese": 0.2840909090909091, - "Filipino,Spanish,Malay": 0.35795454545454547, + "Indonesian,Filipino,Vietnamese": 0.3068181818181818, + "Indonesian,Spanish,Chinese": 0.4147727272727273, + "Indonesian,Spanish,Malay": 0.44886363636363635, + "Indonesian,Spanish,Vietnamese": 0.4034090909090909, + "Indonesian,Chinese,Malay": 0.4034090909090909, + "Indonesian,Chinese,Vietnamese": 0.3977272727272727, + "Indonesian,Malay,Vietnamese": 0.4034090909090909, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Filipino,Chinese": 0.25, + "English,Filipino,Malay": 0.32386363636363635, + "English,Filipino,Vietnamese": 0.2897727272727273, + "English,Spanish,Chinese": 0.39204545454545453, + "English,Spanish,Malay": 0.42613636363636365, + "English,Spanish,Vietnamese": 0.4090909090909091, + "English,Chinese,Malay": 0.3806818181818182, + "English,Chinese,Vietnamese": 0.3806818181818182, + "English,Malay,Vietnamese": 0.4034090909090909, + "Filipino,Spanish,Chinese": 0.29545454545454547, + "Filipino,Spanish,Malay": 0.36363636363636365, "Filipino,Spanish,Vietnamese": 0.32954545454545453, "Filipino,Chinese,Malay": 0.2840909090909091, "Filipino,Chinese,Vietnamese": 0.30113636363636365, - "Filipino,Malay,Vietnamese": 0.3352272727272727, - "Spanish,Chinese,Malay": 0.35795454545454547, - "Spanish,Chinese,Vietnamese": 0.3977272727272727, - "Spanish,Malay,Vietnamese": 0.39204545454545453, - "Chinese,Malay,Vietnamese": 0.36363636363636365 + "Filipino,Malay,Vietnamese": 0.3181818181818182, + "Spanish,Chinese,Malay": 0.38636363636363635, + "Spanish,Chinese,Vietnamese": 0.42045454545454547, + "Spanish,Malay,Vietnamese": 0.4147727272727273, + "Chinese,Malay,Vietnamese": 0.375 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.24431818181818182, - "Indonesian,English,Filipino,Chinese": 0.20454545454545456, - "Indonesian,English,Filipino,Malay": 0.23863636363636365, - "Indonesian,English,Filipino,Vietnamese": 0.23863636363636365, - "Indonesian,English,Spanish,Chinese": 0.29545454545454547, - "Indonesian,English,Spanish,Malay": 0.32386363636363635, - "Indonesian,English,Spanish,Vietnamese": 0.29545454545454547, - "Indonesian,English,Chinese,Malay": 0.30113636363636365, - "Indonesian,English,Chinese,Vietnamese": 0.2897727272727273, - "Indonesian,English,Malay,Vietnamese": 0.2897727272727273, - "Indonesian,Filipino,Spanish,Chinese": 0.23295454545454544, - "Indonesian,Filipino,Spanish,Malay": 0.26136363636363635, - "Indonesian,Filipino,Spanish,Vietnamese": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish": 0.23863636363636365, + "Indonesian,English,Filipino,Chinese": 0.2159090909090909, + "Indonesian,English,Filipino,Malay": 0.2556818181818182, + "Indonesian,English,Filipino,Vietnamese": 0.23295454545454544, + "Indonesian,English,Spanish,Chinese": 0.3181818181818182, + "Indonesian,English,Spanish,Malay": 0.3409090909090909, + "Indonesian,English,Spanish,Vietnamese": 0.30113636363636365, + "Indonesian,English,Chinese,Malay": 0.32386363636363635, + "Indonesian,English,Chinese,Vietnamese": 0.30113636363636365, + "Indonesian,English,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,Filipino,Spanish,Chinese": 0.2556818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Vietnamese": 0.25, "Indonesian,Filipino,Chinese,Malay": 0.23863636363636365, - "Indonesian,Filipino,Chinese,Vietnamese": 0.23863636363636365, - "Indonesian,Filipino,Malay,Vietnamese": 0.26136363636363635, - "Indonesian,Spanish,Chinese,Malay": 0.30113636363636365, - "Indonesian,Spanish,Chinese,Vietnamese": 0.30113636363636365, - "Indonesian,Spanish,Malay,Vietnamese": 0.29545454545454547, - "Indonesian,Chinese,Malay,Vietnamese": 0.3068181818181818, - "English,Filipino,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Spanish,Chinese,Malay": 0.32954545454545453, + "Indonesian,Spanish,Chinese,Vietnamese": 0.32386363636363635, + "Indonesian,Spanish,Malay,Vietnamese": 0.3352272727272727, + "Indonesian,Chinese,Malay,Vietnamese": 0.3125, + "English,Filipino,Spanish,Chinese": 0.2159090909090909, "English,Filipino,Spanish,Malay": 0.26136363636363635, - "English,Filipino,Spanish,Vietnamese": 0.2556818181818182, - "English,Filipino,Chinese,Malay": 0.2159090909090909, - "English,Filipino,Chinese,Vietnamese": 0.20454545454545456, - "English,Filipino,Malay,Vietnamese": 0.24431818181818182, - "English,Spanish,Chinese,Malay": 0.29545454545454547, + "English,Filipino,Spanish,Vietnamese": 0.23863636363636365, + "English,Filipino,Chinese,Malay": 0.2215909090909091, + "English,Filipino,Chinese,Vietnamese": 0.19886363636363635, + "English,Filipino,Malay,Vietnamese": 0.23295454545454544, + "English,Spanish,Chinese,Malay": 0.3068181818181818, "English,Spanish,Chinese,Vietnamese": 0.3125, - "English,Spanish,Malay,Vietnamese": 0.3181818181818182, - "English,Chinese,Malay,Vietnamese": 0.2897727272727273, - "Filipino,Spanish,Chinese,Malay": 0.23295454545454544, - "Filipino,Spanish,Chinese,Vietnamese": 0.23863636363636365, - "Filipino,Spanish,Malay,Vietnamese": 0.26136363636363635, - "Filipino,Chinese,Malay,Vietnamese": 0.24431818181818182, - "Spanish,Chinese,Malay,Vietnamese": 0.2897727272727273 + "English,Spanish,Malay,Vietnamese": 0.32386363636363635, + "English,Chinese,Malay,Vietnamese": 0.30113636363636365, + "Filipino,Spanish,Chinese,Malay": 0.22727272727272727, + "Filipino,Spanish,Chinese,Vietnamese": 0.24431818181818182, + "Filipino,Spanish,Malay,Vietnamese": 0.2556818181818182, + "Filipino,Chinese,Malay,Vietnamese": 0.23863636363636365, + "Spanish,Chinese,Malay,Vietnamese": 0.3068181818181818 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.1875, - "Indonesian,English,Filipino,Spanish,Malay": 0.20454545454545456, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.20454545454545456, - "Indonesian,English,Filipino,Chinese,Malay": 0.19318181818181818, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1875, + "Indonesian,English,Filipino,Spanish,Chinese": 0.19886363636363635, + "Indonesian,English,Filipino,Spanish,Malay": 0.21022727272727273, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Malay": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.18181818181818182, "Indonesian,English,Filipino,Malay,Vietnamese": 0.20454545454545456, - "Indonesian,English,Spanish,Chinese,Malay": 0.2556818181818182, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.25, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.24431818181818182, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.25, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.20454545454545456, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.19886363636363635, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.20454545454545456, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.21022727272727273, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.25, + "Indonesian,English,Spanish,Chinese,Malay": 0.2784090909090909, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2556818181818182, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2727272727272727, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.2727272727272727, "English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.1875, - "English,Filipino,Spanish,Malay,Vietnamese": 0.2159090909090909, - "English,Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, - "English,Spanish,Chinese,Malay,Vietnamese": 0.25, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456 + "English,Filipino,Spanish,Chinese,Vietnamese": 0.18181818181818182, + "English,Filipino,Spanish,Malay,Vietnamese": 0.20454545454545456, + "English,Filipino,Chinese,Malay,Vietnamese": 0.1875, + "English,Spanish,Chinese,Malay,Vietnamese": 0.2556818181818182, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.17613636363636365, + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544, "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365 + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 }, "7_combine": { "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 } }, - "AC3_2": 0.4939513632151703, - "AC3_3": 0.40032525771082667, - "AC3_4": 0.33628426639235887, - "AC3_5": 0.2921667517442057, - "AC3_6": 0.2606596942474146, - "AC3_7": 0.23644703916115414 + "AC3_2": 0.4937325388284096, + "AC3_3": 0.40414059254587914, + "AC3_4": 0.3405283570104286, + "AC3_5": 0.2952353112686236, + "AC3_6": 0.2622155480703067, + "AC3_7": 0.23569023565182692 }, "prompt_4": { - "overall_acc": 0.43344155844155846, + "overall_acc": 0.4375, "language_acc": { - "Indonesian": 0.3806818181818182, - "English": 0.5056818181818182, - "Filipino": 0.30113636363636365, + "Indonesian": 0.39204545454545453, + "English": 0.5, + "Filipino": 0.3068181818181818, "Spanish": 0.45454545454545453, - "Chinese": 0.4602272727272727, - "Malay": 0.45454545454545453, - "Vietnamese": 0.4772727272727273 + "Chinese": 0.4772727272727273, + "Malay": 0.44886363636363635, + "Vietnamese": 0.48295454545454547 }, - "consistency_score_2": 0.5075757575757577, - "consistency_score_3": 0.32418831168831164, - "consistency_score_4": 0.23538961038961043, - "consistency_score_5": 0.18479437229437232, - "consistency_score_6": 0.15097402597402595, - "consistency_score_7": 0.125, + "consistency_score_2": 0.5151515151515151, + "consistency_score_3": 0.33522727272727276, + "consistency_score_4": 0.24675324675324675, + "consistency_score_5": 0.19561688311688313, + "consistency_score_6": 0.16152597402597402, + "consistency_score_7": 0.13636363636363635, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5113636363636364, - "Indonesian,Filipino": 0.48863636363636365, - "Indonesian,Spanish": 0.5056818181818182, - "Indonesian,Chinese": 0.48295454545454547, - "Indonesian,Malay": 0.6022727272727273, - "Indonesian,Vietnamese": 0.4772727272727273, - "English,Filipino": 0.39204545454545453, - "English,Spanish": 0.625, - "English,Chinese": 0.5227272727272727, - "English,Malay": 0.5681818181818182, - "English,Vietnamese": 0.5397727272727273, - "Filipino,Spanish": 0.42045454545454547, + "Indonesian,English": 0.5340909090909091, + "Indonesian,Filipino": 0.5056818181818182, + "Indonesian,Spanish": 0.5454545454545454, + "Indonesian,Chinese": 0.48863636363636365, + "Indonesian,Malay": 0.6136363636363636, + "Indonesian,Vietnamese": 0.4943181818181818, + "English,Filipino": 0.375, + "English,Spanish": 0.6363636363636364, + "English,Chinese": 0.5340909090909091, + "English,Malay": 0.5625, + "English,Vietnamese": 0.5511363636363636, + "Filipino,Spanish": 0.42613636363636365, "Filipino,Chinese": 0.3806818181818182, "Filipino,Malay": 0.4715909090909091, - "Filipino,Vietnamese": 0.4715909090909091, - "Spanish,Chinese": 0.5340909090909091, - "Spanish,Malay": 0.5511363636363636, - "Spanish,Vietnamese": 0.5681818181818182, - "Chinese,Malay": 0.5056818181818182, - "Chinese,Vietnamese": 0.5227272727272727, + "Filipino,Vietnamese": 0.42613636363636365, + "Spanish,Chinese": 0.5511363636363636, + "Spanish,Malay": 0.5795454545454546, + "Spanish,Vietnamese": 0.5795454545454546, + "Chinese,Malay": 0.5113636363636364, + "Chinese,Vietnamese": 0.5340909090909091, "Malay,Vietnamese": 0.5170454545454546 }, "3_combine": { - "Indonesian,English,Filipino": 0.26704545454545453, - "Indonesian,English,Spanish": 0.375, - "Indonesian,English,Chinese": 0.32386363636363635, - "Indonesian,English,Malay": 0.3977272727272727, - "Indonesian,English,Vietnamese": 0.3352272727272727, - "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,English,Filipino": 0.2784090909090909, + "Indonesian,English,Spanish": 0.4147727272727273, + "Indonesian,English,Chinese": 0.3409090909090909, + "Indonesian,English,Malay": 0.4034090909090909, + "Indonesian,English,Vietnamese": 0.35795454545454547, + "Indonesian,Filipino,Spanish": 0.29545454545454547, "Indonesian,Filipino,Chinese": 0.23863636363636365, - "Indonesian,Filipino,Malay": 0.32386363636363635, - "Indonesian,Filipino,Vietnamese": 0.30113636363636365, - "Indonesian,Spanish,Chinese": 0.32386363636363635, - "Indonesian,Spanish,Malay": 0.38636363636363635, - "Indonesian,Spanish,Vietnamese": 0.32954545454545453, - "Indonesian,Chinese,Malay": 0.3522727272727273, - "Indonesian,Chinese,Vietnamese": 0.32954545454545453, - "Indonesian,Malay,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Malay": 0.3409090909090909, + "Indonesian,Filipino,Vietnamese": 0.29545454545454547, + "Indonesian,Spanish,Chinese": 0.3522727272727273, + "Indonesian,Spanish,Malay": 0.42045454545454547, + "Indonesian,Spanish,Vietnamese": 0.3693181818181818, + "Indonesian,Chinese,Malay": 0.36363636363636365, + "Indonesian,Chinese,Vietnamese": 0.3409090909090909, + "Indonesian,Malay,Vietnamese": 0.35795454545454547, "English,Filipino,Spanish": 0.30113636363636365, "English,Filipino,Chinese": 0.2159090909090909, - "English,Filipino,Malay": 0.2784090909090909, - "English,Filipino,Vietnamese": 0.2727272727272727, - "English,Spanish,Chinese": 0.38636363636363635, - "English,Spanish,Malay": 0.42045454545454547, - "English,Spanish,Vietnamese": 0.4090909090909091, - "English,Chinese,Malay": 0.36363636363636365, - "English,Chinese,Vietnamese": 0.3522727272727273, - "English,Malay,Vietnamese": 0.375, - "Filipino,Spanish,Chinese": 0.23863636363636365, - "Filipino,Spanish,Malay": 0.2897727272727273, - "Filipino,Spanish,Vietnamese": 0.2897727272727273, + "English,Filipino,Malay": 0.2840909090909091, + "English,Filipino,Vietnamese": 0.24431818181818182, + "English,Spanish,Chinese": 0.4090909090909091, + "English,Spanish,Malay": 0.4375, + "English,Spanish,Vietnamese": 0.4318181818181818, + "English,Chinese,Malay": 0.375, + "English,Chinese,Vietnamese": 0.36363636363636365, + "English,Malay,Vietnamese": 0.3693181818181818, + "Filipino,Spanish,Chinese": 0.25, + "Filipino,Spanish,Malay": 0.3125, + "Filipino,Spanish,Vietnamese": 0.2727272727272727, "Filipino,Chinese,Malay": 0.24431818181818182, - "Filipino,Chinese,Vietnamese": 0.26704545454545453, - "Filipino,Malay,Vietnamese": 0.3125, - "Spanish,Chinese,Malay": 0.3465909090909091, - "Spanish,Chinese,Vietnamese": 0.3693181818181818, - "Spanish,Malay,Vietnamese": 0.375, - "Chinese,Malay,Vietnamese": 0.3352272727272727 + "Filipino,Chinese,Vietnamese": 0.24431818181818182, + "Filipino,Malay,Vietnamese": 0.3068181818181818, + "Spanish,Chinese,Malay": 0.3693181818181818, + "Spanish,Chinese,Vietnamese": 0.39204545454545453, + "Spanish,Malay,Vietnamese": 0.3977272727272727, + "Chinese,Malay,Vietnamese": 0.3409090909090909 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.2159090909090909, - "Indonesian,English,Filipino,Chinese": 0.1590909090909091, - "Indonesian,English,Filipino,Malay": 0.2215909090909091, + "Indonesian,English,Filipino,Spanish": 0.23863636363636365, + "Indonesian,English,Filipino,Chinese": 0.16477272727272727, + "Indonesian,English,Filipino,Malay": 0.23295454545454544, "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, - "Indonesian,English,Spanish,Chinese": 0.26704545454545453, - "Indonesian,English,Spanish,Malay": 0.3125, - "Indonesian,English,Spanish,Vietnamese": 0.26704545454545453, - "Indonesian,English,Chinese,Malay": 0.2784090909090909, - "Indonesian,English,Chinese,Vietnamese": 0.24431818181818182, - "Indonesian,English,Malay,Vietnamese": 0.2784090909090909, - "Indonesian,Filipino,Spanish,Chinese": 0.17045454545454544, - "Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, - "Indonesian,Filipino,Spanish,Vietnamese": 0.2159090909090909, - "Indonesian,Filipino,Chinese,Malay": 0.19318181818181818, - "Indonesian,Filipino,Chinese,Vietnamese": 0.19886363636363635, - "Indonesian,Filipino,Malay,Vietnamese": 0.24431818181818182, - "Indonesian,Spanish,Chinese,Malay": 0.26704545454545453, - "Indonesian,Spanish,Chinese,Vietnamese": 0.2556818181818182, - "Indonesian,Spanish,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,English,Spanish,Chinese": 0.29545454545454547, + "Indonesian,English,Spanish,Malay": 0.3409090909090909, + "Indonesian,English,Spanish,Vietnamese": 0.30113636363636365, + "Indonesian,English,Chinese,Malay": 0.2897727272727273, + "Indonesian,English,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,English,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,Filipino,Spanish,Chinese": 0.19318181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Chinese,Malay": 0.19886363636363635, + "Indonesian,Filipino,Chinese,Vietnamese": 0.1875, + "Indonesian,Filipino,Malay,Vietnamese": 0.25, + "Indonesian,Spanish,Chinese,Malay": 0.2840909090909091, + "Indonesian,Spanish,Chinese,Vietnamese": 0.2840909090909091, + "Indonesian,Spanish,Malay,Vietnamese": 0.3068181818181818, "Indonesian,Chinese,Malay,Vietnamese": 0.26136363636363635, - "English,Filipino,Spanish,Chinese": 0.18181818181818182, - "English,Filipino,Spanish,Malay": 0.22727272727272727, - "English,Filipino,Spanish,Vietnamese": 0.2215909090909091, - "English,Filipino,Chinese,Malay": 0.18181818181818182, - "English,Filipino,Chinese,Vietnamese": 0.19318181818181818, - "English,Filipino,Malay,Vietnamese": 0.22727272727272727, - "English,Spanish,Chinese,Malay": 0.2897727272727273, - "English,Spanish,Chinese,Vietnamese": 0.2897727272727273, - "English,Spanish,Malay,Vietnamese": 0.3068181818181818, + "English,Filipino,Spanish,Chinese": 0.19886363636363635, + "English,Filipino,Spanish,Malay": 0.25, + "English,Filipino,Spanish,Vietnamese": 0.21022727272727273, + "English,Filipino,Chinese,Malay": 0.1875, + "English,Filipino,Chinese,Vietnamese": 0.17613636363636365, + "English,Filipino,Malay,Vietnamese": 0.2215909090909091, + "English,Spanish,Chinese,Malay": 0.3125, + "English,Spanish,Chinese,Vietnamese": 0.3068181818181818, + "English,Spanish,Malay,Vietnamese": 0.3181818181818182, "English,Chinese,Malay,Vietnamese": 0.2784090909090909, - "Filipino,Spanish,Chinese,Malay": 0.18181818181818182, - "Filipino,Spanish,Chinese,Vietnamese": 0.19886363636363635, - "Filipino,Spanish,Malay,Vietnamese": 0.23295454545454544, - "Filipino,Chinese,Malay,Vietnamese": 0.19886363636363635, - "Spanish,Chinese,Malay,Vietnamese": 0.2727272727272727 + "Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "Filipino,Spanish,Chinese,Vietnamese": 0.19318181818181818, + "Filipino,Spanish,Malay,Vietnamese": 0.23863636363636365, + "Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, + "Spanish,Chinese,Malay,Vietnamese": 0.2897727272727273 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.13636363636363635, - "Indonesian,English,Filipino,Spanish,Malay": 0.18181818181818182, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.17613636363636365, - "Indonesian,English,Filipino,Chinese,Malay": 0.1534090909090909, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1534090909090909, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.1875, - "Indonesian,English,Spanish,Chinese,Malay": 0.23295454545454544, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2159090909090909, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish,Chinese": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Malay": 0.21022727272727273, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.1875, + "Indonesian,English,Filipino,Chinese,Malay": 0.1590909090909091, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,English,Spanish,Chinese,Malay": 0.2556818181818182, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.26136363636363635, "Indonesian,English,Chinese,Malay,Vietnamese": 0.2215909090909091, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.19318181818181818, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.2215909090909091, - "English,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544, + "English,Filipino,Spanish,Chinese,Malay": 0.17613636363636365, "English,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, - "English,Filipino,Spanish,Malay,Vietnamese": 0.19318181818181818, - "English,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, - "English,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727 + "English,Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, + "English,Filipino,Chinese,Malay,Vietnamese": 0.16477272727272727, + "English,Spanish,Chinese,Malay,Vietnamese": 0.25, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.13068181818181818, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.14204545454545456, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635 } }, - "AC3_2": 0.4675884781770865, - "AC3_3": 0.37093755821450325, - "AC3_4": 0.3050923590515977, - "AC3_5": 0.25911648522336955, - "AC3_6": 0.22394480515648574, - "AC3_7": 0.19404069763967177 + "AC3_2": 0.47316103374754886, + "AC3_3": 0.37959558818617, + "AC3_4": 0.31553973898116905, + "AC3_5": 0.27035256405986224, + "AC3_6": 0.2359417343779566, + "AC3_7": 0.20792079204297617 }, "prompt_5": { - "overall_acc": 0.4293831168831169, + "overall_acc": 0.4439935064935065, "language_acc": { "Indonesian": 0.42613636363636365, - "English": 0.4943181818181818, - "Filipino": 0.29545454545454547, - "Spanish": 0.4431818181818182, - "Chinese": 0.4715909090909091, - "Malay": 0.3977272727272727, - "Vietnamese": 0.4772727272727273 + "English": 0.5284090909090909, + "Filipino": 0.3125, + "Spanish": 0.4375, + "Chinese": 0.4772727272727273, + "Malay": 0.42045454545454547, + "Vietnamese": 0.5056818181818182 }, - "consistency_score_2": 0.4945887445887445, - "consistency_score_3": 0.30746753246753245, - "consistency_score_4": 0.21883116883116882, - "consistency_score_5": 0.16991341991341993, - "consistency_score_6": 0.13798701298701296, - "consistency_score_7": 0.11363636363636363, + "consistency_score_2": 0.5075757575757576, + "consistency_score_3": 0.32337662337662343, + "consistency_score_4": 0.23474025974025972, + "consistency_score_5": 0.18452380952380953, + "consistency_score_6": 0.15097402597402598, + "consistency_score_7": 0.125, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.5227272727272727, + "Indonesian,English": 0.5397727272727273, "Indonesian,Filipino": 0.48295454545454547, - "Indonesian,Spanish": 0.5227272727272727, - "Indonesian,Chinese": 0.5, - "Indonesian,Malay": 0.625, + "Indonesian,Spanish": 0.5113636363636364, + "Indonesian,Chinese": 0.5170454545454546, + "Indonesian,Malay": 0.6022727272727273, "Indonesian,Vietnamese": 0.4659090909090909, - "English,Filipino": 0.3806818181818182, - "English,Spanish": 0.6079545454545454, - "English,Chinese": 0.5056818181818182, - "English,Malay": 0.5511363636363636, - "English,Vietnamese": 0.4943181818181818, - "Filipino,Spanish": 0.4318181818181818, - "Filipino,Chinese": 0.375, - "Filipino,Malay": 0.4715909090909091, - "Filipino,Vietnamese": 0.4034090909090909, + "English,Filipino": 0.375, + "English,Spanish": 0.5795454545454546, + "English,Chinese": 0.5284090909090909, + "English,Malay": 0.5625, + "English,Vietnamese": 0.5511363636363636, + "Filipino,Spanish": 0.42045454545454547, + "Filipino,Chinese": 0.38636363636363635, + "Filipino,Malay": 0.5, + "Filipino,Vietnamese": 0.4318181818181818, "Spanish,Chinese": 0.4943181818181818, - "Spanish,Malay": 0.5511363636363636, - "Spanish,Vietnamese": 0.5397727272727273, - "Chinese,Malay": 0.45454545454545453, - "Chinese,Vietnamese": 0.5056818181818182, - "Malay,Vietnamese": 0.5 + "Spanish,Malay": 0.5454545454545454, + "Spanish,Vietnamese": 0.5909090909090909, + "Chinese,Malay": 0.5170454545454546, + "Chinese,Vietnamese": 0.5227272727272727, + "Malay,Vietnamese": 0.5340909090909091 }, "3_combine": { "Indonesian,English,Filipino": 0.26136363636363635, "Indonesian,English,Spanish": 0.375, - "Indonesian,English,Chinese": 0.32954545454545453, + "Indonesian,English,Chinese": 0.35795454545454547, "Indonesian,English,Malay": 0.39204545454545453, - "Indonesian,English,Vietnamese": 0.3181818181818182, - "Indonesian,Filipino,Spanish": 0.2897727272727273, - "Indonesian,Filipino,Chinese": 0.25, + "Indonesian,English,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Spanish": 0.2727272727272727, + "Indonesian,Filipino,Chinese": 0.2556818181818182, "Indonesian,Filipino,Malay": 0.3352272727272727, - "Indonesian,Filipino,Vietnamese": 0.26136363636363635, - "Indonesian,Spanish,Chinese": 0.3125, - "Indonesian,Spanish,Malay": 0.39204545454545453, - "Indonesian,Spanish,Vietnamese": 0.32386363636363635, - "Indonesian,Chinese,Malay": 0.3465909090909091, - "Indonesian,Chinese,Vietnamese": 0.32386363636363635, - "Indonesian,Malay,Vietnamese": 0.3465909090909091, - "English,Filipino,Spanish": 0.2784090909090909, - "English,Filipino,Chinese": 0.2215909090909091, - "English,Filipino,Malay": 0.2727272727272727, - "English,Filipino,Vietnamese": 0.2215909090909091, - "English,Spanish,Chinese": 0.3522727272727273, - "English,Spanish,Malay": 0.39204545454545453, - "English,Spanish,Vietnamese": 0.36363636363636365, - "English,Chinese,Malay": 0.32386363636363635, - "English,Chinese,Vietnamese": 0.3068181818181818, - "English,Malay,Vietnamese": 0.3409090909090909, + "Indonesian,Filipino,Vietnamese": 0.2727272727272727, + "Indonesian,Spanish,Chinese": 0.32386363636363635, + "Indonesian,Spanish,Malay": 0.38636363636363635, + "Indonesian,Spanish,Vietnamese": 0.32954545454545453, + "Indonesian,Chinese,Malay": 0.3693181818181818, + "Indonesian,Chinese,Vietnamese": 0.3352272727272727, + "Indonesian,Malay,Vietnamese": 0.3693181818181818, + "English,Filipino,Spanish": 0.26704545454545453, + "English,Filipino,Chinese": 0.22727272727272727, + "English,Filipino,Malay": 0.2784090909090909, + "English,Filipino,Vietnamese": 0.26704545454545453, + "English,Spanish,Chinese": 0.35795454545454547, + "English,Spanish,Malay": 0.375, + "English,Spanish,Vietnamese": 0.39204545454545453, + "English,Chinese,Malay": 0.35795454545454547, + "English,Chinese,Vietnamese": 0.36363636363636365, + "English,Malay,Vietnamese": 0.375, "Filipino,Spanish,Chinese": 0.24431818181818182, - "Filipino,Spanish,Malay": 0.2840909090909091, - "Filipino,Spanish,Vietnamese": 0.25, - "Filipino,Chinese,Malay": 0.23863636363636365, - "Filipino,Chinese,Vietnamese": 0.24431818181818182, - "Filipino,Malay,Vietnamese": 0.26704545454545453, - "Spanish,Chinese,Malay": 0.3068181818181818, - "Spanish,Chinese,Vietnamese": 0.32954545454545453, - "Spanish,Malay,Vietnamese": 0.35795454545454547, - "Chinese,Malay,Vietnamese": 0.3068181818181818 + "Filipino,Spanish,Malay": 0.29545454545454547, + "Filipino,Spanish,Vietnamese": 0.26704545454545453, + "Filipino,Chinese,Malay": 0.26704545454545453, + "Filipino,Chinese,Vietnamese": 0.26136363636363635, + "Filipino,Malay,Vietnamese": 0.3125, + "Spanish,Chinese,Malay": 0.3352272727272727, + "Spanish,Chinese,Vietnamese": 0.35795454545454547, + "Spanish,Malay,Vietnamese": 0.38636363636363635, + "Chinese,Malay,Vietnamese": 0.3409090909090909 }, "4_combine": { "Indonesian,English,Filipino,Spanish": 0.20454545454545456, - "Indonesian,English,Filipino,Chinese": 0.17613636363636365, - "Indonesian,English,Filipino,Malay": 0.2159090909090909, - "Indonesian,English,Filipino,Vietnamese": 0.1875, - "Indonesian,English,Spanish,Chinese": 0.26704545454545453, + "Indonesian,English,Filipino,Chinese": 0.18181818181818182, + "Indonesian,English,Filipino,Malay": 0.20454545454545456, + "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese": 0.2784090909090909, "Indonesian,English,Spanish,Malay": 0.29545454545454547, - "Indonesian,English,Spanish,Vietnamese": 0.26136363636363635, - "Indonesian,English,Chinese,Malay": 0.26136363636363635, - "Indonesian,English,Chinese,Vietnamese": 0.23863636363636365, - "Indonesian,English,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,English,Spanish,Vietnamese": 0.2727272727272727, + "Indonesian,English,Chinese,Malay": 0.2840909090909091, + "Indonesian,English,Chinese,Vietnamese": 0.26704545454545453, + "Indonesian,English,Malay,Vietnamese": 0.2897727272727273, "Indonesian,Filipino,Spanish,Chinese": 0.18181818181818182, - "Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, "Indonesian,Filipino,Spanish,Vietnamese": 0.19886363636363635, - "Indonesian,Filipino,Chinese,Malay": 0.19318181818181818, - "Indonesian,Filipino,Chinese,Vietnamese": 0.19318181818181818, - "Indonesian,Filipino,Malay,Vietnamese": 0.21022727272727273, - "Indonesian,Spanish,Chinese,Malay": 0.23863636363636365, - "Indonesian,Spanish,Chinese,Vietnamese": 0.23295454545454544, - "Indonesian,Spanish,Malay,Vietnamese": 0.26136363636363635, - "Indonesian,Chinese,Malay,Vietnamese": 0.24431818181818182, - "English,Filipino,Spanish,Chinese": 0.1875, + "Indonesian,Filipino,Chinese,Malay": 0.20454545454545456, + "Indonesian,Filipino,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,Spanish,Chinese,Malay": 0.26704545454545453, + "Indonesian,Spanish,Chinese,Vietnamese": 0.25, + "Indonesian,Spanish,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,Chinese,Malay,Vietnamese": 0.2727272727272727, + "English,Filipino,Spanish,Chinese": 0.18181818181818182, "English,Filipino,Spanish,Malay": 0.2159090909090909, - "English,Filipino,Spanish,Vietnamese": 0.18181818181818182, - "English,Filipino,Chinese,Malay": 0.18181818181818182, - "English,Filipino,Chinese,Vietnamese": 0.17045454545454544, - "English,Filipino,Malay,Vietnamese": 0.1875, - "English,Spanish,Chinese,Malay": 0.2556818181818182, - "English,Spanish,Chinese,Vietnamese": 0.23863636363636365, - "English,Spanish,Malay,Vietnamese": 0.2784090909090909, - "English,Chinese,Malay,Vietnamese": 0.23295454545454544, - "Filipino,Spanish,Chinese,Malay": 0.18181818181818182, - "Filipino,Spanish,Chinese,Vietnamese": 0.18181818181818182, - "Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, - "Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, - "Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365 + "English,Filipino,Spanish,Vietnamese": 0.20454545454545456, + "English,Filipino,Chinese,Malay": 0.1875, + "English,Filipino,Chinese,Vietnamese": 0.20454545454545456, + "English,Filipino,Malay,Vietnamese": 0.22727272727272727, + "English,Spanish,Chinese,Malay": 0.2727272727272727, + "English,Spanish,Chinese,Vietnamese": 0.2727272727272727, + "English,Spanish,Malay,Vietnamese": 0.29545454545454547, + "English,Chinese,Malay,Vietnamese": 0.2727272727272727, + "Filipino,Spanish,Chinese,Malay": 0.19318181818181818, + "Filipino,Spanish,Chinese,Vietnamese": 0.19318181818181818, + "Filipino,Spanish,Malay,Vietnamese": 0.2215909090909091, + "Filipino,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Spanish,Chinese,Malay,Vietnamese": 0.2727272727272727 }, "5_combine": { "Indonesian,English,Filipino,Spanish,Chinese": 0.1534090909090909, - "Indonesian,English,Filipino,Spanish,Malay": 0.17045454545454544, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Malay": 0.16477272727272727, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.17045454545454544, "Indonesian,English,Filipino,Chinese,Malay": 0.1534090909090909, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.14772727272727273, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.16477272727272727, - "Indonesian,English,Spanish,Chinese,Malay": 0.21022727272727273, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.20454545454545456, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.2215909090909091, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.19886363636363635, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.14772727272727273, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.1590909090909091, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.1875, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.16477272727272727, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,English,Spanish,Chinese,Malay": 0.23295454545454544, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2215909090909091, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.2215909090909091, "English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, - "English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, - "English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, - "English,Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909 + "English,Filipino,Spanish,Chinese,Vietnamese": 0.16477272727272727, + "English,Filipino,Spanish,Malay,Vietnamese": 0.18181818181818182, + "English,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, + "English,Spanish,Chinese,Malay,Vietnamese": 0.22727272727272727, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 }, "6_combine": { "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.13068181818181818, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.13068181818181818, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818 + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.14204545454545456, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11363636363636363 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 } }, - "AC3_2": 0.45968511717562976, - "AC3_3": 0.35833955640725035, - "AC3_4": 0.2899115659750307, - "AC3_5": 0.24347864321228224, - "AC3_6": 0.2088558794199266, - "AC3_7": 0.17971191734698388 + "AC3_2": 0.4736604026694829, + "AC3_3": 0.37420565467307376, + "AC3_4": 0.30711055264052634, + "AC3_5": 0.2607004488854016, + "AC3_6": 0.2253282188101282, + "AC3_7": 0.19507845930950973 } }, "sg_eval": { @@ -95646,19 +95646,19 @@ }, "cn_eval": { "prompt_1": { - "accuracy": 0.3238095238095238 + "accuracy": 0.26666666666666666 }, "prompt_2": { - "accuracy": 0.26666666666666666 + "accuracy": 0.3333333333333333 }, "prompt_3": { - "accuracy": 0.3238095238095238 + "accuracy": 0.3619047619047619 }, "prompt_4": { - "accuracy": 0.29523809523809524 + "accuracy": 0.4 }, "prompt_5": { - "accuracy": 0.2761904761904762 + "accuracy": 0.3047619047619048 } }, "us_eval": { @@ -95666,13 +95666,13 @@ "accuracy": 0.6915887850467289 }, "prompt_2": { - "accuracy": 0.7663551401869159 + "accuracy": 0.7757009345794392 }, "prompt_3": { - "accuracy": 0.7383177570093458 + "accuracy": 0.7476635514018691 }, "prompt_4": { - "accuracy": 0.7102803738317757 + "accuracy": 0.719626168224299 }, "prompt_5": { "accuracy": 0.7009345794392523 @@ -95680,7 +95680,7 @@ }, "ph_eval": { "prompt_1": { - "accuracy": 0.52, + "accuracy": 0.53, "category_acc": { "brand": 0.5, "demographics": 0.4, @@ -95688,7 +95688,7 @@ "history": 0.5333333333333333, "literature": 0.6, "politics": 0.8, - "culture": 0.6, + "culture": 0.7, "film": 0.4, "law": 0.3, "geography": 0.5 @@ -95697,40 +95697,40 @@ "prompt_2": { "accuracy": 0.51, "category_acc": { - "brand": 0.4, - "demographics": 0.4, - "biology": 0.5, - "history": 0.4, - "literature": 0.5, + "brand": 0.5, + "demographics": 0.2, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.4, "politics": 0.7, "culture": 0.7, "film": 0.5, "law": 0.4, - "geography": 0.6 + "geography": 0.7 } }, "prompt_3": { - "accuracy": 0.53, + "accuracy": 0.52, "category_acc": { - "brand": 0.5, - "demographics": 0.6, - "biology": 0.5, + "brand": 0.6, + "demographics": 0.4, + "biology": 0.6, "history": 0.4, "literature": 0.4, "politics": 0.7, - "culture": 0.8, - "film": 0.4, - "law": 0.5, - "geography": 0.6 + "culture": 0.7, + "film": 0.3, + "law": 0.4, + "geography": 0.7 } }, "prompt_4": { - "accuracy": 0.55, + "accuracy": 0.53, "category_acc": { "brand": 0.4, "demographics": 0.4, - "biology": 0.6, - "history": 0.4666666666666667, + "biology": 0.3, + "history": 0.5333333333333333, "literature": 0.7, "politics": 0.8, "culture": 0.8, @@ -95740,270 +95740,270 @@ } }, "prompt_5": { - "accuracy": 0.5, + "accuracy": 0.51, "category_acc": { "brand": 0.5, "demographics": 0.4, "biology": 0.3, "history": 0.5333333333333333, - "literature": 0.5, + "literature": 0.6, "politics": 0.8, - "culture": 0.6, + "culture": 0.7, "film": 0.4, - "law": 0.4, + "law": 0.3, "geography": 0.5 } } }, "sing2eng": { "prompt_1": { - "bleu_score": 0.2396434382915585 + "bleu_score": 0.23798948235798217 }, "prompt_2": { - "bleu_score": 0.2657607783185625 + "bleu_score": 0.2581721657503597 }, "prompt_3": { - "bleu_score": 0.1991577032131693 + "bleu_score": 0.2172503692490754 }, "prompt_4": { - "bleu_score": 0.22222624038742203 + "bleu_score": 0.22656708542827697 }, "prompt_5": { - "bleu_score": 0.21913847724220636 + "bleu_score": 0.21399056654831147 } }, "indommlu": { "prompt_1": { - "accuracy": 0.48220842512851325, + "accuracy": 0.44889511983443486, "category_acc": { - "History": 0.4859437751004016, - "Geography": 0.44285714285714284, - "Lampungic": 0.30612244897959184, - "Social science": 0.674457429048414, - "Balinese": 0.29723991507430997, - "Makassarese": 0.2903225806451613, - "Banjarese": 0.3888888888888889, - "Chemistry": 0.3284671532846715, - "Biology": 0.4662721893491124, - "Science": 0.5954592363261094, - "Christian religion": 0.5970149253731343, - "Art": 0.5790349417637272, - "Islam religion": 0.5860597439544808, - "Hindu religion": 0.52, - "Madurese": 0.33559322033898303, - "Sport": 0.5067567567567568, - "Indonesian language": 0.541095890410959, - "Physics": 0.3696969696969697, - "Minangkabau culture": 0.36180904522613067, - "Dayak language": 0.27522935779816515, - "Sociology": 0.4899193548387097, - "Economy": 0.4385245901639344, - "Sundanese": 0.4044943820224719, - "Javanese": 0.3860887096774194, - "Civic education": 0.580829756795422 + "History": 0.44779116465863456, + "Geography": 0.4061224489795918, + "Lampungic": 0.32653061224489793, + "Social science": 0.5826377295492488, + "Balinese": 0.2760084925690021, + "Makassarese": 0.2849462365591398, + "Banjarese": 0.3680555555555556, + "Chemistry": 0.2846715328467153, + "Biology": 0.44260355029585796, + "Science": 0.5325077399380805, + "Christian religion": 0.527363184079602, + "Art": 0.5174708818635607, + "Islam religion": 0.55049786628734, + "Hindu religion": 0.44666666666666666, + "Madurese": 0.3288135593220339, + "Sport": 0.4594594594594595, + "Indonesian language": 0.5140099626400996, + "Physics": 0.3575757575757576, + "Minangkabau culture": 0.35175879396984927, + "Dayak language": 0.28440366972477066, + "Sociology": 0.4475806451612903, + "Economy": 0.4077868852459016, + "Sundanese": 0.40276577355229043, + "Javanese": 0.37701612903225806, + "Civic education": 0.5121602288984263 } }, "prompt_2": { - "accuracy": 0.48815007677415045, + "accuracy": 0.44141798517925096, "category_acc": { - "History": 0.4819277108433735, - "Geography": 0.46122448979591835, - "Lampungic": 0.3741496598639456, - "Social science": 0.7045075125208681, - "Balinese": 0.3205944798301486, + "History": 0.42168674698795183, + "Geography": 0.41836734693877553, + "Lampungic": 0.3333333333333333, + "Social science": 0.6026711185308848, + "Balinese": 0.31210191082802546, "Makassarese": 0.3602150537634409, - "Banjarese": 0.3958333333333333, - "Chemistry": 0.2948905109489051, - "Biology": 0.4686390532544379, - "Science": 0.5799793601651186, - "Christian religion": 0.5920398009950248, - "Art": 0.5657237936772047, - "Islam religion": 0.5974395448079659, - "Hindu religion": 0.48, - "Madurese": 0.3423728813559322, - "Sport": 0.527027027027027, - "Indonesian language": 0.5473225404732254, - "Physics": 0.3797979797979798, - "Minangkabau culture": 0.36180904522613067, - "Dayak language": 0.3394495412844037, - "Sociology": 0.4879032258064516, - "Economy": 0.4672131147540984, - "Sundanese": 0.4140017286084702, - "Javanese": 0.38911290322580644, - "Civic education": 0.592274678111588 + "Banjarese": 0.3333333333333333, + "Chemistry": 0.2832116788321168, + "Biology": 0.421301775147929, + "Science": 0.49742002063983487, + "Christian religion": 0.5671641791044776, + "Art": 0.4908485856905158, + "Islam religion": 0.5291607396870555, + "Hindu religion": 0.4266666666666667, + "Madurese": 0.33559322033898303, + "Sport": 0.4594594594594595, + "Indonesian language": 0.5056039850560399, + "Physics": 0.3434343434343434, + "Minangkabau culture": 0.35175879396984927, + "Dayak language": 0.3211009174311927, + "Sociology": 0.4657258064516129, + "Economy": 0.42213114754098363, + "Sundanese": 0.3742437337942956, + "Javanese": 0.36088709677419356, + "Civic education": 0.5064377682403434 } }, "prompt_3": { - "accuracy": 0.3732558915815475, + "accuracy": 0.4446224714600441, "category_acc": { - "History": 0.3614457831325301, - "Geography": 0.3489795918367347, - "Lampungic": 0.2789115646258503, - "Social science": 0.4757929883138564, - "Balinese": 0.2781316348195329, - "Makassarese": 0.3064516129032258, - "Banjarese": 0.3819444444444444, - "Chemistry": 0.24379562043795622, - "Biology": 0.3337278106508876, - "Science": 0.3942208462332301, - "Christian religion": 0.34328358208955223, - "Art": 0.4176372712146423, - "Islam religion": 0.39829302987197723, - "Hindu religion": 0.3933333333333333, - "Madurese": 0.2745762711864407, - "Sport": 0.46621621621621623, - "Indonesian language": 0.42714819427148193, - "Physics": 0.36363636363636365, - "Minangkabau culture": 0.3065326633165829, - "Dayak language": 0.27522935779816515, - "Sociology": 0.38911290322580644, - "Economy": 0.33811475409836067, - "Sundanese": 0.35522904062229904, - "Javanese": 0.3326612903225806, - "Civic education": 0.413447782546495 + "History": 0.39357429718875503, + "Geography": 0.3836734693877551, + "Lampungic": 0.35374149659863946, + "Social science": 0.6377295492487479, + "Balinese": 0.3227176220806794, + "Makassarese": 0.27956989247311825, + "Banjarese": 0.4305555555555556, + "Chemistry": 0.27007299270072993, + "Biology": 0.4165680473372781, + "Science": 0.5098039215686274, + "Christian religion": 0.46766169154228854, + "Art": 0.5440931780366056, + "Islam religion": 0.5049786628733998, + "Hindu religion": 0.5, + "Madurese": 0.3050847457627119, + "Sport": 0.5202702702702703, + "Indonesian language": 0.5074719800747198, + "Physics": 0.3393939393939394, + "Minangkabau culture": 0.4020100502512563, + "Dayak language": 0.29357798165137616, + "Sociology": 0.45161290322580644, + "Economy": 0.4077868852459016, + "Sundanese": 0.3949870354364736, + "Javanese": 0.3850806451612903, + "Civic education": 0.5078683834048641 } }, "prompt_4": { - "accuracy": 0.41778489885840175, + "accuracy": 0.44368782962814607, "category_acc": { - "History": 0.3654618473895582, - "Geography": 0.3979591836734694, - "Lampungic": 0.3877551020408163, - "Social science": 0.5525876460767947, - "Balinese": 0.2929936305732484, - "Makassarese": 0.3118279569892473, - "Banjarese": 0.3819444444444444, - "Chemistry": 0.29635036496350364, - "Biology": 0.38224852071005916, - "Science": 0.4840041279669763, + "History": 0.4738955823293173, + "Geography": 0.4163265306122449, + "Lampungic": 0.3673469387755102, + "Social science": 0.6076794657762938, + "Balinese": 0.2951167728237792, + "Makassarese": 0.3172043010752688, + "Banjarese": 0.3541666666666667, + "Chemistry": 0.27007299270072993, + "Biology": 0.40236686390532544, + "Science": 0.5128998968008256, "Christian religion": 0.4925373134328358, - "Art": 0.5008319467554077, - "Islam religion": 0.4850640113798009, + "Art": 0.5141430948419301, + "Islam religion": 0.5149359886201992, "Hindu religion": 0.5, - "Madurese": 0.3220338983050847, - "Sport": 0.4391891891891892, - "Indonesian language": 0.47447073474470736, - "Physics": 0.32323232323232326, - "Minangkabau culture": 0.34673366834170855, - "Dayak language": 0.2018348623853211, - "Sociology": 0.39314516129032256, - "Economy": 0.375, - "Sundanese": 0.3656006914433881, - "Javanese": 0.3497983870967742, - "Civic education": 0.4978540772532189 + "Madurese": 0.3423728813559322, + "Sport": 0.4864864864864865, + "Indonesian language": 0.5056039850560399, + "Physics": 0.3696969696969697, + "Minangkabau culture": 0.3768844221105528, + "Dayak language": 0.28440366972477066, + "Sociology": 0.43548387096774194, + "Economy": 0.42418032786885246, + "Sundanese": 0.3949870354364736, + "Javanese": 0.3639112903225806, + "Civic education": 0.49356223175965663 } }, "prompt_5": { - "accuracy": 0.42132318579344413, + "accuracy": 0.44829427865678617, "category_acc": { - "History": 0.40562248995983935, - "Geography": 0.42653061224489797, - "Lampungic": 0.3673469387755102, - "Social science": 0.5893155258764607, - "Balinese": 0.2781316348195329, + "History": 0.45582329317269077, + "Geography": 0.38979591836734695, + "Lampungic": 0.3333333333333333, + "Social science": 0.6143572621035058, + "Balinese": 0.29723991507430997, "Makassarese": 0.3172043010752688, - "Banjarese": 0.3263888888888889, - "Chemistry": 0.27883211678832115, - "Biology": 0.38698224852071006, - "Science": 0.49122807017543857, - "Christian religion": 0.5422885572139303, - "Art": 0.47254575707154745, - "Islam religion": 0.5049786628733998, - "Hindu religion": 0.44666666666666666, - "Madurese": 0.31186440677966104, - "Sport": 0.4594594594594595, - "Indonesian language": 0.4813200498132005, - "Physics": 0.3212121212121212, - "Minangkabau culture": 0.32663316582914576, - "Dayak language": 0.27522935779816515, - "Sociology": 0.4092741935483871, - "Economy": 0.3668032786885246, - "Sundanese": 0.3716508210890233, - "Javanese": 0.35181451612903225, - "Civic education": 0.4663805436337625 + "Banjarese": 0.3680555555555556, + "Chemistry": 0.27007299270072993, + "Biology": 0.42248520710059173, + "Science": 0.5294117647058824, + "Christian religion": 0.5223880597014925, + "Art": 0.5224625623960066, + "Islam religion": 0.5135135135135135, + "Hindu religion": 0.47333333333333333, + "Madurese": 0.34915254237288135, + "Sport": 0.46621621621621623, + "Indonesian language": 0.5115193026151931, + "Physics": 0.3414141414141414, + "Minangkabau culture": 0.35175879396984927, + "Dayak language": 0.3577981651376147, + "Sociology": 0.41330645161290325, + "Economy": 0.4262295081967213, + "Sundanese": 0.4096802074330164, + "Javanese": 0.3860887096774194, + "Civic education": 0.5135908440629471 } } }, "flores_ind2eng": { "prompt_1": { - "bleu_score": 0.34862757099739766 + "bleu_score": 0.32971871495146615 }, "prompt_2": { - "bleu_score": 0.3530147157703795 + "bleu_score": 0.3426615795583529 }, "prompt_3": { - "bleu_score": 0.35953966781160585 + "bleu_score": 0.3599845877289089 }, "prompt_4": { - "bleu_score": 0.33177166236852557 + "bleu_score": 0.29744838905316373 }, "prompt_5": { - "bleu_score": 0.33548492537137203 + "bleu_score": 0.3235635193491021 } }, "flores_vie2eng": { "prompt_1": { - "bleu_score": 0.28443833413071995 + "bleu_score": 0.2673409525747403 }, "prompt_2": { - "bleu_score": 0.2907746744644467 + "bleu_score": 0.28387018176926726 }, "prompt_3": { - "bleu_score": 0.29090264975427593 + "bleu_score": 0.2955925819775187 }, "prompt_4": { - "bleu_score": 0.2779363781260474 + "bleu_score": 0.2655157780088739 }, "prompt_5": { - "bleu_score": 0.27992746635172344 + "bleu_score": 0.2736481427616579 } }, "flores_zho2eng": { "prompt_1": { - "bleu_score": 0.2002151122788486 + "bleu_score": 0.18076194281183502 }, "prompt_2": { - "bleu_score": 0.20884450972688828 + "bleu_score": 0.1944248874109758 }, "prompt_3": { - "bleu_score": 0.21481801019464242 + "bleu_score": 0.21339548627157054 }, "prompt_4": { - "bleu_score": 0.19222350540209923 + "bleu_score": 0.17662506066723732 }, "prompt_5": { - "bleu_score": 0.20272935145984003 + "bleu_score": 0.1823084649922072 } }, "flores_zsm2eng": { "prompt_1": { - "bleu_score": 0.3402735018440144 + "bleu_score": 0.3279161127195312 }, "prompt_2": { - "bleu_score": 0.34725259353122995 + "bleu_score": 0.34366690148800155 }, "prompt_3": { - "bleu_score": 0.3553594382109361 + "bleu_score": 0.35545003896755506 }, "prompt_4": { - "bleu_score": 0.3218818358090323 + "bleu_score": 0.2980737499673598 }, "prompt_5": { - "bleu_score": 0.32800134077077914 + "bleu_score": 0.3185520394552591 } }, "mmlu": { "prompt_1": { - "accuracy": 0.5997666277712952 + "accuracy": 0.5939323220536756 }, "prompt_2": { - "accuracy": 0.48191365227537925 + "accuracy": 0.5507584597432905 }, "prompt_3": { - "accuracy": 0.5950991831971996 + "accuracy": 0.6044340723453909 }, "prompt_4": { - "accuracy": 0.6137689614935823 + "accuracy": 0.5915985997666278 }, "prompt_5": { "accuracy": 0.5950991831971996 @@ -96011,1197 +96011,1197 @@ }, "mmlu_full": { "prompt_1": { - "accuracy": 0.578190918841616, + "accuracy": 0.5731855559528066, "category_acc": { "high_school_european_history": 0.676829268292683, - "business_ethics": 0.5959595959595959, - "clinical_knowledge": 0.6628787878787878, - "medical_genetics": 0.7373737373737373, - "high_school_us_history": 0.7241379310344828, - "high_school_physics": 0.37333333333333335, - "high_school_world_history": 0.8177966101694916, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.6590909090909091, + "medical_genetics": 0.7171717171717171, + "high_school_us_history": 0.7389162561576355, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.809322033898305, "virology": 0.49696969696969695, - "high_school_microeconomics": 0.6455696202531646, - "econometrics": 0.45132743362831856, - "college_computer_science": 0.4444444444444444, - "high_school_biology": 0.7119741100323624, - "abstract_algebra": 0.3434343434343434, - "professional_accounting": 0.4626334519572954, - "philosophy": 0.6129032258064516, - "professional_medicine": 0.6494464944649446, - "nutrition": 0.6754098360655738, - "global_facts": 0.3333333333333333, - "machine_learning": 0.36036036036036034, - "security_studies": 0.7008196721311475, - "public_relations": 0.5504587155963303, - "professional_psychology": 0.6284779050736498, - "prehistory": 0.6934984520123839, - "anatomy": 0.6194029850746269, + "high_school_microeconomics": 0.6329113924050633, + "econometrics": 0.4424778761061947, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.7184466019417476, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.45195729537366547, + "philosophy": 0.6290322580645161, + "professional_medicine": 0.6531365313653137, + "nutrition": 0.6721311475409836, + "global_facts": 0.3434343434343434, + "machine_learning": 0.34234234234234234, + "security_studies": 0.6844262295081968, + "public_relations": 0.5412844036697247, + "professional_psychology": 0.6235679214402619, + "prehistory": 0.6873065015479877, + "anatomy": 0.5970149253731343, "human_sexuality": 0.676923076923077, - "college_medicine": 0.5406976744186046, - "high_school_government_and_politics": 0.8125, - "college_chemistry": 0.45454545454545453, - "logical_fallacies": 0.6419753086419753, + "college_medicine": 0.5755813953488372, + "high_school_government_and_politics": 0.8072916666666666, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.6358024691358025, "high_school_geography": 0.700507614213198, - "elementary_mathematics": 0.3925729442970822, - "human_aging": 0.6441441441441441, - "college_mathematics": 0.2828282828282828, - "high_school_psychology": 0.7867647058823529, - "formal_logic": 0.408, - "high_school_statistics": 0.46511627906976744, - "international_law": 0.7, - "high_school_mathematics": 0.30855018587360594, - "high_school_computer_science": 0.6464646464646465, - "conceptual_physics": 0.5256410256410257, + "elementary_mathematics": 0.3687002652519894, + "human_aging": 0.6351351351351351, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.7886029411764706, + "formal_logic": 0.4, + "high_school_statistics": 0.4418604651162791, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.6161616161616161, + "conceptual_physics": 0.5341880341880342, "miscellaneous": 0.7838874680306905, - "high_school_chemistry": 0.4801980198019802, - "marketing": 0.8240343347639485, - "professional_law": 0.41682974559686886, + "high_school_chemistry": 0.4900990099009901, + "marketing": 0.8111587982832618, + "professional_law": 0.41487279843444225, "management": 0.7843137254901961, "college_physics": 0.39603960396039606, - "jurisprudence": 0.6635514018691588, + "jurisprudence": 0.6542056074766355, "world_religions": 0.8, - "sociology": 0.76, - "us_foreign_policy": 0.7878787878787878, - "high_school_macroeconomics": 0.5784061696658098, - "computer_security": 0.7070707070707071, - "moral_scenarios": 0.25838926174496646, - "moral_disputes": 0.6, - "electrical_engineering": 0.5902777777777778, - "astronomy": 0.6357615894039735, - "college_biology": 0.7202797202797203 + "sociology": 0.765, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.5629820051413882, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.2606263982102908, + "moral_disputes": 0.6057971014492753, + "electrical_engineering": 0.5486111111111112, + "astronomy": 0.6490066225165563, + "college_biology": 0.6993006993006993 } }, "prompt_2": { - "accuracy": 0.45505899177690384, + "accuracy": 0.5196996782266714, "category_acc": { - "high_school_european_history": 0.6036585365853658, - "business_ethics": 0.46464646464646464, - "clinical_knowledge": 0.4583333333333333, - "medical_genetics": 0.36363636363636365, - "high_school_us_history": 0.7339901477832512, - "high_school_physics": 0.31333333333333335, - "high_school_world_history": 0.7923728813559322, - "virology": 0.3515151515151515, - "high_school_microeconomics": 0.39662447257383965, - "econometrics": 0.3274336283185841, - "college_computer_science": 0.47474747474747475, - "high_school_biology": 0.5080906148867314, - "abstract_algebra": 0.25252525252525254, - "professional_accounting": 0.4483985765124555, - "philosophy": 0.4258064516129032, - "professional_medicine": 0.4907749077490775, - "nutrition": 0.4852459016393443, - "global_facts": 0.32323232323232326, - "machine_learning": 0.32432432432432434, - "security_studies": 0.6967213114754098, - "public_relations": 0.3853211009174312, - "professional_psychology": 0.4877250409165303, - "prehistory": 0.43034055727554177, - "anatomy": 0.3805970149253731, - "human_sexuality": 0.46923076923076923, - "college_medicine": 0.38953488372093026, - "high_school_government_and_politics": 0.6145833333333334, - "college_chemistry": 0.36363636363636365, - "logical_fallacies": 0.5987654320987654, - "high_school_geography": 0.5380710659898477, - "elementary_mathematics": 0.35543766578249336, - "human_aging": 0.42342342342342343, - "college_mathematics": 0.31313131313131315, - "high_school_psychology": 0.5588235294117647, - "formal_logic": 0.392, - "high_school_statistics": 0.4325581395348837, - "international_law": 0.5583333333333333, - "high_school_mathematics": 0.32342007434944237, - "high_school_computer_science": 0.5555555555555556, - "conceptual_physics": 0.358974358974359, - "miscellaneous": 0.4884910485933504, - "high_school_chemistry": 0.3564356435643564, - "marketing": 0.6266094420600858, - "professional_law": 0.43574690150032613, - "management": 0.5490196078431373, + "high_school_european_history": 0.6707317073170732, + "business_ethics": 0.494949494949495, + "clinical_knowledge": 0.6098484848484849, + "medical_genetics": 0.5454545454545454, + "high_school_us_history": 0.7536945812807881, + "high_school_physics": 0.34, + "high_school_world_history": 0.8177966101694916, + "virology": 0.43636363636363634, + "high_school_microeconomics": 0.5569620253164557, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.6245954692556634, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.4234875444839858, + "philosophy": 0.5645161290322581, + "professional_medicine": 0.5830258302583026, + "nutrition": 0.5967213114754099, + "global_facts": 0.2828282828282828, + "machine_learning": 0.3783783783783784, + "security_studies": 0.7254098360655737, + "public_relations": 0.5137614678899083, + "professional_psychology": 0.5761047463175123, + "prehistory": 0.5541795665634675, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.6076923076923076, + "college_medicine": 0.5058139534883721, + "high_school_government_and_politics": 0.7395833333333334, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.6345177664974619, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.47297297297297297, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.6672794117647058, + "formal_logic": 0.416, + "high_school_statistics": 0.4744186046511628, + "international_law": 0.675, + "high_school_mathematics": 0.31226765799256506, + "high_school_computer_science": 0.5353535353535354, + "conceptual_physics": 0.405982905982906, + "miscellaneous": 0.6240409207161125, + "high_school_chemistry": 0.38613861386138615, + "marketing": 0.6995708154506438, + "professional_law": 0.4383561643835616, + "management": 0.6274509803921569, "college_physics": 0.297029702970297, - "jurisprudence": 0.5327102803738317, - "world_religions": 0.48823529411764705, - "sociology": 0.575, - "us_foreign_policy": 0.6666666666666666, - "high_school_macroeconomics": 0.40102827763496146, - "computer_security": 0.5555555555555556, - "moral_scenarios": 0.24384787472035793, - "moral_disputes": 0.48405797101449277, - "electrical_engineering": 0.4027777777777778, - "astronomy": 0.4503311258278146, - "college_biology": 0.5174825174825175 + "jurisprudence": 0.6074766355140186, + "world_religions": 0.6176470588235294, + "sociology": 0.735, + "us_foreign_policy": 0.7676767676767676, + "high_school_macroeconomics": 0.4781491002570694, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.28747203579418346, + "moral_disputes": 0.5449275362318841, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.5894039735099338, + "college_biology": 0.6083916083916084 } }, "prompt_3": { - "accuracy": 0.5891312120128709, + "accuracy": 0.5917053986414015, "category_acc": { - "high_school_european_history": 0.7012195121951219, - "business_ethics": 0.6161616161616161, - "clinical_knowledge": 0.678030303030303, - "medical_genetics": 0.7171717171717171, - "high_school_us_history": 0.7635467980295566, - "high_school_physics": 0.36666666666666664, + "high_school_european_history": 0.6890243902439024, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.6553030303030303, + "medical_genetics": 0.7070707070707071, + "high_school_us_history": 0.7684729064039408, + "high_school_physics": 0.3933333333333333, "high_school_world_history": 0.8305084745762712, "virology": 0.503030303030303, - "high_school_microeconomics": 0.679324894514768, - "econometrics": 0.39823008849557523, - "college_computer_science": 0.5252525252525253, - "high_school_biology": 0.7443365695792881, - "abstract_algebra": 0.23232323232323232, - "professional_accounting": 0.46619217081850534, - "philosophy": 0.6483870967741936, - "professional_medicine": 0.6715867158671587, - "nutrition": 0.6721311475409836, - "global_facts": 0.35353535353535354, - "machine_learning": 0.4144144144144144, - "security_studies": 0.7172131147540983, - "public_relations": 0.5779816513761468, - "professional_psychology": 0.6333878887070377, - "prehistory": 0.6501547987616099, - "anatomy": 0.6417910447761194, - "human_sexuality": 0.6692307692307692, - "college_medicine": 0.5755813953488372, - "high_school_government_and_politics": 0.8229166666666666, + "high_school_microeconomics": 0.6962025316455697, + "econometrics": 0.40707964601769914, + "college_computer_science": 0.5050505050505051, + "high_school_biology": 0.7313915857605178, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.4697508896797153, + "philosophy": 0.6612903225806451, + "professional_medicine": 0.6900369003690037, + "nutrition": 0.6852459016393443, + "global_facts": 0.3434343434343434, + "machine_learning": 0.36036036036036034, + "security_studies": 0.7213114754098361, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.6415711947626841, + "prehistory": 0.6625386996904025, + "anatomy": 0.6119402985074627, + "human_sexuality": 0.6615384615384615, + "college_medicine": 0.5581395348837209, + "high_school_government_and_politics": 0.8385416666666666, "college_chemistry": 0.42424242424242425, "logical_fallacies": 0.6790123456790124, - "high_school_geography": 0.766497461928934, - "elementary_mathematics": 0.3819628647214854, - "human_aging": 0.6216216216216216, - "college_mathematics": 0.3434343434343434, - "high_school_psychology": 0.7922794117647058, + "high_school_geography": 0.7411167512690355, + "elementary_mathematics": 0.34748010610079577, + "human_aging": 0.6351351351351351, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.8106617647058824, "formal_logic": 0.464, "high_school_statistics": 0.48372093023255813, "international_law": 0.7083333333333334, - "high_school_mathematics": 0.2788104089219331, - "high_school_computer_science": 0.6262626262626263, - "conceptual_physics": 0.44871794871794873, - "miscellaneous": 0.7928388746803069, - "high_school_chemistry": 0.4900990099009901, - "marketing": 0.8197424892703863, - "professional_law": 0.43966079582517936, - "management": 0.803921568627451, + "high_school_mathematics": 0.31226765799256506, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.4829059829059829, + "miscellaneous": 0.7941176470588235, + "high_school_chemistry": 0.5, + "marketing": 0.8025751072961373, + "professional_law": 0.43770384866275275, + "management": 0.7745098039215687, "college_physics": 0.36633663366336633, - "jurisprudence": 0.7009345794392523, - "world_religions": 0.7941176470588235, - "sociology": 0.78, - "us_foreign_policy": 0.8484848484848485, - "high_school_macroeconomics": 0.5912596401028277, - "computer_security": 0.696969696969697, - "moral_scenarios": 0.30089485458612975, - "moral_disputes": 0.6086956521739131, - "electrical_engineering": 0.5, - "astronomy": 0.6622516556291391, - "college_biology": 0.7412587412587412 + "jurisprudence": 0.6822429906542056, + "world_religions": 0.7823529411764706, + "sociology": 0.79, + "us_foreign_policy": 0.8686868686868687, + "high_school_macroeconomics": 0.5938303341902313, + "computer_security": 0.7070707070707071, + "moral_scenarios": 0.30313199105145416, + "moral_disputes": 0.6289855072463768, + "electrical_engineering": 0.5486111111111112, + "astronomy": 0.6490066225165563, + "college_biology": 0.7342657342657343 } }, "prompt_4": { - "accuracy": 0.5799785484447623, + "accuracy": 0.5706828745084018, "category_acc": { - "high_school_european_history": 0.676829268292683, - "business_ethics": 0.5959595959595959, + "high_school_european_history": 0.6707317073170732, + "business_ethics": 0.6060606060606061, "clinical_knowledge": 0.6553030303030303, - "medical_genetics": 0.7373737373737373, - "high_school_us_history": 0.7241379310344828, - "high_school_physics": 0.36666666666666664, - "high_school_world_history": 0.8008474576271186, - "virology": 0.48484848484848486, - "high_school_microeconomics": 0.6751054852320675, - "econometrics": 0.45132743362831856, - "college_computer_science": 0.494949494949495, - "high_school_biology": 0.7216828478964401, - "abstract_algebra": 0.30303030303030304, - "professional_accounting": 0.46619217081850534, - "philosophy": 0.6225806451612903, - "professional_medicine": 0.6531365313653137, - "nutrition": 0.6688524590163935, - "global_facts": 0.32323232323232326, - "machine_learning": 0.34234234234234234, - "security_studies": 0.7008196721311475, - "public_relations": 0.5779816513761468, - "professional_psychology": 0.6219312602291326, - "prehistory": 0.6904024767801857, - "anatomy": 0.6268656716417911, - "human_sexuality": 0.6692307692307692, - "college_medicine": 0.5581395348837209, - "high_school_government_and_politics": 0.8177083333333334, - "college_chemistry": 0.47474747474747475, + "medical_genetics": 0.696969696969697, + "high_school_us_history": 0.7389162561576355, + "high_school_physics": 0.4, + "high_school_world_history": 0.7838983050847458, + "virology": 0.47878787878787876, + "high_school_microeconomics": 0.6455696202531646, + "econometrics": 0.40707964601769914, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.7249190938511327, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.4483985765124555, + "philosophy": 0.6161290322580645, + "professional_medicine": 0.6568265682656826, + "nutrition": 0.6524590163934426, + "global_facts": 0.2828282828282828, + "machine_learning": 0.36036036036036034, + "security_studies": 0.6967213114754098, + "public_relations": 0.5412844036697247, + "professional_psychology": 0.6284779050736498, + "prehistory": 0.6842105263157895, + "anatomy": 0.6194029850746269, + "human_sexuality": 0.6846153846153846, + "college_medicine": 0.5465116279069767, + "high_school_government_and_politics": 0.8020833333333334, + "college_chemistry": 0.40404040404040403, "logical_fallacies": 0.6419753086419753, - "high_school_geography": 0.7258883248730964, - "elementary_mathematics": 0.38992042440318303, - "human_aging": 0.6261261261261262, - "college_mathematics": 0.26262626262626265, - "high_school_psychology": 0.7904411764705882, - "formal_logic": 0.424, - "high_school_statistics": 0.44651162790697674, - "international_law": 0.6583333333333333, - "high_school_mathematics": 0.2825278810408922, - "high_school_computer_science": 0.6060606060606061, - "conceptual_physics": 0.5299145299145299, - "miscellaneous": 0.789002557544757, - "high_school_chemistry": 0.5, - "marketing": 0.8369098712446352, - "professional_law": 0.42857142857142855, - "management": 0.803921568627451, - "college_physics": 0.39603960396039606, - "jurisprudence": 0.6728971962616822, - "world_religions": 0.8058823529411765, - "sociology": 0.78, + "high_school_geography": 0.700507614213198, + "elementary_mathematics": 0.3395225464190981, + "human_aging": 0.6306306306306306, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.7959558823529411, + "formal_logic": 0.376, + "high_school_statistics": 0.4604651162790698, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.5170940170940171, + "miscellaneous": 0.768542199488491, + "high_school_chemistry": 0.45544554455445546, + "marketing": 0.8240343347639485, + "professional_law": 0.4207436399217221, + "management": 0.7941176470588235, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.6822429906542056, + "world_religions": 0.788235294117647, + "sociology": 0.775, "us_foreign_policy": 0.797979797979798, - "high_school_macroeconomics": 0.5629820051413882, - "computer_security": 0.7272727272727273, - "moral_scenarios": 0.2639821029082774, - "moral_disputes": 0.6057971014492753, - "electrical_engineering": 0.5763888888888888, + "high_school_macroeconomics": 0.5526992287917738, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.2651006711409396, + "moral_disputes": 0.6260869565217392, + "electrical_engineering": 0.5625, "astronomy": 0.6158940397350994, - "college_biology": 0.7202797202797203 + "college_biology": 0.7062937062937062 } }, "prompt_5": { - "accuracy": 0.5732570611369324, + "accuracy": 0.5683232034322488, "category_acc": { "high_school_european_history": 0.6524390243902439, "business_ethics": 0.6464646464646465, - "clinical_knowledge": 0.6287878787878788, - "medical_genetics": 0.7373737373737373, + "clinical_knowledge": 0.6363636363636364, + "medical_genetics": 0.7070707070707071, "high_school_us_history": 0.7142857142857143, - "high_school_physics": 0.35333333333333333, - "high_school_world_history": 0.8177966101694916, - "virology": 0.503030303030303, - "high_school_microeconomics": 0.6413502109704642, + "high_school_physics": 0.36, + "high_school_world_history": 0.8008474576271186, + "virology": 0.49696969696969695, + "high_school_microeconomics": 0.6371308016877637, "econometrics": 0.4336283185840708, "college_computer_science": 0.4444444444444444, - "high_school_biology": 0.7216828478964401, - "abstract_algebra": 0.21212121212121213, - "professional_accounting": 0.45195729537366547, - "philosophy": 0.6193548387096774, - "professional_medicine": 0.6494464944649446, + "high_school_biology": 0.7249190938511327, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.4412811387900356, + "philosophy": 0.6064516129032258, + "professional_medicine": 0.6531365313653137, "nutrition": 0.6721311475409836, - "global_facts": 0.31313131313131315, - "machine_learning": 0.36936936936936937, - "security_studies": 0.6926229508196722, - "public_relations": 0.5321100917431193, - "professional_psychology": 0.6153846153846154, - "prehistory": 0.6749226006191951, - "anatomy": 0.6119402985074627, - "human_sexuality": 0.6923076923076923, - "college_medicine": 0.5348837209302325, + "global_facts": 0.30303030303030304, + "machine_learning": 0.3153153153153153, + "security_studies": 0.680327868852459, + "public_relations": 0.5229357798165137, + "professional_psychology": 0.6219312602291326, + "prehistory": 0.6780185758513931, + "anatomy": 0.6194029850746269, + "human_sexuality": 0.6846153846153846, + "college_medicine": 0.5523255813953488, "high_school_government_and_politics": 0.8177083333333334, - "college_chemistry": 0.4444444444444444, - "logical_fallacies": 0.6481481481481481, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.6296296296296297, "high_school_geography": 0.6954314720812182, - "elementary_mathematics": 0.3448275862068966, - "human_aging": 0.6441441441441441, - "college_mathematics": 0.2828282828282828, - "high_school_psychology": 0.7904411764705882, - "formal_logic": 0.416, - "high_school_statistics": 0.42790697674418604, - "international_law": 0.675, - "high_school_mathematics": 0.32342007434944237, - "high_school_computer_science": 0.5858585858585859, - "conceptual_physics": 0.4829059829059829, - "miscellaneous": 0.7659846547314578, - "high_school_chemistry": 0.46534653465346537, - "marketing": 0.8369098712446352, - "professional_law": 0.43313763861709065, + "elementary_mathematics": 0.3103448275862069, + "human_aging": 0.6486486486486487, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.7941176470588235, + "formal_logic": 0.408, + "high_school_statistics": 0.40930232558139534, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.6161616161616161, + "conceptual_physics": 0.4658119658119658, + "miscellaneous": 0.7723785166240409, + "high_school_chemistry": 0.45544554455445546, + "marketing": 0.8326180257510729, + "professional_law": 0.4233529028049576, "management": 0.8137254901960784, - "college_physics": 0.4158415841584158, + "college_physics": 0.39603960396039606, "jurisprudence": 0.6542056074766355, - "world_religions": 0.8, - "sociology": 0.79, + "world_religions": 0.7941176470588235, + "sociology": 0.78, "us_foreign_policy": 0.8080808080808081, - "high_school_macroeconomics": 0.5552699228791774, - "computer_security": 0.7070707070707071, - "moral_scenarios": 0.2606263982102908, + "high_school_macroeconomics": 0.5501285347043702, + "computer_security": 0.6868686868686869, + "moral_scenarios": 0.26174496644295303, "moral_disputes": 0.6086956521739131, - "electrical_engineering": 0.5833333333333334, - "astronomy": 0.609271523178808, - "college_biology": 0.7272727272727273 + "electrical_engineering": 0.5763888888888888, + "astronomy": 0.5894039735099338, + "college_biology": 0.7202797202797203 } } }, "c_eval": { "prompt_1": { - "accuracy": 0.30534918276374445 + "accuracy": 0.37518573551263 }, "prompt_2": { - "accuracy": 0.29420505200594355 + "accuracy": 0.3291233283803863 }, "prompt_3": { - "accuracy": 0.3439821693907875 + "accuracy": 0.40564635958395245 }, "prompt_4": { - "accuracy": 0.3588410104011887 + "accuracy": 0.4034175334323923 }, "prompt_5": { - "accuracy": 0.3060921248142645 + "accuracy": 0.3551263001485884 } }, "c_eval_full": { "prompt_1": { - "accuracy": 0.31693648816936487, + "accuracy": 0.3816936488169365, "category_acc": { - "computer_network": 0.25, - "operating_system": 0.375, - "computer_architecture": 0.38461538461538464, - "college_programming": 0.30952380952380953, + "computer_network": 0.20833333333333334, + "operating_system": 0.25, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.5, "college_physics": 0.4583333333333333, - "college_chemistry": 0.13793103448275862, - "advanced_mathematics": 0.16666666666666666, - "probability_and_statistics": 0.30434782608695654, - "discrete_mathematics": 0.23809523809523808, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, "electrical_engineer": 0.23809523809523808, - "metrology_engineer": 0.3103448275862069, - "high_school_mathematics": 0.08695652173913043, - "high_school_physics": 0.25, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.3333333333333333, "high_school_chemistry": 0.5, - "high_school_biology": 0.20833333333333334, - "middle_school_mathematics": 0.25, - "middle_school_biology": 0.4230769230769231, - "middle_school_physics": 0.4166666666666667, - "middle_school_chemistry": 0.4, - "veterinary_medicine": 0.17857142857142858, - "college_economics": 0.35, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.56, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.45, "business_administration": 0.2631578947368421, - "marxism": 0.375, - "mao_zedong_thought": 0.3103448275862069, - "education_science": 0.20588235294117646, - "teacher_qualification": 0.3673469387755102, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.5306122448979592, "high_school_politics": 0.5833333333333334, "high_school_geography": 0.375, - "middle_school_politics": 0.3076923076923077, - "middle_school_geography": 0.23529411764705882, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.4117647058823529, "modern_chinese_history": 0.21428571428571427, - "ideological_and_moral_cultivation": 0.2916666666666667, - "logic": 0.5925925925925926, - "law": 0.41379310344827586, - "chinese_language_and_literature": 0.2857142857142857, - "art_studies": 0.2631578947368421, - "professional_tour_guide": 0.2647058823529412, - "legal_professional": 0.35714285714285715, - "high_school_chinese": 0.20833333333333334, - "high_school_history": 0.44, - "middle_school_history": 0.48148148148148145, - "civil_servant": 0.36538461538461536, - "sports_science": 0.2916666666666667, - "plant_protection": 0.3333333333333333, - "basic_medicine": 0.2916666666666667, - "clinical_medicine": 0.25925925925925924, - "urban_and_rural_planner": 0.27450980392156865, - "accountant": 0.35185185185185186, - "fire_engineer": 0.3333333333333333, - "environmental_impact_assessment_engineer": 0.3611111111111111, - "tax_accountant": 0.3333333333333333, - "physician": 0.24074074074074073 + "ideological_and_moral_cultivation": 0.375, + "logic": 0.5555555555555556, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.20588235294117646, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.25, + "high_school_history": 0.72, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.4230769230769231, + "sports_science": 0.375, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.35185185185185186, + "physician": 0.35185185185185186 } }, "prompt_2": { - "accuracy": 0.3163138231631382, + "accuracy": 0.33623910336239105, "category_acc": { - "computer_network": 0.20833333333333334, - "operating_system": 0.041666666666666664, - "computer_architecture": 0.4230769230769231, - "college_programming": 0.30952380952380953, + "computer_network": 0.4166666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.3333333333333333, "college_physics": 0.4166666666666667, - "college_chemistry": 0.2413793103448276, - "advanced_mathematics": 0.3333333333333333, - "probability_and_statistics": 0.34782608695652173, - "discrete_mathematics": 0.2857142857142857, - "electrical_engineer": 0.23809523809523808, - "metrology_engineer": 0.3448275862068966, - "high_school_mathematics": 0.21739130434782608, - "high_school_physics": 0.2916666666666667, - "high_school_chemistry": 0.5, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.4166666666666667, "high_school_biology": 0.2916666666666667, - "middle_school_mathematics": 0.25, - "middle_school_biology": 0.5769230769230769, - "middle_school_physics": 0.375, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.4166666666666667, "middle_school_chemistry": 0.32, - "veterinary_medicine": 0.39285714285714285, - "college_economics": 0.2, - "business_administration": 0.2894736842105263, - "marxism": 0.25, - "mao_zedong_thought": 0.27586206896551724, - "education_science": 0.17647058823529413, - "teacher_qualification": 0.2653061224489796, - "high_school_politics": 0.5, - "high_school_geography": 0.4166666666666667, - "middle_school_politics": 0.34615384615384615, - "middle_school_geography": 0.29411764705882354, + "veterinary_medicine": 0.10714285714285714, + "college_economics": 0.38333333333333336, + "business_administration": 0.2631578947368421, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.25, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.47058823529411764, "modern_chinese_history": 0.2857142857142857, "ideological_and_moral_cultivation": 0.3333333333333333, - "logic": 0.5555555555555556, + "logic": 0.6296296296296297, "law": 0.3448275862068966, - "chinese_language_and_literature": 0.25, - "art_studies": 0.4473684210526316, - "professional_tour_guide": 0.29411764705882354, - "legal_professional": 0.39285714285714285, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.35714285714285715, "high_school_chinese": 0.2916666666666667, "high_school_history": 0.36, - "middle_school_history": 0.18518518518518517, - "civil_servant": 0.4807692307692308, - "sports_science": 0.2916666666666667, - "plant_protection": 0.25925925925925924, - "basic_medicine": 0.375, - "clinical_medicine": 0.37037037037037035, - "urban_and_rural_planner": 0.3333333333333333, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.5192307692307693, + "sports_science": 0.25, + "plant_protection": 0.1111111111111111, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.29411764705882354, "accountant": 0.37037037037037035, - "fire_engineer": 0.25, - "environmental_impact_assessment_engineer": 0.19444444444444445, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3333333333333333, "tax_accountant": 0.3333333333333333, - "physician": 0.2037037037037037 + "physician": 0.2222222222222222 } }, "prompt_3": { - "accuracy": 0.3318804483188045, + "accuracy": 0.4190535491905355, "category_acc": { - "computer_network": 0.2916666666666667, - "operating_system": 0.3333333333333333, - "computer_architecture": 0.4230769230769231, - "college_programming": 0.3333333333333333, - "college_physics": 0.2916666666666667, + "computer_network": 0.375, + "operating_system": 0.375, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.375, "college_chemistry": 0.3448275862068966, - "advanced_mathematics": 0.375, + "advanced_mathematics": 0.4583333333333333, "probability_and_statistics": 0.30434782608695654, - "discrete_mathematics": 0.38095238095238093, - "electrical_engineer": 0.2619047619047619, - "metrology_engineer": 0.3103448275862069, - "high_school_mathematics": 0.21739130434782608, - "high_school_physics": 0.20833333333333334, - "high_school_chemistry": 0.4583333333333333, - "high_school_biology": 0.2916666666666667, - "middle_school_mathematics": 0.25, - "middle_school_biology": 0.5, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.6923076923076923, "middle_school_physics": 0.4583333333333333, - "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.39285714285714285, - "college_economics": 0.3333333333333333, - "business_administration": 0.18421052631578946, - "marxism": 0.3333333333333333, - "mao_zedong_thought": 0.20689655172413793, - "education_science": 0.35294117647058826, - "teacher_qualification": 0.46938775510204084, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.45, + "business_administration": 0.3157894736842105, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.6122448979591837, "high_school_politics": 0.5416666666666666, - "high_school_geography": 0.125, - "middle_school_politics": 0.3076923076923077, - "middle_school_geography": 0.17647058823529413, - "modern_chinese_history": 0.25, - "ideological_and_moral_cultivation": 0.25, - "logic": 0.5925925925925926, - "law": 0.3103448275862069, - "chinese_language_and_literature": 0.35714285714285715, - "art_studies": 0.34210526315789475, - "professional_tour_guide": 0.23529411764705882, - "legal_professional": 0.39285714285714285, - "high_school_chinese": 0.25, - "high_school_history": 0.44, - "middle_school_history": 0.4444444444444444, - "civil_servant": 0.4423076923076923, - "sports_science": 0.2916666666666667, - "plant_protection": 0.3333333333333333, - "basic_medicine": 0.16666666666666666, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.5555555555555556, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.5, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.68, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.5384615384615384, + "sports_science": 0.25, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.2916666666666667, "clinical_medicine": 0.2962962962962963, - "urban_and_rural_planner": 0.37254901960784315, - "accountant": 0.37037037037037035, - "fire_engineer": 0.2222222222222222, - "environmental_impact_assessment_engineer": 0.3333333333333333, - "tax_accountant": 0.35185185185185186, - "physician": 0.2037037037037037 + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.42592592592592593, + "fire_engineer": 0.4444444444444444, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.3888888888888889, + "physician": 0.37037037037037035 } }, "prompt_4": { - "accuracy": 0.3835616438356164, + "accuracy": 0.4066002490660025, "category_acc": { "computer_network": 0.2916666666666667, - "operating_system": 0.25, - "computer_architecture": 0.4230769230769231, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.46153846153846156, "college_programming": 0.40476190476190477, - "college_physics": 0.375, - "college_chemistry": 0.3793103448275862, - "advanced_mathematics": 0.3333333333333333, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.16666666666666666, "probability_and_statistics": 0.2608695652173913, - "discrete_mathematics": 0.23809523809523808, - "electrical_engineer": 0.3333333333333333, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, "metrology_engineer": 0.41379310344827586, "high_school_mathematics": 0.21739130434782608, - "high_school_physics": 0.2916666666666667, - "high_school_chemistry": 0.5, - "high_school_biology": 0.3333333333333333, - "middle_school_mathematics": 0.20833333333333334, - "middle_school_biology": 0.5384615384615384, + "high_school_physics": 0.25, + "high_school_chemistry": 0.5416666666666666, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.6923076923076923, "middle_school_physics": 0.5, - "middle_school_chemistry": 0.52, + "middle_school_chemistry": 0.6, "veterinary_medicine": 0.2857142857142857, - "college_economics": 0.35, - "business_administration": 0.3684210526315789, - "marxism": 0.25, - "mao_zedong_thought": 0.4482758620689655, - "education_science": 0.35294117647058826, - "teacher_qualification": 0.5102040816326531, + "college_economics": 0.4, + "business_administration": 0.2894736842105263, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5306122448979592, "high_school_politics": 0.5833333333333334, - "high_school_geography": 0.375, - "middle_school_politics": 0.5769230769230769, - "middle_school_geography": 0.35294117647058826, - "modern_chinese_history": 0.32142857142857145, - "ideological_and_moral_cultivation": 0.5416666666666666, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.5, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.625, "logic": 0.5925925925925926, - "law": 0.41379310344827586, - "chinese_language_and_literature": 0.39285714285714285, - "art_studies": 0.42105263157894735, + "law": 0.4482758620689655, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.47368421052631576, "professional_tour_guide": 0.2647058823529412, - "legal_professional": 0.39285714285714285, + "legal_professional": 0.4642857142857143, "high_school_chinese": 0.125, - "high_school_history": 0.64, - "middle_school_history": 0.5185185185185185, - "civil_servant": 0.38461538461538464, - "sports_science": 0.2916666666666667, - "plant_protection": 0.37037037037037035, - "basic_medicine": 0.2916666666666667, - "clinical_medicine": 0.2222222222222222, + "high_school_history": 0.6, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.4807692307692308, + "sports_science": 0.3333333333333333, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, "urban_and_rural_planner": 0.47058823529411764, - "accountant": 0.42592592592592593, + "accountant": 0.4074074074074074, "fire_engineer": 0.4444444444444444, "environmental_impact_assessment_engineer": 0.4166666666666667, - "tax_accountant": 0.2777777777777778, - "physician": 0.3333333333333333 + "tax_accountant": 0.2962962962962963, + "physician": 0.4444444444444444 } }, "prompt_5": { - "accuracy": 0.31320049813200496, + "accuracy": 0.3642590286425903, "category_acc": { - "computer_network": 0.5, - "operating_system": 0.375, + "computer_network": 0.25, + "operating_system": 0.4583333333333333, "computer_architecture": 0.3076923076923077, - "college_programming": 0.35714285714285715, - "college_physics": 0.3333333333333333, - "college_chemistry": 0.27586206896551724, - "advanced_mathematics": 0.2916666666666667, - "probability_and_statistics": 0.34782608695652173, - "discrete_mathematics": 0.19047619047619047, - "electrical_engineer": 0.3333333333333333, - "metrology_engineer": 0.2413793103448276, - "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.2916666666666667, - "high_school_chemistry": 0.5, - "high_school_biology": 0.2916666666666667, - "middle_school_mathematics": 0.25, - "middle_school_biology": 0.3076923076923077, - "middle_school_physics": 0.375, - "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.3, - "business_administration": 0.3684210526315789, - "marxism": 0.2916666666666667, - "mao_zedong_thought": 0.10344827586206896, - "education_science": 0.17647058823529413, - "teacher_qualification": 0.3469387755102041, + "college_programming": 0.40476190476190477, + "college_physics": 0.4583333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.25, + "high_school_chemistry": 0.375, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.25, + "college_economics": 0.3333333333333333, + "business_administration": 0.3157894736842105, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.4897959183673469, "high_school_politics": 0.625, - "high_school_geography": 0.25, - "middle_school_politics": 0.23076923076923078, - "middle_school_geography": 0.5882352941176471, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.4117647058823529, "modern_chinese_history": 0.21428571428571427, - "ideological_and_moral_cultivation": 0.16666666666666666, + "ideological_and_moral_cultivation": 0.3333333333333333, "logic": 0.5925925925925926, - "law": 0.27586206896551724, - "chinese_language_and_literature": 0.2857142857142857, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, "art_studies": 0.23684210526315788, - "professional_tour_guide": 0.23529411764705882, - "legal_professional": 0.35714285714285715, + "professional_tour_guide": 0.17647058823529413, + "legal_professional": 0.4642857142857143, "high_school_chinese": 0.20833333333333334, - "high_school_history": 0.52, - "middle_school_history": 0.3333333333333333, - "civil_servant": 0.4230769230769231, - "sports_science": 0.16666666666666666, - "plant_protection": 0.07407407407407407, - "basic_medicine": 0.16666666666666666, - "clinical_medicine": 0.2222222222222222, - "urban_and_rural_planner": 0.29411764705882354, - "accountant": 0.37037037037037035, - "fire_engineer": 0.2777777777777778, - "environmental_impact_assessment_engineer": 0.3333333333333333, - "tax_accountant": 0.2962962962962963, - "physician": 0.3148148148148148 + "high_school_history": 0.56, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.4423076923076923, + "sports_science": 0.3333333333333333, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.4444444444444444, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.35185185185185186, + "physician": 0.2777777777777778 } } }, "cmmlu": { "prompt_1": { - "accuracy": 0.2939068100358423 + "accuracy": 0.35125448028673834 }, "prompt_2": { - "accuracy": 0.25806451612903225 + "accuracy": 0.3118279569892473 }, "prompt_3": { - "accuracy": 0.25448028673835127 + "accuracy": 0.27956989247311825 }, "prompt_4": { - "accuracy": 0.4050179211469534 + "accuracy": 0.4157706093189964 + }, + "prompt_5": { + "accuracy": 0.34767025089605735 + } + }, + "cmmlu_full": { + "prompt_1": { + "accuracy": 0.3334484544983595, + "category_acc": { + "agronomy": 0.3254437869822485, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.31097560975609756, + "arts": 0.3, + "astronomy": 0.23636363636363636, + "business_ethics": 0.3253588516746411, + "chinese_civil_service_exam": 0.425, + "chinese_driving_rule": 0.37404580152671757, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.48598130841121495, + "chinese_history": 0.5015479876160991, + "chinese_literature": 0.27450980392156865, + "chinese_teacher_qualification": 0.4022346368715084, + "clinical_knowledge": 0.3206751054852321, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.5327102803738317, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.2962962962962963, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.36792452830188677, + "college_medicine": 0.2600732600732601, + "computer_science": 0.3431372549019608, + "computer_security": 0.40350877192982454, + "conceptual_physics": 0.4013605442176871, + "construction_project_management": 0.3237410071942446, + "economics": 0.37735849056603776, + "education": 0.2822085889570552, + "electrical_engineering": 0.29651162790697677, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.31313131313131315, + "elementary_information_and_technology": 0.3235294117647059, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.2814814814814815, + "food_science": 0.3006993006993007, + "genetics": 0.2784090909090909, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.33136094674556216, + "high_school_chemistry": 0.3333333333333333, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.4405594405594406, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2756756756756757, + "journalism": 0.29651162790697677, + "jurisprudence": 0.38929440389294406, + "legal_and_moral_basis": 0.5700934579439252, + "logical": 0.37398373983739835, + "machine_learning": 0.45081967213114754, + "management": 0.3238095238095238, + "marketing": 0.39444444444444443, + "marxist_theory": 0.30687830687830686, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.3793103448275862, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.35428571428571426, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.33189655172413796, + "public_relations": 0.3620689655172414, + "security_study": 0.34814814814814815, + "sociology": 0.36283185840707965, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.17297297297297298, + "virology": 0.31952662721893493, + "world_history": 0.4658385093167702, + "world_religions": 0.3 + } }, - "prompt_5": { - "accuracy": 0.33691756272401435 - } - }, - "cmmlu_full": { - "prompt_1": { - "accuracy": 0.28501122431359005, + "prompt_2": { + "accuracy": 0.30789155586254535, "category_acc": { - "agronomy": 0.30177514792899407, - "anatomy": 0.22972972972972974, - "ancient_chinese": 0.2926829268292683, - "arts": 0.18125, - "astronomy": 0.2545454545454545, - "business_ethics": 0.24401913875598086, - "chinese_civil_service_exam": 0.38125, - "chinese_driving_rule": 0.31297709923664124, - "chinese_food_culture": 0.25735294117647056, - "chinese_foreign_policy": 0.411214953271028, - "chinese_history": 0.4613003095975232, - "chinese_literature": 0.28921568627450983, - "chinese_teacher_qualification": 0.35195530726256985, - "clinical_knowledge": 0.26582278481012656, + "agronomy": 0.2485207100591716, + "anatomy": 0.31756756756756754, + "ancient_chinese": 0.24390243902439024, + "arts": 0.28125, + "astronomy": 0.24848484848484848, + "business_ethics": 0.2966507177033493, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.33088235294117646, + "chinese_foreign_policy": 0.45794392523364486, + "chinese_history": 0.48606811145510836, + "chinese_literature": 0.23529411764705882, + "chinese_teacher_qualification": 0.3575418994413408, + "clinical_knowledge": 0.24050632911392406, "college_actuarial_science": 0.29245283018867924, - "college_education": 0.308411214953271, - "college_engineering_hydrology": 0.19811320754716982, + "college_education": 0.38317757009345793, + "college_engineering_hydrology": 0.32075471698113206, "college_law": 0.2777777777777778, - "college_mathematics": 0.2857142857142857, - "college_medical_statistics": 0.2358490566037736, - "college_medicine": 0.2673992673992674, - "computer_science": 0.23529411764705882, - "computer_security": 0.3391812865497076, - "conceptual_physics": 0.2857142857142857, - "construction_project_management": 0.23741007194244604, - "economics": 0.2830188679245283, - "education": 0.2331288343558282, + "college_mathematics": 0.3238095238095238, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.2564102564102564, + "computer_science": 0.28921568627450983, + "computer_security": 0.3508771929824561, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.2949640287769784, + "economics": 0.33962264150943394, + "education": 0.3006134969325153, "electrical_engineering": 0.26744186046511625, - "elementary_chinese": 0.21428571428571427, - "elementary_commonsense": 0.25757575757575757, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.2676767676767677, "elementary_information_and_technology": 0.28991596638655465, - "elementary_mathematics": 0.24347826086956523, - "ethnology": 0.21481481481481482, - "food_science": 0.26573426573426573, - "genetics": 0.25, - "global_facts": 0.24161073825503357, - "high_school_biology": 0.28994082840236685, - "high_school_chemistry": 0.3560606060606061, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.22962962962962963, + "food_science": 0.2517482517482518, + "genetics": 0.3068181818181818, + "global_facts": 0.3422818791946309, + "high_school_biology": 0.3136094674556213, + "high_school_chemistry": 0.25757575757575757, "high_school_geography": 0.3898305084745763, - "high_school_mathematics": 0.17682926829268292, - "high_school_physics": 0.2818181818181818, - "high_school_politics": 0.3776223776223776, - "human_sexuality": 0.2698412698412698, - "international_law": 0.31351351351351353, - "journalism": 0.27325581395348836, - "jurisprudence": 0.29683698296836986, - "legal_and_moral_basis": 0.35514018691588783, - "logical": 0.2764227642276423, - "machine_learning": 0.4180327868852459, - "management": 0.3142857142857143, - "marketing": 0.2777777777777778, - "marxist_theory": 0.2698412698412698, - "modern_chinese": 0.3103448275862069, - "nutrition": 0.2482758620689655, - "philosophy": 0.26666666666666666, - "professional_accounting": 0.26857142857142857, - "professional_law": 0.27488151658767773, - "professional_medicine": 0.27393617021276595, - "professional_psychology": 0.2801724137931034, - "public_relations": 0.2988505747126437, - "security_study": 0.2740740740740741, - "sociology": 0.26991150442477874, - "sports_science": 0.2787878787878788, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.2909090909090909, + "high_school_politics": 0.40559440559440557, + "human_sexuality": 0.30158730158730157, + "international_law": 0.21621621621621623, + "journalism": 0.313953488372093, + "jurisprudence": 0.31873479318734793, + "legal_and_moral_basis": 0.411214953271028, + "logical": 0.3008130081300813, + "machine_learning": 0.5081967213114754, + "management": 0.3047619047619048, + "marketing": 0.28888888888888886, + "marxist_theory": 0.3862433862433862, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.296551724137931, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.21714285714285714, + "professional_law": 0.2938388625592417, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.3103448275862069, + "public_relations": 0.3505747126436782, + "security_study": 0.2962962962962963, + "sociology": 0.28761061946902655, + "sports_science": 0.24242424242424243, "traditional_chinese_medicine": 0.24324324324324326, - "virology": 0.2485207100591716, - "world_history": 0.38509316770186336, - "world_religions": 0.2375 - } - }, - "prompt_2": { - "accuracy": 0.27784493179070974, - "category_acc": { - "agronomy": 0.28402366863905326, - "anatomy": 0.25675675675675674, - "ancient_chinese": 0.25, - "arts": 0.23125, - "astronomy": 0.24848484848484848, - "business_ethics": 0.3062200956937799, - "chinese_civil_service_exam": 0.29375, - "chinese_driving_rule": 0.22900763358778625, - "chinese_food_culture": 0.29411764705882354, - "chinese_foreign_policy": 0.3644859813084112, - "chinese_history": 0.39009287925696595, - "chinese_literature": 0.19607843137254902, - "chinese_teacher_qualification": 0.3016759776536313, - "clinical_knowledge": 0.2489451476793249, - "college_actuarial_science": 0.2169811320754717, - "college_education": 0.308411214953271, - "college_engineering_hydrology": 0.2641509433962264, - "college_law": 0.25, - "college_mathematics": 0.34285714285714286, - "college_medical_statistics": 0.29245283018867924, - "college_medicine": 0.304029304029304, - "computer_science": 0.2549019607843137, - "computer_security": 0.30409356725146197, - "conceptual_physics": 0.3469387755102041, - "construction_project_management": 0.26618705035971224, - "economics": 0.3333333333333333, - "education": 0.3128834355828221, - "electrical_engineering": 0.3023255813953488, - "elementary_chinese": 0.2619047619047619, - "elementary_commonsense": 0.2828282828282828, - "elementary_information_and_technology": 0.2689075630252101, - "elementary_mathematics": 0.25217391304347825, - "ethnology": 0.3111111111111111, - "food_science": 0.2097902097902098, - "genetics": 0.30113636363636365, - "global_facts": 0.26174496644295303, - "high_school_biology": 0.2485207100591716, - "high_school_chemistry": 0.2803030303030303, - "high_school_geography": 0.3305084745762712, - "high_school_mathematics": 0.2682926829268293, - "high_school_physics": 0.3090909090909091, - "high_school_politics": 0.3986013986013986, - "human_sexuality": 0.23809523809523808, - "international_law": 0.2702702702702703, - "journalism": 0.20930232558139536, - "jurisprudence": 0.26763990267639903, - "legal_and_moral_basis": 0.2897196261682243, - "logical": 0.3089430894308943, - "machine_learning": 0.28688524590163933, - "management": 0.22857142857142856, - "marketing": 0.26666666666666666, - "marxist_theory": 0.26455026455026454, - "modern_chinese": 0.23275862068965517, - "nutrition": 0.2482758620689655, - "philosophy": 0.23809523809523808, - "professional_accounting": 0.2857142857142857, - "professional_law": 0.2796208530805687, - "professional_medicine": 0.25, - "professional_psychology": 0.2672413793103448, - "public_relations": 0.2988505747126437, - "security_study": 0.34814814814814815, - "sociology": 0.2610619469026549, - "sports_science": 0.28484848484848485, - "traditional_chinese_medicine": 0.2918918918918919, - "virology": 0.1893491124260355, - "world_history": 0.35403726708074534, - "world_religions": 0.225 + "virology": 0.2958579881656805, + "world_history": 0.4409937888198758, + "world_religions": 0.3125 } }, "prompt_3": { - "accuracy": 0.28829217751683645, + "accuracy": 0.3489034709031255, "category_acc": { - "agronomy": 0.24260355029585798, - "anatomy": 0.2702702702702703, - "ancient_chinese": 0.27439024390243905, - "arts": 0.24375, - "astronomy": 0.30303030303030304, - "business_ethics": 0.2583732057416268, + "agronomy": 0.30177514792899407, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.3048780487804878, + "arts": 0.29375, + "astronomy": 0.2545454545454545, + "business_ethics": 0.35406698564593303, "chinese_civil_service_exam": 0.3875, - "chinese_driving_rule": 0.21374045801526717, - "chinese_food_culture": 0.27941176470588236, - "chinese_foreign_policy": 0.4205607476635514, - "chinese_history": 0.49226006191950467, - "chinese_literature": 0.28431372549019607, - "chinese_teacher_qualification": 0.36312849162011174, - "clinical_knowledge": 0.25738396624472576, - "college_actuarial_science": 0.20754716981132076, - "college_education": 0.2523364485981308, - "college_engineering_hydrology": 0.2169811320754717, - "college_law": 0.25925925925925924, - "college_mathematics": 0.34285714285714286, - "college_medical_statistics": 0.29245283018867924, - "college_medicine": 0.27106227106227104, - "computer_science": 0.29411764705882354, - "computer_security": 0.391812865497076, - "conceptual_physics": 0.3401360544217687, - "construction_project_management": 0.30935251798561153, - "economics": 0.3584905660377358, - "education": 0.20245398773006135, - "electrical_engineering": 0.2441860465116279, - "elementary_chinese": 0.2896825396825397, - "elementary_commonsense": 0.19696969696969696, - "elementary_information_and_technology": 0.3025210084033613, - "elementary_mathematics": 0.28695652173913044, - "ethnology": 0.28888888888888886, - "food_science": 0.2937062937062937, - "genetics": 0.24431818181818182, - "global_facts": 0.26174496644295303, - "high_school_biology": 0.3609467455621302, - "high_school_chemistry": 0.2878787878787879, - "high_school_geography": 0.2796610169491525, - "high_school_mathematics": 0.23780487804878048, - "high_school_physics": 0.33636363636363636, - "high_school_politics": 0.44755244755244755, - "human_sexuality": 0.23809523809523808, - "international_law": 0.2648648648648649, - "journalism": 0.29069767441860467, - "jurisprudence": 0.3284671532846715, - "legal_and_moral_basis": 0.3364485981308411, - "logical": 0.21951219512195122, - "machine_learning": 0.36065573770491804, - "management": 0.23333333333333334, - "marketing": 0.2611111111111111, - "marxist_theory": 0.2857142857142857, - "modern_chinese": 0.23275862068965517, - "nutrition": 0.3103448275862069, - "philosophy": 0.20952380952380953, - "professional_accounting": 0.29714285714285715, - "professional_law": 0.27488151658767773, - "professional_medicine": 0.2047872340425532, - "professional_psychology": 0.25, - "public_relations": 0.2988505747126437, - "security_study": 0.26666666666666666, - "sociology": 0.2168141592920354, - "sports_science": 0.2787878787878788, - "traditional_chinese_medicine": 0.2702702702702703, - "virology": 0.27218934911242604, - "world_history": 0.391304347826087, - "world_religions": 0.2375 + "chinese_driving_rule": 0.42748091603053434, + "chinese_food_culture": 0.33088235294117646, + "chinese_foreign_policy": 0.4672897196261682, + "chinese_history": 0.5046439628482973, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.39664804469273746, + "clinical_knowledge": 0.32489451476793246, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.4672897196261682, + "college_engineering_hydrology": 0.42452830188679247, + "college_law": 0.2962962962962963, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.29304029304029305, + "computer_science": 0.37745098039215685, + "computer_security": 0.4619883040935672, + "conceptual_physics": 0.4421768707482993, + "construction_project_management": 0.34532374100719426, + "economics": 0.39622641509433965, + "education": 0.294478527607362, + "electrical_engineering": 0.3081395348837209, + "elementary_chinese": 0.29365079365079366, + "elementary_commonsense": 0.3838383838383838, + "elementary_information_and_technology": 0.3907563025210084, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.31851851851851853, + "food_science": 0.27972027972027974, + "genetics": 0.3352272727272727, + "global_facts": 0.348993288590604, + "high_school_biology": 0.38461538461538464, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.4406779661016949, + "high_school_mathematics": 0.2073170731707317, + "high_school_physics": 0.36363636363636365, + "high_school_politics": 0.4195804195804196, + "human_sexuality": 0.373015873015873, + "international_law": 0.32432432432432434, + "journalism": 0.313953488372093, + "jurisprudence": 0.41119221411192214, + "legal_and_moral_basis": 0.5093457943925234, + "logical": 0.35772357723577236, + "machine_learning": 0.45081967213114754, + "management": 0.34285714285714286, + "marketing": 0.35, + "marxist_theory": 0.32275132275132273, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.3448275862068966, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.30857142857142855, + "professional_law": 0.3080568720379147, + "professional_medicine": 0.3058510638297872, + "professional_psychology": 0.3577586206896552, + "public_relations": 0.3218390804597701, + "security_study": 0.32592592592592595, + "sociology": 0.3230088495575221, + "sports_science": 0.3515151515151515, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.34911242603550297, + "world_history": 0.4720496894409938, + "world_religions": 0.325 } }, "prompt_4": { - "accuracy": 0.36988430322914867, + "accuracy": 0.4011396995337593, "category_acc": { - "agronomy": 0.3668639053254438, - "anatomy": 0.32432432432432434, - "ancient_chinese": 0.29878048780487804, - "arts": 0.4125, - "astronomy": 0.2727272727272727, - "business_ethics": 0.3923444976076555, - "chinese_civil_service_exam": 0.39375, - "chinese_driving_rule": 0.45038167938931295, - "chinese_food_culture": 0.4117647058823529, - "chinese_foreign_policy": 0.48598130841121495, - "chinese_history": 0.48606811145510836, - "chinese_literature": 0.3088235294117647, - "chinese_teacher_qualification": 0.4692737430167598, - "clinical_knowledge": 0.32489451476793246, - "college_actuarial_science": 0.29245283018867924, - "college_education": 0.4392523364485981, - "college_engineering_hydrology": 0.4339622641509434, - "college_law": 0.3425925925925926, + "agronomy": 0.35502958579881655, + "anatomy": 0.33783783783783783, + "ancient_chinese": 0.32926829268292684, + "arts": 0.3875, + "astronomy": 0.30303030303030304, + "business_ethics": 0.47368421052631576, + "chinese_civil_service_exam": 0.41875, + "chinese_driving_rule": 0.4198473282442748, + "chinese_food_culture": 0.39705882352941174, + "chinese_foreign_policy": 0.5420560747663551, + "chinese_history": 0.5015479876160991, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.49162011173184356, + "clinical_knowledge": 0.3206751054852321, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.5327102803738317, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.3611111111111111, "college_mathematics": 0.19047619047619047, - "college_medical_statistics": 0.3867924528301887, - "college_medicine": 0.29304029304029305, - "computer_science": 0.25980392156862747, + "college_medical_statistics": 0.44339622641509435, + "college_medicine": 0.3076923076923077, + "computer_science": 0.4068627450980392, "computer_security": 0.5321637426900585, - "conceptual_physics": 0.43537414965986393, - "construction_project_management": 0.35251798561151076, - "economics": 0.44654088050314467, - "education": 0.3619631901840491, - "electrical_engineering": 0.42441860465116277, - "elementary_chinese": 0.3055555555555556, - "elementary_commonsense": 0.35353535353535354, - "elementary_information_and_technology": 0.4957983193277311, - "elementary_mathematics": 0.2391304347826087, - "ethnology": 0.3111111111111111, - "food_science": 0.40559440559440557, - "genetics": 0.3693181818181818, - "global_facts": 0.436241610738255, - "high_school_biology": 0.3727810650887574, - "high_school_chemistry": 0.32575757575757575, - "high_school_geography": 0.3898305084745763, - "high_school_mathematics": 0.2682926829268293, - "high_school_physics": 0.33636363636363636, - "high_school_politics": 0.3986013986013986, - "human_sexuality": 0.36507936507936506, - "international_law": 0.33513513513513515, - "journalism": 0.3081395348837209, - "jurisprudence": 0.40145985401459855, - "legal_and_moral_basis": 0.5514018691588785, - "logical": 0.3252032520325203, - "machine_learning": 0.4262295081967213, - "management": 0.3904761904761905, - "marketing": 0.4222222222222222, - "marxist_theory": 0.35978835978835977, - "modern_chinese": 0.31896551724137934, - "nutrition": 0.3931034482758621, - "philosophy": 0.3238095238095238, - "professional_accounting": 0.36, - "professional_law": 0.2796208530805687, - "professional_medicine": 0.324468085106383, - "professional_psychology": 0.33189655172413796, - "public_relations": 0.3275862068965517, - "security_study": 0.4444444444444444, - "sociology": 0.3938053097345133, - "sports_science": 0.3333333333333333, - "traditional_chinese_medicine": 0.2702702702702703, - "virology": 0.38461538461538464, - "world_history": 0.42857142857142855, - "world_religions": 0.39375 + "conceptual_physics": 0.4421768707482993, + "construction_project_management": 0.33093525179856115, + "economics": 0.4716981132075472, + "education": 0.5153374233128835, + "electrical_engineering": 0.4127906976744186, + "elementary_chinese": 0.3253968253968254, + "elementary_commonsense": 0.36363636363636365, + "elementary_information_and_technology": 0.5588235294117647, + "elementary_mathematics": 0.21304347826086956, + "ethnology": 0.26666666666666666, + "food_science": 0.38461538461538464, + "genetics": 0.4090909090909091, + "global_facts": 0.5167785234899329, + "high_school_biology": 0.3905325443786982, + "high_school_chemistry": 0.3333333333333333, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.34545454545454546, + "high_school_politics": 0.46853146853146854, + "human_sexuality": 0.42857142857142855, + "international_law": 0.372972972972973, + "journalism": 0.4418604651162791, + "jurisprudence": 0.40875912408759124, + "legal_and_moral_basis": 0.7009345794392523, + "logical": 0.3821138211382114, + "machine_learning": 0.4918032786885246, + "management": 0.48095238095238096, + "marketing": 0.4722222222222222, + "marxist_theory": 0.41798941798941797, + "modern_chinese": 0.3448275862068966, + "nutrition": 0.4413793103448276, + "philosophy": 0.4, + "professional_accounting": 0.3657142857142857, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.31648936170212766, + "professional_psychology": 0.39655172413793105, + "public_relations": 0.39655172413793105, + "security_study": 0.45185185185185184, + "sociology": 0.4336283185840708, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.4260355029585799, + "world_history": 0.484472049689441, + "world_religions": 0.41875 } }, "prompt_5": { - "accuracy": 0.2940770160594025, + "accuracy": 0.31972025556898637, "category_acc": { - "agronomy": 0.2485207100591716, - "anatomy": 0.24324324324324326, - "ancient_chinese": 0.25609756097560976, - "arts": 0.23125, - "astronomy": 0.23030303030303031, - "business_ethics": 0.31100478468899523, - "chinese_civil_service_exam": 0.33125, - "chinese_driving_rule": 0.2824427480916031, - "chinese_food_culture": 0.33088235294117646, - "chinese_foreign_policy": 0.42990654205607476, - "chinese_history": 0.4520123839009288, + "agronomy": 0.31952662721893493, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.2865853658536585, + "arts": 0.2625, + "astronomy": 0.28484848484848485, + "business_ethics": 0.28708133971291866, + "chinese_civil_service_exam": 0.3375, + "chinese_driving_rule": 0.3511450381679389, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.4766355140186916, + "chinese_history": 0.48297213622291024, "chinese_literature": 0.2647058823529412, - "chinese_teacher_qualification": 0.329608938547486, - "clinical_knowledge": 0.25316455696202533, - "college_actuarial_science": 0.22641509433962265, - "college_education": 0.32710280373831774, - "college_engineering_hydrology": 0.3018867924528302, - "college_law": 0.35185185185185186, - "college_mathematics": 0.23809523809523808, - "college_medical_statistics": 0.39622641509433965, - "college_medicine": 0.2673992673992674, - "computer_science": 0.25, - "computer_security": 0.34502923976608185, - "conceptual_physics": 0.40816326530612246, - "construction_project_management": 0.302158273381295, - "economics": 0.3018867924528302, - "education": 0.3006134969325153, - "electrical_engineering": 0.27325581395348836, - "elementary_chinese": 0.2777777777777778, - "elementary_commonsense": 0.26262626262626265, + "chinese_teacher_qualification": 0.39106145251396646, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.4672897196261682, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.32407407407407407, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.27106227106227104, + "computer_science": 0.35294117647058826, + "computer_security": 0.391812865497076, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.2733812949640288, + "economics": 0.29559748427672955, + "education": 0.3496932515337423, + "electrical_engineering": 0.313953488372093, + "elementary_chinese": 0.30158730158730157, + "elementary_commonsense": 0.3333333333333333, "elementary_information_and_technology": 0.3487394957983193, - "elementary_mathematics": 0.20869565217391303, - "ethnology": 0.21481481481481482, - "food_science": 0.2937062937062937, - "genetics": 0.2784090909090909, - "global_facts": 0.24161073825503357, - "high_school_biology": 0.31952662721893493, - "high_school_chemistry": 0.2803030303030303, - "high_school_geography": 0.3559322033898305, - "high_school_mathematics": 0.1951219512195122, - "high_school_physics": 0.2727272727272727, - "high_school_politics": 0.3356643356643357, - "human_sexuality": 0.2777777777777778, - "international_law": 0.3081081081081081, - "journalism": 0.23255813953488372, - "jurisprudence": 0.3381995133819951, - "legal_and_moral_basis": 0.4252336448598131, - "logical": 0.3008130081300813, - "machine_learning": 0.32786885245901637, - "management": 0.2904761904761905, - "marketing": 0.25, - "marxist_theory": 0.32275132275132273, - "modern_chinese": 0.25862068965517243, - "nutrition": 0.2689655172413793, - "philosophy": 0.26666666666666666, - "professional_accounting": 0.2857142857142857, - "professional_law": 0.3080568720379147, - "professional_medicine": 0.21808510638297873, - "professional_psychology": 0.30603448275862066, - "public_relations": 0.29310344827586204, - "security_study": 0.2814814814814815, - "sociology": 0.26991150442477874, - "sports_science": 0.2909090909090909, - "traditional_chinese_medicine": 0.23243243243243245, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.24444444444444444, + "food_science": 0.20279720279720279, + "genetics": 0.32386363636363635, + "global_facts": 0.3288590604026846, + "high_school_biology": 0.35502958579881655, + "high_school_chemistry": 0.3181818181818182, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.38181818181818183, + "high_school_politics": 0.40559440559440557, + "human_sexuality": 0.3412698412698413, + "international_law": 0.2702702702702703, + "journalism": 0.28488372093023256, + "jurisprudence": 0.3722627737226277, + "legal_and_moral_basis": 0.4953271028037383, + "logical": 0.2845528455284553, + "machine_learning": 0.4098360655737705, + "management": 0.2761904761904762, + "marketing": 0.3111111111111111, + "marxist_theory": 0.31746031746031744, + "modern_chinese": 0.33620689655172414, + "nutrition": 0.2827586206896552, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.3485714285714286, + "professional_law": 0.2938388625592417, + "professional_medicine": 0.27393617021276595, + "professional_psychology": 0.29310344827586204, + "public_relations": 0.25287356321839083, + "security_study": 0.34074074074074073, + "sociology": 0.26548672566371684, + "sports_science": 0.3212121212121212, + "traditional_chinese_medicine": 0.22702702702702704, "virology": 0.28402366863905326, - "world_history": 0.39751552795031053, - "world_religions": 0.28125 + "world_history": 0.4409937888198758, + "world_religions": 0.3125 } } }, "zbench": { "prompt_1": { - "accuracy": 0.24242424242424243 + "accuracy": 0.18181818181818182 }, "prompt_2": { - "accuracy": 0.12121212121212122 + "accuracy": 0.2727272727272727 }, "prompt_3": { - "accuracy": 0.15151515151515152 + "accuracy": 0.2727272727272727 }, "prompt_4": { - "accuracy": 0.30303030303030304 + "accuracy": 0.18181818181818182 }, "prompt_5": { - "accuracy": 0.18181818181818182 + "accuracy": 0.24242424242424243 } }, "ind_emotion": { "prompt_1": { - "accuracy": 0.625 + "accuracy": 0.6136363636363636 }, "prompt_2": { - "accuracy": 0.6386363636363637 + "accuracy": 0.6431818181818182 }, "prompt_3": { - "accuracy": 0.6136363636363636 + "accuracy": 0.5636363636363636 }, "prompt_4": { - "accuracy": 0.5409090909090909 + "accuracy": 0.5113636363636364 }, "prompt_5": { - "accuracy": 0.6204545454545455 + "accuracy": 0.6113636363636363 } }, "ocnli": { "prompt_1": { - "accuracy": 0.46915254237288134 + "accuracy": 0.4549152542372881 }, "prompt_2": { - "accuracy": 0.45389830508474577 + "accuracy": 0.4535593220338983 }, "prompt_3": { - "accuracy": 0.4908474576271186 + "accuracy": 0.48542372881355933 }, "prompt_4": { - "accuracy": 0.41559322033898305 + "accuracy": 0.4386440677966102 }, "prompt_5": { - "accuracy": 0.4216949152542373 + "accuracy": 0.43559322033898307 } }, "c3": { "prompt_1": { - "accuracy": 0.7816005983545251 + "accuracy": 0.8118922961854899 }, "prompt_2": { - "accuracy": 0.5523560209424084 + "accuracy": 0.8070306656694092 }, "prompt_3": { - "accuracy": 0.8006731488406881 + "accuracy": 0.8167539267015707 }, "prompt_4": { - "accuracy": 0.6851159311892296 + "accuracy": 0.8137621540762902 }, "prompt_5": { - "accuracy": 0.7916978309648467 + "accuracy": 0.8133881824981302 } }, "dream": { "prompt_1": { - "accuracy": 0.8809407153356198 + "accuracy": 0.876531112199902 }, "prompt_2": { - "accuracy": 0.8892699657030867 + "accuracy": 0.8897599216070554 }, "prompt_3": { - "accuracy": 0.8784909358157765 + "accuracy": 0.8799608035276825 }, "prompt_4": { - "accuracy": 0.8794708476237139 + "accuracy": 0.8775110240078393 }, "prompt_5": { - "accuracy": 0.8755512003919648 + "accuracy": 0.8701616854483096 } }, "samsum": { "prompt_1": { - "rouge1": 0.3992689686335395, - "rouge2": 0.1649072829138515, - "rougeL": 0.31251190740179147, - "avg_rouge": 0.29222938631639417 + "rouge1": 0.399413822788364, + "rouge2": 0.1644959950171181, + "rougeL": 0.31249179402812377, + "avg_rouge": 0.29213387061120194 }, "prompt_2": { - "rouge1": 0.4149958909508306, - "rouge2": 0.17332643372666745, - "rougeL": 0.3219141360236752, - "avg_rouge": 0.30341215356705775 + "rouge1": 0.41342004119217496, + "rouge2": 0.17408447013547854, + "rougeL": 0.32312748398860974, + "avg_rouge": 0.3035439984387544 }, "prompt_3": { - "rouge1": 0.3922099270623417, - "rouge2": 0.15755329731140477, - "rougeL": 0.30426789092009127, - "avg_rouge": 0.2846770384312793 + "rouge1": 0.3858609377139331, + "rouge2": 0.15362039680923922, + "rougeL": 0.2971140286848332, + "avg_rouge": 0.2788651210693352 }, "prompt_4": { - "rouge1": 0.3875422557422527, - "rouge2": 0.15520209191882986, - "rougeL": 0.29887378320084235, - "avg_rouge": 0.28053937695397496 + "rouge1": 0.38992922020602294, + "rouge2": 0.1570489435408073, + "rougeL": 0.30238141296268606, + "avg_rouge": 0.2831198589031721 }, "prompt_5": { - "rouge1": 0.41424382573544144, - "rouge2": 0.16273788460008867, - "rougeL": 0.3231591977429508, - "avg_rouge": 0.30004696935949365 + "rouge1": 0.4121653715915391, + "rouge2": 0.16065262497539948, + "rougeL": 0.3230860764110063, + "avg_rouge": 0.2986346909926483 } }, "dialogsum": { "prompt_1": { - "rouge1": 0.3471676316551315, - "rouge2": 0.12804934249301983, - "rougeL": 0.26693426071297793, - "avg_rouge": 0.24738374495370974 + "rouge1": 0.34817420710916464, + "rouge2": 0.1274144928230791, + "rougeL": 0.2660456685070806, + "avg_rouge": 0.24721145614644144 }, "prompt_2": { - "rouge1": 0.3536462719638873, - "rouge2": 0.12849295704954358, - "rougeL": 0.27132344139721254, - "avg_rouge": 0.2511542234702145 + "rouge1": 0.355401233230698, + "rouge2": 0.13136484574034482, + "rougeL": 0.27391593655739904, + "avg_rouge": 0.253560671842814 }, "prompt_3": { - "rouge1": 0.3486625789053131, - "rouge2": 0.1312013920008153, - "rougeL": 0.26913111205422957, - "avg_rouge": 0.24966502765345266 + "rouge1": 0.3519868168185481, + "rouge2": 0.13083814192154605, + "rougeL": 0.27211843276428616, + "avg_rouge": 0.25164779716812674 }, "prompt_4": { - "rouge1": 0.34210585871053295, - "rouge2": 0.1238699268921856, - "rougeL": 0.2625347098391768, - "avg_rouge": 0.24283683181396512 + "rouge1": 0.34597063608751577, + "rouge2": 0.12633009934757794, + "rougeL": 0.2654539549583252, + "avg_rouge": 0.24591823013113964 }, "prompt_5": { - "rouge1": 0.3683250571772577, - "rouge2": 0.1288145261489626, - "rougeL": 0.28486316877157086, - "avg_rouge": 0.2606675840325971 + "rouge1": 0.3718198708621898, + "rouge2": 0.13176117054123318, + "rougeL": 0.28825278307993496, + "avg_rouge": 0.2639446081611193 } }, "sst2": { "prompt_1": { - "accuracy": 0.908256880733945 + "accuracy": 0.9139908256880734 }, "prompt_2": { - "accuracy": 0.908256880733945 + "accuracy": 0.9197247706422018 }, "prompt_3": { - "accuracy": 0.9185779816513762 + "accuracy": 0.9231651376146789 }, "prompt_4": { - "accuracy": 0.9105504587155964 + "accuracy": 0.9174311926605505 }, "prompt_5": { - "accuracy": 0.6032110091743119 + "accuracy": 0.5263761467889908 } }, "cola": { "prompt_1": { - "accuracy": 0.8092042186001918 + "accuracy": 0.8149568552253116 }, "prompt_2": { - "accuracy": 0.8034515819750719 + "accuracy": 0.8092042186001918 }, "prompt_3": { - "accuracy": 0.8139980824544583 + "accuracy": 0.8187919463087249 }, "prompt_4": { - "accuracy": 0.8293384467881112 + "accuracy": 0.8312559923298178 }, "prompt_5": { - "accuracy": 0.8082454458293384 + "accuracy": 0.8178331735378715 } }, "qqp": { "prompt_1": { - "accuracy": 0.725 + "accuracy": 0.7295 }, "prompt_2": { - "accuracy": 0.814 + "accuracy": 0.8105 }, "prompt_3": { - "accuracy": 0.815 + "accuracy": 0.7955 }, "prompt_4": { - "accuracy": 0.7515 + "accuracy": 0.746 }, "prompt_5": { - "accuracy": 0.7795 + "accuracy": 0.7745 } }, "mnli": { @@ -97226,13 +97226,13 @@ "accuracy": 0.8 }, "prompt_2": { - "accuracy": 0.9 + "accuracy": 1.0 }, "prompt_3": { "accuracy": 0.8 }, "prompt_4": { - "accuracy": 0.6 + "accuracy": 0.9 }, "prompt_5": { "accuracy": 0.8 @@ -97240,13 +97240,13 @@ }, "wnli": { "prompt_1": { - "accuracy": 0.7 + "accuracy": 0.6 }, "prompt_2": { "accuracy": 0.9 }, "prompt_3": { - "accuracy": 0.7 + "accuracy": 0.6 }, "prompt_4": { "accuracy": 0.4 @@ -97269,7 +97269,7 @@ "accuracy": 0.8 }, "prompt_5": { - "accuracy": 0.8 + "accuracy": 0.9 } }, "mrpc": {