diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -89355,3427 +89355,4934 @@ } } }, - "hf_sg_regional_llama3_8b": { - "model_size": "8B", - "model_link": "https://seaeval.github.io/", + "qwen_1_5_7b": { + "model_size": "7B", + "model_link": "https://huggingface.co/Qwen/Qwen1.5-7B", "zero_shot": { + "cross_xquad": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cross_mmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cross_logiqa": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "sg_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cn_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "us_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "ph_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "sing2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "indommlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_ind2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_vie2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_zho2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mmlu_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c_eval_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "zbench": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "ind_emotion": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "ocnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c3": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "dream": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "samsum": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "dialogsum": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "sst2": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cola": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "qqp": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "qnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "wnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "rte": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mrpc": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + } + }, + "five_shot": { "cross_xquad": { "prompt_1": { - "overall_acc": 0.8909663865546219, + "overall_acc": 0.9334033613445378, "language_acc": { - "Spanish": 0.9042016806722689, - "English": 0.9168067226890756, - "Chinese": 0.8705882352941177, - "Vietnamese": 0.8722689075630252 - }, - "consistency_score_2": 0.8523809523809525, - "consistency_score_3": 0.7873949579831933, - "consistency_score_4": 0.738655462184874, + "Spanish": 0.9252100840336135, + "English": 0.9445378151260504, + "Chinese": 0.9361344537815126, + "Vietnamese": 0.9277310924369748 + }, + "consistency_score_2": 0.9215686274509806, + "consistency_score_3": 0.8852941176470589, + "consistency_score_4": 0.8596638655462185, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8857142857142857, - "Spanish,Chinese": 0.8344537815126051, - "Spanish,Vietnamese": 0.8495798319327731, - "English,Chinese": 0.8529411764705882, - "English,Vietnamese": 0.8605042016806723, - "Chinese,Vietnamese": 0.83109243697479 + "Spanish,English": 0.9378151260504202, + "Spanish,Chinese": 0.9134453781512605, + "Spanish,Vietnamese": 0.9168067226890756, + "English,Chinese": 0.9252100840336135, + "English,Vietnamese": 0.9243697478991597, + "Chinese,Vietnamese": 0.9117647058823529 }, "3_combine": { - "Spanish,English,Chinese": 0.7924369747899159, - "Spanish,English,Vietnamese": 0.8058823529411765, - "Spanish,Chinese,Vietnamese": 0.7689075630252101, - "English,Chinese,Vietnamese": 0.7823529411764706 + "Spanish,English,Chinese": 0.8915966386554622, + "Spanish,English,Vietnamese": 0.8907563025210085, + "Spanish,Chinese,Vietnamese": 0.8739495798319328, + "English,Chinese,Vietnamese": 0.8848739495798319 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.738655462184874 + "Spanish,English,Chinese,Vietnamese": 0.8596638655462185 } }, - "AC3_2": 0.871246664512616, - "AC3_3": 0.8359849835042579, - "AC3_4": 0.8076931326373135 - }, - "prompt_2": { - "overall_acc": 0.8836134453781512, - "language_acc": { - "Spanish": 0.8773109243697479, - "English": 0.9218487394957983, - "Chinese": 0.8773109243697479, - "Vietnamese": 0.8579831932773109 - }, - "consistency_score_2": 0.8379551820728292, - "consistency_score_3": 0.7653361344537815, - "consistency_score_4": 0.7109243697478992, - "detailed_consistency_score": { - "2_combine": { - "Spanish,English": 0.8621848739495799, - "Spanish,Chinese": 0.8134453781512605, - "Spanish,Vietnamese": 0.8210084033613445, - "English,Chinese": 0.8563025210084033, - "English,Vietnamese": 0.8588235294117647, - "Chinese,Vietnamese": 0.8159663865546218 - }, - "3_combine": { - "Spanish,English,Chinese": 0.7722689075630252, - "Spanish,English,Vietnamese": 0.7781512605042017, - "Spanish,Chinese,Vietnamese": 0.7369747899159664, - "English,Chinese,Vietnamese": 0.7739495798319328 - }, - "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.7109243697478992 - } - }, - "AC3_2": 0.8601788550911803, - "AC3_3": 0.8202328401878939, - "AC3_4": 0.7879177599399577 - }, - "prompt_3": { - "overall_acc": 0.8510504201680673, - "language_acc": { - "Spanish": 0.8092436974789916, - "English": 0.9016806722689076, - "Chinese": 0.8428571428571429, - "Vietnamese": 0.8504201680672269 - }, - "consistency_score_2": 0.7725490196078431, - "consistency_score_3": 0.6758403361344538, - "consistency_score_4": 0.6033613445378151, - "detailed_consistency_score": { - "2_combine": { - "Spanish,English": 0.7756302521008404, - "Spanish,Chinese": 0.7344537815126051, - "Spanish,Vietnamese": 0.7344537815126051, - "English,Chinese": 0.8033613445378152, - "English,Vietnamese": 0.819327731092437, - "Chinese,Vietnamese": 0.7680672268907563 - }, - "3_combine": { - "Spanish,English,Chinese": 0.6722689075630253, - "Spanish,English,Vietnamese": 0.6840336134453782, - "Spanish,Chinese,Vietnamese": 0.6369747899159663, - "English,Chinese,Vietnamese": 0.7100840336134454 - }, - "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.6033613445378151 - } - }, - "AC3_2": 0.8099019395916194, - "AC3_3": 0.7533927356910746, - "AC3_4": 0.706114923170485 - }, - "prompt_4": { - "overall_acc": 0.8863445378151261, + "AC3_2": 0.9274482415064308, + "AC3_3": 0.908712432595497, + "AC3_4": 0.8950173531127907 + } + }, + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.5504761904761905, "language_acc": { - "Spanish": 0.8932773109243698, - "English": 0.9285714285714286, - "Chinese": 0.8672268907563025, - "Vietnamese": 0.8563025210084033 + "Filipino": 0.4, + "Vietnamese": 0.52, + "Chinese": 0.64, + "Spanish": 0.6, + "Malay": 0.49333333333333335, + "Indonesian": 0.4866666666666667, + "English": 0.7133333333333334 }, - "consistency_score_2": 0.8393557422969188, - "consistency_score_3": 0.7674369747899159, - "consistency_score_4": 0.7151260504201681, + "consistency_score_2": 0.5803174603174602, + "consistency_score_3": 0.4139047619047619, + "consistency_score_4": 0.3243809523809523, + "consistency_score_5": 0.26920634920634917, + "consistency_score_6": 0.23047619047619047, + "consistency_score_7": 0.2, "detailed_consistency_score": { "2_combine": { - "Spanish,English": 0.8873949579831932, - "Spanish,Chinese": 0.8176470588235294, - "Spanish,Vietnamese": 0.83109243697479, - "English,Chinese": 0.8487394957983193, - "English,Vietnamese": 0.8470588235294118, - "Chinese,Vietnamese": 0.8042016806722689 + "Filipino,Vietnamese": 0.46, + "Filipino,Chinese": 0.48, + "Filipino,Spanish": 0.48, + "Filipino,Malay": 0.5733333333333334, + "Filipino,Indonesian": 0.5466666666666666, + "Filipino,English": 0.5333333333333333, + "Vietnamese,Chinese": 0.5066666666666667, + "Vietnamese,Spanish": 0.6066666666666667, + "Vietnamese,Malay": 0.52, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,English": 0.6533333333333333, + "Chinese,Spanish": 0.6533333333333333, + "Chinese,Malay": 0.5133333333333333, + "Chinese,Indonesian": 0.5866666666666667, + "Chinese,English": 0.6866666666666666, + "Spanish,Malay": 0.6, + "Spanish,Indonesian": 0.6733333333333333, + "Spanish,English": 0.7333333333333333, + "Malay,Indonesian": 0.6133333333333333, + "Malay,English": 0.5866666666666667, + "Indonesian,English": 0.62 }, "3_combine": { - "Spanish,English,Chinese": 0.7831932773109244, - "Spanish,English,Vietnamese": 0.7907563025210084, - "Spanish,Chinese,Vietnamese": 0.7369747899159664, - "English,Chinese,Vietnamese": 0.7588235294117647 + "Filipino,Vietnamese,Chinese": 0.28, + "Filipino,Vietnamese,Spanish": 0.32666666666666666, + "Filipino,Vietnamese,Malay": 0.34, + "Filipino,Vietnamese,Indonesian": 0.34, + "Filipino,Vietnamese,English": 0.36, + "Filipino,Chinese,Spanish": 0.36, + "Filipino,Chinese,Malay": 0.3466666666666667, + "Filipino,Chinese,Indonesian": 0.36666666666666664, + "Filipino,Chinese,English": 0.4066666666666667, + "Filipino,Spanish,Malay": 0.38, + "Filipino,Spanish,Indonesian": 0.3933333333333333, + "Filipino,Spanish,English": 0.3933333333333333, + "Filipino,Malay,Indonesian": 0.42, + "Filipino,Malay,English": 0.4, + "Filipino,Indonesian,English": 0.3933333333333333, + "Vietnamese,Chinese,Spanish": 0.41333333333333333, + "Vietnamese,Chinese,Malay": 0.3333333333333333, + "Vietnamese,Chinese,Indonesian": 0.38, + "Vietnamese,Chinese,English": 0.44666666666666666, + "Vietnamese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.46, + "Vietnamese,Spanish,English": 0.52, + "Vietnamese,Malay,Indonesian": 0.4, + "Vietnamese,Malay,English": 0.4266666666666667, + "Vietnamese,Indonesian,English": 0.4533333333333333, + "Chinese,Spanish,Malay": 0.41333333333333333, + "Chinese,Spanish,Indonesian": 0.4866666666666667, + "Chinese,Spanish,English": 0.5466666666666666, + "Chinese,Malay,Indonesian": 0.4, + "Chinese,Malay,English": 0.43333333333333335, + "Chinese,Indonesian,English": 0.4866666666666667, + "Spanish,Malay,Indonesian": 0.48, + "Spanish,Malay,English": 0.4866666666666667, + "Spanish,Indonesian,English": 0.54, + "Malay,Indonesian,English": 0.44666666666666666 }, "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.7151260504201681 - } - }, - "AC3_2": 0.8622104150985256, - "AC3_3": 0.8226160051938194, - "AC3_4": 0.7915825282787072 - }, - "prompt_5": { - "overall_acc": 0.8676470588235294, - "language_acc": { - "Spanish": 0.8663865546218488, - "English": 0.9100840336134454, - "Chinese": 0.8470588235294118, - "Vietnamese": 0.8470588235294118 - }, - "consistency_score_2": 0.8091036414565825, - "consistency_score_3": 0.7239495798319329, - "consistency_score_4": 0.6596638655462185, - "detailed_consistency_score": { - "2_combine": { - "Spanish,English": 0.8521008403361344, - "Spanish,Chinese": 0.7781512605042017, - "Spanish,Vietnamese": 0.8016806722689076, - "English,Chinese": 0.8243697478991596, - "English,Vietnamese": 0.8243697478991596, - "Chinese,Vietnamese": 0.7739495798319328 + "Filipino,Vietnamese,Chinese,Spanish": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Malay": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Indonesian": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,English": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Malay": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Indonesian": 0.28, + "Filipino,Vietnamese,Spanish,English": 0.29333333333333333, + "Filipino,Vietnamese,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Malay,English": 0.3, + "Filipino,Vietnamese,Indonesian,English": 0.2866666666666667, + "Filipino,Chinese,Spanish,Malay": 0.2866666666666667, + "Filipino,Chinese,Spanish,Indonesian": 0.3, + "Filipino,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.2866666666666667, + "Filipino,Chinese,Malay,English": 0.30666666666666664, + "Filipino,Chinese,Indonesian,English": 0.32666666666666666, + "Filipino,Spanish,Malay,Indonesian": 0.3333333333333333, + "Filipino,Spanish,Malay,English": 0.32666666666666666, + "Filipino,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Malay,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Indonesian": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.38, + "Vietnamese,Chinese,Malay,Indonesian": 0.3, + "Vietnamese,Chinese,Malay,English": 0.32666666666666666, + "Vietnamese,Chinese,Indonesian,English": 0.35333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Spanish,Malay,English": 0.38666666666666666, + "Vietnamese,Spanish,Indonesian,English": 0.42, + "Vietnamese,Malay,Indonesian,English": 0.36, + "Chinese,Spanish,Malay,Indonesian": 0.35333333333333333, + "Chinese,Spanish,Malay,English": 0.38, + "Chinese,Spanish,Indonesian,English": 0.4266666666666667, + "Chinese,Malay,Indonesian,English": 0.36, + "Spanish,Malay,Indonesian,English": 0.4 }, - "3_combine": { - "Spanish,English,Chinese": 0.7361344537815127, - "Spanish,English,Vietnamese": 0.7453781512605042, - "Spanish,Chinese,Vietnamese": 0.6907563025210084, - "English,Chinese,Vietnamese": 0.7235294117647059 + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.22, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.24, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Malay,English": 0.24, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.24, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.24666666666666667, + "Filipino,Vietnamese,Spanish,Malay,English": 0.26, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.26, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.25333333333333335, + "Filipino,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Indonesian,English": 0.28, + "Filipino,Chinese,Malay,Indonesian,English": 0.26666666666666666, + "Filipino,Spanish,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.28, + "Vietnamese,Chinese,Spanish,Malay,English": 0.3, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.3333333333333333, + "Chinese,Spanish,Malay,Indonesian,English": 0.32666666666666666 }, - "4_combine": { - "Spanish,English,Chinese,Vietnamese": 0.6596638655462185 + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.20666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.21333333333333335, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.22, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.22, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.24, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2733333333333333 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2 } }, - "AC3_2": 0.837353334200567, - "AC3_3": 0.7893114478664273, - "AC3_4": 0.7494942956059156 + "AC3_2": 0.5650030747341315, + "AC3_3": 0.4725201508120582, + "AC3_4": 0.4082129119415617, + "AC3_5": 0.36158312119818753, + "AC3_6": 0.3249152148248294, + "AC3_7": 0.29340101518933104 } }, - "cross_mmlu": { + "cross_logiqa": { "prompt_1": { - "overall_acc": 0.48857142857142855, + "overall_acc": 0.4967532467532467, "language_acc": { - "Filipino": 0.44, - "Vietnamese": 0.4666666666666667, - "Chinese": 0.5266666666666666, - "Spanish": 0.5, - "Malay": 0.44666666666666666, - "Indonesian": 0.4533333333333333, - "English": 0.5866666666666667 + "Indonesian": 0.5227272727272727, + "English": 0.5397727272727273, + "Filipino": 0.3522727272727273, + "Spanish": 0.5170454545454546, + "Chinese": 0.625, + "Malay": 0.44886363636363635, + "Vietnamese": 0.4715909090909091 }, - "consistency_score_2": 0.46253968253968253, - "consistency_score_3": 0.2767619047619047, - "consistency_score_4": 0.18952380952380954, - "consistency_score_5": 0.14253968253968252, - "consistency_score_6": 0.11619047619047619, - "consistency_score_7": 0.1, + "consistency_score_2": 0.5395021645021645, + "consistency_score_3": 0.36071428571428565, + "consistency_score_4": 0.26720779220779217, + "consistency_score_5": 0.21022727272727273, + "consistency_score_6": 0.17126623376623376, + "consistency_score_7": 0.14204545454545456, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.4, - "Filipino,Chinese": 0.43333333333333335, - "Filipino,Spanish": 0.4533333333333333, - "Filipino,Malay": 0.44666666666666666, - "Filipino,Indonesian": 0.43333333333333335, - "Filipino,English": 0.43333333333333335, - "Vietnamese,Chinese": 0.4066666666666667, - "Vietnamese,Spanish": 0.5133333333333333, - "Vietnamese,Malay": 0.48, - "Vietnamese,Indonesian": 0.4866666666666667, - "Vietnamese,English": 0.49333333333333335, - "Chinese,Spanish": 0.46, - "Chinese,Malay": 0.43333333333333335, - "Chinese,Indonesian": 0.47333333333333333, - "Chinese,English": 0.46, - "Spanish,Malay": 0.44666666666666666, - "Spanish,Indonesian": 0.49333333333333335, - "Spanish,English": 0.5066666666666667, - "Malay,Indonesian": 0.5333333333333333, - "Malay,English": 0.46, - "Indonesian,English": 0.4666666666666667 + "Indonesian,English": 0.5681818181818182, + "Indonesian,Filipino": 0.45454545454545453, + "Indonesian,Spanish": 0.5738636363636364, + "Indonesian,Chinese": 0.5227272727272727, + "Indonesian,Malay": 0.5965909090909091, + "Indonesian,Vietnamese": 0.5454545454545454, + "English,Filipino": 0.3977272727272727, + "English,Spanish": 0.6704545454545454, + "English,Chinese": 0.6761363636363636, + "English,Malay": 0.5454545454545454, + "English,Vietnamese": 0.625, + "Filipino,Spanish": 0.48295454545454547, + "Filipino,Chinese": 0.42613636363636365, + "Filipino,Malay": 0.4431818181818182, + "Filipino,Vietnamese": 0.42045454545454547, + "Spanish,Chinese": 0.5738636363636364, + "Spanish,Malay": 0.5681818181818182, + "Spanish,Vietnamese": 0.5568181818181818, + "Chinese,Malay": 0.5284090909090909, + "Chinese,Vietnamese": 0.5397727272727273, + "Malay,Vietnamese": 0.6136363636363636 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.22, - "Filipino,Vietnamese,Spanish": 0.26666666666666666, - "Filipino,Vietnamese,Malay": 0.22666666666666666, - "Filipino,Vietnamese,Indonesian": 0.24, - "Filipino,Vietnamese,English": 0.24666666666666667, - "Filipino,Chinese,Spanish": 0.2866666666666667, - "Filipino,Chinese,Malay": 0.25333333333333335, - "Filipino,Chinese,Indonesian": 0.2733333333333333, - "Filipino,Chinese,English": 0.26, - "Filipino,Spanish,Malay": 0.25333333333333335, - "Filipino,Spanish,Indonesian": 0.3, - "Filipino,Spanish,English": 0.2733333333333333, - "Filipino,Malay,Indonesian": 0.2733333333333333, - "Filipino,Malay,English": 0.24666666666666667, - "Filipino,Indonesian,English": 0.24, - "Vietnamese,Chinese,Spanish": 0.28, - "Vietnamese,Chinese,Malay": 0.23333333333333334, - "Vietnamese,Chinese,Indonesian": 0.26, - "Vietnamese,Chinese,English": 0.28, - "Vietnamese,Spanish,Malay": 0.29333333333333333, - "Vietnamese,Spanish,Indonesian": 0.32666666666666666, - "Vietnamese,Spanish,English": 0.32666666666666666, - "Vietnamese,Malay,Indonesian": 0.30666666666666664, - "Vietnamese,Malay,English": 0.3, - "Vietnamese,Indonesian,English": 0.3, - "Chinese,Spanish,Malay": 0.29333333333333333, - "Chinese,Spanish,Indonesian": 0.3, - "Chinese,Spanish,English": 0.30666666666666664, - "Chinese,Malay,Indonesian": 0.29333333333333333, - "Chinese,Malay,English": 0.26, - "Chinese,Indonesian,English": 0.28, - "Spanish,Malay,Indonesian": 0.31333333333333335, - "Spanish,Malay,English": 0.28, - "Spanish,Indonesian,English": 0.32, - "Malay,Indonesian,English": 0.2733333333333333 - }, - "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Malay": 0.14, - "Filipino,Vietnamese,Chinese,Indonesian": 0.16, - "Filipino,Vietnamese,Chinese,English": 0.16, - "Filipino,Vietnamese,Spanish,Malay": 0.16666666666666666, - "Filipino,Vietnamese,Spanish,Indonesian": 0.2, - "Filipino,Vietnamese,Spanish,English": 0.2, - "Filipino,Vietnamese,Malay,Indonesian": 0.16, - "Filipino,Vietnamese,Malay,English": 0.16, - "Filipino,Vietnamese,Indonesian,English": 0.17333333333333334, - "Filipino,Chinese,Spanish,Malay": 0.18, - "Filipino,Chinese,Spanish,Indonesian": 0.21333333333333335, - "Filipino,Chinese,Spanish,English": 0.19333333333333333, - "Filipino,Chinese,Malay,Indonesian": 0.18666666666666668, - "Filipino,Chinese,Malay,English": 0.17333333333333334, - "Filipino,Chinese,Indonesian,English": 0.17333333333333334, - "Filipino,Spanish,Malay,Indonesian": 0.2, - "Filipino,Spanish,Malay,English": 0.17333333333333334, - "Filipino,Spanish,Indonesian,English": 0.19333333333333333, - "Filipino,Malay,Indonesian,English": 0.15333333333333332, - "Vietnamese,Chinese,Spanish,Malay": 0.18666666666666668, - "Vietnamese,Chinese,Spanish,Indonesian": 0.2, - "Vietnamese,Chinese,Spanish,English": 0.22, - "Vietnamese,Chinese,Malay,Indonesian": 0.19333333333333333, - "Vietnamese,Chinese,Malay,English": 0.16, - "Vietnamese,Chinese,Indonesian,English": 0.2, - "Vietnamese,Spanish,Malay,Indonesian": 0.23333333333333334, - "Vietnamese,Spanish,Malay,English": 0.22, - "Vietnamese,Spanish,Indonesian,English": 0.22666666666666666, - "Vietnamese,Malay,Indonesian,English": 0.2, - "Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, - "Chinese,Spanish,Malay,English": 0.20666666666666667, - "Chinese,Spanish,Indonesian,English": 0.22666666666666666, - "Chinese,Malay,Indonesian,English": 0.19333333333333333, - "Spanish,Malay,Indonesian,English": 0.20666666666666667 + "Indonesian,English,Filipino": 0.26704545454545453, + "Indonesian,English,Spanish": 0.4318181818181818, + "Indonesian,English,Chinese": 0.4090909090909091, + "Indonesian,English,Malay": 0.39204545454545453, + "Indonesian,English,Vietnamese": 0.4034090909090909, + "Indonesian,Filipino,Spanish": 0.32386363636363635, + "Indonesian,Filipino,Chinese": 0.26704545454545453, + "Indonesian,Filipino,Malay": 0.3068181818181818, + "Indonesian,Filipino,Vietnamese": 0.2840909090909091, + "Indonesian,Spanish,Chinese": 0.375, + "Indonesian,Spanish,Malay": 0.4147727272727273, + "Indonesian,Spanish,Vietnamese": 0.3693181818181818, + "Indonesian,Chinese,Malay": 0.375, + "Indonesian,Chinese,Vietnamese": 0.3693181818181818, + "Indonesian,Malay,Vietnamese": 0.42613636363636365, + "English,Filipino,Spanish": 0.32954545454545453, + "English,Filipino,Chinese": 0.3068181818181818, + "English,Filipino,Malay": 0.2727272727272727, + "English,Filipino,Vietnamese": 0.2727272727272727, + "English,Spanish,Chinese": 0.4943181818181818, + "English,Spanish,Malay": 0.42613636363636365, + "English,Spanish,Vietnamese": 0.44886363636363635, + "English,Chinese,Malay": 0.4147727272727273, + "English,Chinese,Vietnamese": 0.45454545454545453, + "English,Malay,Vietnamese": 0.4318181818181818, + "Filipino,Spanish,Chinese": 0.30113636363636365, + "Filipino,Spanish,Malay": 0.32386363636363635, + "Filipino,Spanish,Vietnamese": 0.30113636363636365, + "Filipino,Chinese,Malay": 0.2840909090909091, + "Filipino,Chinese,Vietnamese": 0.26704545454545453, + "Filipino,Malay,Vietnamese": 0.3125, + "Spanish,Chinese,Malay": 0.39204545454545453, + "Spanish,Chinese,Vietnamese": 0.3806818181818182, + "Spanish,Malay,Vietnamese": 0.4147727272727273, + "Chinese,Malay,Vietnamese": 0.3806818181818182 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.22727272727272727, + "Indonesian,English,Filipino,Chinese": 0.2159090909090909, + "Indonesian,English,Filipino,Malay": 0.2215909090909091, + "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese": 0.32954545454545453, + "Indonesian,English,Spanish,Malay": 0.32386363636363635, + "Indonesian,English,Spanish,Vietnamese": 0.3125, + "Indonesian,English,Chinese,Malay": 0.3125, + "Indonesian,English,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,English,Malay,Vietnamese": 0.3352272727272727, + "Indonesian,Filipino,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.25, + "Indonesian,Filipino,Spanish,Vietnamese": 0.22727272727272727, + "Indonesian,Filipino,Chinese,Malay": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Vietnamese": 0.2159090909090909, + "Indonesian,Filipino,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Spanish,Chinese,Malay": 0.29545454545454547, + "Indonesian,Spanish,Chinese,Vietnamese": 0.26704545454545453, + "Indonesian,Spanish,Malay,Vietnamese": 0.3352272727272727, + "Indonesian,Chinese,Malay,Vietnamese": 0.3068181818181818, + "English,Filipino,Spanish,Chinese": 0.2556818181818182, + "English,Filipino,Spanish,Malay": 0.23295454545454544, + "English,Filipino,Spanish,Vietnamese": 0.23863636363636365, + "English,Filipino,Chinese,Malay": 0.22727272727272727, + "English,Filipino,Chinese,Vietnamese": 0.22727272727272727, + "English,Filipino,Malay,Vietnamese": 0.22727272727272727, + "English,Spanish,Chinese,Malay": 0.3465909090909091, + "English,Spanish,Chinese,Vietnamese": 0.35795454545454547, + "English,Spanish,Malay,Vietnamese": 0.3465909090909091, + "English,Chinese,Malay,Vietnamese": 0.32954545454545453, + "Filipino,Spanish,Chinese,Malay": 0.2215909090909091, + "Filipino,Spanish,Chinese,Vietnamese": 0.2215909090909091, + "Filipino,Spanish,Malay,Vietnamese": 0.25, + "Filipino,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Spanish,Chinese,Malay,Vietnamese": 0.29545454545454547 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.11333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.14, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.14, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Malay,English": 0.10666666666666667, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.13333333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.14, - "Filipino,Vietnamese,Spanish,Malay,English": 0.14, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.12, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.16, - "Filipino,Chinese,Spanish,Malay,English": 0.13333333333333333, - "Filipino,Chinese,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Chinese,Malay,Indonesian,English": 0.12666666666666668, - "Filipino,Spanish,Malay,Indonesian,English": 0.13333333333333333, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,Malay,English": 0.15333333333333332, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.16666666666666666, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.14666666666666667, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.16666666666666666, - "Chinese,Spanish,Malay,Indonesian,English": 0.17333333333333334 + "Indonesian,English,Filipino,Spanish,Chinese": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Malay": 0.1875, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.18181818181818182, + "Indonesian,English,Filipino,Chinese,Malay": 0.1875, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.18181818181818182, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.19886363636363635, + "Indonesian,English,Spanish,Chinese,Malay": 0.26704545454545453, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.25, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.1875, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365, + "English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "English,Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, + "English,Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, + "English,Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.11333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.1, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.12, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.10666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.11333333333333333, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.12, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.14 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.1 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456 } }, - "AC3_2": 0.4751993133202454, - "AC3_3": 0.35335703614809827, - "AC3_4": 0.27310593896454, - "AC3_5": 0.22069272776037455, - "AC3_6": 0.18773453315230937, - "AC3_7": 0.16601941744752097 - }, - "prompt_2": { - "overall_acc": 0.5247619047619047, - "language_acc": { - "Filipino": 0.4866666666666667, - "Vietnamese": 0.56, - "Chinese": 0.56, - "Spanish": 0.5533333333333333, - "Malay": 0.4533333333333333, - "Indonesian": 0.4533333333333333, - "English": 0.6066666666666667 - }, - "consistency_score_2": 0.480952380952381, - "consistency_score_3": 0.298095238095238, - "consistency_score_4": 0.21142857142857144, - "consistency_score_5": 0.1634920634920635, - "consistency_score_6": 0.13333333333333336, - "consistency_score_7": 0.11333333333333333, - "detailed_consistency_score": { - "2_combine": { - "Filipino,Vietnamese": 0.47333333333333333, - "Filipino,Chinese": 0.49333333333333335, - "Filipino,Spanish": 0.4533333333333333, - "Filipino,Malay": 0.52, - "Filipino,Indonesian": 0.4866666666666667, - "Filipino,English": 0.4266666666666667, - "Vietnamese,Chinese": 0.5133333333333333, - "Vietnamese,Spanish": 0.5066666666666667, - "Vietnamese,Malay": 0.42, - "Vietnamese,Indonesian": 0.4666666666666667, - "Vietnamese,English": 0.48, - "Chinese,Spanish": 0.5066666666666667, - "Chinese,Malay": 0.4666666666666667, - "Chinese,Indonesian": 0.47333333333333333, - "Chinese,English": 0.48, - "Spanish,Malay": 0.46, - "Spanish,Indonesian": 0.49333333333333335, - "Spanish,English": 0.5666666666666667, - "Malay,Indonesian": 0.46, - "Malay,English": 0.43333333333333335, - "Indonesian,English": 0.52 - }, - "3_combine": { - "Filipino,Vietnamese,Chinese": 0.3333333333333333, - "Filipino,Vietnamese,Spanish": 0.30666666666666664, - "Filipino,Vietnamese,Malay": 0.28, - "Filipino,Vietnamese,Indonesian": 0.29333333333333333, - "Filipino,Vietnamese,English": 0.2866666666666667, - "Filipino,Chinese,Spanish": 0.31333333333333335, - "Filipino,Chinese,Malay": 0.30666666666666664, - "Filipino,Chinese,Indonesian": 0.30666666666666664, - "Filipino,Chinese,English": 0.3, - "Filipino,Spanish,Malay": 0.3, - "Filipino,Spanish,Indonesian": 0.28, - "Filipino,Spanish,English": 0.29333333333333333, - "Filipino,Malay,Indonesian": 0.31333333333333335, - "Filipino,Malay,English": 0.2733333333333333, - "Filipino,Indonesian,English": 0.2866666666666667, - "Vietnamese,Chinese,Spanish": 0.34, - "Vietnamese,Chinese,Malay": 0.2733333333333333, - "Vietnamese,Chinese,Indonesian": 0.3, - "Vietnamese,Chinese,English": 0.30666666666666664, - "Vietnamese,Spanish,Malay": 0.29333333333333333, - "Vietnamese,Spanish,Indonesian": 0.31333333333333335, - "Vietnamese,Spanish,English": 0.3466666666666667, - "Vietnamese,Malay,Indonesian": 0.26, - "Vietnamese,Malay,English": 0.25333333333333335, - "Vietnamese,Indonesian,English": 0.3, - "Chinese,Spanish,Malay": 0.29333333333333333, - "Chinese,Spanish,Indonesian": 0.31333333333333335, - "Chinese,Spanish,English": 0.32666666666666666, - "Chinese,Malay,Indonesian": 0.29333333333333333, - "Chinese,Malay,English": 0.26, - "Chinese,Indonesian,English": 0.3, - "Spanish,Malay,Indonesian": 0.2733333333333333, - "Spanish,Malay,English": 0.2866666666666667, - "Spanish,Indonesian,English": 0.36, - "Malay,Indonesian,English": 0.26666666666666666 - }, - "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.24, - "Filipino,Vietnamese,Chinese,Malay": 0.20666666666666667, - "Filipino,Vietnamese,Chinese,Indonesian": 0.23333333333333334, - "Filipino,Vietnamese,Chinese,English": 0.23333333333333334, - "Filipino,Vietnamese,Spanish,Malay": 0.22666666666666666, - "Filipino,Vietnamese,Spanish,Indonesian": 0.22666666666666666, - "Filipino,Vietnamese,Spanish,English": 0.22666666666666666, - "Filipino,Vietnamese,Malay,Indonesian": 0.2, - "Filipino,Vietnamese,Malay,English": 0.18666666666666668, - "Filipino,Vietnamese,Indonesian,English": 0.2, - "Filipino,Chinese,Spanish,Malay": 0.2, - "Filipino,Chinese,Spanish,Indonesian": 0.20666666666666667, - "Filipino,Chinese,Spanish,English": 0.21333333333333335, - "Filipino,Chinese,Malay,Indonesian": 0.23333333333333334, - "Filipino,Chinese,Malay,English": 0.2, - "Filipino,Chinese,Indonesian,English": 0.22, - "Filipino,Spanish,Malay,Indonesian": 0.22666666666666666, - "Filipino,Spanish,Malay,English": 0.21333333333333335, - "Filipino,Spanish,Indonesian,English": 0.2, - "Filipino,Malay,Indonesian,English": 0.2, - "Vietnamese,Chinese,Spanish,Malay": 0.22, - "Vietnamese,Chinese,Spanish,Indonesian": 0.24, - "Vietnamese,Chinese,Spanish,English": 0.24, - "Vietnamese,Chinese,Malay,Indonesian": 0.18666666666666668, - "Vietnamese,Chinese,Malay,English": 0.17333333333333334, - "Vietnamese,Chinese,Indonesian,English": 0.20666666666666667, - "Vietnamese,Spanish,Malay,Indonesian": 0.2, - "Vietnamese,Spanish,Malay,English": 0.2, - "Vietnamese,Spanish,Indonesian,English": 0.23333333333333334, - "Vietnamese,Malay,Indonesian,English": 0.18666666666666668, - "Chinese,Spanish,Malay,Indonesian": 0.21333333333333335, - "Chinese,Spanish,Malay,English": 0.2, - "Chinese,Spanish,Indonesian,English": 0.22666666666666666, - "Chinese,Malay,Indonesian,English": 0.19333333333333333, - "Spanish,Malay,Indonesian,English": 0.18666666666666668 - }, - "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.18666666666666668, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.18666666666666668, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.16666666666666666, - "Filipino,Vietnamese,Chinese,Malay,English": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.17333333333333334, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.18, - "Filipino,Vietnamese,Spanish,Malay,English": 0.16666666666666666, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.16666666666666666, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.14666666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Filipino,Chinese,Spanish,Malay,English": 0.15333333333333332, - "Filipino,Chinese,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Chinese,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Spanish,Malay,Indonesian,English": 0.16, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,Malay,English": 0.16, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.17333333333333334, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.14, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.14666666666666667, - "Chinese,Spanish,Malay,Indonesian,English": 0.15333333333333332 + "AC3_2": 0.517245939389236, + "AC3_3": 0.41794233781029455, + "AC3_4": 0.347495046344588, + "AC3_5": 0.2954284521030827, + "AC3_6": 0.25471430145624463, + "AC3_7": 0.22091948708566592 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.6699029126213593 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.8 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.6915887850467289 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.5, + "culture": 0.7, + "film": 0.3, + "law": 0.5, + "geography": 0.7 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.324299850031996 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.4615795446959076, + "category_acc": { + "History": 0.43373493975903615, + "Geography": 0.463265306122449, + "Lampungic": 0.2653061224489796, + "Social science": 0.6828046744574291, + "Balinese": 0.2823779193205945, + "Makassarese": 0.3548387096774194, + "Banjarese": 0.3819444444444444, + "Chemistry": 0.3284671532846715, + "Biology": 0.39644970414201186, + "Science": 0.5882352941176471, + "Christian religion": 0.5422885572139303, + "Art": 0.5457570715474209, + "Islam religion": 0.5305832147937412, + "Hindu religion": 0.44666666666666666, + "Madurese": 0.2745762711864407, + "Sport": 0.5, + "Indonesian language": 0.5438978829389788, + "Physics": 0.4080808080808081, + "Minangkabau culture": 0.36180904522613067, + "Dayak language": 0.3486238532110092, + "Sociology": 0.4717741935483871, + "Economy": 0.4426229508196721, + "Sundanese": 0.3526361279170268, + "Javanese": 0.3034274193548387, + "Civic education": 0.5565092989985694 + } + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.358063538041651 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.30566338247234903 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.25377859749436343 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.3455666218492509 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.5997666277712952 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.5871290668573471, + "category_acc": { + "high_school_european_history": 0.725609756097561, + "business_ethics": 0.6767676767676768, + "clinical_knowledge": 0.6818181818181818, + "medical_genetics": 0.6565656565656566, + "high_school_us_history": 0.7684729064039408, + "high_school_physics": 0.4, + "high_school_world_history": 0.7838983050847458, + "virology": 0.4666666666666667, + "high_school_microeconomics": 0.6371308016877637, + "econometrics": 0.415929203539823, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.7119741100323624, + "abstract_algebra": 0.3434343434343434, + "professional_accounting": 0.45907473309608543, + "philosophy": 0.6709677419354839, + "professional_medicine": 0.6014760147601476, + "nutrition": 0.6688524590163935, + "global_facts": 0.40404040404040403, + "machine_learning": 0.43243243243243246, + "security_studies": 0.7131147540983607, + "public_relations": 0.5412844036697247, + "professional_psychology": 0.5793780687397708, + "prehistory": 0.6934984520123839, + "anatomy": 0.5223880597014925, + "human_sexuality": 0.7153846153846154, + "college_medicine": 0.622093023255814, + "high_school_government_and_politics": 0.8177083333333334, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.6481481481481481, + "high_school_geography": 0.7817258883248731, + "elementary_mathematics": 0.46949602122015915, + "human_aging": 0.6306306306306306, + "college_mathematics": 0.3939393939393939, + "high_school_psychology": 0.7959558823529411, + "formal_logic": 0.376, + "high_school_statistics": 0.4558139534883721, + "international_law": 0.7583333333333333, + "high_school_mathematics": 0.3271375464684015, + "high_school_computer_science": 0.6464646464646465, + "conceptual_physics": 0.5213675213675214, + "miscellaneous": 0.7851662404092071, + "high_school_chemistry": 0.5693069306930693, + "marketing": 0.8369098712446352, + "professional_law": 0.42204827136333983, + "management": 0.803921568627451, + "college_physics": 0.43564356435643564, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.7529411764705882, + "sociology": 0.79, + "us_foreign_policy": 0.8383838383838383, + "high_school_macroeconomics": 0.5655526992287918, + "computer_security": 0.7373737373737373, + "moral_scenarios": 0.2841163310961969, + "moral_disputes": 0.6434782608695652, + "electrical_engineering": 0.5277777777777778, + "astronomy": 0.6225165562913907, + "college_biology": 0.6433566433566433 + } + } + }, + "c_eval": { + "prompt_1": { + "accuracy": 0.6946508172362555 + } + }, + "c_eval_full": { + "prompt_1": { + "accuracy": 0.7235367372353674, + "category_acc": { + "computer_network": 0.6666666666666666, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.6153846153846154, + "college_programming": 0.6666666666666666, + "college_physics": 0.625, + "college_chemistry": 0.5517241379310345, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.5217391304347826, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.5238095238095238, + "metrology_engineer": 0.7241379310344828, + "high_school_mathematics": 0.5217391304347826, + "high_school_physics": 0.8333333333333334, + "high_school_chemistry": 0.7083333333333334, + "high_school_biology": 0.9166666666666666, + "middle_school_mathematics": 0.7083333333333334, + "middle_school_biology": 0.9615384615384616, + "middle_school_physics": 0.875, + "middle_school_chemistry": 0.96, + "veterinary_medicine": 0.8214285714285714, + "college_economics": 0.6, + "business_administration": 0.7631578947368421, + "marxism": 0.875, + "mao_zedong_thought": 0.896551724137931, + "education_science": 0.7941176470588235, + "teacher_qualification": 0.8571428571428571, + "high_school_politics": 0.9583333333333334, + "high_school_geography": 0.9166666666666666, + "middle_school_politics": 0.9615384615384616, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.8928571428571429, + "ideological_and_moral_cultivation": 1.0, + "logic": 0.5185185185185185, + "law": 0.6896551724137931, + "chinese_language_and_literature": 0.6785714285714286, + "art_studies": 0.7105263157894737, + "professional_tour_guide": 0.7941176470588235, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.8333333333333334, + "high_school_history": 0.92, + "middle_school_history": 0.9629629629629629, + "civil_servant": 0.6346153846153846, + "sports_science": 0.625, + "plant_protection": 0.8888888888888888, + "basic_medicine": 0.8333333333333334, + "clinical_medicine": 0.6666666666666666, + "urban_and_rural_planner": 0.6470588235294118, + "accountant": 0.7037037037037037, + "fire_engineer": 0.6388888888888888, + "environmental_impact_assessment_engineer": 0.6666666666666666, + "tax_accountant": 0.6666666666666666, + "physician": 0.6851851851851852 + } + } + }, + "cmmlu": { + "prompt_1": { + "accuracy": 0.7204301075268817 + } + }, + "cmmlu_full": { + "prompt_1": { + "accuracy": 0.7157658435503367, + "category_acc": { + "agronomy": 0.5976331360946746, + "anatomy": 0.7027027027027027, + "ancient_chinese": 0.42073170731707316, + "arts": 0.88125, + "astronomy": 0.4, + "business_ethics": 0.6411483253588517, + "chinese_civil_service_exam": 0.675, + "chinese_driving_rule": 0.9007633587786259, + "chinese_food_culture": 0.6691176470588235, + "chinese_foreign_policy": 0.8504672897196262, + "chinese_history": 0.9411764705882353, + "chinese_literature": 0.5980392156862745, + "chinese_teacher_qualification": 0.8938547486033519, + "clinical_knowledge": 0.6919831223628692, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.822429906542056, + "college_engineering_hydrology": 0.5566037735849056, + "college_law": 0.6759259259259259, + "college_mathematics": 0.4, + "college_medical_statistics": 0.6132075471698113, + "college_medicine": 0.7509157509157509, + "computer_science": 0.7401960784313726, + "computer_security": 0.8362573099415205, + "conceptual_physics": 0.8503401360544217, + "construction_project_management": 0.60431654676259, + "economics": 0.6792452830188679, + "education": 0.7607361963190185, + "electrical_engineering": 0.7383720930232558, + "elementary_chinese": 0.7222222222222222, + "elementary_commonsense": 0.696969696969697, + "elementary_information_and_technology": 0.8823529411764706, + "elementary_mathematics": 0.5304347826086957, + "ethnology": 0.6518518518518519, + "food_science": 0.6083916083916084, + "genetics": 0.5568181818181818, + "global_facts": 0.7449664429530202, + "high_school_biology": 0.8994082840236687, + "high_school_chemistry": 0.7727272727272727, + "high_school_geography": 0.7966101694915254, + "high_school_mathematics": 0.5, + "high_school_physics": 0.6818181818181818, + "high_school_politics": 0.8321678321678322, + "human_sexuality": 0.6111111111111112, + "international_law": 0.6162162162162163, + "journalism": 0.6337209302325582, + "jurisprudence": 0.7712895377128953, + "legal_and_moral_basis": 0.9485981308411215, + "logical": 0.6016260162601627, + "machine_learning": 0.6229508196721312, + "management": 0.8333333333333334, + "marketing": 0.7444444444444445, + "marxist_theory": 0.9153439153439153, + "modern_chinese": 0.5689655172413793, + "nutrition": 0.7379310344827587, + "philosophy": 0.7047619047619048, + "professional_accounting": 0.8228571428571428, + "professional_law": 0.6350710900473934, + "professional_medicine": 0.6382978723404256, + "professional_psychology": 0.8318965517241379, + "public_relations": 0.6896551724137931, + "security_study": 0.7851851851851852, + "sociology": 0.6769911504424779, + "sports_science": 0.6909090909090909, + "traditional_chinese_medicine": 0.7675675675675676, + "virology": 0.7337278106508875, + "world_history": 0.9130434782608695, + "world_religions": 0.70625 + } + } + }, + "zbench": { + "prompt_1": { + "accuracy": 0.45454545454545453 + } + }, + "ind_emotion": { + "prompt_1": { + "accuracy": 0.634090909090909 + } + }, + "ocnli": { + "prompt_1": { + "accuracy": 0.615593220338983 + } + }, + "c3": { + "prompt_1": { + "accuracy": 0.8975317875841436 + } + }, + "dream": { + "prompt_1": { + "accuracy": 0.8990690837824595 + } + }, + "samsum": { + "prompt_1": { + "rouge1": 0.40619506364326374, + "rouge2": 0.17465809475148847, + "rougeL": 0.3178215644999177, + "avg_rouge": 0.29955824096489 + } + }, + "dialogsum": { + "prompt_1": { + "rouge1": 0.3588273806142116, + "rouge2": 0.1380414207543411, + "rougeL": 0.27862569710756147, + "avg_rouge": 0.25849816615870475 + } + }, + "sst2": { + "prompt_1": { + "accuracy": 0.9438073394495413 + } + }, + "cola": { + "prompt_1": { + "accuracy": 0.7996164908916586 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.774 + } + }, + "mnli": { + "prompt_1": { + "accuracy": 0.8 + } + }, + "qnli": { + "prompt_1": { + "accuracy": 0.8 + } + }, + "wnli": { + "prompt_1": { + "accuracy": 0.5 + } + }, + "rte": { + "prompt_1": { + "accuracy": 1.0 + } + }, + "mrpc": { + "prompt_1": { + "accuracy": 0.9 + } + } + } + }, + "qwen_1_5_7b_chat": { + "model_size": "7B", + "model_link": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat", + "zero_shot": { + "cross_xquad": { + "prompt_1": { + "overall_acc": 0.9256302521008403, + "language_acc": { + "Spanish": 0.9243697478991597, + "English": 0.9369747899159664, + "Chinese": 0.9252100840336135, + "Vietnamese": 0.9159663865546218 + }, + "consistency_score_2": 0.9183473389355744, + "consistency_score_3": 0.8817226890756302, + "consistency_score_4": 0.8588235294117647, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9285714285714286, + "Spanish,Chinese": 0.9084033613445378, + "Spanish,Vietnamese": 0.9176470588235294, + "English,Chinese": 0.926890756302521, + "English,Vietnamese": 0.9226890756302522, + "Chinese,Vietnamese": 0.9058823529411765 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8857142857142857, + "Spanish,English,Vietnamese": 0.8899159663865546, + "Spanish,Chinese,Vietnamese": 0.8697478991596639, + "English,Chinese,Vietnamese": 0.8815126050420168 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8588235294117647 + } + }, + "AC3_2": 0.9219744132912145, + "AC3_3": 0.9031431286417014, + "AC3_4": 0.8909763292621103 + }, + "prompt_2": { + "overall_acc": 0.9292016806722688, + "language_acc": { + "Spanish": 0.9319327731092437, + "English": 0.9436974789915966, + "Chinese": 0.9294117647058824, + "Vietnamese": 0.9117647058823529 + }, + "consistency_score_2": 0.9148459383753503, + "consistency_score_3": 0.8760504201680672, + "consistency_score_4": 0.8478991596638655, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9277310924369748, + "Spanish,Chinese": 0.9134453781512605, + "Spanish,Vietnamese": 0.9117647058823529, + "English,Chinese": 0.9260504201680673, + "English,Vietnamese": 0.9109243697478991, + "Chinese,Vietnamese": 0.8991596638655462 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8857142857142857, + "Spanish,English,Vietnamese": 0.8789915966386554, + "Spanish,Chinese,Vietnamese": 0.8672268907563025, + "English,Chinese,Vietnamese": 0.8722689075630252 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8478991596638655 + } + }, + "AC3_2": 0.9219679304024787, + "AC3_3": 0.9018435955286216, + "AC3_4": 0.8866906213458434 + }, + "prompt_3": { + "overall_acc": 0.917016806722689, + "language_acc": { + "Spanish": 0.9134453781512605, + "English": 0.9327731092436975, + "Chinese": 0.915126050420168, + "Vietnamese": 0.9067226890756303 + }, + "consistency_score_2": 0.9012605042016807, + "consistency_score_3": 0.8581932773109244, + "consistency_score_4": 0.826890756302521, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9134453781512605, + "Spanish,Chinese": 0.8865546218487395, + "Spanish,Vietnamese": 0.8966386554621849, + "English,Chinese": 0.9100840336134454, + "English,Vietnamese": 0.9092436974789916, + "Chinese,Vietnamese": 0.8915966386554622 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8596638655462185, + "Spanish,English,Vietnamese": 0.8647058823529412, + "Spanish,Chinese,Vietnamese": 0.8453781512605042, + "English,Chinese,Vietnamese": 0.8630252100840337 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.826890756302521 + } + }, + "AC3_2": 0.909070387203692, + "AC3_3": 0.8866304509470218, + "AC3_4": 0.8696249008682165 + }, + "prompt_4": { + "overall_acc": 0.9235294117647058, + "language_acc": { + "Spanish": 0.9277310924369748, + "English": 0.9336134453781513, + "Chinese": 0.9210084033613445, + "Vietnamese": 0.9117647058823529 + }, + "consistency_score_2": 0.9123249299719888, + "consistency_score_3": 0.873109243697479, + "consistency_score_4": 0.8470588235294118, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9302521008403362, + "Spanish,Chinese": 0.9050420168067227, + "Spanish,Vietnamese": 0.9176470588235294, + "English,Chinese": 0.9126050420168067, + "English,Vietnamese": 0.9134453781512605, + "Chinese,Vietnamese": 0.8949579831932774 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8756302521008403, + "Spanish,English,Vietnamese": 0.8848739495798319, + "Spanish,Chinese,Vietnamese": 0.8647058823529412, + "English,Chinese,Vietnamese": 0.8672268907563025 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8470588235294118 + } + }, + "AC3_2": 0.9178929795403732, + "AC3_3": 0.8976118416932849, + "AC3_4": 0.8836427593794464 + }, + "prompt_5": { + "overall_acc": 0.9077731092436975, + "language_acc": { + "Spanish": 0.907563025210084, + "English": 0.915126050420168, + "Chinese": 0.9142857142857143, + "Vietnamese": 0.8941176470588236 + }, + "consistency_score_2": 0.892577030812325, + "consistency_score_3": 0.8441176470588235, + "consistency_score_4": 0.8117647058823529, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9084033613445378, + "Spanish,Chinese": 0.8899159663865546, + "Spanish,Vietnamese": 0.8873949579831932, + "English,Chinese": 0.8974789915966387, + "English,Vietnamese": 0.8915966386554622, + "Chinese,Vietnamese": 0.880672268907563 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8529411764705882, + "Spanish,English,Vietnamese": 0.8478991596638655, + "Spanish,Chinese,Vietnamese": 0.8344537815126051, + "English,Chinese,Vietnamese": 0.8411764705882353 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.8117647058823529 + } + }, + "AC3_2": 0.9001109377865719, + "AC3_3": 0.8747889081983395, + "AC3_4": 0.8570886484822626 + } + }, + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.5399999999999999, + "language_acc": { + "Filipino": 0.4533333333333333, + "Vietnamese": 0.5133333333333333, + "Chinese": 0.62, + "Spanish": 0.5733333333333334, + "Malay": 0.44666666666666666, + "Indonesian": 0.48, + "English": 0.6933333333333334 + }, + "consistency_score_2": 0.5114285714285715, + "consistency_score_3": 0.3398095238095238, + "consistency_score_4": 0.252952380952381, + "consistency_score_5": 0.19904761904761903, + "consistency_score_6": 0.16190476190476186, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.41333333333333333, + "Filipino,Chinese": 0.4533333333333333, + "Filipino,Spanish": 0.47333333333333333, + "Filipino,Malay": 0.46, + "Filipino,Indonesian": 0.4533333333333333, + "Filipino,English": 0.5133333333333333, + "Vietnamese,Chinese": 0.4866666666666667, + "Vietnamese,Spanish": 0.5333333333333333, + "Vietnamese,Malay": 0.48, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,English": 0.5466666666666666, + "Chinese,Spanish": 0.56, + "Chinese,Malay": 0.47333333333333333, + "Chinese,Indonesian": 0.5066666666666667, + "Chinese,English": 0.64, + "Spanish,Malay": 0.44, + "Spanish,Indonesian": 0.56, + "Spanish,English": 0.6466666666666666, + "Malay,Indonesian": 0.5533333333333333, + "Malay,English": 0.47333333333333333, + "Indonesian,English": 0.58 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.26, + "Filipino,Vietnamese,Spanish": 0.31333333333333335, + "Filipino,Vietnamese,Malay": 0.2733333333333333, + "Filipino,Vietnamese,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,English": 0.30666666666666664, + "Filipino,Chinese,Spanish": 0.32, + "Filipino,Chinese,Malay": 0.2866666666666667, + "Filipino,Chinese,Indonesian": 0.2866666666666667, + "Filipino,Chinese,English": 0.36, + "Filipino,Spanish,Malay": 0.29333333333333333, + "Filipino,Spanish,Indonesian": 0.32, + "Filipino,Spanish,English": 0.37333333333333335, + "Filipino,Malay,Indonesian": 0.32, + "Filipino,Malay,English": 0.32, + "Filipino,Indonesian,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,Malay": 0.2866666666666667, + "Vietnamese,Chinese,Indonesian": 0.32, + "Vietnamese,Chinese,English": 0.4, + "Vietnamese,Spanish,Malay": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian": 0.36, + "Vietnamese,Spanish,English": 0.41333333333333333, + "Vietnamese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Malay,English": 0.34, + "Vietnamese,Indonesian,English": 0.38666666666666666, + "Chinese,Spanish,Malay": 0.31333333333333335, + "Chinese,Spanish,Indonesian": 0.37333333333333335, + "Chinese,Spanish,English": 0.4666666666666667, + "Chinese,Malay,Indonesian": 0.3333333333333333, + "Chinese,Malay,English": 0.34, + "Chinese,Indonesian,English": 0.4, + "Spanish,Malay,Indonesian": 0.32666666666666666, + "Spanish,Malay,English": 0.35333333333333333, + "Spanish,Indonesian,English": 0.44666666666666666, + "Malay,Indonesian,English": 0.36 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Malay": 0.18666666666666668, + "Filipino,Vietnamese,Chinese,Indonesian": 0.18666666666666668, + "Filipino,Vietnamese,Chinese,English": 0.22666666666666666, + "Filipino,Vietnamese,Spanish,Malay": 0.24, + "Filipino,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Filipino,Vietnamese,Spanish,English": 0.26, + "Filipino,Vietnamese,Malay,Indonesian": 0.23333333333333334, + "Filipino,Vietnamese,Malay,English": 0.23333333333333334, + "Filipino,Vietnamese,Indonesian,English": 0.23333333333333334, + "Filipino,Chinese,Spanish,Malay": 0.21333333333333335, + "Filipino,Chinese,Spanish,Indonesian": 0.22666666666666666, + "Filipino,Chinese,Spanish,English": 0.2733333333333333, + "Filipino,Chinese,Malay,Indonesian": 0.21333333333333335, + "Filipino,Chinese,Malay,English": 0.22666666666666666, + "Filipino,Chinese,Indonesian,English": 0.24666666666666667, + "Filipino,Spanish,Malay,Indonesian": 0.24666666666666667, + "Filipino,Spanish,Malay,English": 0.25333333333333335, + "Filipino,Spanish,Indonesian,English": 0.2866666666666667, + "Filipino,Malay,Indonesian,English": 0.26666666666666666, + "Vietnamese,Chinese,Spanish,Malay": 0.24, + "Vietnamese,Chinese,Spanish,Indonesian": 0.26666666666666666, + "Vietnamese,Chinese,Spanish,English": 0.31333333333333335, + "Vietnamese,Chinese,Malay,Indonesian": 0.23333333333333334, + "Vietnamese,Chinese,Malay,English": 0.24666666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.2866666666666667, + "Vietnamese,Spanish,Malay,Indonesian": 0.26666666666666666, + "Vietnamese,Spanish,Malay,English": 0.28, + "Vietnamese,Spanish,Indonesian,English": 0.31333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.2866666666666667, + "Chinese,Spanish,Malay,Indonesian": 0.24666666666666667, + "Chinese,Spanish,Malay,English": 0.28, + "Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Chinese,Malay,Indonesian,English": 0.26, + "Spanish,Malay,Indonesian,English": 0.29333333333333333 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.17333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.16666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.19333333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.15333333333333332, + "Filipino,Vietnamese,Chinese,Malay,English": 0.16666666666666666, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.17333333333333334, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.2, + "Filipino,Vietnamese,Spanish,Malay,English": 0.20666666666666667, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.2, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.20666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.17333333333333334, + "Filipino,Chinese,Spanish,Malay,English": 0.18666666666666668, + "Filipino,Chinese,Spanish,Indonesian,English": 0.20666666666666667, + "Filipino,Chinese,Malay,Indonesian,English": 0.18666666666666668, + "Filipino,Spanish,Malay,Indonesian,English": 0.22666666666666666, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.2, + "Vietnamese,Chinese,Spanish,Malay,English": 0.22, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.25333333333333335, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.21333333333333335, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.24, + "Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.14, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.15333333333333332, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.15333333333333332, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.14666666666666667, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.18, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.16666666666666666, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.19333333333333333 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.13333333333333333 + } + }, + "AC3_2": 0.5253260869065587, + "AC3_3": 0.4171292487077305, + "AC3_4": 0.3445207782414429, + "AC3_5": 0.2908762886204355, + "AC3_6": 0.24911804609747973, + "AC3_7": 0.2138613861068523 + }, + "prompt_2": { + "overall_acc": 0.5152380952380952, + "language_acc": { + "Filipino": 0.3933333333333333, + "Vietnamese": 0.46, + "Chinese": 0.6266666666666667, + "Spanish": 0.6, + "Malay": 0.4, + "Indonesian": 0.49333333333333335, + "English": 0.6333333333333333 + }, + "consistency_score_2": 0.4695238095238095, + "consistency_score_3": 0.28476190476190477, + "consistency_score_4": 0.19238095238095235, + "consistency_score_5": 0.13555555555555557, + "consistency_score_6": 0.09619047619047619, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.42, + "Filipino,Chinese": 0.4533333333333333, + "Filipino,Spanish": 0.41333333333333333, + "Filipino,Malay": 0.42, + "Filipino,Indonesian": 0.4, + "Filipino,English": 0.4066666666666667, + "Vietnamese,Chinese": 0.44666666666666666, + "Vietnamese,Spanish": 0.46, + "Vietnamese,Malay": 0.49333333333333335, + "Vietnamese,Indonesian": 0.47333333333333333, + "Vietnamese,English": 0.47333333333333333, + "Chinese,Spanish": 0.5666666666666667, + "Chinese,Malay": 0.46, + "Chinese,Indonesian": 0.43333333333333335, + "Chinese,English": 0.5533333333333333, + "Spanish,Malay": 0.4666666666666667, + "Spanish,Indonesian": 0.49333333333333335, + "Spanish,English": 0.5866666666666667, + "Malay,Indonesian": 0.5133333333333333, + "Malay,English": 0.4533333333333333, + "Indonesian,English": 0.47333333333333333 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.22666666666666666, + "Filipino,Vietnamese,Spanish": 0.24666666666666667, + "Filipino,Vietnamese,Malay": 0.24666666666666667, + "Filipino,Vietnamese,Indonesian": 0.24666666666666667, + "Filipino,Vietnamese,English": 0.25333333333333335, + "Filipino,Chinese,Spanish": 0.2866666666666667, + "Filipino,Chinese,Malay": 0.26, + "Filipino,Chinese,Indonesian": 0.23333333333333334, + "Filipino,Chinese,English": 0.28, + "Filipino,Spanish,Malay": 0.23333333333333334, + "Filipino,Spanish,Indonesian": 0.22666666666666666, + "Filipino,Spanish,English": 0.28, + "Filipino,Malay,Indonesian": 0.26666666666666666, + "Filipino,Malay,English": 0.22666666666666666, + "Filipino,Indonesian,English": 0.22666666666666666, + "Vietnamese,Chinese,Spanish": 0.3, + "Vietnamese,Chinese,Malay": 0.26666666666666666, + "Vietnamese,Chinese,Indonesian": 0.26, + "Vietnamese,Chinese,English": 0.3333333333333333, + "Vietnamese,Spanish,Malay": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Vietnamese,Spanish,English": 0.32666666666666666, + "Vietnamese,Malay,Indonesian": 0.32, + "Vietnamese,Malay,English": 0.29333333333333333, + "Vietnamese,Indonesian,English": 0.28, + "Chinese,Spanish,Malay": 0.32, + "Chinese,Spanish,Indonesian": 0.31333333333333335, + "Chinese,Spanish,English": 0.41333333333333333, + "Chinese,Malay,Indonesian": 0.30666666666666664, + "Chinese,Malay,English": 0.30666666666666664, + "Chinese,Indonesian,English": 0.32, + "Spanish,Malay,Indonesian": 0.30666666666666664, + "Spanish,Malay,English": 0.31333333333333335, + "Spanish,Indonesian,English": 0.3466666666666667, + "Malay,Indonesian,English": 0.30666666666666664 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.16666666666666666, + "Filipino,Vietnamese,Chinese,Malay": 0.15333333333333332, + "Filipino,Vietnamese,Chinese,Indonesian": 0.14, + "Filipino,Vietnamese,Chinese,English": 0.18, + "Filipino,Vietnamese,Spanish,Malay": 0.16666666666666666, + "Filipino,Vietnamese,Spanish,Indonesian": 0.15333333333333332, + "Filipino,Vietnamese,Spanish,English": 0.16666666666666666, + "Filipino,Vietnamese,Malay,Indonesian": 0.18666666666666668, + "Filipino,Vietnamese,Malay,English": 0.16, + "Filipino,Vietnamese,Indonesian,English": 0.15333333333333332, + "Filipino,Chinese,Spanish,Malay": 0.16666666666666666, + "Filipino,Chinese,Spanish,Indonesian": 0.16666666666666666, + "Filipino,Chinese,Spanish,English": 0.21333333333333335, + "Filipino,Chinese,Malay,Indonesian": 0.18, + "Filipino,Chinese,Malay,English": 0.18, + "Filipino,Chinese,Indonesian,English": 0.18, + "Filipino,Spanish,Malay,Indonesian": 0.16, + "Filipino,Spanish,Malay,English": 0.16, + "Filipino,Spanish,Indonesian,English": 0.16, + "Filipino,Malay,Indonesian,English": 0.17333333333333334, + "Vietnamese,Chinese,Spanish,Malay": 0.20666666666666667, + "Vietnamese,Chinese,Spanish,Indonesian": 0.18666666666666668, + "Vietnamese,Chinese,Spanish,English": 0.26, + "Vietnamese,Chinese,Malay,Indonesian": 0.20666666666666667, + "Vietnamese,Chinese,Malay,English": 0.21333333333333335, + "Vietnamese,Chinese,Indonesian,English": 0.21333333333333335, + "Vietnamese,Spanish,Malay,Indonesian": 0.21333333333333335, + "Vietnamese,Spanish,Malay,English": 0.22, + "Vietnamese,Spanish,Indonesian,English": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.22666666666666666, + "Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, + "Chinese,Spanish,Malay,English": 0.24666666666666667, + "Chinese,Spanish,Indonesian,English": 0.26, + "Chinese,Malay,Indonesian,English": 0.23333333333333334, + "Spanish,Malay,Indonesian,English": 0.24 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.10666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.1, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.13333333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.12, + "Filipino,Vietnamese,Chinese,Malay,English": 0.12, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.12, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.12, + "Filipino,Vietnamese,Spanish,Malay,English": 0.10666666666666667, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.10666666666666667, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.13333333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.12, + "Filipino,Chinese,Spanish,Malay,English": 0.12666666666666668, + "Filipino,Chinese,Spanish,Indonesian,English": 0.13333333333333333, + "Filipino,Chinese,Malay,Indonesian,English": 0.14, + "Filipino,Spanish,Malay,Indonesian,English": 0.12, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.15333333333333332, + "Vietnamese,Chinese,Spanish,Malay,English": 0.17333333333333334, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.17333333333333334, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.17333333333333334, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.17333333333333334, + "Chinese,Spanish,Malay,Indonesian,English": 0.19333333333333333 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.08, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.08, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.08666666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.1, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.08666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.1, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.14 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.06666666666666667 + } + }, + "AC3_2": 0.49131988573807855, + "AC3_3": 0.3668004534688892, + "AC3_4": 0.28015638014370664, + "AC3_5": 0.2146406503735227, + "AC3_6": 0.16211541311693634, + "AC3_7": 0.1180578286758384 + }, + "prompt_3": { + "overall_acc": 0.4752380952380952, + "language_acc": { + "Filipino": 0.35333333333333333, + "Vietnamese": 0.46, + "Chinese": 0.6133333333333333, + "Spanish": 0.5133333333333333, + "Malay": 0.38, + "Indonesian": 0.46, + "English": 0.5466666666666666 + }, + "consistency_score_2": 0.4641269841269841, + "consistency_score_3": 0.2773333333333333, + "consistency_score_4": 0.18647619047619043, + "consistency_score_5": 0.1346031746031746, + "consistency_score_6": 0.10380952380952381, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.38666666666666666, + "Filipino,Chinese": 0.42, + "Filipino,Spanish": 0.4533333333333333, + "Filipino,Malay": 0.36666666666666664, + "Filipino,Indonesian": 0.38666666666666666, + "Filipino,English": 0.43333333333333335, + "Vietnamese,Chinese": 0.4266666666666667, + "Vietnamese,Spanish": 0.47333333333333333, + "Vietnamese,Malay": 0.4266666666666667, + "Vietnamese,Indonesian": 0.4266666666666667, + "Vietnamese,English": 0.4866666666666667, + "Chinese,Spanish": 0.5466666666666666, + "Chinese,Malay": 0.38666666666666666, + "Chinese,Indonesian": 0.52, + "Chinese,English": 0.56, + "Spanish,Malay": 0.46, + "Spanish,Indonesian": 0.54, + "Spanish,English": 0.5666666666666667, + "Malay,Indonesian": 0.5, + "Malay,English": 0.4666666666666667, + "Indonesian,English": 0.5133333333333333 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.21333333333333335, + "Filipino,Vietnamese,Spanish": 0.23333333333333334, + "Filipino,Vietnamese,Malay": 0.18666666666666668, + "Filipino,Vietnamese,Indonesian": 0.20666666666666667, + "Filipino,Vietnamese,English": 0.24666666666666667, + "Filipino,Chinese,Spanish": 0.3, + "Filipino,Chinese,Malay": 0.18666666666666668, + "Filipino,Chinese,Indonesian": 0.25333333333333335, + "Filipino,Chinese,English": 0.3, + "Filipino,Spanish,Malay": 0.22, + "Filipino,Spanish,Indonesian": 0.2733333333333333, + "Filipino,Spanish,English": 0.29333333333333333, + "Filipino,Malay,Indonesian": 0.21333333333333335, + "Filipino,Malay,English": 0.20666666666666667, + "Filipino,Indonesian,English": 0.24666666666666667, + "Vietnamese,Chinese,Spanish": 0.28, + "Vietnamese,Chinese,Malay": 0.22666666666666666, + "Vietnamese,Chinese,Indonesian": 0.26666666666666666, + "Vietnamese,Chinese,English": 0.32, + "Vietnamese,Spanish,Malay": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Vietnamese,Spanish,English": 0.32666666666666666, + "Vietnamese,Malay,Indonesian": 0.28, + "Vietnamese,Malay,English": 0.26666666666666666, + "Vietnamese,Indonesian,English": 0.3, + "Chinese,Spanish,Malay": 0.2733333333333333, + "Chinese,Spanish,Indonesian": 0.3466666666666667, + "Chinese,Spanish,English": 0.4, + "Chinese,Malay,Indonesian": 0.29333333333333333, + "Chinese,Malay,English": 0.2733333333333333, + "Chinese,Indonesian,English": 0.3466666666666667, + "Spanish,Malay,Indonesian": 0.31333333333333335, + "Spanish,Malay,English": 0.32, + "Spanish,Indonesian,English": 0.38, + "Malay,Indonesian,English": 0.3333333333333333 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.16, + "Filipino,Vietnamese,Chinese,Malay": 0.12666666666666668, + "Filipino,Vietnamese,Chinese,Indonesian": 0.14666666666666667, + "Filipino,Vietnamese,Chinese,English": 0.17333333333333334, + "Filipino,Vietnamese,Spanish,Malay": 0.13333333333333333, + "Filipino,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Filipino,Vietnamese,Spanish,English": 0.18, + "Filipino,Vietnamese,Malay,Indonesian": 0.14, + "Filipino,Vietnamese,Malay,English": 0.12666666666666668, + "Filipino,Vietnamese,Indonesian,English": 0.16, + "Filipino,Chinese,Spanish,Malay": 0.15333333333333332, + "Filipino,Chinese,Spanish,Indonesian": 0.2, + "Filipino,Chinese,Spanish,English": 0.23333333333333334, + "Filipino,Chinese,Malay,Indonesian": 0.15333333333333332, + "Filipino,Chinese,Malay,English": 0.15333333333333332, + "Filipino,Chinese,Indonesian,English": 0.2, + "Filipino,Spanish,Malay,Indonesian": 0.16, + "Filipino,Spanish,Malay,English": 0.16666666666666666, + "Filipino,Spanish,Indonesian,English": 0.2, + "Filipino,Malay,Indonesian,English": 0.15333333333333332, + "Vietnamese,Chinese,Spanish,Malay": 0.17333333333333334, + "Vietnamese,Chinese,Spanish,Indonesian": 0.2, + "Vietnamese,Chinese,Spanish,English": 0.22666666666666666, + "Vietnamese,Chinese,Malay,Indonesian": 0.19333333333333333, + "Vietnamese,Chinese,Malay,English": 0.17333333333333334, + "Vietnamese,Chinese,Indonesian,English": 0.20666666666666667, + "Vietnamese,Spanish,Malay,Indonesian": 0.21333333333333335, + "Vietnamese,Spanish,Malay,English": 0.2, + "Vietnamese,Spanish,Indonesian,English": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,English": 0.21333333333333335, + "Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, + "Chinese,Spanish,Malay,English": 0.22666666666666666, + "Chinese,Spanish,Indonesian,English": 0.2733333333333333, + "Chinese,Malay,Indonesian,English": 0.23333333333333334, + "Spanish,Malay,Indonesian,English": 0.26666666666666666 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.1, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.11333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.13333333333333333, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.11333333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.1, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.12, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.10666666666666667, + "Filipino,Vietnamese,Spanish,Malay,English": 0.1, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.12666666666666668, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.10666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.13333333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.14, + "Filipino,Chinese,Spanish,Indonesian,English": 0.16666666666666666, + "Filipino,Chinese,Malay,Indonesian,English": 0.13333333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.14, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.15333333333333332, + "Vietnamese,Chinese,Spanish,Malay,English": 0.14, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.16666666666666666, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.15333333333333332, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.18, + "Chinese,Spanish,Malay,Indonesian,English": 0.2 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.09333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.08666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.1, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.09333333333333334, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.09333333333333334, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.12666666666666668, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.13333333333333333 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.08666666666666667 + } + }, + "AC3_2": 0.4696168267415549, + "AC3_3": 0.35026406812185384, + "AC3_4": 0.26785152283695113, + "AC3_5": 0.20978756100771556, + "AC3_6": 0.1703978696447582, + "AC3_7": 0.14659887003040753 + }, + "prompt_4": { + "overall_acc": 0.5304761904761904, + "language_acc": { + "Filipino": 0.4066666666666667, + "Vietnamese": 0.5, + "Chinese": 0.64, + "Spanish": 0.5533333333333333, + "Malay": 0.4266666666666667, + "Indonesian": 0.5, + "English": 0.6866666666666666 + }, + "consistency_score_2": 0.49777777777777776, + "consistency_score_3": 0.3182857142857144, + "consistency_score_4": 0.22514285714285714, + "consistency_score_5": 0.1688888888888889, + "consistency_score_6": 0.13238095238095238, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.4, + "Filipino,Chinese": 0.5, + "Filipino,Spanish": 0.4533333333333333, + "Filipino,Malay": 0.41333333333333333, + "Filipino,Indonesian": 0.4533333333333333, + "Filipino,English": 0.44, + "Vietnamese,Chinese": 0.52, + "Vietnamese,Spanish": 0.49333333333333335, + "Vietnamese,Malay": 0.43333333333333335, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,English": 0.5066666666666667, + "Chinese,Spanish": 0.5866666666666667, + "Chinese,Malay": 0.4266666666666667, + "Chinese,Indonesian": 0.5333333333333333, + "Chinese,English": 0.6133333333333333, + "Spanish,Malay": 0.5, + "Spanish,Indonesian": 0.5533333333333333, + "Spanish,English": 0.64, + "Malay,Indonesian": 0.4866666666666667, + "Malay,English": 0.44666666666666666, + "Indonesian,English": 0.54 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.2866666666666667, + "Filipino,Vietnamese,Spanish": 0.26666666666666666, + "Filipino,Vietnamese,Malay": 0.23333333333333334, + "Filipino,Vietnamese,Indonesian": 0.26666666666666666, + "Filipino,Vietnamese,English": 0.24, + "Filipino,Chinese,Spanish": 0.32666666666666666, + "Filipino,Chinese,Malay": 0.28, + "Filipino,Chinese,Indonesian": 0.3333333333333333, + "Filipino,Chinese,English": 0.3466666666666667, + "Filipino,Spanish,Malay": 0.2866666666666667, + "Filipino,Spanish,Indonesian": 0.31333333333333335, + "Filipino,Spanish,English": 0.3333333333333333, + "Filipino,Malay,Indonesian": 0.2733333333333333, + "Filipino,Malay,English": 0.22666666666666666, + "Filipino,Indonesian,English": 0.28, + "Vietnamese,Chinese,Spanish": 0.35333333333333333, + "Vietnamese,Chinese,Malay": 0.2733333333333333, + "Vietnamese,Chinese,Indonesian": 0.35333333333333333, + "Vietnamese,Chinese,English": 0.37333333333333335, + "Vietnamese,Spanish,Malay": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Vietnamese,Spanish,English": 0.38666666666666666, + "Vietnamese,Malay,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,English": 0.28, + "Vietnamese,Indonesian,English": 0.3466666666666667, + "Chinese,Spanish,Malay": 0.32666666666666666, + "Chinese,Spanish,Indonesian": 0.38, + "Chinese,Spanish,English": 0.46, + "Chinese,Malay,Indonesian": 0.29333333333333333, + "Chinese,Malay,English": 0.30666666666666664, + "Chinese,Indonesian,English": 0.38, + "Spanish,Malay,Indonesian": 0.32666666666666666, + "Spanish,Malay,English": 0.3333333333333333, + "Spanish,Indonesian,English": 0.42, + "Malay,Indonesian,English": 0.3 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.2, + "Filipino,Vietnamese,Chinese,Malay": 0.18, + "Filipino,Vietnamese,Chinese,Indonesian": 0.21333333333333335, + "Filipino,Vietnamese,Chinese,English": 0.20666666666666667, + "Filipino,Vietnamese,Spanish,Malay": 0.17333333333333334, + "Filipino,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Filipino,Vietnamese,Spanish,English": 0.20666666666666667, + "Filipino,Vietnamese,Malay,Indonesian": 0.18, + "Filipino,Vietnamese,Malay,English": 0.14666666666666667, + "Filipino,Vietnamese,Indonesian,English": 0.19333333333333333, + "Filipino,Chinese,Spanish,Malay": 0.21333333333333335, + "Filipino,Chinese,Spanish,Indonesian": 0.24666666666666667, + "Filipino,Chinese,Spanish,English": 0.26666666666666666, + "Filipino,Chinese,Malay,Indonesian": 0.22, + "Filipino,Chinese,Malay,English": 0.18666666666666668, + "Filipino,Chinese,Indonesian,English": 0.24, + "Filipino,Spanish,Malay,Indonesian": 0.22, + "Filipino,Spanish,Malay,English": 0.18666666666666668, + "Filipino,Spanish,Indonesian,English": 0.24, + "Filipino,Malay,Indonesian,English": 0.17333333333333334, + "Vietnamese,Chinese,Spanish,Malay": 0.22, + "Vietnamese,Chinese,Spanish,Indonesian": 0.26, + "Vietnamese,Chinese,Spanish,English": 0.3, + "Vietnamese,Chinese,Malay,Indonesian": 0.21333333333333335, + "Vietnamese,Chinese,Malay,English": 0.22666666666666666, + "Vietnamese,Chinese,Indonesian,English": 0.26666666666666666, + "Vietnamese,Spanish,Malay,Indonesian": 0.24, + "Vietnamese,Spanish,Malay,English": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,English": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,English": 0.22666666666666666, + "Chinese,Spanish,Malay,Indonesian": 0.25333333333333335, + "Chinese,Spanish,Malay,English": 0.25333333333333335, + "Chinese,Spanish,Indonesian,English": 0.31333333333333335, + "Chinese,Malay,Indonesian,English": 0.23333333333333334, + "Spanish,Malay,Indonesian,English": 0.24666666666666667 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.14, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.16, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.17333333333333334, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.14666666666666667, + "Filipino,Vietnamese,Chinese,Malay,English": 0.13333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.16666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.16, + "Filipino,Vietnamese,Spanish,Malay,English": 0.12666666666666668, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.16666666666666666, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.13333333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.18666666666666668, + "Filipino,Chinese,Spanish,Malay,English": 0.15333333333333332, + "Filipino,Chinese,Spanish,Indonesian,English": 0.2, + "Filipino,Chinese,Malay,Indonesian,English": 0.16, + "Filipino,Spanish,Malay,Indonesian,English": 0.15333333333333332, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.18666666666666668, + "Vietnamese,Chinese,Spanish,Malay,English": 0.19333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.22, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.18666666666666668, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.19333333333333333, + "Chinese,Spanish,Malay,Indonesian,English": 0.20666666666666667 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.13333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.12666666666666668, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.11333333333333333, "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.14, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.12666666666666668, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.13333333333333333, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.12666666666666668, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.12666666666666668 + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.12, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.12, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.14, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.16666666666666666 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.11333333333333333 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.10666666666666667 } }, - "AC3_2": 0.501902958103053, - "AC3_3": 0.38020943557989617, - "AC3_4": 0.30141563477702077, - "AC3_5": 0.24931031449549318, - "AC3_6": 0.21263868785963283, - "AC3_7": 0.18640796016979178 + "AC3_2": 0.5136070803246517, + "AC3_3": 0.39785714281026796, + "AC3_4": 0.31611941369290436, + "AC3_5": 0.25620820090148033, + "AC3_6": 0.2118856047846738, + "AC3_7": 0.17761833579673655 }, - "prompt_3": { - "overall_acc": 0.5066666666666667, + "prompt_5": { + "overall_acc": 0.5323809523809524, "language_acc": { - "Filipino": 0.44666666666666666, + "Filipino": 0.4, "Vietnamese": 0.49333333333333335, - "Chinese": 0.54, + "Chinese": 0.62, "Spanish": 0.56, - "Malay": 0.44666666666666666, - "Indonesian": 0.48, - "English": 0.58 + "Malay": 0.4266666666666667, + "Indonesian": 0.54, + "English": 0.6866666666666666 }, - "consistency_score_2": 0.4904761904761905, - "consistency_score_3": 0.315047619047619, - "consistency_score_4": 0.23371428571428576, - "consistency_score_5": 0.18984126984126984, - "consistency_score_6": 0.16380952380952382, - "consistency_score_7": 0.14666666666666667, + "consistency_score_2": 0.4990476190476191, + "consistency_score_3": 0.3165714285714285, + "consistency_score_4": 0.2232380952380953, + "consistency_score_5": 0.16666666666666669, + "consistency_score_6": 0.13047619047619047, + "consistency_score_7": 0.10666666666666667, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.49333333333333335, - "Filipino,Chinese": 0.48, - "Filipino,Spanish": 0.4666666666666667, + "Filipino,Vietnamese": 0.4066666666666667, + "Filipino,Chinese": 0.41333333333333333, + "Filipino,Spanish": 0.46, "Filipino,Malay": 0.44, - "Filipino,Indonesian": 0.5133333333333333, - "Filipino,English": 0.43333333333333335, - "Vietnamese,Chinese": 0.44666666666666666, - "Vietnamese,Spanish": 0.52, + "Filipino,Indonesian": 0.37333333333333335, + "Filipino,English": 0.44666666666666666, + "Vietnamese,Chinese": 0.49333333333333335, + "Vietnamese,Spanish": 0.5266666666666666, "Vietnamese,Malay": 0.48, - "Vietnamese,Indonesian": 0.5133333333333333, - "Vietnamese,English": 0.5266666666666666, - "Chinese,Spanish": 0.48, - "Chinese,Malay": 0.47333333333333333, - "Chinese,Indonesian": 0.5133333333333333, - "Chinese,English": 0.43333333333333335, - "Spanish,Malay": 0.5066666666666667, - "Spanish,Indonesian": 0.5133333333333333, - "Spanish,English": 0.58, - "Malay,Indonesian": 0.54, - "Malay,English": 0.44, - "Indonesian,English": 0.5066666666666667 + "Vietnamese,Indonesian": 0.52, + "Vietnamese,English": 0.5933333333333334, + "Chinese,Spanish": 0.5466666666666666, + "Chinese,Malay": 0.44666666666666666, + "Chinese,Indonesian": 0.5, + "Chinese,English": 0.5933333333333334, + "Spanish,Malay": 0.49333333333333335, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,English": 0.6133333333333333, + "Malay,Indonesian": 0.5533333333333333, + "Malay,English": 0.49333333333333335, + "Indonesian,English": 0.54 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.29333333333333333, - "Filipino,Vietnamese,Spanish": 0.3, - "Filipino,Vietnamese,Malay": 0.30666666666666664, - "Filipino,Vietnamese,Indonesian": 0.34, - "Filipino,Vietnamese,English": 0.3, + "Filipino,Vietnamese,Chinese": 0.23333333333333334, + "Filipino,Vietnamese,Spanish": 0.28, + "Filipino,Vietnamese,Malay": 0.24666666666666667, + "Filipino,Vietnamese,Indonesian": 0.24, + "Filipino,Vietnamese,English": 0.29333333333333333, "Filipino,Chinese,Spanish": 0.2866666666666667, - "Filipino,Chinese,Malay": 0.2733333333333333, - "Filipino,Chinese,Indonesian": 0.31333333333333335, - "Filipino,Chinese,English": 0.2866666666666667, - "Filipino,Spanish,Malay": 0.2866666666666667, - "Filipino,Spanish,Indonesian": 0.32666666666666666, - "Filipino,Spanish,English": 0.3333333333333333, - "Filipino,Malay,Indonesian": 0.30666666666666664, - "Filipino,Malay,English": 0.26666666666666666, - "Filipino,Indonesian,English": 0.3333333333333333, - "Vietnamese,Chinese,Spanish": 0.3, - "Vietnamese,Chinese,Malay": 0.28, - "Vietnamese,Chinese,Indonesian": 0.3333333333333333, - "Vietnamese,Chinese,English": 0.3, - "Vietnamese,Spanish,Malay": 0.3, - "Vietnamese,Spanish,Indonesian": 0.3466666666666667, - "Vietnamese,Spanish,English": 0.38, - "Vietnamese,Malay,Indonesian": 0.34, - "Vietnamese,Malay,English": 0.31333333333333335, - "Vietnamese,Indonesian,English": 0.3466666666666667, - "Chinese,Spanish,Malay": 0.30666666666666664, - "Chinese,Spanish,Indonesian": 0.32666666666666666, - "Chinese,Spanish,English": 0.32666666666666666, - "Chinese,Malay,Indonesian": 0.34, - "Chinese,Malay,English": 0.28, - "Chinese,Indonesian,English": 0.3, - "Spanish,Malay,Indonesian": 0.3333333333333333, - "Spanish,Malay,English": 0.32666666666666666, - "Spanish,Indonesian,English": 0.38, - "Malay,Indonesian,English": 0.31333333333333335 + "Filipino,Chinese,Malay": 0.22666666666666666, + "Filipino,Chinese,Indonesian": 0.22666666666666666, + "Filipino,Chinese,English": 0.3, + "Filipino,Spanish,Malay": 0.2733333333333333, + "Filipino,Spanish,Indonesian": 0.26, + "Filipino,Spanish,English": 0.32666666666666666, + "Filipino,Malay,Indonesian": 0.26666666666666666, + "Filipino,Malay,English": 0.26, + "Filipino,Indonesian,English": 0.26, + "Vietnamese,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,Chinese,Malay": 0.2866666666666667, + "Vietnamese,Chinese,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,English": 0.38666666666666666, + "Vietnamese,Spanish,Malay": 0.3466666666666667, + "Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Vietnamese,Spanish,English": 0.42, + "Vietnamese,Malay,Indonesian": 0.35333333333333333, + "Vietnamese,Malay,English": 0.36, + "Vietnamese,Indonesian,English": 0.37333333333333335, + "Chinese,Spanish,Malay": 0.31333333333333335, + "Chinese,Spanish,Indonesian": 0.3466666666666667, + "Chinese,Spanish,English": 0.42, + "Chinese,Malay,Indonesian": 0.31333333333333335, + "Chinese,Malay,English": 0.31333333333333335, + "Chinese,Indonesian,English": 0.37333333333333335, + "Spanish,Malay,Indonesian": 0.36, + "Spanish,Malay,English": 0.36666666666666664, + "Spanish,Indonesian,English": 0.4, + "Malay,Indonesian,English": 0.36 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.20666666666666667, - "Filipino,Vietnamese,Chinese,Malay": 0.2, - "Filipino,Vietnamese,Chinese,Indonesian": 0.24, - "Filipino,Vietnamese,Chinese,English": 0.22, - "Filipino,Vietnamese,Spanish,Malay": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian": 0.25333333333333335, - "Filipino,Vietnamese,Spanish,English": 0.24, - "Filipino,Vietnamese,Malay,Indonesian": 0.24666666666666667, - "Filipino,Vietnamese,Malay,English": 0.21333333333333335, - "Filipino,Vietnamese,Indonesian,English": 0.26, - "Filipino,Chinese,Spanish,Malay": 0.21333333333333335, - "Filipino,Chinese,Spanish,Indonesian": 0.22666666666666666, - "Filipino,Chinese,Spanish,English": 0.23333333333333334, - "Filipino,Chinese,Malay,Indonesian": 0.22, - "Filipino,Chinese,Malay,English": 0.20666666666666667, - "Filipino,Chinese,Indonesian,English": 0.22666666666666666, - "Filipino,Spanish,Malay,Indonesian": 0.21333333333333335, - "Filipino,Spanish,Malay,English": 0.22666666666666666, - "Filipino,Spanish,Indonesian,English": 0.28, - "Filipino,Malay,Indonesian,English": 0.23333333333333334, - "Vietnamese,Chinese,Spanish,Malay": 0.20666666666666667, - "Vietnamese,Chinese,Spanish,Indonesian": 0.24666666666666667, - "Vietnamese,Chinese,Spanish,English": 0.24666666666666667, - "Vietnamese,Chinese,Malay,Indonesian": 0.24666666666666667, - "Vietnamese,Chinese,Malay,English": 0.22, - "Vietnamese,Chinese,Indonesian,English": 0.24, - "Vietnamese,Spanish,Malay,Indonesian": 0.23333333333333334, - "Vietnamese,Spanish,Malay,English": 0.24666666666666667, - "Vietnamese,Spanish,Indonesian,English": 0.28, - "Vietnamese,Malay,Indonesian,English": 0.24666666666666667, - "Chinese,Spanish,Malay,Indonesian": 0.23333333333333334, - "Chinese,Spanish,Malay,English": 0.23333333333333334, - "Chinese,Spanish,Indonesian,English": 0.25333333333333335, - "Chinese,Malay,Indonesian,English": 0.22, - "Spanish,Malay,Indonesian,English": 0.26 + "Filipino,Vietnamese,Chinese,Spanish": 0.18666666666666668, + "Filipino,Vietnamese,Chinese,Malay": 0.14666666666666667, + "Filipino,Vietnamese,Chinese,Indonesian": 0.16666666666666666, + "Filipino,Vietnamese,Chinese,English": 0.20666666666666667, + "Filipino,Vietnamese,Spanish,Malay": 0.2, + "Filipino,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Filipino,Vietnamese,Spanish,English": 0.23333333333333334, + "Filipino,Vietnamese,Malay,Indonesian": 0.2, + "Filipino,Vietnamese,Malay,English": 0.19333333333333333, + "Filipino,Vietnamese,Indonesian,English": 0.19333333333333333, + "Filipino,Chinese,Spanish,Malay": 0.18666666666666668, + "Filipino,Chinese,Spanish,Indonesian": 0.19333333333333333, + "Filipino,Chinese,Spanish,English": 0.24666666666666667, + "Filipino,Chinese,Malay,Indonesian": 0.16, + "Filipino,Chinese,Malay,English": 0.17333333333333334, + "Filipino,Chinese,Indonesian,English": 0.19333333333333333, + "Filipino,Spanish,Malay,Indonesian": 0.20666666666666667, + "Filipino,Spanish,Malay,English": 0.20666666666666667, + "Filipino,Spanish,Indonesian,English": 0.20666666666666667, + "Filipino,Malay,Indonesian,English": 0.18666666666666668, + "Vietnamese,Chinese,Spanish,Malay": 0.23333333333333334, + "Vietnamese,Chinese,Spanish,Indonesian": 0.24, + "Vietnamese,Chinese,Spanish,English": 0.2866666666666667, + "Vietnamese,Chinese,Malay,Indonesian": 0.23333333333333334, + "Vietnamese,Chinese,Malay,English": 0.24, + "Vietnamese,Chinese,Indonesian,English": 0.26666666666666666, + "Vietnamese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,Spanish,Malay,English": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,English": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,English": 0.2866666666666667, + "Chinese,Spanish,Malay,Indonesian": 0.24, + "Chinese,Spanish,Malay,English": 0.26, + "Chinese,Spanish,Indonesian,English": 0.29333333333333333, + "Chinese,Malay,Indonesian,English": 0.24, + "Spanish,Malay,Indonesian,English": 0.2733333333333333 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.16666666666666666, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.19333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.18666666666666668, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.18666666666666668, - "Filipino,Vietnamese,Chinese,Malay,English": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.19333333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.18, - "Filipino,Vietnamese,Spanish,Malay,English": 0.18666666666666668, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.22, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.2, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.18, - "Filipino,Chinese,Spanish,Malay,English": 0.18666666666666668, - "Filipino,Chinese,Spanish,Indonesian,English": 0.2, - "Filipino,Chinese,Malay,Indonesian,English": 0.18, - "Filipino,Spanish,Malay,Indonesian,English": 0.2, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.18, - "Vietnamese,Chinese,Spanish,Malay,English": 0.18666666666666668, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.20666666666666667, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.18666666666666668, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.20666666666666667, - "Chinese,Spanish,Malay,Indonesian,English": 0.18666666666666668 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.13333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.14, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.17333333333333334, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.13333333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.13333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.14666666666666667, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.16666666666666666, + "Filipino,Vietnamese,Spanish,Malay,English": 0.16, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.15333333333333332, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.16, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.14666666666666667, + "Filipino,Chinese,Spanish,Malay,English": 0.15333333333333332, + "Filipino,Chinese,Spanish,Indonesian,English": 0.16666666666666666, + "Filipino,Chinese,Malay,Indonesian,English": 0.13333333333333333, + "Filipino,Spanish,Malay,Indonesian,English": 0.15333333333333332, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.19333333333333333, + "Vietnamese,Chinese,Spanish,Malay,English": 0.20666666666666667, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.21333333333333335, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.2, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.23333333333333334, + "Chinese,Spanish,Malay,Indonesian,English": 0.2 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.15333333333333332, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.16, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.17333333333333334, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.16, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.17333333333333334, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.16666666666666666, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.16 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.12, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.12, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.12666666666666668, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.12, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.13333333333333333, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.12, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.17333333333333334 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.14666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.10666666666666667 } }, - "AC3_2": 0.4984399872152154, - "AC3_3": 0.3885149126400934, - "AC3_4": 0.31987651140521356, - "AC3_5": 0.2761956851624119, - "AC3_6": 0.24757575753883224, - "AC3_7": 0.22748299316246012 - }, - "prompt_4": { - "overall_acc": 0.499047619047619, + "AC3_2": 0.5151756583944087, + "AC3_3": 0.3970460591217941, + "AC3_4": 0.3145704442187555, + "AC3_5": 0.25386012712081474, + "AC3_6": 0.20958675421030795, + "AC3_7": 0.17772478884451898 + } + }, + "cross_logiqa": { + "prompt_1": { + "overall_acc": 0.4602272727272728, "language_acc": { - "Filipino": 0.4066666666666667, - "Vietnamese": 0.5066666666666667, - "Chinese": 0.54, - "Spanish": 0.49333333333333335, - "Malay": 0.44, - "Indonesian": 0.52, - "English": 0.5866666666666667 + "Indonesian": 0.45454545454545453, + "English": 0.5511363636363636, + "Filipino": 0.3465909090909091, + "Spanish": 0.45454545454545453, + "Chinese": 0.6079545454545454, + "Malay": 0.3806818181818182, + "Vietnamese": 0.42613636363636365 }, - "consistency_score_2": 0.4444444444444445, - "consistency_score_3": 0.2510476190476191, - "consistency_score_4": 0.16133333333333338, - "consistency_score_5": 0.11333333333333331, - "consistency_score_6": 0.08476190476190477, - "consistency_score_7": 0.06666666666666667, + "consistency_score_2": 0.5227272727272727, + "consistency_score_3": 0.35211038961038965, + "consistency_score_4": 0.2597402597402597, + "consistency_score_5": 0.20021645021645015, + "consistency_score_6": 0.15746753246753248, + "consistency_score_7": 0.125, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.42, - "Filipino,Chinese": 0.4266666666666667, - "Filipino,Spanish": 0.3333333333333333, - "Filipino,Malay": 0.38, - "Filipino,Indonesian": 0.48, - "Filipino,English": 0.38, - "Vietnamese,Chinese": 0.52, - "Vietnamese,Spanish": 0.52, - "Vietnamese,Malay": 0.4, - "Vietnamese,Indonesian": 0.47333333333333333, - "Vietnamese,English": 0.43333333333333335, - "Chinese,Spanish": 0.4866666666666667, - "Chinese,Malay": 0.3933333333333333, - "Chinese,Indonesian": 0.47333333333333333, - "Chinese,English": 0.5, - "Spanish,Malay": 0.4266666666666667, - "Spanish,Indonesian": 0.42, - "Spanish,English": 0.49333333333333335, - "Malay,Indonesian": 0.5, - "Malay,English": 0.4066666666666667, - "Indonesian,English": 0.4666666666666667 + "Indonesian,English": 0.5454545454545454, + "Indonesian,Filipino": 0.42613636363636365, + "Indonesian,Spanish": 0.5113636363636364, + "Indonesian,Chinese": 0.5568181818181818, + "Indonesian,Malay": 0.5227272727272727, + "Indonesian,Vietnamese": 0.5681818181818182, + "English,Filipino": 0.4375, + "English,Spanish": 0.5738636363636364, + "English,Chinese": 0.6931818181818182, + "English,Malay": 0.45454545454545453, + "English,Vietnamese": 0.5852272727272727, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Chinese": 0.48295454545454547, + "Filipino,Malay": 0.4943181818181818, + "Filipino,Vietnamese": 0.5170454545454546, + "Spanish,Chinese": 0.5397727272727273, + "Spanish,Malay": 0.4943181818181818, + "Spanish,Vietnamese": 0.5340909090909091, + "Chinese,Malay": 0.4772727272727273, + "Chinese,Vietnamese": 0.5795454545454546, + "Malay,Vietnamese": 0.5056818181818182 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.26666666666666666, - "Filipino,Vietnamese,Spanish": 0.2, - "Filipino,Vietnamese,Malay": 0.2, - "Filipino,Vietnamese,Indonesian": 0.26, - "Filipino,Vietnamese,English": 0.20666666666666667, - "Filipino,Chinese,Spanish": 0.22, - "Filipino,Chinese,Malay": 0.20666666666666667, - "Filipino,Chinese,Indonesian": 0.29333333333333333, - "Filipino,Chinese,English": 0.23333333333333334, - "Filipino,Spanish,Malay": 0.17333333333333334, - "Filipino,Spanish,Indonesian": 0.22666666666666666, - "Filipino,Spanish,English": 0.20666666666666667, - "Filipino,Malay,Indonesian": 0.25333333333333335, - "Filipino,Malay,English": 0.19333333333333333, - "Filipino,Indonesian,English": 0.25333333333333335, - "Vietnamese,Chinese,Spanish": 0.3333333333333333, - "Vietnamese,Chinese,Malay": 0.24, - "Vietnamese,Chinese,Indonesian": 0.32666666666666666, - "Vietnamese,Chinese,English": 0.2866666666666667, - "Vietnamese,Spanish,Malay": 0.25333333333333335, - "Vietnamese,Spanish,Indonesian": 0.2733333333333333, - "Vietnamese,Spanish,English": 0.29333333333333333, - "Vietnamese,Malay,Indonesian": 0.26, - "Vietnamese,Malay,English": 0.22, - "Vietnamese,Indonesian,English": 0.26666666666666666, - "Chinese,Spanish,Malay": 0.24666666666666667, - "Chinese,Spanish,Indonesian": 0.28, - "Chinese,Spanish,English": 0.30666666666666664, - "Chinese,Malay,Indonesian": 0.25333333333333335, - "Chinese,Malay,English": 0.22, - "Chinese,Indonesian,English": 0.3, - "Spanish,Malay,Indonesian": 0.25333333333333335, - "Spanish,Malay,English": 0.25333333333333335, - "Spanish,Indonesian,English": 0.2733333333333333, - "Malay,Indonesian,English": 0.25333333333333335 + "Indonesian,English,Filipino": 0.3181818181818182, + "Indonesian,English,Spanish": 0.3806818181818182, + "Indonesian,English,Chinese": 0.4318181818181818, + "Indonesian,English,Malay": 0.32954545454545453, + "Indonesian,English,Vietnamese": 0.4147727272727273, + "Indonesian,Filipino,Spanish": 0.29545454545454547, + "Indonesian,Filipino,Chinese": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.2897727272727273, + "Indonesian,Filipino,Vietnamese": 0.3465909090909091, + "Indonesian,Spanish,Chinese": 0.375, + "Indonesian,Spanish,Malay": 0.32954545454545453, + "Indonesian,Spanish,Vietnamese": 0.38636363636363635, + "Indonesian,Chinese,Malay": 0.3522727272727273, + "Indonesian,Chinese,Vietnamese": 0.42045454545454547, + "Indonesian,Malay,Vietnamese": 0.3522727272727273, + "English,Filipino,Spanish": 0.32386363636363635, + "English,Filipino,Chinese": 0.36363636363636365, + "English,Filipino,Malay": 0.2556818181818182, + "English,Filipino,Vietnamese": 0.36363636363636365, + "English,Spanish,Chinese": 0.44886363636363635, + "English,Spanish,Malay": 0.3352272727272727, + "English,Spanish,Vietnamese": 0.4034090909090909, + "English,Chinese,Malay": 0.35795454545454547, + "English,Chinese,Vietnamese": 0.4772727272727273, + "English,Malay,Vietnamese": 0.3181818181818182, + "Filipino,Spanish,Chinese": 0.3181818181818182, + "Filipino,Spanish,Malay": 0.3068181818181818, + "Filipino,Spanish,Vietnamese": 0.3352272727272727, + "Filipino,Chinese,Malay": 0.2897727272727273, + "Filipino,Chinese,Vietnamese": 0.375, + "Filipino,Malay,Vietnamese": 0.3125, + "Spanish,Chinese,Malay": 0.32386363636363635, + "Spanish,Chinese,Vietnamese": 0.3977272727272727, + "Spanish,Malay,Vietnamese": 0.3352272727272727, + "Chinese,Malay,Vietnamese": 0.32954545454545453 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.16, - "Filipino,Vietnamese,Chinese,Malay": 0.14, - "Filipino,Vietnamese,Chinese,Indonesian": 0.21333333333333335, - "Filipino,Vietnamese,Chinese,English": 0.14, - "Filipino,Vietnamese,Spanish,Malay": 0.10666666666666667, - "Filipino,Vietnamese,Spanish,Indonesian": 0.15333333333333332, - "Filipino,Vietnamese,Spanish,English": 0.14, - "Filipino,Vietnamese,Malay,Indonesian": 0.14666666666666667, - "Filipino,Vietnamese,Malay,English": 0.11333333333333333, - "Filipino,Vietnamese,Indonesian,English": 0.14666666666666667, - "Filipino,Chinese,Spanish,Malay": 0.13333333333333333, - "Filipino,Chinese,Spanish,Indonesian": 0.18666666666666668, - "Filipino,Chinese,Spanish,English": 0.16, - "Filipino,Chinese,Malay,Indonesian": 0.18, - "Filipino,Chinese,Malay,English": 0.12666666666666668, - "Filipino,Chinese,Indonesian,English": 0.18, - "Filipino,Spanish,Malay,Indonesian": 0.14666666666666667, - "Filipino,Spanish,Malay,English": 0.12, - "Filipino,Spanish,Indonesian,English": 0.15333333333333332, - "Filipino,Malay,Indonesian,English": 0.14, - "Vietnamese,Chinese,Spanish,Malay": 0.17333333333333334, - "Vietnamese,Chinese,Spanish,Indonesian": 0.22, - "Vietnamese,Chinese,Spanish,English": 0.21333333333333335, - "Vietnamese,Chinese,Malay,Indonesian": 0.18, - "Vietnamese,Chinese,Malay,English": 0.14666666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.2, - "Vietnamese,Spanish,Malay,Indonesian": 0.16, - "Vietnamese,Spanish,Malay,English": 0.16, - "Vietnamese,Spanish,Indonesian,English": 0.18666666666666668, - "Vietnamese,Malay,Indonesian,English": 0.14666666666666667, - "Chinese,Spanish,Malay,Indonesian": 0.17333333333333334, - "Chinese,Spanish,Malay,English": 0.16, - "Chinese,Spanish,Indonesian,English": 0.2, - "Chinese,Malay,Indonesian,English": 0.16666666666666666, - "Spanish,Malay,Indonesian,English": 0.17333333333333334 - }, - "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.1, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.12, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Malay,English": 0.08, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.12, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.09333333333333334, - "Filipino,Vietnamese,Spanish,Malay,English": 0.08, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.11333333333333333, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.08666666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.12666666666666668, - "Filipino,Chinese,Spanish,Malay,English": 0.1, - "Filipino,Chinese,Spanish,Indonesian,English": 0.13333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.11333333333333333, - "Filipino,Spanish,Malay,Indonesian,English": 0.1, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.12666666666666668, - "Vietnamese,Chinese,Spanish,Malay,English": 0.11333333333333333, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.15333333333333332, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.11333333333333333, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.10666666666666667, - "Chinese,Spanish,Malay,Indonesian,English": 0.12666666666666668 + "Indonesian,English,Filipino,Spanish": 0.23863636363636365, + "Indonesian,English,Filipino,Chinese": 0.2727272727272727, + "Indonesian,English,Filipino,Malay": 0.19318181818181818, + "Indonesian,English,Filipino,Vietnamese": 0.2840909090909091, + "Indonesian,English,Spanish,Chinese": 0.3125, + "Indonesian,English,Spanish,Malay": 0.25, + "Indonesian,English,Spanish,Vietnamese": 0.3181818181818182, + "Indonesian,English,Chinese,Malay": 0.2727272727272727, + "Indonesian,English,Chinese,Vietnamese": 0.3409090909090909, + "Indonesian,English,Malay,Vietnamese": 0.2556818181818182, + "Indonesian,Filipino,Spanish,Chinese": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Vietnamese": 0.2556818181818182, + "Indonesian,Filipino,Chinese,Malay": 0.2159090909090909, + "Indonesian,Filipino,Chinese,Vietnamese": 0.2897727272727273, + "Indonesian,Filipino,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,Spanish,Chinese,Malay": 0.26136363636363635, + "Indonesian,Spanish,Chinese,Vietnamese": 0.32954545454545453, + "Indonesian,Spanish,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Chinese,Malay,Vietnamese": 0.26704545454545453, + "English,Filipino,Spanish,Chinese": 0.2840909090909091, + "English,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Filipino,Spanish,Vietnamese": 0.2784090909090909, + "English,Filipino,Chinese,Malay": 0.20454545454545456, + "English,Filipino,Chinese,Vietnamese": 0.3181818181818182, + "English,Filipino,Malay,Vietnamese": 0.19886363636363635, + "English,Spanish,Chinese,Malay": 0.2727272727272727, + "English,Spanish,Chinese,Vietnamese": 0.3352272727272727, + "English,Spanish,Malay,Vietnamese": 0.26136363636363635, + "English,Chinese,Malay,Vietnamese": 0.25, + "Filipino,Spanish,Chinese,Malay": 0.21022727272727273, + "Filipino,Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Filipino,Spanish,Malay,Vietnamese": 0.2215909090909091, + "Filipino,Chinese,Malay,Vietnamese": 0.2215909090909091, + "Spanish,Chinese,Malay,Vietnamese": 0.2556818181818182 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.2159090909090909, + "Indonesian,English,Filipino,Spanish,Malay": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.2215909090909091, + "Indonesian,English,Filipino,Chinese,Malay": 0.16477272727272727, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.24431818181818182, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.16477272727272727, + "Indonesian,English,Spanish,Chinese,Malay": 0.2159090909090909, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2727272727272727, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.16477272727272727, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.2215909090909091, + "English,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.25, + "English,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "English,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, + "English,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.09333333333333334, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.07333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.10666666666666667, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.07333333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.06666666666666667, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.09333333333333334, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.08666666666666667 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.14204545454545456, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.06666666666666667 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 } }, - "AC3_2": 0.47016599366932404, - "AC3_3": 0.3340501583631659, - "AC3_4": 0.2438380924541828, - "AC3_5": 0.1847174701616454, - "AC3_6": 0.1449110541195115, - "AC3_7": 0.11762065093319275 + "AC3_2": 0.4894902784578217, + "AC3_3": 0.3989740025669747, + "AC3_4": 0.332069283545145, + "AC3_5": 0.27903988674036473, + "AC3_6": 0.23464938474280295, + "AC3_7": 0.1966019417139787 }, - "prompt_5": { - "overall_acc": 0.5, + "prompt_2": { + "overall_acc": 0.46185064935064934, "language_acc": { - "Filipino": 0.5266666666666666, - "Vietnamese": 0.49333333333333335, - "Chinese": 0.48, - "Spanish": 0.47333333333333333, - "Malay": 0.4666666666666667, - "Indonesian": 0.46, - "English": 0.6 + "Indonesian": 0.45454545454545453, + "English": 0.5681818181818182, + "Filipino": 0.3352272727272727, + "Spanish": 0.4659090909090909, + "Chinese": 0.5738636363636364, + "Malay": 0.42613636363636365, + "Vietnamese": 0.4090909090909091 }, - "consistency_score_2": 0.4634920634920635, - "consistency_score_3": 0.28304761904761905, - "consistency_score_4": 0.2001904761904762, - "consistency_score_5": 0.1542857142857143, - "consistency_score_6": 0.12571428571428572, - "consistency_score_7": 0.10666666666666667, + "consistency_score_2": 0.5094696969696969, + "consistency_score_3": 0.33068181818181813, + "consistency_score_4": 0.24090909090909085, + "consistency_score_5": 0.18695887445887452, + "consistency_score_6": 0.15097402597402598, + "consistency_score_7": 0.125, "detailed_consistency_score": { "2_combine": { - "Filipino,Vietnamese": 0.44666666666666666, - "Filipino,Chinese": 0.47333333333333333, - "Filipino,Spanish": 0.43333333333333335, - "Filipino,Malay": 0.4066666666666667, - "Filipino,Indonesian": 0.43333333333333335, - "Filipino,English": 0.49333333333333335, - "Vietnamese,Chinese": 0.4866666666666667, - "Vietnamese,Spanish": 0.5, - "Vietnamese,Malay": 0.4866666666666667, - "Vietnamese,Indonesian": 0.52, - "Vietnamese,English": 0.49333333333333335, - "Chinese,Spanish": 0.43333333333333335, - "Chinese,Malay": 0.41333333333333333, - "Chinese,Indonesian": 0.44, - "Chinese,English": 0.47333333333333333, - "Spanish,Malay": 0.44666666666666666, - "Spanish,Indonesian": 0.41333333333333333, - "Spanish,English": 0.49333333333333335, - "Malay,Indonesian": 0.5, - "Malay,English": 0.48, - "Indonesian,English": 0.4666666666666667 + "Indonesian,English": 0.5227272727272727, + "Indonesian,Filipino": 0.4715909090909091, + "Indonesian,Spanish": 0.5397727272727273, + "Indonesian,Chinese": 0.5625, + "Indonesian,Malay": 0.5170454545454546, + "Indonesian,Vietnamese": 0.5511363636363636, + "English,Filipino": 0.44886363636363635, + "English,Spanish": 0.6022727272727273, + "English,Chinese": 0.5965909090909091, + "English,Malay": 0.4318181818181818, + "English,Vietnamese": 0.4943181818181818, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Chinese": 0.4659090909090909, + "Filipino,Malay": 0.4659090909090909, + "Filipino,Vietnamese": 0.4715909090909091, + "Spanish,Chinese": 0.5340909090909091, + "Spanish,Malay": 0.5170454545454546, + "Spanish,Vietnamese": 0.5625, + "Chinese,Malay": 0.4943181818181818, + "Chinese,Vietnamese": 0.4943181818181818, + "Malay,Vietnamese": 0.4772727272727273 }, "3_combine": { - "Filipino,Vietnamese,Chinese": 0.3, - "Filipino,Vietnamese,Spanish": 0.28, - "Filipino,Vietnamese,Malay": 0.26666666666666666, - "Filipino,Vietnamese,Indonesian": 0.26, - "Filipino,Vietnamese,English": 0.2866666666666667, - "Filipino,Chinese,Spanish": 0.26, - "Filipino,Chinese,Malay": 0.24, - "Filipino,Chinese,Indonesian": 0.26, - "Filipino,Chinese,English": 0.28, - "Filipino,Spanish,Malay": 0.25333333333333335, - "Filipino,Spanish,Indonesian": 0.25333333333333335, - "Filipino,Spanish,English": 0.29333333333333333, - "Filipino,Malay,Indonesian": 0.26, - "Filipino,Malay,English": 0.26, - "Filipino,Indonesian,English": 0.2733333333333333, - "Vietnamese,Chinese,Spanish": 0.2866666666666667, - "Vietnamese,Chinese,Malay": 0.29333333333333333, - "Vietnamese,Chinese,Indonesian": 0.30666666666666664, - "Vietnamese,Chinese,English": 0.2866666666666667, - "Vietnamese,Spanish,Malay": 0.32666666666666666, - "Vietnamese,Spanish,Indonesian": 0.3, - "Vietnamese,Spanish,English": 0.34, - "Vietnamese,Malay,Indonesian": 0.3333333333333333, - "Vietnamese,Malay,English": 0.32, - "Vietnamese,Indonesian,English": 0.32, - "Chinese,Spanish,Malay": 0.24666666666666667, - "Chinese,Spanish,Indonesian": 0.24, - "Chinese,Spanish,English": 0.2866666666666667, - "Chinese,Malay,Indonesian": 0.2866666666666667, - "Chinese,Malay,English": 0.28, - "Chinese,Indonesian,English": 0.28, - "Spanish,Malay,Indonesian": 0.2866666666666667, - "Spanish,Malay,English": 0.29333333333333333, - "Spanish,Indonesian,English": 0.26, - "Malay,Indonesian,English": 0.30666666666666664 + "Indonesian,English,Filipino": 0.3068181818181818, + "Indonesian,English,Spanish": 0.38636363636363635, + "Indonesian,English,Chinese": 0.3806818181818182, + "Indonesian,English,Malay": 0.3068181818181818, + "Indonesian,English,Vietnamese": 0.3465909090909091, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Chinese": 0.3068181818181818, + "Indonesian,Filipino,Malay": 0.29545454545454547, + "Indonesian,Filipino,Vietnamese": 0.32386363636363635, + "Indonesian,Spanish,Chinese": 0.3693181818181818, + "Indonesian,Spanish,Malay": 0.3352272727272727, + "Indonesian,Spanish,Vietnamese": 0.3806818181818182, + "Indonesian,Chinese,Malay": 0.3465909090909091, + "Indonesian,Chinese,Vietnamese": 0.36363636363636365, + "Indonesian,Malay,Vietnamese": 0.3409090909090909, + "English,Filipino,Spanish": 0.3465909090909091, + "English,Filipino,Chinese": 0.32954545454545453, + "English,Filipino,Malay": 0.23863636363636365, + "English,Filipino,Vietnamese": 0.30113636363636365, + "English,Spanish,Chinese": 0.42613636363636365, + "English,Spanish,Malay": 0.3409090909090909, + "English,Spanish,Vietnamese": 0.4034090909090909, + "English,Chinese,Malay": 0.32954545454545453, + "English,Chinese,Vietnamese": 0.36363636363636365, + "English,Malay,Vietnamese": 0.2727272727272727, + "Filipino,Spanish,Chinese": 0.3068181818181818, + "Filipino,Spanish,Malay": 0.30113636363636365, + "Filipino,Spanish,Vietnamese": 0.3181818181818182, + "Filipino,Chinese,Malay": 0.2727272727272727, + "Filipino,Chinese,Vietnamese": 0.30113636363636365, + "Filipino,Malay,Vietnamese": 0.2784090909090909, + "Spanish,Chinese,Malay": 0.32954545454545453, + "Spanish,Chinese,Vietnamese": 0.35795454545454547, + "Spanish,Malay,Vietnamese": 0.32386363636363635, + "Chinese,Malay,Vietnamese": 0.3125 }, "4_combine": { - "Filipino,Vietnamese,Chinese,Spanish": 0.2, - "Filipino,Vietnamese,Chinese,Malay": 0.19333333333333333, - "Filipino,Vietnamese,Chinese,Indonesian": 0.18, - "Filipino,Vietnamese,Chinese,English": 0.20666666666666667, - "Filipino,Vietnamese,Spanish,Malay": 0.19333333333333333, - "Filipino,Vietnamese,Spanish,Indonesian": 0.19333333333333333, - "Filipino,Vietnamese,Spanish,English": 0.22666666666666666, - "Filipino,Vietnamese,Malay,Indonesian": 0.19333333333333333, - "Filipino,Vietnamese,Malay,English": 0.2, - "Filipino,Vietnamese,Indonesian,English": 0.2, - "Filipino,Chinese,Spanish,Malay": 0.16666666666666666, - "Filipino,Chinese,Spanish,Indonesian": 0.16666666666666666, - "Filipino,Chinese,Spanish,English": 0.20666666666666667, - "Filipino,Chinese,Malay,Indonesian": 0.18, - "Filipino,Chinese,Malay,English": 0.19333333333333333, - "Filipino,Chinese,Indonesian,English": 0.18666666666666668, - "Filipino,Spanish,Malay,Indonesian": 0.18666666666666668, - "Filipino,Spanish,Malay,English": 0.18666666666666668, - "Filipino,Spanish,Indonesian,English": 0.19333333333333333, - "Filipino,Malay,Indonesian,English": 0.19333333333333333, - "Vietnamese,Chinese,Spanish,Malay": 0.21333333333333335, - "Vietnamese,Chinese,Spanish,Indonesian": 0.19333333333333333, - "Vietnamese,Chinese,Spanish,English": 0.20666666666666667, - "Vietnamese,Chinese,Malay,Indonesian": 0.22666666666666666, - "Vietnamese,Chinese,Malay,English": 0.20666666666666667, - "Vietnamese,Chinese,Indonesian,English": 0.20666666666666667, - "Vietnamese,Spanish,Malay,Indonesian": 0.24, - "Vietnamese,Spanish,Malay,English": 0.24, - "Vietnamese,Spanish,Indonesian,English": 0.21333333333333335, - "Vietnamese,Malay,Indonesian,English": 0.24666666666666667, - "Chinese,Spanish,Malay,Indonesian": 0.19333333333333333, - "Chinese,Spanish,Malay,English": 0.19333333333333333, - "Chinese,Spanish,Indonesian,English": 0.17333333333333334, - "Chinese,Malay,Indonesian,English": 0.20666666666666667, - "Spanish,Malay,Indonesian,English": 0.2 + "Indonesian,English,Filipino,Spanish": 0.26136363636363635, + "Indonesian,English,Filipino,Chinese": 0.23295454545454544, + "Indonesian,English,Filipino,Malay": 0.19318181818181818, + "Indonesian,English,Filipino,Vietnamese": 0.23295454545454544, + "Indonesian,English,Spanish,Chinese": 0.30113636363636365, + "Indonesian,English,Spanish,Malay": 0.25, + "Indonesian,English,Spanish,Vietnamese": 0.30113636363636365, + "Indonesian,English,Chinese,Malay": 0.26136363636363635, + "Indonesian,English,Chinese,Vietnamese": 0.2840909090909091, + "Indonesian,English,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Spanish,Chinese": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Vietnamese": 0.25, + "Indonesian,Filipino,Chinese,Malay": 0.20454545454545456, + "Indonesian,Filipino,Chinese,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,Spanish,Chinese,Malay": 0.26136363636363635, + "Indonesian,Spanish,Chinese,Vietnamese": 0.2840909090909091, + "Indonesian,Spanish,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Chinese,Malay,Vietnamese": 0.2556818181818182, + "English,Filipino,Spanish,Chinese": 0.2727272727272727, + "English,Filipino,Spanish,Malay": 0.2159090909090909, + "English,Filipino,Spanish,Vietnamese": 0.26704545454545453, + "English,Filipino,Chinese,Malay": 0.19318181818181818, + "English,Filipino,Chinese,Vietnamese": 0.23863636363636365, + "English,Filipino,Malay,Vietnamese": 0.17613636363636365, + "English,Spanish,Chinese,Malay": 0.26704545454545453, + "English,Spanish,Chinese,Vietnamese": 0.30113636363636365, + "English,Spanish,Malay,Vietnamese": 0.23295454545454544, + "English,Chinese,Malay,Vietnamese": 0.22727272727272727, + "Filipino,Spanish,Chinese,Malay": 0.20454545454545456, + "Filipino,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "Filipino,Spanish,Malay,Vietnamese": 0.19886363636363635, + "Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, + "Spanish,Chinese,Malay,Vietnamese": 0.22727272727272727 }, "5_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.14666666666666667, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.13333333333333333, - "Filipino,Vietnamese,Chinese,Spanish,English": 0.16666666666666666, - "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.15333333333333332, - "Filipino,Vietnamese,Chinese,Malay,English": 0.15333333333333332, - "Filipino,Vietnamese,Chinese,Indonesian,English": 0.14666666666666667, - "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.16, - "Filipino,Vietnamese,Spanish,Malay,English": 0.16, - "Filipino,Vietnamese,Spanish,Indonesian,English": 0.16, - "Filipino,Vietnamese,Malay,Indonesian,English": 0.16666666666666666, - "Filipino,Chinese,Spanish,Malay,Indonesian": 0.14, - "Filipino,Chinese,Spanish,Malay,English": 0.14666666666666667, - "Filipino,Chinese,Spanish,Indonesian,English": 0.13333333333333333, - "Filipino,Chinese,Malay,Indonesian,English": 0.15333333333333332, - "Filipino,Spanish,Malay,Indonesian,English": 0.14666666666666667, - "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.17333333333333334, - "Vietnamese,Chinese,Spanish,Malay,English": 0.16, - "Vietnamese,Chinese,Spanish,Indonesian,English": 0.14, - "Vietnamese,Chinese,Malay,Indonesian,English": 0.17333333333333334, - "Vietnamese,Spanish,Malay,Indonesian,English": 0.18, - "Chinese,Spanish,Malay,Indonesian,English": 0.14666666666666667 + "Indonesian,English,Filipino,Spanish,Chinese": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.21022727272727273, + "Indonesian,English,Filipino,Chinese,Malay": 0.16477272727272727, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1875, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1590909090909091, + "Indonesian,English,Spanish,Chinese,Malay": 0.2159090909090909, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.24431818181818182, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.19318181818181818, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.16477272727272727, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "English,Filipino,Spanish,Chinese,Malay": 0.17613636363636365, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.2159090909090909, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, + "English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Spanish,Chinese,Malay,Vietnamese": 0.1875, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273 }, "6_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.12666666666666668, - "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.11333333333333333, - "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.13333333333333333, - "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.13333333333333333, - "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.12, - "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.12666666666666668 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.17045454545454544, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.13636363636363635, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13068181818181818 }, "7_combine": { - "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.10666666666666667 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 } }, - "AC3_2": 0.4810543656831855, - "AC3_3": 0.3614692288519165, - "AC3_4": 0.28590859625949344, - "AC3_5": 0.23580786022596822, - "AC3_6": 0.200913241977023, - "AC3_7": 0.17582417579519383 - } - }, - "cross_logiqa": { - "prompt_1": { - "overall_acc": 0.4155844155844156, + "AC3_2": 0.4844929095474865, + "AC3_3": 0.38541162335459483, + "AC3_4": 0.3166488166037587, + "AC3_5": 0.26617080788332814, + "AC3_6": 0.22756084970914708, + "AC3_7": 0.19674965418500753 + }, + "prompt_3": { + "overall_acc": 0.40909090909090917, "language_acc": { - "Indonesian": 0.39204545454545453, - "English": 0.48295454545454547, - "Filipino": 0.30113636363636365, - "Spanish": 0.4034090909090909, - "Chinese": 0.4715909090909091, - "Malay": 0.42045454545454547, - "Vietnamese": 0.4375 + "Indonesian": 0.3693181818181818, + "English": 0.5, + "Filipino": 0.2784090909090909, + "Spanish": 0.44886363636363635, + "Chinese": 0.5511363636363636, + "Malay": 0.3352272727272727, + "Vietnamese": 0.3806818181818182 }, - "consistency_score_2": 0.4266774891774892, - "consistency_score_3": 0.23327922077922075, - "consistency_score_4": 0.14886363636363642, - "consistency_score_5": 0.10551948051948054, + "consistency_score_2": 0.4334415584415584, + "consistency_score_3": 0.2407467532467533, + "consistency_score_4": 0.1530844155844156, + "consistency_score_5": 0.10660173160173161, "consistency_score_6": 0.07954545454545454, "consistency_score_7": 0.0625, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.4715909090909091, - "Indonesian,Filipino": 0.38636363636363635, - "Indonesian,Spanish": 0.4943181818181818, - "Indonesian,Chinese": 0.36363636363636365, - "Indonesian,Malay": 0.5056818181818182, - "Indonesian,Vietnamese": 0.4431818181818182, - "English,Filipino": 0.32954545454545453, - "English,Spanish": 0.5, - "English,Chinese": 0.4602272727272727, - "English,Malay": 0.4659090909090909, - "English,Vietnamese": 0.4147727272727273, + "Indonesian,English": 0.5056818181818182, + "Indonesian,Filipino": 0.3977272727272727, + "Indonesian,Spanish": 0.4659090909090909, + "Indonesian,Chinese": 0.4602272727272727, + "Indonesian,Malay": 0.4090909090909091, + "Indonesian,Vietnamese": 0.44886363636363635, + "English,Filipino": 0.32386363636363635, + "English,Spanish": 0.48863636363636365, + "English,Chinese": 0.4943181818181818, + "English,Malay": 0.36363636363636365, + "English,Vietnamese": 0.4602272727272727, "Filipino,Spanish": 0.3522727272727273, - "Filipino,Chinese": 0.3522727272727273, - "Filipino,Malay": 0.38636363636363635, - "Filipino,Vietnamese": 0.3409090909090909, - "Spanish,Chinese": 0.4318181818181818, - "Spanish,Malay": 0.48295454545454547, - "Spanish,Vietnamese": 0.48295454545454547, - "Chinese,Malay": 0.4431818181818182, - "Chinese,Vietnamese": 0.4090909090909091, - "Malay,Vietnamese": 0.4431818181818182 + "Filipino,Chinese": 0.3977272727272727, + "Filipino,Malay": 0.4772727272727273, + "Filipino,Vietnamese": 0.39204545454545453, + "Spanish,Chinese": 0.4943181818181818, + "Spanish,Malay": 0.42613636363636365, + "Spanish,Vietnamese": 0.4602272727272727, + "Chinese,Malay": 0.3977272727272727, + "Chinese,Vietnamese": 0.45454545454545453, + "Malay,Vietnamese": 0.4318181818181818 }, "3_combine": { - "Indonesian,English,Filipino": 0.17613636363636365, + "Indonesian,English,Filipino": 0.2159090909090909, "Indonesian,English,Spanish": 0.3068181818181818, - "Indonesian,English,Chinese": 0.26136363636363635, - "Indonesian,English,Malay": 0.29545454545454547, - "Indonesian,English,Vietnamese": 0.26136363636363635, - "Indonesian,Filipino,Spanish": 0.21022727272727273, - "Indonesian,Filipino,Chinese": 0.1590909090909091, - "Indonesian,Filipino,Malay": 0.23863636363636365, - "Indonesian,Filipino,Vietnamese": 0.19886363636363635, - "Indonesian,Spanish,Chinese": 0.25, - "Indonesian,Spanish,Malay": 0.32386363636363635, - "Indonesian,Spanish,Vietnamese": 0.2897727272727273, - "Indonesian,Chinese,Malay": 0.26704545454545453, - "Indonesian,Chinese,Vietnamese": 0.20454545454545456, - "Indonesian,Malay,Vietnamese": 0.2784090909090909, - "English,Filipino,Spanish": 0.17045454545454544, - "English,Filipino,Chinese": 0.18181818181818182, - "English,Filipino,Malay": 0.19318181818181818, - "English,Filipino,Vietnamese": 0.17045454545454544, - "English,Spanish,Chinese": 0.2727272727272727, - "English,Spanish,Malay": 0.30113636363636365, + "Indonesian,English,Chinese": 0.3068181818181818, + "Indonesian,English,Malay": 0.23295454545454544, + "Indonesian,English,Vietnamese": 0.2897727272727273, + "Indonesian,Filipino,Spanish": 0.19886363636363635, + "Indonesian,Filipino,Chinese": 0.23863636363636365, + "Indonesian,Filipino,Malay": 0.24431818181818182, + "Indonesian,Filipino,Vietnamese": 0.22727272727272727, + "Indonesian,Spanish,Chinese": 0.29545454545454547, + "Indonesian,Spanish,Malay": 0.23295454545454544, + "Indonesian,Spanish,Vietnamese": 0.2784090909090909, + "Indonesian,Chinese,Malay": 0.23295454545454544, + "Indonesian,Chinese,Vietnamese": 0.25, + "Indonesian,Malay,Vietnamese": 0.2215909090909091, + "English,Filipino,Spanish": 0.17613636363636365, + "English,Filipino,Chinese": 0.19886363636363635, + "English,Filipino,Malay": 0.1875, + "English,Filipino,Vietnamese": 0.19886363636363635, + "English,Spanish,Chinese": 0.3465909090909091, + "English,Spanish,Malay": 0.2159090909090909, "English,Spanish,Vietnamese": 0.2897727272727273, - "English,Chinese,Malay": 0.2727272727272727, - "English,Chinese,Vietnamese": 0.23863636363636365, - "English,Malay,Vietnamese": 0.24431818181818182, - "Filipino,Spanish,Chinese": 0.16477272727272727, - "Filipino,Spanish,Malay": 0.20454545454545456, - "Filipino,Spanish,Vietnamese": 0.18181818181818182, - "Filipino,Chinese,Malay": 0.19318181818181818, - "Filipino,Chinese,Vietnamese": 0.16477272727272727, - "Filipino,Malay,Vietnamese": 0.18181818181818182, - "Spanish,Chinese,Malay": 0.26704545454545453, - "Spanish,Chinese,Vietnamese": 0.23863636363636365, - "Spanish,Malay,Vietnamese": 0.2840909090909091, - "Chinese,Malay,Vietnamese": 0.22727272727272727 + "English,Chinese,Malay": 0.21022727272727273, + "English,Chinese,Vietnamese": 0.29545454545454547, + "English,Malay,Vietnamese": 0.21022727272727273, + "Filipino,Spanish,Chinese": 0.23295454545454544, + "Filipino,Spanish,Malay": 0.21022727272727273, + "Filipino,Spanish,Vietnamese": 0.20454545454545456, + "Filipino,Chinese,Malay": 0.23295454545454544, + "Filipino,Chinese,Vietnamese": 0.23295454545454544, + "Filipino,Malay,Vietnamese": 0.2556818181818182, + "Spanish,Chinese,Malay": 0.24431818181818182, + "Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Spanish,Malay,Vietnamese": 0.2215909090909091, + "Chinese,Malay,Vietnamese": 0.21022727272727273 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.11931818181818182, - "Indonesian,English,Filipino,Chinese": 0.11363636363636363, - "Indonesian,English,Filipino,Malay": 0.11931818181818182, - "Indonesian,English,Filipino,Vietnamese": 0.125, - "Indonesian,English,Spanish,Chinese": 0.19886363636363635, - "Indonesian,English,Spanish,Malay": 0.22727272727272727, - "Indonesian,English,Spanish,Vietnamese": 0.21022727272727273, - "Indonesian,English,Chinese,Malay": 0.19318181818181818, - "Indonesian,English,Chinese,Vietnamese": 0.1534090909090909, - "Indonesian,English,Malay,Vietnamese": 0.17613636363636365, - "Indonesian,Filipino,Spanish,Chinese": 0.11363636363636363, - "Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, - "Indonesian,Filipino,Spanish,Vietnamese": 0.14772727272727273, - "Indonesian,Filipino,Chinese,Malay": 0.125, - "Indonesian,Filipino,Chinese,Vietnamese": 0.10227272727272728, - "Indonesian,Filipino,Malay,Vietnamese": 0.13636363636363635, - "Indonesian,Spanish,Chinese,Malay": 0.20454545454545456, - "Indonesian,Spanish,Chinese,Vietnamese": 0.1590909090909091, - "Indonesian,Spanish,Malay,Vietnamese": 0.2215909090909091, - "Indonesian,Chinese,Malay,Vietnamese": 0.1590909090909091, - "English,Filipino,Spanish,Chinese": 0.11363636363636363, - "English,Filipino,Spanish,Malay": 0.10795454545454546, - "English,Filipino,Spanish,Vietnamese": 0.11363636363636363, + "Indonesian,English,Filipino,Spanish": 0.13068181818181818, + "Indonesian,English,Filipino,Chinese": 0.1590909090909091, + "Indonesian,English,Filipino,Malay": 0.14772727272727273, + "Indonesian,English,Filipino,Vietnamese": 0.14772727272727273, + "Indonesian,English,Spanish,Chinese": 0.23295454545454544, + "Indonesian,English,Spanish,Malay": 0.14772727272727273, + "Indonesian,English,Spanish,Vietnamese": 0.20454545454545456, + "Indonesian,English,Chinese,Malay": 0.1590909090909091, + "Indonesian,English,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,English,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,Filipino,Spanish,Chinese": 0.16477272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Vietnamese": 0.13636363636363635, + "Indonesian,Filipino,Chinese,Malay": 0.1534090909090909, + "Indonesian,Filipino,Chinese,Vietnamese": 0.1590909090909091, + "Indonesian,Filipino,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,Spanish,Chinese,Malay": 0.17045454545454544, + "Indonesian,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,Spanish,Malay,Vietnamese": 0.13636363636363635, + "Indonesian,Chinese,Malay,Vietnamese": 0.125, + "English,Filipino,Spanish,Chinese": 0.16477272727272727, + "English,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Filipino,Spanish,Vietnamese": 0.13068181818181818, "English,Filipino,Chinese,Malay": 0.125, - "English,Filipino,Chinese,Vietnamese": 0.10795454545454546, - "English,Filipino,Malay,Vietnamese": 0.11363636363636363, - "English,Spanish,Chinese,Malay": 0.19886363636363635, - "English,Spanish,Chinese,Vietnamese": 0.18181818181818182, - "English,Spanish,Malay,Vietnamese": 0.19886363636363635, - "English,Chinese,Malay,Vietnamese": 0.17045454545454544, - "Filipino,Spanish,Chinese,Malay": 0.11363636363636363, - "Filipino,Spanish,Chinese,Vietnamese": 0.10227272727272728, + "English,Filipino,Chinese,Vietnamese": 0.14204545454545456, + "English,Filipino,Malay,Vietnamese": 0.13068181818181818, + "English,Spanish,Chinese,Malay": 0.16477272727272727, + "English,Spanish,Chinese,Vietnamese": 0.22727272727272727, + "English,Spanish,Malay,Vietnamese": 0.13636363636363635, + "English,Chinese,Malay,Vietnamese": 0.13068181818181818, + "Filipino,Spanish,Chinese,Malay": 0.13636363636363635, + "Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, "Filipino,Spanish,Malay,Vietnamese": 0.13636363636363635, - "Filipino,Chinese,Malay,Vietnamese": 0.10227272727272728, - "Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 + "Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "Spanish,Chinese,Malay,Vietnamese": 0.125 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.125, + "Indonesian,English,Filipino,Spanish,Malay": 0.09090909090909091, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.10227272727272728, + "Indonesian,English,Filipino,Chinese,Malay": 0.10795454545454546, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.11931818181818182, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.10227272727272728, + "Indonesian,English,Spanish,Chinese,Malay": 0.125, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.16477272727272727, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.09659090909090909, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.09659090909090909, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.11363636363636363, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.11931818181818182, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.09090909090909091, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.10227272727272728, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.09659090909090909, + "English,Filipino,Spanish,Chinese,Malay": 0.10227272727272728, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.11931818181818182, + "English,Filipino,Spanish,Malay,Vietnamese": 0.08522727272727272, + "English,Filipino,Chinese,Malay,Vietnamese": 0.09090909090909091, + "English,Spanish,Chinese,Malay,Vietnamese": 0.09659090909090909, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.09090909090909091 + }, + "6_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.08522727272727272, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.09659090909090909, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.06818181818181818, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.07954545454545454, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.07386363636363637, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.07954545454545454, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.07386363636363637 + }, + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.0625 + } + }, + "AC3_2": 0.4209143457198789, + "AC3_3": 0.30311357386383175, + "AC3_4": 0.22279683927467855, + "AC3_5": 0.16913097392503967, + "AC3_6": 0.1331923889790845, + "AC3_7": 0.10843373491676586 + }, + "prompt_4": { + "overall_acc": 0.4642857142857143, + "language_acc": { + "Indonesian": 0.4659090909090909, + "English": 0.5397727272727273, + "Filipino": 0.36363636363636365, + "Spanish": 0.4431818181818182, + "Chinese": 0.5852272727272727, + "Malay": 0.4147727272727273, + "Vietnamese": 0.4375 + }, + "consistency_score_2": 0.507034632034632, + "consistency_score_3": 0.33003246753246757, + "consistency_score_4": 0.24237012987012982, + "consistency_score_5": 0.1915584415584416, + "consistency_score_6": 0.1590909090909091, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Indonesian,English": 0.5625, + "Indonesian,Filipino": 0.42045454545454547, + "Indonesian,Spanish": 0.5284090909090909, + "Indonesian,Chinese": 0.5454545454545454, + "Indonesian,Malay": 0.5625, + "Indonesian,Vietnamese": 0.5113636363636364, + "English,Filipino": 0.45454545454545453, + "English,Spanish": 0.5795454545454546, + "English,Chinese": 0.6193181818181818, + "English,Malay": 0.4772727272727273, + "English,Vietnamese": 0.5454545454545454, + "Filipino,Spanish": 0.4034090909090909, + "Filipino,Chinese": 0.45454545454545453, + "Filipino,Malay": 0.48295454545454547, + "Filipino,Vietnamese": 0.4375, + "Spanish,Chinese": 0.5056818181818182, + "Spanish,Malay": 0.4943181818181818, + "Spanish,Vietnamese": 0.48863636363636365, + "Chinese,Malay": 0.5113636363636364, + "Chinese,Vietnamese": 0.5454545454545454, + "Malay,Vietnamese": 0.5170454545454546 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.29545454545454547, + "Indonesian,English,Spanish": 0.3977272727272727, + "Indonesian,English,Chinese": 0.42045454545454547, + "Indonesian,English,Malay": 0.36363636363636365, + "Indonesian,English,Vietnamese": 0.375, + "Indonesian,Filipino,Spanish": 0.2727272727272727, + "Indonesian,Filipino,Chinese": 0.30113636363636365, + "Indonesian,Filipino,Malay": 0.32386363636363635, + "Indonesian,Filipino,Vietnamese": 0.26136363636363635, + "Indonesian,Spanish,Chinese": 0.35795454545454547, + "Indonesian,Spanish,Malay": 0.3522727272727273, + "Indonesian,Spanish,Vietnamese": 0.3125, + "Indonesian,Chinese,Malay": 0.375, + "Indonesian,Chinese,Vietnamese": 0.375, + "Indonesian,Malay,Vietnamese": 0.3522727272727273, + "English,Filipino,Spanish": 0.30113636363636365, + "English,Filipino,Chinese": 0.32954545454545453, + "English,Filipino,Malay": 0.29545454545454547, + "English,Filipino,Vietnamese": 0.2897727272727273, + "English,Spanish,Chinese": 0.4034090909090909, + "English,Spanish,Malay": 0.32954545454545453, + "English,Spanish,Vietnamese": 0.3465909090909091, + "English,Chinese,Malay": 0.3522727272727273, + "English,Chinese,Vietnamese": 0.4147727272727273, + "English,Malay,Vietnamese": 0.3352272727272727, + "Filipino,Spanish,Chinese": 0.2727272727272727, + "Filipino,Spanish,Malay": 0.2727272727272727, + "Filipino,Spanish,Vietnamese": 0.24431818181818182, + "Filipino,Chinese,Malay": 0.3068181818181818, + "Filipino,Chinese,Vietnamese": 0.2897727272727273, + "Filipino,Malay,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Malay": 0.3352272727272727, + "Spanish,Chinese,Vietnamese": 0.32386363636363635, + "Spanish,Malay,Vietnamese": 0.3125, + "Chinese,Malay,Vietnamese": 0.35795454545454547 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.22727272727272727, + "Indonesian,English,Filipino,Chinese": 0.25, + "Indonesian,English,Filipino,Malay": 0.22727272727272727, + "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese": 0.3181818181818182, + "Indonesian,English,Spanish,Malay": 0.2840909090909091, + "Indonesian,English,Spanish,Vietnamese": 0.26704545454545453, + "Indonesian,English,Chinese,Malay": 0.29545454545454547, + "Indonesian,English,Chinese,Vietnamese": 0.3181818181818182, + "Indonesian,English,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,Filipino,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Chinese,Malay": 0.24431818181818182, + "Indonesian,Filipino,Chinese,Vietnamese": 0.2215909090909091, + "Indonesian,Filipino,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,Spanish,Chinese,Malay": 0.26704545454545453, + "Indonesian,Spanish,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,Spanish,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,Chinese,Malay,Vietnamese": 0.2784090909090909, + "English,Filipino,Spanish,Chinese": 0.24431818181818182, + "English,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Filipino,Spanish,Vietnamese": 0.21022727272727273, + "English,Filipino,Chinese,Malay": 0.23295454545454544, + "English,Filipino,Chinese,Vietnamese": 0.24431818181818182, + "English,Filipino,Malay,Vietnamese": 0.21022727272727273, + "English,Spanish,Chinese,Malay": 0.2727272727272727, + "English,Spanish,Chinese,Vietnamese": 0.2727272727272727, + "English,Spanish,Malay,Vietnamese": 0.25, + "English,Chinese,Malay,Vietnamese": 0.2727272727272727, + "Filipino,Spanish,Chinese,Malay": 0.20454545454545456, + "Filipino,Spanish,Chinese,Vietnamese": 0.1875, + "Filipino,Spanish,Malay,Vietnamese": 0.1875, + "Filipino,Chinese,Malay,Vietnamese": 0.2215909090909091, + "Spanish,Chinese,Malay,Vietnamese": 0.25 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.09090909090909091, - "Indonesian,English,Filipino,Spanish,Malay": 0.08522727272727272, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.10227272727272728, - "Indonesian,English,Filipino,Chinese,Malay": 0.08522727272727272, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.07954545454545454, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.08522727272727272, - "Indonesian,English,Spanish,Chinese,Malay": 0.16477272727272727, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.14204545454545456, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.16477272727272727, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.125, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.09090909090909091, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.08522727272727272, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.11931818181818182, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, - "English,Filipino,Spanish,Chinese,Malay": 0.08522727272727272, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.08522727272727272, - "English,Filipino,Spanish,Malay,Vietnamese": 0.09090909090909091, - "English,Filipino,Chinese,Malay,Vietnamese": 0.07954545454545454, - "English,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.08522727272727272 + "Indonesian,English,Filipino,Spanish,Chinese": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.16477272727272727, + "Indonesian,English,Filipino,Chinese,Malay": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.19318181818181818, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,English,Spanish,Chinese,Malay": 0.24431818181818182, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.22727272727272727, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.1875, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "English,Filipino,Spanish,Chinese,Malay": 0.1875, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.18181818181818182, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, + "English,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, + "English,Spanish,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.06818181818181818, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.07386363636363637, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.0625, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.125, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.07386363636363637, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.07386363636363637 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.17045454545454544, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.1590909090909091, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.0625 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635 } }, - "AC3_2": 0.42105790125204745, - "AC3_3": 0.29882151869040185, - "AC3_4": 0.21920673512993588, - "AC3_5": 0.1683052150018892, - "AC3_6": 0.13353204170179395, - "AC3_7": 0.1086587436105488 + "AC3_2": 0.48471945876426314, + "AC3_3": 0.38581355284039504, + "AC3_4": 0.31848314786605947, + "AC3_5": 0.2712164073136675, + "AC3_6": 0.23697916662865126, + "AC3_7": 0.21081081077571362 }, - "prompt_2": { - "overall_acc": 0.4204545454545454, + "prompt_5": { + "overall_acc": 0.45860389610389607, "language_acc": { - "Indonesian": 0.3806818181818182, - "English": 0.5170454545454546, - "Filipino": 0.3181818181818182, - "Spanish": 0.42045454545454547, - "Chinese": 0.44886363636363635, - "Malay": 0.39204545454545453, - "Vietnamese": 0.4659090909090909 + "Indonesian": 0.4659090909090909, + "English": 0.5568181818181818, + "Filipino": 0.35795454545454547, + "Spanish": 0.4318181818181818, + "Chinese": 0.5909090909090909, + "Malay": 0.3693181818181818, + "Vietnamese": 0.4375 }, - "consistency_score_2": 0.44778138528138534, - "consistency_score_3": 0.25925324675324674, - "consistency_score_4": 0.1754870129870129, - "consistency_score_5": 0.13392857142857142, - "consistency_score_6": 0.11120129870129869, - "consistency_score_7": 0.09659090909090909, + "consistency_score_2": 0.4802489177489177, + "consistency_score_3": 0.29740259740259756, + "consistency_score_4": 0.21087662337662336, + "consistency_score_5": 0.16152597402597405, + "consistency_score_6": 0.12987012987012989, + "consistency_score_7": 0.10795454545454546, "detailed_consistency_score": { "2_combine": { - "Indonesian,English": 0.4602272727272727, - "Indonesian,Filipino": 0.4034090909090909, - "Indonesian,Spanish": 0.45454545454545453, - "Indonesian,Chinese": 0.45454545454545453, - "Indonesian,Malay": 0.4659090909090909, - "Indonesian,Vietnamese": 0.4318181818181818, - "English,Filipino": 0.35795454545454547, - "English,Spanish": 0.5, - "English,Chinese": 0.4318181818181818, - "English,Malay": 0.5, - "English,Vietnamese": 0.5340909090909091, - "Filipino,Spanish": 0.4147727272727273, - "Filipino,Chinese": 0.32386363636363635, - "Filipino,Malay": 0.45454545454545453, - "Filipino,Vietnamese": 0.3806818181818182, - "Spanish,Chinese": 0.4659090909090909, - "Spanish,Malay": 0.5284090909090909, - "Spanish,Vietnamese": 0.48863636363636365, + "Indonesian,English": 0.5284090909090909, + "Indonesian,Filipino": 0.4090909090909091, + "Indonesian,Spanish": 0.5, + "Indonesian,Chinese": 0.5681818181818182, + "Indonesian,Malay": 0.5227272727272727, + "Indonesian,Vietnamese": 0.5284090909090909, + "English,Filipino": 0.3693181818181818, + "English,Spanish": 0.5454545454545454, + "English,Chinese": 0.5909090909090909, + "English,Malay": 0.48863636363636365, + "English,Vietnamese": 0.48295454545454547, + "Filipino,Spanish": 0.4375, + "Filipino,Chinese": 0.4034090909090909, + "Filipino,Malay": 0.42613636363636365, + "Filipino,Vietnamese": 0.4318181818181818, + "Spanish,Chinese": 0.4943181818181818, + "Spanish,Malay": 0.4715909090909091, + "Spanish,Vietnamese": 0.5, "Chinese,Malay": 0.4431818181818182, - "Chinese,Vietnamese": 0.4602272727272727, - "Malay,Vietnamese": 0.44886363636363635 + "Chinese,Vietnamese": 0.48863636363636365, + "Malay,Vietnamese": 0.45454545454545453 }, "3_combine": { - "Indonesian,English,Filipino": 0.20454545454545456, - "Indonesian,English,Spanish": 0.3068181818181818, - "Indonesian,English,Chinese": 0.26136363636363635, - "Indonesian,English,Malay": 0.29545454545454547, - "Indonesian,English,Vietnamese": 0.3125, - "Indonesian,Filipino,Spanish": 0.23295454545454544, - "Indonesian,Filipino,Chinese": 0.1875, - "Indonesian,Filipino,Malay": 0.23863636363636365, - "Indonesian,Filipino,Vietnamese": 0.2215909090909091, - "Indonesian,Spanish,Chinese": 0.2784090909090909, + "Indonesian,English,Filipino": 0.23863636363636365, + "Indonesian,English,Spanish": 0.35795454545454547, + "Indonesian,English,Chinese": 0.38636363636363635, + "Indonesian,English,Malay": 0.32386363636363635, + "Indonesian,English,Vietnamese": 0.3352272727272727, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Chinese": 0.2727272727272727, + "Indonesian,Filipino,Malay": 0.26704545454545453, + "Indonesian,Filipino,Vietnamese": 0.2840909090909091, + "Indonesian,Spanish,Chinese": 0.3465909090909091, "Indonesian,Spanish,Malay": 0.3068181818181818, - "Indonesian,Spanish,Vietnamese": 0.26704545454545453, - "Indonesian,Chinese,Malay": 0.2727272727272727, - "Indonesian,Chinese,Vietnamese": 0.26704545454545453, - "Indonesian,Malay,Vietnamese": 0.2556818181818182, - "English,Filipino,Spanish": 0.2159090909090909, - "English,Filipino,Chinese": 0.17613636363636365, - "English,Filipino,Malay": 0.2556818181818182, - "English,Filipino,Vietnamese": 0.2215909090909091, - "English,Spanish,Chinese": 0.26704545454545453, - "English,Spanish,Malay": 0.32954545454545453, - "English,Spanish,Vietnamese": 0.32954545454545453, - "English,Chinese,Malay": 0.26136363636363635, - "English,Chinese,Vietnamese": 0.30113636363636365, - "English,Malay,Vietnamese": 0.32954545454545453, - "Filipino,Spanish,Chinese": 0.19886363636363635, - "Filipino,Spanish,Malay": 0.26704545454545453, - "Filipino,Spanish,Vietnamese": 0.2215909090909091, + "Indonesian,Spanish,Vietnamese": 0.32954545454545453, + "Indonesian,Chinese,Malay": 0.32386363636363635, + "Indonesian,Chinese,Vietnamese": 0.35795454545454547, + "Indonesian,Malay,Vietnamese": 0.3068181818181818, + "English,Filipino,Spanish": 0.2556818181818182, + "English,Filipino,Chinese": 0.26704545454545453, + "English,Filipino,Malay": 0.24431818181818182, + "English,Filipino,Vietnamese": 0.23863636363636365, + "English,Spanish,Chinese": 0.375, + "English,Spanish,Malay": 0.32386363636363635, + "English,Spanish,Vietnamese": 0.3465909090909091, + "English,Chinese,Malay": 0.32386363636363635, + "English,Chinese,Vietnamese": 0.3352272727272727, + "English,Malay,Vietnamese": 0.2897727272727273, + "Filipino,Spanish,Chinese": 0.26136363636363635, + "Filipino,Spanish,Malay": 0.25, + "Filipino,Spanish,Vietnamese": 0.2727272727272727, "Filipino,Chinese,Malay": 0.23295454545454544, - "Filipino,Chinese,Vietnamese": 0.19886363636363635, - "Filipino,Malay,Vietnamese": 0.23295454545454544, - "Spanish,Chinese,Malay": 0.2897727272727273, - "Spanish,Chinese,Vietnamese": 0.2897727272727273, - "Spanish,Malay,Vietnamese": 0.2897727272727273, - "Chinese,Malay,Vietnamese": 0.2556818181818182 + "Filipino,Chinese,Vietnamese": 0.2784090909090909, + "Filipino,Malay,Vietnamese": 0.25, + "Spanish,Chinese,Malay": 0.2784090909090909, + "Spanish,Chinese,Vietnamese": 0.3181818181818182, + "Spanish,Malay,Vietnamese": 0.2784090909090909, + "Chinese,Malay,Vietnamese": 0.2727272727272727 }, "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.16477272727272727, - "Indonesian,English,Filipino,Chinese": 0.11363636363636363, - "Indonesian,English,Filipino,Malay": 0.16477272727272727, - "Indonesian,English,Filipino,Vietnamese": 0.17613636363636365, - "Indonesian,English,Spanish,Chinese": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish": 0.1875, + "Indonesian,English,Filipino,Chinese": 0.19886363636363635, + "Indonesian,English,Filipino,Malay": 0.17613636363636365, + "Indonesian,English,Filipino,Vietnamese": 0.19318181818181818, + "Indonesian,English,Spanish,Chinese": 0.2727272727272727, "Indonesian,English,Spanish,Malay": 0.23295454545454544, - "Indonesian,English,Spanish,Vietnamese": 0.2215909090909091, - "Indonesian,English,Chinese,Malay": 0.17045454545454544, - "Indonesian,English,Chinese,Vietnamese": 0.20454545454545456, - "Indonesian,English,Malay,Vietnamese": 0.20454545454545456, - "Indonesian,Filipino,Spanish,Chinese": 0.14204545454545456, - "Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, - "Indonesian,Filipino,Spanish,Vietnamese": 0.1590909090909091, - "Indonesian,Filipino,Chinese,Malay": 0.1534090909090909, - "Indonesian,Filipino,Chinese,Vietnamese": 0.14204545454545456, - "Indonesian,Filipino,Malay,Vietnamese": 0.1590909090909091, - "Indonesian,Spanish,Chinese,Malay": 0.19886363636363635, - "Indonesian,Spanish,Chinese,Vietnamese": 0.19886363636363635, - "Indonesian,Spanish,Malay,Vietnamese": 0.19318181818181818, - "Indonesian,Chinese,Malay,Vietnamese": 0.18181818181818182, - "English,Filipino,Spanish,Chinese": 0.11931818181818182, - "English,Filipino,Spanish,Malay": 0.17613636363636365, - "English,Filipino,Spanish,Vietnamese": 0.16477272727272727, - "English,Filipino,Chinese,Malay": 0.1590909090909091, - "English,Filipino,Chinese,Vietnamese": 0.14204545454545456, - "English,Filipino,Malay,Vietnamese": 0.17613636363636365, - "English,Spanish,Chinese,Malay": 0.1875, - "English,Spanish,Chinese,Vietnamese": 0.2159090909090909, - "English,Spanish,Malay,Vietnamese": 0.23863636363636365, - "English,Chinese,Malay,Vietnamese": 0.21022727272727273, - "Filipino,Spanish,Chinese,Malay": 0.1590909090909091, - "Filipino,Spanish,Chinese,Vietnamese": 0.14204545454545456, - "Filipino,Spanish,Malay,Vietnamese": 0.17045454545454544, - "Filipino,Chinese,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,English,Spanish,Vietnamese": 0.26136363636363635, + "Indonesian,English,Chinese,Malay": 0.25, + "Indonesian,English,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,English,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Malay": 0.18181818181818182, + "Indonesian,Filipino,Chinese,Vietnamese": 0.22727272727272727, + "Indonesian,Filipino,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Spanish,Chinese,Malay": 0.22727272727272727, + "Indonesian,Spanish,Chinese,Vietnamese": 0.25, + "Indonesian,Spanish,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,Chinese,Malay,Vietnamese": 0.22727272727272727, + "English,Filipino,Spanish,Chinese": 0.20454545454545456, + "English,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Filipino,Spanish,Vietnamese": 0.19318181818181818, + "English,Filipino,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Vietnamese": 0.19886363636363635, + "English,Filipino,Malay,Vietnamese": 0.17045454545454544, + "English,Spanish,Chinese,Malay": 0.23863636363636365, + "English,Spanish,Chinese,Vietnamese": 0.24431818181818182, + "English,Spanish,Malay,Vietnamese": 0.22727272727272727, + "English,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Filipino,Spanish,Chinese,Malay": 0.17045454545454544, + "Filipino,Spanish,Chinese,Vietnamese": 0.20454545454545456, + "Filipino,Spanish,Malay,Vietnamese": 0.17613636363636365, + "Filipino,Chinese,Malay,Vietnamese": 0.1875, "Spanish,Chinese,Malay,Vietnamese": 0.1875 }, "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.10227272727272728, - "Indonesian,English,Filipino,Spanish,Malay": 0.13636363636363635, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.14204545454545456, - "Indonesian,English,Filipino,Chinese,Malay": 0.10795454545454546, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.11363636363636363, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.14204545454545456, - "Indonesian,English,Spanish,Chinese,Malay": 0.14772727272727273, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.16477272727272727, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.17045454545454544, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.1534090909090909, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.11931818181818182, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.11931818181818182, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.125, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909, - "English,Filipino,Spanish,Chinese,Malay": 0.11363636363636363, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.11363636363636363, + "Indonesian,English,Filipino,Spanish,Chinese": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Malay": 0.14204545454545456, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.1590909090909091, + "Indonesian,English,Filipino,Chinese,Malay": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.16477272727272727, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,English,Spanish,Chinese,Malay": 0.19318181818181818, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.1875, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.14772727272727273, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.1590909090909091, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544, + "English,Filipino,Spanish,Chinese,Malay": 0.14772727272727273, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, "English,Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, - "English,Filipino,Chinese,Malay,Vietnamese": 0.13068181818181818, - "English,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 + "English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635 }, "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.09659090909090909, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.10227272727272728, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.11931818181818182, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.10795454545454546, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.125, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.125, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.13068181818181818, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 }, "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.09659090909090909 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546 } }, - "AC3_2": 0.4336879231206776, - "AC3_3": 0.32073843276432845, - "AC3_4": 0.24762264427136785, - "AC3_5": 0.20314787697653322, - "AC3_6": 0.1758848021875976, - "AC3_7": 0.1570929070625243 + "AC3_2": 0.4691768965404678, + "AC3_3": 0.3608169798373228, + "AC3_4": 0.2889071100630347, + "AC3_5": 0.23890621808889714, + "AC3_6": 0.20241827134939147, + "AC3_7": 0.17476881997436225 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.6116504854368932 + }, + "prompt_2": { + "accuracy": 0.5922330097087378 + }, + "prompt_3": { + "accuracy": 0.5728155339805825 + }, + "prompt_4": { + "accuracy": 0.6213592233009708 + }, + "prompt_5": { + "accuracy": 0.6213592233009708 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.6 + }, + "prompt_2": { + "accuracy": 0.638095238095238 + }, + "prompt_3": { + "accuracy": 0.6761904761904762 + }, + "prompt_4": { + "accuracy": 0.6476190476190476 + }, + "prompt_5": { + "accuracy": 0.6095238095238096 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.6728971962616822 + }, + "prompt_2": { + "accuracy": 0.6355140186915887 + }, + "prompt_3": { + "accuracy": 0.6728971962616822 + }, + "prompt_4": { + "accuracy": 0.6635514018691588 + }, + "prompt_5": { + "accuracy": 0.6355140186915887 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.4, + "demographics": 0.6, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.5, + "culture": 0.3, + "film": 0.4, + "law": 0.4, + "geography": 0.8 + } + }, + "prompt_2": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.5, + "history": 0.2, + "literature": 0.2, + "politics": 0.5, + "culture": 0.4, + "film": 0.4, + "law": 0.5, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.2, + "literature": 0.3, + "politics": 0.5, + "culture": 0.4, + "film": 0.3, + "law": 0.7, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.3, + "politics": 0.6, + "culture": 0.5, + "film": 0.3, + "law": 0.6, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.3, + "politics": 0.5, + "culture": 0.5, + "film": 0.3, + "law": 0.5, + "geography": 0.6 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.18328104784835536 + }, + "prompt_2": { + "bleu_score": 0.15895050867572064 + }, + "prompt_3": { + "bleu_score": 0.17203662866668645 + }, + "prompt_4": { + "bleu_score": 0.199134691773029 + }, + "prompt_5": { + "bleu_score": 0.13039131852856445 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.4498297616663329, + "category_acc": { + "History": 0.3855421686746988, + "Geography": 0.4142857142857143, + "Lampungic": 0.32653061224489793, + "Social science": 0.654424040066778, + "Balinese": 0.28450106157112526, + "Makassarese": 0.3279569892473118, + "Banjarese": 0.3055555555555556, + "Chemistry": 0.3094890510948905, + "Biology": 0.40236686390532544, + "Science": 0.5675954592363261, + "Christian religion": 0.5621890547263682, + "Art": 0.5457570715474209, + "Islam religion": 0.5135135135135135, + "Hindu religion": 0.36666666666666664, + "Madurese": 0.3152542372881356, + "Sport": 0.5540540540540541, + "Indonesian language": 0.524906600249066, + "Physics": 0.3898989898989899, + "Minangkabau culture": 0.3869346733668342, + "Dayak language": 0.27522935779816515, + "Sociology": 0.4879032258064516, + "Economy": 0.45286885245901637, + "Sundanese": 0.34917891097666376, + "Javanese": 0.31350806451612906, + "Civic education": 0.5236051502145923 + } + }, + "prompt_2": { + "accuracy": 0.4529007276854263, + "category_acc": { + "History": 0.38755020080321284, + "Geography": 0.4326530612244898, + "Lampungic": 0.3333333333333333, + "Social science": 0.662771285475793, + "Balinese": 0.26963906581740976, + "Makassarese": 0.3118279569892473, + "Banjarese": 0.2847222222222222, + "Chemistry": 0.3124087591240876, + "Biology": 0.41893491124260357, + "Science": 0.5717234262125903, + "Christian religion": 0.5074626865671642, + "Art": 0.5474209650582362, + "Islam religion": 0.5064011379800853, + "Hindu religion": 0.3466666666666667, + "Madurese": 0.30847457627118646, + "Sport": 0.527027027027027, + "Indonesian language": 0.5345579078455791, + "Physics": 0.39595959595959596, + "Minangkabau culture": 0.3869346733668342, + "Dayak language": 0.3302752293577982, + "Sociology": 0.4778225806451613, + "Economy": 0.4262295081967213, + "Sundanese": 0.35350043215211757, + "Javanese": 0.32661290322580644, + "Civic education": 0.5336194563662375 + } + }, + "prompt_3": { + "accuracy": 0.45029708258228185, + "category_acc": { + "History": 0.39156626506024095, + "Geography": 0.42448979591836733, + "Lampungic": 0.30612244897959184, + "Social science": 0.662771285475793, + "Balinese": 0.2484076433121019, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.2847222222222222, + "Chemistry": 0.30802919708029197, + "Biology": 0.40946745562130177, + "Science": 0.5603715170278638, + "Christian religion": 0.5373134328358209, + "Art": 0.5440931780366056, + "Islam religion": 0.5220483641536273, + "Hindu religion": 0.3933333333333333, + "Madurese": 0.2983050847457627, + "Sport": 0.5202702702702703, + "Indonesian language": 0.5292652552926526, + "Physics": 0.4121212121212121, + "Minangkabau culture": 0.34673366834170855, + "Dayak language": 0.3302752293577982, + "Sociology": 0.4879032258064516, + "Economy": 0.430327868852459, + "Sundanese": 0.3560933448573898, + "Javanese": 0.3235887096774194, + "Civic education": 0.5178826895565093 + } + }, + "prompt_4": { + "accuracy": 0.4494959610120836, + "category_acc": { + "History": 0.37349397590361444, + "Geography": 0.42653061224489797, + "Lampungic": 0.3197278911564626, + "Social science": 0.662771285475793, + "Balinese": 0.2632696390658174, + "Makassarese": 0.3333333333333333, + "Banjarese": 0.3333333333333333, + "Chemistry": 0.3167883211678832, + "Biology": 0.40946745562130177, + "Science": 0.564499484004128, + "Christian religion": 0.5373134328358209, + "Art": 0.5424292845257903, + "Islam religion": 0.5291607396870555, + "Hindu religion": 0.37333333333333335, + "Madurese": 0.29152542372881357, + "Sport": 0.5067567567567568, + "Indonesian language": 0.5252179327521793, + "Physics": 0.40606060606060607, + "Minangkabau culture": 0.3417085427135678, + "Dayak language": 0.3486238532110092, + "Sociology": 0.4838709677419355, + "Economy": 0.44672131147540983, + "Sundanese": 0.34485738980121, + "Javanese": 0.3125, + "Civic education": 0.5236051502145923 + } + }, + "prompt_5": { + "accuracy": 0.4462914747312905, + "category_acc": { + "History": 0.38755020080321284, + "Geography": 0.4142857142857143, + "Lampungic": 0.3197278911564626, + "Social science": 0.664440734557596, + "Balinese": 0.2760084925690021, + "Makassarese": 0.3655913978494624, + "Banjarese": 0.3125, + "Chemistry": 0.291970802919708, + "Biology": 0.3905325443786982, + "Science": 0.5624355005159959, + "Christian religion": 0.5422885572139303, + "Art": 0.5524126455906821, + "Islam religion": 0.519203413940256, + "Hindu religion": 0.3466666666666667, + "Madurese": 0.3389830508474576, + "Sport": 0.5, + "Indonesian language": 0.5199252801992528, + "Physics": 0.3838383838383838, + "Minangkabau culture": 0.32663316582914576, + "Dayak language": 0.3119266055045872, + "Sociology": 0.4838709677419355, + "Economy": 0.4426229508196721, + "Sundanese": 0.35177182368193605, + "Javanese": 0.3215725806451613, + "Civic education": 0.5050071530758226 + } + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.3044304147296206 + }, + "prompt_2": { + "bleu_score": 0.29821649291017144 }, "prompt_3": { - "overall_acc": 0.42857142857142866, - "language_acc": { - "Indonesian": 0.3522727272727273, - "English": 0.5340909090909091, - "Filipino": 0.3522727272727273, - "Spanish": 0.4090909090909091, - "Chinese": 0.5056818181818182, - "Malay": 0.4034090909090909, - "Vietnamese": 0.4431818181818182 - }, - "consistency_score_2": 0.44101731601731586, - "consistency_score_3": 0.24350649350649353, - "consistency_score_4": 0.15292207792207793, - "consistency_score_5": 0.10606060606060605, - "consistency_score_6": 0.07954545454545454, - "consistency_score_7": 0.0625, - "detailed_consistency_score": { - "2_combine": { - "Indonesian,English": 0.4659090909090909, - "Indonesian,Filipino": 0.4375, - "Indonesian,Spanish": 0.4602272727272727, - "Indonesian,Chinese": 0.4318181818181818, - "Indonesian,Malay": 0.4659090909090909, - "Indonesian,Vietnamese": 0.4147727272727273, - "English,Filipino": 0.4034090909090909, - "English,Spanish": 0.5625, - "English,Chinese": 0.3977272727272727, - "English,Malay": 0.45454545454545453, - "English,Vietnamese": 0.42613636363636365, - "Filipino,Spanish": 0.4659090909090909, - "Filipino,Chinese": 0.3693181818181818, - "Filipino,Malay": 0.4602272727272727, - "Filipino,Vietnamese": 0.3977272727272727, - "Spanish,Chinese": 0.4318181818181818, - "Spanish,Malay": 0.4943181818181818, - "Spanish,Vietnamese": 0.4318181818181818, - "Chinese,Malay": 0.375, - "Chinese,Vietnamese": 0.5, - "Malay,Vietnamese": 0.4147727272727273 - }, - "3_combine": { - "Indonesian,English,Filipino": 0.23295454545454544, - "Indonesian,English,Spanish": 0.2897727272727273, - "Indonesian,English,Chinese": 0.22727272727272727, - "Indonesian,English,Malay": 0.2727272727272727, - "Indonesian,English,Vietnamese": 0.2215909090909091, - "Indonesian,Filipino,Spanish": 0.2727272727272727, - "Indonesian,Filipino,Chinese": 0.22727272727272727, - "Indonesian,Filipino,Malay": 0.2727272727272727, - "Indonesian,Filipino,Vietnamese": 0.2215909090909091, - "Indonesian,Spanish,Chinese": 0.22727272727272727, - "Indonesian,Spanish,Malay": 0.26704545454545453, - "Indonesian,Spanish,Vietnamese": 0.20454545454545456, - "Indonesian,Chinese,Malay": 0.2215909090909091, - "Indonesian,Chinese,Vietnamese": 0.23295454545454544, - "Indonesian,Malay,Vietnamese": 0.25, - "English,Filipino,Spanish": 0.2840909090909091, - "English,Filipino,Chinese": 0.20454545454545456, - "English,Filipino,Malay": 0.23863636363636365, - "English,Filipino,Vietnamese": 0.20454545454545456, - "English,Spanish,Chinese": 0.2556818181818182, - "English,Spanish,Malay": 0.32954545454545453, - "English,Spanish,Vietnamese": 0.26136363636363635, - "English,Chinese,Malay": 0.22727272727272727, - "English,Chinese,Vietnamese": 0.23295454545454544, - "English,Malay,Vietnamese": 0.25, - "Filipino,Spanish,Chinese": 0.23295454545454544, - "Filipino,Spanish,Malay": 0.2784090909090909, - "Filipino,Spanish,Vietnamese": 0.2215909090909091, - "Filipino,Chinese,Malay": 0.21022727272727273, - "Filipino,Chinese,Vietnamese": 0.22727272727272727, - "Filipino,Malay,Vietnamese": 0.22727272727272727, - "Spanish,Chinese,Malay": 0.2215909090909091, - "Spanish,Chinese,Vietnamese": 0.2727272727272727, - "Spanish,Malay,Vietnamese": 0.26704545454545453, - "Chinese,Malay,Vietnamese": 0.23295454545454544 - }, - "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.17613636363636365, - "Indonesian,English,Filipino,Chinese": 0.13636363636363635, - "Indonesian,English,Filipino,Malay": 0.1590909090909091, - "Indonesian,English,Filipino,Vietnamese": 0.13636363636363635, - "Indonesian,English,Spanish,Chinese": 0.1534090909090909, - "Indonesian,English,Spanish,Malay": 0.19318181818181818, - "Indonesian,English,Spanish,Vietnamese": 0.14204545454545456, - "Indonesian,English,Chinese,Malay": 0.14772727272727273, - "Indonesian,English,Chinese,Vietnamese": 0.13636363636363635, - "Indonesian,English,Malay,Vietnamese": 0.1534090909090909, - "Indonesian,Filipino,Spanish,Chinese": 0.14772727272727273, - "Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, - "Indonesian,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Chinese,Malay": 0.14204545454545456, - "Indonesian,Filipino,Chinese,Vietnamese": 0.13636363636363635, - "Indonesian,Filipino,Malay,Vietnamese": 0.1590909090909091, - "Indonesian,Spanish,Chinese,Malay": 0.13068181818181818, - "Indonesian,Spanish,Chinese,Vietnamese": 0.1534090909090909, - "Indonesian,Spanish,Malay,Vietnamese": 0.13636363636363635, - "Indonesian,Chinese,Malay,Vietnamese": 0.1590909090909091, - "English,Filipino,Spanish,Chinese": 0.1534090909090909, - "English,Filipino,Spanish,Malay": 0.1875, - "English,Filipino,Spanish,Vietnamese": 0.14204545454545456, - "English,Filipino,Chinese,Malay": 0.14772727272727273, - "English,Filipino,Chinese,Vietnamese": 0.125, - "English,Filipino,Malay,Vietnamese": 0.14204545454545456, - "English,Spanish,Chinese,Malay": 0.16477272727272727, - "English,Spanish,Chinese,Vietnamese": 0.17613636363636365, - "English,Spanish,Malay,Vietnamese": 0.19886363636363635, - "English,Chinese,Malay,Vietnamese": 0.1590909090909091, - "Filipino,Spanish,Chinese,Malay": 0.13636363636363635, - "Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, - "Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, - "Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, - "Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727 - }, - "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.10227272727272728, - "Indonesian,English,Filipino,Spanish,Malay": 0.125, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.09659090909090909, - "Indonesian,English,Filipino,Chinese,Malay": 0.10227272727272728, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.09090909090909091, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.10227272727272728, - "Indonesian,English,Spanish,Chinese,Malay": 0.10227272727272728, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.11363636363636363, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.09090909090909091, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.09659090909090909, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.09659090909090909, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.10227272727272728, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728, - "English,Filipino,Spanish,Chinese,Malay": 0.10795454545454546, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.10227272727272728, - "English,Filipino,Spanish,Malay,Vietnamese": 0.11931818181818182, - "English,Filipino,Chinese,Malay,Vietnamese": 0.10795454545454546, - "English,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728 - }, - "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.07386363636363637, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.07386363636363637, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.09090909090909091, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.06818181818181818, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.09090909090909091 - }, - "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.0625 - } - }, - "AC3_2": 0.43470530708842253, - "AC3_3": 0.3105590061649714, - "AC3_4": 0.22541277813783284, - "AC3_5": 0.17004048579815437, - "AC3_6": 0.13418530348796864, - "AC3_7": 0.10909090906869422 + "bleu_score": 0.2928519299374442 }, "prompt_4": { - "overall_acc": 0.40584415584415584, - "language_acc": { - "Indonesian": 0.3693181818181818, - "English": 0.48863636363636365, - "Filipino": 0.2784090909090909, - "Spanish": 0.42045454545454547, - "Chinese": 0.4659090909090909, - "Malay": 0.4034090909090909, - "Vietnamese": 0.4147727272727273 - }, - "consistency_score_2": 0.4345238095238094, - "consistency_score_3": 0.2336038961038961, - "consistency_score_4": 0.14155844155844155, - "consistency_score_5": 0.091991341991342, - "consistency_score_6": 0.06250000000000001, - "consistency_score_7": 0.045454545454545456, - "detailed_consistency_score": { - "2_combine": { - "Indonesian,English": 0.4943181818181818, - "Indonesian,Filipino": 0.4034090909090909, - "Indonesian,Spanish": 0.44886363636363635, - "Indonesian,Chinese": 0.42613636363636365, - "Indonesian,Malay": 0.4602272727272727, - "Indonesian,Vietnamese": 0.44886363636363635, - "English,Filipino": 0.375, - "English,Spanish": 0.5852272727272727, - "English,Chinese": 0.5, - "English,Malay": 0.4431818181818182, - "English,Vietnamese": 0.4602272727272727, - "Filipino,Spanish": 0.3693181818181818, - "Filipino,Chinese": 0.3125, - "Filipino,Malay": 0.3977272727272727, - "Filipino,Vietnamese": 0.3522727272727273, - "Spanish,Chinese": 0.4318181818181818, - "Spanish,Malay": 0.45454545454545453, - "Spanish,Vietnamese": 0.4715909090909091, - "Chinese,Malay": 0.4034090909090909, - "Chinese,Vietnamese": 0.4772727272727273, - "Malay,Vietnamese": 0.4090909090909091 - }, - "3_combine": { - "Indonesian,English,Filipino": 0.20454545454545456, - "Indonesian,English,Spanish": 0.3181818181818182, - "Indonesian,English,Chinese": 0.2897727272727273, - "Indonesian,English,Malay": 0.2784090909090909, - "Indonesian,English,Vietnamese": 0.2727272727272727, - "Indonesian,Filipino,Spanish": 0.19318181818181818, - "Indonesian,Filipino,Chinese": 0.1590909090909091, - "Indonesian,Filipino,Malay": 0.2215909090909091, - "Indonesian,Filipino,Vietnamese": 0.1875, - "Indonesian,Spanish,Chinese": 0.26136363636363635, - "Indonesian,Spanish,Malay": 0.24431818181818182, - "Indonesian,Spanish,Vietnamese": 0.26136363636363635, - "Indonesian,Chinese,Malay": 0.23295454545454544, - "Indonesian,Chinese,Vietnamese": 0.26136363636363635, - "Indonesian,Malay,Vietnamese": 0.24431818181818182, - "English,Filipino,Spanish": 0.23295454545454544, - "English,Filipino,Chinese": 0.17613636363636365, - "English,Filipino,Malay": 0.19318181818181818, - "English,Filipino,Vietnamese": 0.16477272727272727, - "English,Spanish,Chinese": 0.32954545454545453, - "English,Spanish,Malay": 0.30113636363636365, - "English,Spanish,Vietnamese": 0.3409090909090909, - "English,Chinese,Malay": 0.2556818181818182, - "English,Chinese,Vietnamese": 0.2840909090909091, - "English,Malay,Vietnamese": 0.2556818181818182, - "Filipino,Spanish,Chinese": 0.14204545454545456, - "Filipino,Spanish,Malay": 0.19886363636363635, - "Filipino,Spanish,Vietnamese": 0.17613636363636365, - "Filipino,Chinese,Malay": 0.17045454545454544, - "Filipino,Chinese,Vietnamese": 0.17045454545454544, - "Filipino,Malay,Vietnamese": 0.1875, - "Spanish,Chinese,Malay": 0.2215909090909091, - "Spanish,Chinese,Vietnamese": 0.2727272727272727, - "Spanish,Malay,Vietnamese": 0.25, - "Chinese,Malay,Vietnamese": 0.2215909090909091 - }, - "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.13636363636363635, - "Indonesian,English,Filipino,Chinese": 0.09659090909090909, - "Indonesian,English,Filipino,Malay": 0.13636363636363635, - "Indonesian,English,Filipino,Vietnamese": 0.10227272727272728, - "Indonesian,English,Spanish,Chinese": 0.2159090909090909, - "Indonesian,English,Spanish,Malay": 0.19318181818181818, - "Indonesian,English,Spanish,Vietnamese": 0.21022727272727273, - "Indonesian,English,Chinese,Malay": 0.17613636363636365, - "Indonesian,English,Chinese,Vietnamese": 0.1875, - "Indonesian,English,Malay,Vietnamese": 0.16477272727272727, - "Indonesian,Filipino,Spanish,Chinese": 0.09090909090909091, - "Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, - "Indonesian,Filipino,Spanish,Vietnamese": 0.10227272727272728, - "Indonesian,Filipino,Chinese,Malay": 0.10795454545454546, - "Indonesian,Filipino,Chinese,Vietnamese": 0.10227272727272728, - "Indonesian,Filipino,Malay,Vietnamese": 0.125, - "Indonesian,Spanish,Chinese,Malay": 0.14204545454545456, - "Indonesian,Spanish,Chinese,Vietnamese": 0.1875, - "Indonesian,Spanish,Malay,Vietnamese": 0.1534090909090909, - "Indonesian,Chinese,Malay,Vietnamese": 0.1534090909090909, - "English,Filipino,Spanish,Chinese": 0.11931818181818182, - "English,Filipino,Spanish,Malay": 0.14772727272727273, - "English,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "English,Filipino,Chinese,Malay": 0.11363636363636363, - "English,Filipino,Chinese,Vietnamese": 0.09090909090909091, - "English,Filipino,Malay,Vietnamese": 0.11931818181818182, - "English,Spanish,Chinese,Malay": 0.1875, - "English,Spanish,Chinese,Vietnamese": 0.22727272727272727, - "English,Spanish,Malay,Vietnamese": 0.21022727272727273, - "English,Chinese,Malay,Vietnamese": 0.1534090909090909, - "Filipino,Spanish,Chinese,Malay": 0.10227272727272728, - "Filipino,Spanish,Chinese,Vietnamese": 0.09090909090909091, - "Filipino,Spanish,Malay,Vietnamese": 0.10795454545454546, - "Filipino,Chinese,Malay,Vietnamese": 0.11363636363636363, - "Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456 - }, - "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.07954545454545454, - "Indonesian,English,Filipino,Spanish,Malay": 0.10227272727272728, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.07954545454545454, - "Indonesian,English,Filipino,Chinese,Malay": 0.07954545454545454, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.056818181818181816, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,English,Spanish,Chinese,Malay": 0.13068181818181818, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.16477272727272727, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.13068181818181818, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.11363636363636363, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.0625, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.0625, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.06818181818181818, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728, - "English,Filipino,Spanish,Chinese,Malay": 0.09090909090909091, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.07386363636363637, - "English,Filipino,Spanish,Malay,Vietnamese": 0.09659090909090909, - "English,Filipino,Chinese,Malay,Vietnamese": 0.07386363636363637, - "English,Spanish,Chinese,Malay,Vietnamese": 0.13636363636363635, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.06818181818181818 - }, - "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.0625, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.05113636363636364, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.0625, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.05113636363636364, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.045454545454545456, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.0625 - }, - "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.045454545454545456 - } - }, - "AC3_2": 0.4196946002628169, - "AC3_3": 0.29652690539142423, - "AC3_4": 0.20990278993561914, - "AC3_5": 0.14998588365140825, - "AC3_6": 0.10831889079143003, - "AC3_7": 0.0817527795763912 + "bleu_score": 0.3136684772191943 + }, + "prompt_5": { + "bleu_score": 0.28063486126359444 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.2516919772808757 + }, + "prompt_2": { + "bleu_score": 0.23887910636911036 + }, + "prompt_3": { + "bleu_score": 0.23924571820883456 + }, + "prompt_4": { + "bleu_score": 0.2584865607862379 + }, + "prompt_5": { + "bleu_score": 0.23683830918379803 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.15584366135182787 + }, + "prompt_2": { + "bleu_score": 0.1440599297310671 + }, + "prompt_3": { + "bleu_score": 0.15144528864585652 + }, + "prompt_4": { + "bleu_score": 0.17181780373780153 + }, + "prompt_5": { + "bleu_score": 0.20064653927085144 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.28164835459459403 + }, + "prompt_2": { + "bleu_score": 0.2738665326663029 + }, + "prompt_3": { + "bleu_score": 0.2663916384980528 + }, + "prompt_4": { + "bleu_score": 0.2854299944610882 }, "prompt_5": { - "overall_acc": 0.4017857142857143, - "language_acc": { - "Indonesian": 0.38636363636363635, - "English": 0.5113636363636364, - "Filipino": 0.3409090909090909, - "Spanish": 0.4147727272727273, - "Chinese": 0.4147727272727273, - "Malay": 0.3465909090909091, - "Vietnamese": 0.3977272727272727 - }, - "consistency_score_2": 0.4134199134199134, - "consistency_score_3": 0.2163961038961039, - "consistency_score_4": 0.13084415584415587, - "consistency_score_5": 0.08522727272727272, - "consistency_score_6": 0.057629870129870135, - "consistency_score_7": 0.03977272727272727, - "detailed_consistency_score": { - "2_combine": { - "Indonesian,English": 0.42613636363636365, - "Indonesian,Filipino": 0.42045454545454547, - "Indonesian,Spanish": 0.4375, - "Indonesian,Chinese": 0.44886363636363635, - "Indonesian,Malay": 0.48295454545454547, - "Indonesian,Vietnamese": 0.42045454545454547, - "English,Filipino": 0.3693181818181818, - "English,Spanish": 0.5170454545454546, - "English,Chinese": 0.4431818181818182, - "English,Malay": 0.42045454545454547, - "English,Vietnamese": 0.39204545454545453, - "Filipino,Spanish": 0.3693181818181818, - "Filipino,Chinese": 0.29545454545454547, - "Filipino,Malay": 0.39204545454545453, - "Filipino,Vietnamese": 0.42613636363636365, - "Spanish,Chinese": 0.3693181818181818, - "Spanish,Malay": 0.4602272727272727, - "Spanish,Vietnamese": 0.4318181818181818, - "Chinese,Malay": 0.3522727272727273, - "Chinese,Vietnamese": 0.3522727272727273, - "Malay,Vietnamese": 0.45454545454545453 - }, - "3_combine": { - "Indonesian,English,Filipino": 0.2159090909090909, - "Indonesian,English,Spanish": 0.2727272727272727, - "Indonesian,English,Chinese": 0.25, - "Indonesian,English,Malay": 0.26136363636363635, - "Indonesian,English,Vietnamese": 0.2215909090909091, - "Indonesian,Filipino,Spanish": 0.19886363636363635, - "Indonesian,Filipino,Chinese": 0.17045454545454544, - "Indonesian,Filipino,Malay": 0.22727272727272727, - "Indonesian,Filipino,Vietnamese": 0.2215909090909091, - "Indonesian,Spanish,Chinese": 0.20454545454545456, - "Indonesian,Spanish,Malay": 0.26136363636363635, - "Indonesian,Spanish,Vietnamese": 0.2215909090909091, - "Indonesian,Chinese,Malay": 0.22727272727272727, - "Indonesian,Chinese,Vietnamese": 0.2215909090909091, - "Indonesian,Malay,Vietnamese": 0.26136363636363635, - "English,Filipino,Spanish": 0.2159090909090909, - "English,Filipino,Chinese": 0.16477272727272727, - "English,Filipino,Malay": 0.19886363636363635, - "English,Filipino,Vietnamese": 0.20454545454545456, - "English,Spanish,Chinese": 0.2556818181818182, - "English,Spanish,Malay": 0.25, - "English,Spanish,Vietnamese": 0.2556818181818182, - "English,Chinese,Malay": 0.2159090909090909, - "English,Chinese,Vietnamese": 0.21022727272727273, - "English,Malay,Vietnamese": 0.22727272727272727, - "Filipino,Spanish,Chinese": 0.13636363636363635, - "Filipino,Spanish,Malay": 0.20454545454545456, - "Filipino,Spanish,Vietnamese": 0.2215909090909091, - "Filipino,Chinese,Malay": 0.14204545454545456, - "Filipino,Chinese,Vietnamese": 0.17613636363636365, - "Filipino,Malay,Vietnamese": 0.23295454545454544, - "Spanish,Chinese,Malay": 0.19318181818181818, - "Spanish,Chinese,Vietnamese": 0.18181818181818182, - "Spanish,Malay,Vietnamese": 0.25, - "Chinese,Malay,Vietnamese": 0.19886363636363635 - }, - "4_combine": { - "Indonesian,English,Filipino,Spanish": 0.13636363636363635, - "Indonesian,English,Filipino,Chinese": 0.11363636363636363, - "Indonesian,English,Filipino,Malay": 0.14772727272727273, - "Indonesian,English,Filipino,Vietnamese": 0.14204545454545456, - "Indonesian,English,Spanish,Chinese": 0.16477272727272727, - "Indonesian,English,Spanish,Malay": 0.16477272727272727, - "Indonesian,English,Spanish,Vietnamese": 0.14772727272727273, - "Indonesian,English,Chinese,Malay": 0.1534090909090909, - "Indonesian,English,Chinese,Vietnamese": 0.14772727272727273, - "Indonesian,English,Malay,Vietnamese": 0.1590909090909091, - "Indonesian,Filipino,Spanish,Chinese": 0.07954545454545454, - "Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, - "Indonesian,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "Indonesian,Filipino,Chinese,Malay": 0.10227272727272728, - "Indonesian,Filipino,Chinese,Vietnamese": 0.125, - "Indonesian,Filipino,Malay,Vietnamese": 0.14204545454545456, - "Indonesian,Spanish,Chinese,Malay": 0.13068181818181818, - "Indonesian,Spanish,Chinese,Vietnamese": 0.11931818181818182, - "Indonesian,Spanish,Malay,Vietnamese": 0.1590909090909091, - "Indonesian,Chinese,Malay,Vietnamese": 0.14772727272727273, - "English,Filipino,Spanish,Chinese": 0.09659090909090909, - "English,Filipino,Spanish,Malay": 0.13068181818181818, - "English,Filipino,Spanish,Vietnamese": 0.13068181818181818, - "English,Filipino,Chinese,Malay": 0.10795454545454546, - "English,Filipino,Chinese,Vietnamese": 0.10795454545454546, - "English,Filipino,Malay,Vietnamese": 0.13068181818181818, - "English,Spanish,Chinese,Malay": 0.14772727272727273, - "English,Spanish,Chinese,Vietnamese": 0.14204545454545456, - "English,Spanish,Malay,Vietnamese": 0.14772727272727273, - "English,Chinese,Malay,Vietnamese": 0.13636363636363635, - "Filipino,Spanish,Chinese,Malay": 0.09090909090909091, - "Filipino,Spanish,Chinese,Vietnamese": 0.09659090909090909, - "Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, - "Filipino,Chinese,Malay,Vietnamese": 0.10227272727272728, - "Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 - }, - "5_combine": { - "Indonesian,English,Filipino,Spanish,Chinese": 0.0625, - "Indonesian,English,Filipino,Spanish,Malay": 0.09659090909090909, - "Indonesian,English,Filipino,Spanish,Vietnamese": 0.08522727272727272, - "Indonesian,English,Filipino,Chinese,Malay": 0.07954545454545454, - "Indonesian,English,Filipino,Chinese,Vietnamese": 0.08522727272727272, - "Indonesian,English,Filipino,Malay,Vietnamese": 0.09659090909090909, - "Indonesian,English,Spanish,Chinese,Malay": 0.10795454545454546, - "Indonesian,English,Spanish,Chinese,Vietnamese": 0.10227272727272728, - "Indonesian,English,Spanish,Malay,Vietnamese": 0.10795454545454546, - "Indonesian,English,Chinese,Malay,Vietnamese": 0.10795454545454546, - "Indonesian,Filipino,Spanish,Chinese,Malay": 0.0625, - "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.06818181818181818, - "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.09659090909090909, - "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.09090909090909091, - "English,Filipino,Spanish,Chinese,Malay": 0.07386363636363637, - "English,Filipino,Spanish,Chinese,Vietnamese": 0.06818181818181818, - "English,Filipino,Spanish,Malay,Vietnamese": 0.08522727272727272, - "English,Filipino,Chinese,Malay,Vietnamese": 0.06818181818181818, - "English,Spanish,Chinese,Malay,Vietnamese": 0.09659090909090909, - "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.06818181818181818 - }, - "6_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.05113636363636364, - "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.05113636363636364, - "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.0625, - "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.056818181818181816, - "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.07954545454545454, - "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.05113636363636364, - "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.05113636363636364 - }, - "7_combine": { - "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.03977272727272727 - } - }, - "AC3_2": 0.4075197951233069, - "AC3_3": 0.2812922043362496, - "AC3_4": 0.19740279527795535, - "AC3_5": 0.140624999971125, - "AC3_6": 0.10080136292606484, - "AC3_7": 0.07238051468949029 + "bleu_score": 0.25194925478011515 } }, - "sg_eval": { + "mmlu": { "prompt_1": { - "accuracy": 0.5922330097087378 + "accuracy": 0.5717619603267211 }, "prompt_2": { - "accuracy": 0.5728155339805825 + "accuracy": 0.5577596266044341 }, "prompt_3": { - "accuracy": 0.6990291262135923 + "accuracy": 0.588098016336056 }, "prompt_4": { - "accuracy": 0.6213592233009708 + "accuracy": 0.5565927654609102 }, "prompt_5": { - "accuracy": 0.5825242718446602 + "accuracy": 0.5612602100350058 } }, - "cn_eval": { + "mmlu_full": { "prompt_1": { - "accuracy": 0.41904761904761906 + "accuracy": 0.562531283518055, + "category_acc": { + "high_school_european_history": 0.7621951219512195, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.6325757575757576, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.7093596059113301, + "high_school_physics": 0.4, + "high_school_world_history": 0.7542372881355932, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.569620253164557, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.6731391585760518, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.6193548387096774, + "professional_medicine": 0.5867158671586716, + "nutrition": 0.6032786885245902, + "global_facts": 0.32323232323232326, + "machine_learning": 0.42342342342342343, + "security_studies": 0.5614754098360656, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.5171849427168577, + "prehistory": 0.5727554179566563, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.6615384615384615, + "college_medicine": 0.5755813953488372, + "high_school_government_and_politics": 0.7864583333333334, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.6172839506172839, + "high_school_geography": 0.7411167512690355, + "elementary_mathematics": 0.6259946949602122, + "human_aging": 0.5765765765765766, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.7536764705882353, + "formal_logic": 0.376, + "high_school_statistics": 0.4232558139534884, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.6565656565656566, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.7519181585677749, + "high_school_chemistry": 0.4207920792079208, + "marketing": 0.8197424892703863, + "professional_law": 0.4142204827136334, + "management": 0.7058823529411765, + "college_physics": 0.38613861386138615, + "jurisprudence": 0.6915887850467289, + "world_religions": 0.7529411764705882, + "sociology": 0.75, + "us_foreign_policy": 0.8585858585858586, + "high_school_macroeconomics": 0.5424164524421594, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.40939597315436244, + "moral_disputes": 0.5768115942028985, + "electrical_engineering": 0.5138888888888888, + "astronomy": 0.6291390728476821, + "college_biology": 0.6083916083916084 + } }, "prompt_2": { - "accuracy": 0.49523809523809526 + "accuracy": 0.5370039327851269, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.5757575757575758, + "medical_genetics": 0.6565656565656566, + "high_school_us_history": 0.7044334975369458, + "high_school_physics": 0.34, + "high_school_world_history": 0.7330508474576272, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.5864978902953587, + "econometrics": 0.415929203539823, + "college_computer_science": 0.46464646464646464, + "high_school_biology": 0.6699029126213593, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.3701067615658363, + "philosophy": 0.5806451612903226, + "professional_medicine": 0.5424354243542435, + "nutrition": 0.5344262295081967, + "global_facts": 0.3838383838383838, + "machine_learning": 0.3963963963963964, + "security_studies": 0.5860655737704918, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.486088379705401, + "prehistory": 0.5479876160990712, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.6615384615384615, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.6510416666666666, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.6172839506172839, + "high_school_geography": 0.6649746192893401, + "elementary_mathematics": 0.6180371352785146, + "human_aging": 0.536036036036036, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.7242647058823529, + "formal_logic": 0.368, + "high_school_statistics": 0.413953488372093, + "international_law": 0.625, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.5042735042735043, + "miscellaneous": 0.7416879795396419, + "high_school_chemistry": 0.4405940594059406, + "marketing": 0.7896995708154506, + "professional_law": 0.4005218525766471, + "management": 0.696078431372549, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.6822429906542056, + "world_religions": 0.7058823529411765, + "sociology": 0.65, + "us_foreign_policy": 0.6868686868686869, + "high_school_macroeconomics": 0.5115681233933161, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.3870246085011186, + "moral_disputes": 0.48985507246376814, + "electrical_engineering": 0.5277777777777778, + "astronomy": 0.5629139072847682, + "college_biology": 0.5734265734265734 + } }, "prompt_3": { - "accuracy": 0.49523809523809526 + "accuracy": 0.5659635323560959, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.6136363636363636, + "medical_genetics": 0.696969696969697, + "high_school_us_history": 0.7241379310344828, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.7627118644067796, + "virology": 0.4303030303030303, + "high_school_microeconomics": 0.5822784810126582, + "econometrics": 0.40707964601769914, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.6699029126213593, + "abstract_algebra": 0.3434343434343434, + "professional_accounting": 0.40569395017793597, + "philosophy": 0.6096774193548387, + "professional_medicine": 0.6088560885608856, + "nutrition": 0.6098360655737705, + "global_facts": 0.30303030303030304, + "machine_learning": 0.45045045045045046, + "security_studies": 0.5737704918032787, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.5384615384615384, + "prehistory": 0.5758513931888545, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.676923076923077, + "college_medicine": 0.5988372093023255, + "high_school_government_and_politics": 0.75, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.6604938271604939, + "high_school_geography": 0.7461928934010152, + "elementary_mathematics": 0.596816976127321, + "human_aging": 0.6081081081081081, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7702205882352942, + "formal_logic": 0.432, + "high_school_statistics": 0.4511627906976744, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.6161616161616161, + "conceptual_physics": 0.5170940170940171, + "miscellaneous": 0.7634271099744245, + "high_school_chemistry": 0.47029702970297027, + "marketing": 0.7854077253218884, + "professional_law": 0.4227005870841487, + "management": 0.7156862745098039, + "college_physics": 0.3564356435643564, + "jurisprudence": 0.6822429906542056, + "world_religions": 0.7588235294117647, + "sociology": 0.71, + "us_foreign_policy": 0.8282828282828283, + "high_school_macroeconomics": 0.5398457583547558, + "computer_security": 0.6464646464646465, + "moral_scenarios": 0.40380313199105144, + "moral_disputes": 0.5884057971014492, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.6357615894039735, + "college_biology": 0.6433566433566433 + } }, "prompt_4": { - "accuracy": 0.4 + "accuracy": 0.5445119771183411, + "category_acc": { + "high_school_european_history": 0.7378048780487805, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.5681818181818182, + "medical_genetics": 0.6464646464646465, + "high_school_us_history": 0.6600985221674877, + "high_school_physics": 0.28, + "high_school_world_history": 0.6949152542372882, + "virology": 0.44242424242424244, + "high_school_microeconomics": 0.5443037974683544, + "econometrics": 0.36283185840707965, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.6343042071197411, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.398576512455516, + "philosophy": 0.5967741935483871, + "professional_medicine": 0.5682656826568265, + "nutrition": 0.5639344262295082, + "global_facts": 0.40404040404040403, + "machine_learning": 0.42342342342342343, + "security_studies": 0.5122950819672131, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.5286415711947627, + "prehistory": 0.5758513931888545, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.6692307692307692, + "college_medicine": 0.5872093023255814, + "high_school_government_and_politics": 0.734375, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.5987654320987654, + "high_school_geography": 0.7258883248730964, + "elementary_mathematics": 0.6153846153846154, + "human_aging": 0.509009009009009, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.7169117647058824, + "formal_logic": 0.448, + "high_school_statistics": 0.37209302325581395, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.5341880341880342, + "miscellaneous": 0.7506393861892583, + "high_school_chemistry": 0.4306930693069307, + "marketing": 0.7982832618025751, + "professional_law": 0.4142204827136334, + "management": 0.6764705882352942, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.6261682242990654, + "world_religions": 0.7588235294117647, + "sociology": 0.7, + "us_foreign_policy": 0.7676767676767676, + "high_school_macroeconomics": 0.5167095115681234, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.38814317673378074, + "moral_disputes": 0.6028985507246377, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.6026490066225165, + "college_biology": 0.5174825174825175 + } }, "prompt_5": { - "accuracy": 0.42857142857142855 + "accuracy": 0.54315337861995, + "category_acc": { + "high_school_european_history": 0.6829268292682927, + "business_ethics": 0.6565656565656566, + "clinical_knowledge": 0.5681818181818182, + "medical_genetics": 0.6262626262626263, + "high_school_us_history": 0.6600985221674877, + "high_school_physics": 0.32, + "high_school_world_history": 0.690677966101695, + "virology": 0.41818181818181815, + "high_school_microeconomics": 0.540084388185654, + "econometrics": 0.415929203539823, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.6343042071197411, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.3807829181494662, + "philosophy": 0.5967741935483871, + "professional_medicine": 0.5202952029520295, + "nutrition": 0.580327868852459, + "global_facts": 0.3838383838383838, + "machine_learning": 0.43243243243243246, + "security_studies": 0.5409836065573771, + "public_relations": 0.6238532110091743, + "professional_psychology": 0.502454991816694, + "prehistory": 0.5572755417956656, + "anatomy": 0.5298507462686567, + "human_sexuality": 0.6461538461538462, + "college_medicine": 0.5348837209302325, + "high_school_government_and_politics": 0.78125, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5925925925925926, + "high_school_geography": 0.7055837563451777, + "elementary_mathematics": 0.5623342175066313, + "human_aging": 0.581081081081081, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.7518382352941176, + "formal_logic": 0.384, + "high_school_statistics": 0.4046511627906977, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.6767676767676768, + "conceptual_physics": 0.49572649572649574, + "miscellaneous": 0.7480818414322251, + "high_school_chemistry": 0.4158415841584158, + "marketing": 0.8068669527896996, + "professional_law": 0.42661448140900193, + "management": 0.7254901960784313, + "college_physics": 0.39603960396039606, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.7529411764705882, + "sociology": 0.715, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.4910025706940874, + "computer_security": 0.6262626262626263, + "moral_scenarios": 0.354586129753915, + "moral_disputes": 0.5797101449275363, + "electrical_engineering": 0.4722222222222222, + "astronomy": 0.6556291390728477, + "college_biology": 0.5874125874125874 + } } }, - "us_eval": { + "c_eval": { "prompt_1": { - "accuracy": 0.6542056074766355 + "accuracy": 0.6389301634472511 }, "prompt_2": { - "accuracy": 0.6448598130841121 + "accuracy": 0.6404160475482912 }, "prompt_3": { - "accuracy": 0.719626168224299 + "accuracy": 0.6441307578008916 }, "prompt_4": { - "accuracy": 0.6822429906542056 + "accuracy": 0.5861812778603269 }, "prompt_5": { - "accuracy": 0.6542056074766355 + "accuracy": 0.524517087667162 } }, - "ph_eval": { + "c_eval_full": { "prompt_1": { - "accuracy": 0.46, + "accuracy": 0.6550435865504358, "category_acc": { - "brand": 0.4, - "demographics": 0.4, - "biology": 0.5, - "history": 0.4666666666666667, - "literature": 0.5, - "politics": 0.7, - "culture": 0.5, - "film": 0.3, - "law": 0.3, - "geography": 0.5 + "computer_network": 0.5416666666666666, + "operating_system": 0.625, + "computer_architecture": 0.5769230769230769, + "college_programming": 0.6428571428571429, + "college_physics": 0.25, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.4523809523809524, + "metrology_engineer": 0.7586206896551724, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.6666666666666666, + "high_school_chemistry": 0.75, + "high_school_biology": 0.7916666666666666, + "middle_school_mathematics": 0.4583333333333333, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.875, + "middle_school_chemistry": 0.92, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.4666666666666667, + "business_administration": 0.631578947368421, + "marxism": 0.875, + "mao_zedong_thought": 0.8620689655172413, + "education_science": 0.7352941176470589, + "teacher_qualification": 0.8163265306122449, + "high_school_politics": 0.9166666666666666, + "high_school_geography": 0.8333333333333334, + "middle_school_politics": 0.9230769230769231, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.8571428571428571, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.5185185185185185, + "law": 0.6896551724137931, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.6842105263157895, + "professional_tour_guide": 0.8235294117647058, + "legal_professional": 0.5, + "high_school_chinese": 0.625, + "high_school_history": 0.88, + "middle_school_history": 0.9629629629629629, + "civil_servant": 0.5192307692307693, + "sports_science": 0.7083333333333334, + "plant_protection": 0.6666666666666666, + "basic_medicine": 0.875, + "clinical_medicine": 0.6666666666666666, + "urban_and_rural_planner": 0.5882352941176471, + "accountant": 0.5370370370370371, + "fire_engineer": 0.5555555555555556, + "environmental_impact_assessment_engineer": 0.6666666666666666, + "tax_accountant": 0.5740740740740741, + "physician": 0.6666666666666666 } }, "prompt_2": { - "accuracy": 0.44, + "accuracy": 0.6475716064757161, "category_acc": { - "brand": 0.5, - "demographics": 0.4, - "biology": 0.3, - "history": 0.4666666666666667, - "literature": 0.3, - "politics": 0.7, - "culture": 0.6, - "film": 0.3, - "law": 0.3, - "geography": 0.5 + "computer_network": 0.5416666666666666, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.6538461538461539, + "college_programming": 0.6428571428571429, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.42857142857142855, + "metrology_engineer": 0.7931034482758621, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.5833333333333334, + "high_school_chemistry": 0.6666666666666666, + "high_school_biology": 0.75, + "middle_school_mathematics": 0.5833333333333334, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.9166666666666666, + "middle_school_chemistry": 0.92, + "veterinary_medicine": 0.6785714285714286, + "college_economics": 0.55, + "business_administration": 0.5789473684210527, + "marxism": 0.875, + "mao_zedong_thought": 0.7931034482758621, + "education_science": 0.7058823529411765, + "teacher_qualification": 0.7551020408163265, + "high_school_politics": 0.9166666666666666, + "high_school_geography": 0.8333333333333334, + "middle_school_politics": 1.0, + "middle_school_geography": 0.8823529411764706, + "modern_chinese_history": 0.8571428571428571, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.4444444444444444, + "law": 0.7241379310344828, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.8235294117647058, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.5833333333333334, + "high_school_history": 0.84, + "middle_school_history": 0.8888888888888888, + "civil_servant": 0.5384615384615384, + "sports_science": 0.7083333333333334, + "plant_protection": 0.7037037037037037, + "basic_medicine": 0.875, + "clinical_medicine": 0.5925925925925926, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.5925925925925926, + "fire_engineer": 0.5277777777777778, + "environmental_impact_assessment_engineer": 0.6111111111111112, + "tax_accountant": 0.5740740740740741, + "physician": 0.6666666666666666 } }, "prompt_3": { - "accuracy": 0.49, + "accuracy": 0.6687422166874222, "category_acc": { - "brand": 0.6, - "demographics": 0.2, - "biology": 0.4, - "history": 0.4, - "literature": 0.3, - "politics": 0.7, - "culture": 0.9, - "film": 0.3, - "law": 0.5, - "geography": 0.5 + "computer_network": 0.4583333333333333, + "operating_system": 0.5, + "computer_architecture": 0.6538461538461539, + "college_programming": 0.7142857142857143, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.7586206896551724, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.6666666666666666, + "high_school_chemistry": 0.7916666666666666, + "high_school_biology": 0.7916666666666666, + "middle_school_mathematics": 0.5833333333333334, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.9166666666666666, + "middle_school_chemistry": 0.88, + "veterinary_medicine": 0.6428571428571429, + "college_economics": 0.5666666666666667, + "business_administration": 0.6842105263157895, + "marxism": 0.8333333333333334, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.7058823529411765, + "teacher_qualification": 0.8367346938775511, + "high_school_politics": 0.9166666666666666, + "high_school_geography": 0.875, + "middle_school_politics": 0.9615384615384616, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.8214285714285714, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.5185185185185185, + "law": 0.6551724137931034, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.7941176470588235, + "legal_professional": 0.6071428571428571, + "high_school_chinese": 0.7083333333333334, + "high_school_history": 0.84, + "middle_school_history": 0.9629629629629629, + "civil_servant": 0.5384615384615384, + "sports_science": 0.6666666666666666, + "plant_protection": 0.7407407407407407, + "basic_medicine": 0.8333333333333334, + "clinical_medicine": 0.6666666666666666, + "urban_and_rural_planner": 0.6274509803921569, + "accountant": 0.6666666666666666, + "fire_engineer": 0.6944444444444444, + "environmental_impact_assessment_engineer": 0.6666666666666666, + "tax_accountant": 0.6111111111111112, + "physician": 0.6851851851851852 } }, "prompt_4": { - "accuracy": 0.49, + "accuracy": 0.5772104607721046, "category_acc": { - "brand": 0.4, - "demographics": 0.4, - "biology": 0.5, - "history": 0.4, - "literature": 0.6, - "politics": 0.8, - "culture": 0.8, - "film": 0.4, - "law": 0.2, - "geography": 0.4 + "computer_network": 0.4166666666666667, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.5, + "college_programming": 0.4523809523809524, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.4523809523809524, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.625, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.5833333333333334, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.8846153846153846, + "middle_school_physics": 0.6666666666666666, + "middle_school_chemistry": 0.76, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.5666666666666667, + "business_administration": 0.5789473684210527, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.6764705882352942, + "teacher_qualification": 0.7959183673469388, + "high_school_politics": 0.9166666666666666, + "high_school_geography": 0.75, + "middle_school_politics": 0.9615384615384616, + "middle_school_geography": 0.8823529411764706, + "modern_chinese_history": 0.75, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.25925925925925924, + "law": 0.5172413793103449, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.8235294117647058, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.375, + "high_school_history": 0.8, + "middle_school_history": 0.8148148148148148, + "civil_servant": 0.4423076923076923, + "sports_science": 0.4166666666666667, + "plant_protection": 0.7037037037037037, + "basic_medicine": 0.7916666666666666, + "clinical_medicine": 0.7407407407407407, + "urban_and_rural_planner": 0.5686274509803921, + "accountant": 0.5555555555555556, + "fire_engineer": 0.5277777777777778, + "environmental_impact_assessment_engineer": 0.5555555555555556, + "tax_accountant": 0.42592592592592593, + "physician": 0.5740740740740741 } }, "prompt_5": { - "accuracy": 0.52, + "accuracy": 0.5342465753424658, "category_acc": { - "brand": 0.7, - "demographics": 0.4, - "biology": 0.5, - "history": 0.4, - "literature": 0.4, - "politics": 0.9, - "culture": 0.6, - "film": 0.4, - "law": 0.4, - "geography": 0.5 + "computer_network": 0.4583333333333333, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.375, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.43478260869565216, + "high_school_physics": 0.5416666666666666, + "high_school_chemistry": 0.5416666666666666, + "high_school_biology": 0.6666666666666666, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.5, + "college_economics": 0.4666666666666667, + "business_administration": 0.47368421052631576, + "marxism": 0.75, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.6470588235294118, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.6666666666666666, + "middle_school_politics": 0.7307692307692307, + "middle_school_geography": 0.8823529411764706, + "modern_chinese_history": 0.6428571428571429, + "ideological_and_moral_cultivation": 0.7083333333333334, + "logic": 0.3333333333333333, + "law": 0.5517241379310345, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.6052631578947368, + "professional_tour_guide": 0.7352941176470589, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.375, + "high_school_history": 0.64, + "middle_school_history": 0.8148148148148148, + "civil_servant": 0.4423076923076923, + "sports_science": 0.375, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.6666666666666666, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.5882352941176471, + "accountant": 0.3888888888888889, + "fire_engineer": 0.5, + "environmental_impact_assessment_engineer": 0.6666666666666666, + "tax_accountant": 0.35185185185185186, + "physician": 0.6851851851851852 } } }, - "sing2eng": { + "cmmlu": { "prompt_1": { - "bleu_score": 0.18200139204376176 + "accuracy": 0.6845878136200717 }, "prompt_2": { - "bleu_score": 0.19640905343490234 + "accuracy": 0.6810035842293907 }, "prompt_3": { - "bleu_score": 0.1698549989089062 + "accuracy": 0.6774193548387096 }, "prompt_4": { - "bleu_score": 0.1613253890657764 + "accuracy": 0.6702508960573477 }, "prompt_5": { - "bleu_score": 0.15902080154902035 + "accuracy": 0.5949820788530465 } }, - "indommlu": { + "cmmlu_full": { "prompt_1": { - "accuracy": 0.4597102610321116, + "accuracy": 0.6518735969608013, "category_acc": { - "History": 0.4718875502008032, - "Geography": 0.40816326530612246, - "Lampungic": 0.3197278911564626, - "Social science": 0.6277128547579299, - "Balinese": 0.2951167728237792, - "Makassarese": 0.3279569892473118, - "Banjarese": 0.4097222222222222, - "Chemistry": 0.29635036496350364, - "Biology": 0.44970414201183434, - "Science": 0.5562435500515995, - "Christian religion": 0.5572139303482587, - "Art": 0.5341098169717138, - "Islam religion": 0.5761024182076814, - "Hindu religion": 0.5333333333333333, - "Madurese": 0.28135593220338984, - "Sport": 0.4864864864864865, - "Indonesian language": 0.5202366127023661, - "Physics": 0.37777777777777777, - "Minangkabau culture": 0.3869346733668342, - "Dayak language": 0.3211009174311927, - "Sociology": 0.46975806451612906, - "Economy": 0.42827868852459017, - "Sundanese": 0.36992221261884184, - "Javanese": 0.35685483870967744, - "Civic education": 0.5436337625178826 + "agronomy": 0.514792899408284, + "anatomy": 0.6418918918918919, + "ancient_chinese": 0.4024390243902439, + "arts": 0.85, + "astronomy": 0.4121212121212121, + "business_ethics": 0.631578947368421, + "chinese_civil_service_exam": 0.5875, + "chinese_driving_rule": 0.8854961832061069, + "chinese_food_culture": 0.6323529411764706, + "chinese_foreign_policy": 0.7009345794392523, + "chinese_history": 0.7832817337461301, + "chinese_literature": 0.4950980392156863, + "chinese_teacher_qualification": 0.8212290502793296, + "clinical_knowledge": 0.6118143459915611, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.794392523364486, + "college_engineering_hydrology": 0.6226415094339622, + "college_law": 0.6296296296296297, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.5188679245283019, + "college_medicine": 0.673992673992674, + "computer_science": 0.6519607843137255, + "computer_security": 0.7660818713450293, + "conceptual_physics": 0.8163265306122449, + "construction_project_management": 0.5539568345323741, + "economics": 0.6352201257861635, + "education": 0.6871165644171779, + "electrical_engineering": 0.6337209302325582, + "elementary_chinese": 0.6666666666666666, + "elementary_commonsense": 0.6767676767676768, + "elementary_information_and_technology": 0.8235294117647058, + "elementary_mathematics": 0.48695652173913045, + "ethnology": 0.6666666666666666, + "food_science": 0.5734265734265734, + "genetics": 0.5625, + "global_facts": 0.6442953020134228, + "high_school_biology": 0.757396449704142, + "high_school_chemistry": 0.6893939393939394, + "high_school_geography": 0.788135593220339, + "high_school_mathematics": 0.35365853658536583, + "high_school_physics": 0.6909090909090909, + "high_school_politics": 0.7132867132867133, + "human_sexuality": 0.6190476190476191, + "international_law": 0.5405405405405406, + "journalism": 0.6046511627906976, + "jurisprudence": 0.6788321167883211, + "legal_and_moral_basis": 0.9485981308411215, + "logical": 0.5203252032520326, + "machine_learning": 0.5409836065573771, + "management": 0.7714285714285715, + "marketing": 0.7055555555555556, + "marxist_theory": 0.8201058201058201, + "modern_chinese": 0.5344827586206896, + "nutrition": 0.6275862068965518, + "philosophy": 0.6476190476190476, + "professional_accounting": 0.7314285714285714, + "professional_law": 0.5355450236966824, + "professional_medicine": 0.5292553191489362, + "professional_psychology": 0.7801724137931034, + "public_relations": 0.6379310344827587, + "security_study": 0.7333333333333333, + "sociology": 0.6637168141592921, + "sports_science": 0.6606060606060606, + "traditional_chinese_medicine": 0.6702702702702703, + "virology": 0.650887573964497, + "world_history": 0.7763975155279503, + "world_religions": 0.70625 } }, "prompt_2": { - "accuracy": 0.4812070231657654, + "accuracy": 0.6434121913313763, "category_acc": { - "History": 0.46184738955823296, - "Geography": 0.4530612244897959, - "Lampungic": 0.35374149659863946, - "Social science": 0.6944908180300501, - "Balinese": 0.34394904458598724, - "Makassarese": 0.3279569892473118, - "Banjarese": 0.3819444444444444, - "Chemistry": 0.2832116788321168, - "Biology": 0.4532544378698225, - "Science": 0.6057791537667698, - "Christian religion": 0.6169154228855721, - "Art": 0.5757071547420965, - "Islam religion": 0.5689900426742532, - "Hindu religion": 0.5066666666666667, - "Madurese": 0.30847457627118646, - "Sport": 0.5135135135135135, - "Indonesian language": 0.5320672478206725, - "Physics": 0.41414141414141414, - "Minangkabau culture": 0.35175879396984927, - "Dayak language": 0.3119266055045872, - "Sociology": 0.48185483870967744, - "Economy": 0.4426229508196721, - "Sundanese": 0.41659464131374246, - "Javanese": 0.37701612903225806, - "Civic education": 0.5779685264663805 + "agronomy": 0.5384615384615384, + "anatomy": 0.5945945945945946, + "ancient_chinese": 0.3719512195121951, + "arts": 0.85625, + "astronomy": 0.4303030303030303, + "business_ethics": 0.6220095693779905, + "chinese_civil_service_exam": 0.60625, + "chinese_driving_rule": 0.8854961832061069, + "chinese_food_culture": 0.6176470588235294, + "chinese_foreign_policy": 0.7102803738317757, + "chinese_history": 0.7708978328173375, + "chinese_literature": 0.4950980392156863, + "chinese_teacher_qualification": 0.8100558659217877, + "clinical_knowledge": 0.5949367088607594, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.7757009345794392, + "college_engineering_hydrology": 0.5188679245283019, + "college_law": 0.5925925925925926, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.4811320754716981, + "college_medicine": 0.6410256410256411, + "computer_science": 0.6813725490196079, + "computer_security": 0.7602339181286549, + "conceptual_physics": 0.8027210884353742, + "construction_project_management": 0.5539568345323741, + "economics": 0.6477987421383647, + "education": 0.6748466257668712, + "electrical_engineering": 0.6453488372093024, + "elementary_chinese": 0.6626984126984127, + "elementary_commonsense": 0.6818181818181818, + "elementary_information_and_technology": 0.8319327731092437, + "elementary_mathematics": 0.4956521739130435, + "ethnology": 0.6666666666666666, + "food_science": 0.6153846153846154, + "genetics": 0.5340909090909091, + "global_facts": 0.6375838926174496, + "high_school_biology": 0.7041420118343196, + "high_school_chemistry": 0.6666666666666666, + "high_school_geography": 0.7627118644067796, + "high_school_mathematics": 0.38414634146341464, + "high_school_physics": 0.6181818181818182, + "high_school_politics": 0.6923076923076923, + "human_sexuality": 0.5952380952380952, + "international_law": 0.5567567567567567, + "journalism": 0.6046511627906976, + "jurisprudence": 0.6788321167883211, + "legal_and_moral_basis": 0.9392523364485982, + "logical": 0.5447154471544715, + "machine_learning": 0.4918032786885246, + "management": 0.7761904761904762, + "marketing": 0.6888888888888889, + "marxist_theory": 0.8148148148148148, + "modern_chinese": 0.5603448275862069, + "nutrition": 0.6482758620689655, + "philosophy": 0.638095238095238, + "professional_accounting": 0.7028571428571428, + "professional_law": 0.5639810426540285, + "professional_medicine": 0.5132978723404256, + "professional_psychology": 0.771551724137931, + "public_relations": 0.6494252873563219, + "security_study": 0.6888888888888889, + "sociology": 0.6371681415929203, + "sports_science": 0.6484848484848484, + "traditional_chinese_medicine": 0.6432432432432432, + "virology": 0.6272189349112426, + "world_history": 0.7080745341614907, + "world_religions": 0.69375 } }, "prompt_3": { - "accuracy": 0.47593297282862673, + "accuracy": 0.6450526679329995, "category_acc": { - "History": 0.4497991967871486, - "Geography": 0.44693877551020406, - "Lampungic": 0.3469387755102041, - "Social science": 0.7011686143572621, - "Balinese": 0.3099787685774947, - "Makassarese": 0.3118279569892473, - "Banjarese": 0.3541666666666667, - "Chemistry": 0.2934306569343066, - "Biology": 0.4437869822485207, - "Science": 0.5933952528379773, - "Christian religion": 0.6019900497512438, - "Art": 0.5773710482529119, - "Islam religion": 0.5789473684210527, - "Hindu religion": 0.5066666666666667, - "Madurese": 0.28135593220338984, - "Sport": 0.5067567567567568, - "Indonesian language": 0.5292652552926526, - "Physics": 0.3939393939393939, - "Minangkabau culture": 0.3969849246231156, - "Dayak language": 0.3119266055045872, - "Sociology": 0.4798387096774194, - "Economy": 0.45081967213114754, - "Sundanese": 0.40017286084701814, - "Javanese": 0.36794354838709675, - "Civic education": 0.580829756795422 + "agronomy": 0.5207100591715976, + "anatomy": 0.6148648648648649, + "ancient_chinese": 0.40853658536585363, + "arts": 0.85625, + "astronomy": 0.38181818181818183, + "business_ethics": 0.6124401913875598, + "chinese_civil_service_exam": 0.6, + "chinese_driving_rule": 0.8854961832061069, + "chinese_food_culture": 0.6029411764705882, + "chinese_foreign_policy": 0.7102803738317757, + "chinese_history": 0.7832817337461301, + "chinese_literature": 0.49019607843137253, + "chinese_teacher_qualification": 0.8044692737430168, + "clinical_knowledge": 0.620253164556962, + "college_actuarial_science": 0.3867924528301887, + "college_education": 0.7663551401869159, + "college_engineering_hydrology": 0.5188679245283019, + "college_law": 0.6018518518518519, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.5377358490566038, + "college_medicine": 0.663003663003663, + "computer_science": 0.6666666666666666, + "computer_security": 0.7602339181286549, + "conceptual_physics": 0.8163265306122449, + "construction_project_management": 0.5755395683453237, + "economics": 0.6792452830188679, + "education": 0.6503067484662577, + "electrical_engineering": 0.6511627906976745, + "elementary_chinese": 0.6507936507936508, + "elementary_commonsense": 0.6717171717171717, + "elementary_information_and_technology": 0.8151260504201681, + "elementary_mathematics": 0.49130434782608695, + "ethnology": 0.6370370370370371, + "food_science": 0.5804195804195804, + "genetics": 0.5170454545454546, + "global_facts": 0.6644295302013423, + "high_school_biology": 0.7218934911242604, + "high_school_chemistry": 0.6742424242424242, + "high_school_geography": 0.7627118644067796, + "high_school_mathematics": 0.36585365853658536, + "high_school_physics": 0.5727272727272728, + "high_school_politics": 0.7412587412587412, + "human_sexuality": 0.6111111111111112, + "international_law": 0.5135135135135135, + "journalism": 0.5988372093023255, + "jurisprudence": 0.6666666666666666, + "legal_and_moral_basis": 0.9439252336448598, + "logical": 0.5528455284552846, + "machine_learning": 0.4672131147540984, + "management": 0.7619047619047619, + "marketing": 0.7166666666666667, + "marxist_theory": 0.7936507936507936, + "modern_chinese": 0.5172413793103449, + "nutrition": 0.6413793103448275, + "philosophy": 0.6666666666666666, + "professional_accounting": 0.7257142857142858, + "professional_law": 0.5639810426540285, + "professional_medicine": 0.5292553191489362, + "professional_psychology": 0.7758620689655172, + "public_relations": 0.6666666666666666, + "security_study": 0.7333333333333333, + "sociology": 0.6238938053097345, + "sports_science": 0.6545454545454545, + "traditional_chinese_medicine": 0.654054054054054, + "virology": 0.6449704142011834, + "world_history": 0.7453416149068323, + "world_religions": 0.6875 } }, "prompt_4": { - "accuracy": 0.4553040923960211, - "category_acc": { - "History": 0.4678714859437751, - "Geography": 0.42857142857142855, - "Lampungic": 0.3333333333333333, - "Social science": 0.6444073455759599, - "Balinese": 0.29936305732484075, - "Makassarese": 0.3709677419354839, - "Banjarese": 0.3888888888888889, - "Chemistry": 0.29781021897810217, - "Biology": 0.4307692307692308, - "Science": 0.5335397316821465, - "Christian religion": 0.5870646766169154, - "Art": 0.5158069883527454, - "Islam religion": 0.55049786628734, - "Hindu religion": 0.5066666666666667, - "Madurese": 0.29152542372881357, - "Sport": 0.46621621621621623, - "Indonesian language": 0.5080946450809465, - "Physics": 0.38181818181818183, - "Minangkabau culture": 0.3417085427135678, - "Dayak language": 0.3761467889908257, - "Sociology": 0.4596774193548387, - "Economy": 0.4180327868852459, - "Sundanese": 0.36992221261884184, - "Javanese": 0.3860887096774194, - "Civic education": 0.5321888412017167 - } - }, - "prompt_5": { - "accuracy": 0.4440883904132452, + "accuracy": 0.60145052667933, "category_acc": { - "History": 0.4457831325301205, - "Geography": 0.4142857142857143, - "Lampungic": 0.29931972789115646, - "Social science": 0.5959933222036727, - "Balinese": 0.28450106157112526, - "Makassarese": 0.27956989247311825, - "Banjarese": 0.3472222222222222, - "Chemistry": 0.2686131386861314, - "Biology": 0.44260355029585796, - "Science": 0.5376676986584107, - "Christian religion": 0.5621890547263682, - "Art": 0.5141430948419301, - "Islam religion": 0.5547652916073968, - "Hindu religion": 0.43333333333333335, - "Madurese": 0.2711864406779661, - "Sport": 0.5067567567567568, - "Indonesian language": 0.5108966376089664, - "Physics": 0.3595959595959596, - "Minangkabau culture": 0.35175879396984927, - "Dayak language": 0.30275229357798167, - "Sociology": 0.4213709677419355, - "Economy": 0.4262295081967213, - "Sundanese": 0.36992221261884184, - "Javanese": 0.3467741935483871, - "Civic education": 0.5264663805436338 + "agronomy": 0.5266272189349113, + "anatomy": 0.6013513513513513, + "ancient_chinese": 0.3353658536585366, + "arts": 0.85, + "astronomy": 0.4121212121212121, + "business_ethics": 0.5933014354066986, + "chinese_civil_service_exam": 0.5125, + "chinese_driving_rule": 0.8015267175572519, + "chinese_food_culture": 0.6764705882352942, + "chinese_foreign_policy": 0.6074766355140186, + "chinese_history": 0.6687306501547987, + "chinese_literature": 0.5147058823529411, + "chinese_teacher_qualification": 0.7988826815642458, + "clinical_knowledge": 0.5358649789029536, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.7383177570093458, + "college_engineering_hydrology": 0.5377358490566038, + "college_law": 0.5555555555555556, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.49056603773584906, + "college_medicine": 0.6556776556776557, + "computer_science": 0.6372549019607843, + "computer_security": 0.7017543859649122, + "conceptual_physics": 0.673469387755102, + "construction_project_management": 0.5251798561151079, + "economics": 0.5911949685534591, + "education": 0.6380368098159509, + "electrical_engineering": 0.5988372093023255, + "elementary_chinese": 0.6706349206349206, + "elementary_commonsense": 0.6515151515151515, + "elementary_information_and_technology": 0.8109243697478992, + "elementary_mathematics": 0.4043478260869565, + "ethnology": 0.674074074074074, + "food_science": 0.5594405594405595, + "genetics": 0.45454545454545453, + "global_facts": 0.610738255033557, + "high_school_biology": 0.5562130177514792, + "high_school_chemistry": 0.44696969696969696, + "high_school_geography": 0.6949152542372882, + "high_school_mathematics": 0.3780487804878049, + "high_school_physics": 0.4636363636363636, + "high_school_politics": 0.6433566433566433, + "human_sexuality": 0.5317460317460317, + "international_law": 0.5243243243243243, + "journalism": 0.563953488372093, + "jurisprudence": 0.5985401459854015, + "legal_and_moral_basis": 0.9205607476635514, + "logical": 0.5121951219512195, + "machine_learning": 0.5081967213114754, + "management": 0.6952380952380952, + "marketing": 0.65, + "marxist_theory": 0.7619047619047619, + "modern_chinese": 0.5086206896551724, + "nutrition": 0.6344827586206897, + "philosophy": 0.6, + "professional_accounting": 0.68, + "professional_law": 0.46919431279620855, + "professional_medicine": 0.5212765957446809, + "professional_psychology": 0.7155172413793104, + "public_relations": 0.6379310344827587, + "security_study": 0.674074074074074, + "sociology": 0.6061946902654868, + "sports_science": 0.593939393939394, + "traditional_chinese_medicine": 0.6162162162162163, + "virology": 0.6094674556213018, + "world_history": 0.6335403726708074, + "world_religions": 0.675 } - } - }, - "flores_ind2eng": { - "prompt_1": { - "bleu_score": 0.2138162386197109 - }, - "prompt_2": { - "bleu_score": 0.2849916145103055 - }, - "prompt_3": { - "bleu_score": 0.2922698955332789 - }, - "prompt_4": { - "bleu_score": 0.24238762048553122 }, "prompt_5": { - "bleu_score": 0.26602830333551486 + "accuracy": 0.5436021412536695, + "category_acc": { + "agronomy": 0.46153846153846156, + "anatomy": 0.581081081081081, + "ancient_chinese": 0.32926829268292684, + "arts": 0.775, + "astronomy": 0.4303030303030303, + "business_ethics": 0.5119617224880383, + "chinese_civil_service_exam": 0.41875, + "chinese_driving_rule": 0.7404580152671756, + "chinese_food_culture": 0.6617647058823529, + "chinese_foreign_policy": 0.45794392523364486, + "chinese_history": 0.5975232198142415, + "chinese_literature": 0.46568627450980393, + "chinese_teacher_qualification": 0.7541899441340782, + "clinical_knowledge": 0.4641350210970464, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.6355140186915887, + "college_engineering_hydrology": 0.5, + "college_law": 0.5462962962962963, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.4528301886792453, + "college_medicine": 0.4945054945054945, + "computer_science": 0.5784313725490197, + "computer_security": 0.5964912280701754, + "conceptual_physics": 0.564625850340136, + "construction_project_management": 0.5251798561151079, + "economics": 0.5534591194968553, + "education": 0.5705521472392638, + "electrical_engineering": 0.5116279069767442, + "elementary_chinese": 0.6031746031746031, + "elementary_commonsense": 0.5959595959595959, + "elementary_information_and_technology": 0.726890756302521, + "elementary_mathematics": 0.4608695652173913, + "ethnology": 0.5481481481481482, + "food_science": 0.5804195804195804, + "genetics": 0.4602272727272727, + "global_facts": 0.5973154362416108, + "high_school_biology": 0.5384615384615384, + "high_school_chemistry": 0.45454545454545453, + "high_school_geography": 0.5932203389830508, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.45454545454545453, + "high_school_politics": 0.5734265734265734, + "human_sexuality": 0.5158730158730159, + "international_law": 0.4810810810810811, + "journalism": 0.5697674418604651, + "jurisprudence": 0.5401459854014599, + "legal_and_moral_basis": 0.794392523364486, + "logical": 0.45528455284552843, + "machine_learning": 0.47540983606557374, + "management": 0.5952380952380952, + "marketing": 0.6166666666666667, + "marxist_theory": 0.6878306878306878, + "modern_chinese": 0.4827586206896552, + "nutrition": 0.5517241379310345, + "philosophy": 0.5714285714285714, + "professional_accounting": 0.5885714285714285, + "professional_law": 0.3933649289099526, + "professional_medicine": 0.42021276595744683, + "professional_psychology": 0.6724137931034483, + "public_relations": 0.5172413793103449, + "security_study": 0.6592592592592592, + "sociology": 0.5575221238938053, + "sports_science": 0.5333333333333333, + "traditional_chinese_medicine": 0.4972972972972973, + "virology": 0.5384615384615384, + "world_history": 0.6024844720496895, + "world_religions": 0.64375 + } } }, - "flores_vie2eng": { + "zbench": { "prompt_1": { - "bleu_score": 0.1750575651458719 + "accuracy": 0.42424242424242425 }, "prompt_2": { - "bleu_score": 0.2303216696967068 + "accuracy": 0.5454545454545454 }, "prompt_3": { - "bleu_score": 0.23524587815853135 + "accuracy": 0.48484848484848486 }, "prompt_4": { - "bleu_score": 0.21260628676034443 + "accuracy": 0.36363636363636365 }, "prompt_5": { - "bleu_score": 0.22019452648179255 + "accuracy": 0.5151515151515151 } }, - "flores_zho2eng": { + "ind_emotion": { "prompt_1": { - "bleu_score": 0.12975999420747056 + "accuracy": 0.5931818181818181 }, "prompt_2": { - "bleu_score": 0.1657842132280737 + "accuracy": 0.47954545454545455 }, "prompt_3": { - "bleu_score": 0.1753685119515332 + "accuracy": 0.5113636363636364 }, "prompt_4": { - "bleu_score": 0.14728170783178976 + "accuracy": 0.5272727272727272 }, "prompt_5": { - "bleu_score": 0.1601516294342861 + "accuracy": 0.5318181818181819 } }, - "flores_zsm2eng": { + "ocnli": { "prompt_1": { - "bleu_score": 0.20285796706727535 + "accuracy": 0.6101694915254238 }, "prompt_2": { - "bleu_score": 0.27655504228053157 + "accuracy": 0.5976271186440678 }, "prompt_3": { - "bleu_score": 0.2878575740474366 + "accuracy": 0.5922033898305085 }, "prompt_4": { - "bleu_score": 0.24361548497622051 + "accuracy": 0.5728813559322034 }, "prompt_5": { - "bleu_score": 0.2527509574310542 + "accuracy": 0.5820338983050848 } }, - "mmlu": { + "c3": { "prompt_1": { - "accuracy": 0.5460910151691949 + "accuracy": 0.8915482423335827 }, "prompt_2": { - "accuracy": 0.5635939323220537 + "accuracy": 0.8900523560209425 }, "prompt_3": { - "accuracy": 0.5682613768961493 + "accuracy": 0.8848167539267016 }, "prompt_4": { - "accuracy": 0.5694282380396732 + "accuracy": 0.8915482423335827 }, "prompt_5": { - "accuracy": 0.5344224037339557 + "accuracy": 0.8433059087509349 } }, - "mmlu_full": { + "dream": { "prompt_1": { - "accuracy": 0.5214873078298177, - "category_acc": { - "high_school_european_history": 0.6524390243902439, - "business_ethics": 0.494949494949495, - "clinical_knowledge": 0.5795454545454546, - "medical_genetics": 0.6666666666666666, - "high_school_us_history": 0.6896551724137931, - "high_school_physics": 0.36, - "high_school_world_history": 0.7711864406779662, - "virology": 0.4727272727272727, - "high_school_microeconomics": 0.5443037974683544, - "econometrics": 0.415929203539823, - "college_computer_science": 0.35353535353535354, - "high_school_biology": 0.627831715210356, - "abstract_algebra": 0.25252525252525254, - "professional_accounting": 0.4128113879003559, - "philosophy": 0.5548387096774193, - "professional_medicine": 0.6346863468634686, - "nutrition": 0.5770491803278689, - "global_facts": 0.30303030303030304, - "machine_learning": 0.35135135135135137, - "security_studies": 0.6475409836065574, - "public_relations": 0.5137614678899083, - "professional_psychology": 0.5662847790507365, - "prehistory": 0.5975232198142415, - "anatomy": 0.5447761194029851, - "human_sexuality": 0.6, - "college_medicine": 0.47093023255813954, - "high_school_government_and_politics": 0.7447916666666666, - "college_chemistry": 0.37373737373737376, - "logical_fallacies": 0.6234567901234568, - "high_school_geography": 0.6142131979695431, - "elementary_mathematics": 0.506631299734748, - "human_aging": 0.5720720720720721, - "college_mathematics": 0.20202020202020202, - "high_school_psychology": 0.7077205882352942, - "formal_logic": 0.376, - "high_school_statistics": 0.39069767441860465, - "international_law": 0.675, - "high_school_mathematics": 0.24535315985130113, - "high_school_computer_science": 0.6262626262626263, - "conceptual_physics": 0.42735042735042733, - "miscellaneous": 0.6572890025575447, - "high_school_chemistry": 0.39603960396039606, - "marketing": 0.7553648068669528, - "professional_law": 0.3816046966731898, - "management": 0.7058823529411765, - "college_physics": 0.33663366336633666, - "jurisprudence": 0.5981308411214953, - "world_religions": 0.6647058823529411, - "sociology": 0.7, - "us_foreign_policy": 0.7474747474747475, - "high_school_macroeconomics": 0.4987146529562982, - "computer_security": 0.6060606060606061, - "moral_scenarios": 0.2662192393736018, - "moral_disputes": 0.553623188405797, - "electrical_engineering": 0.4097222222222222, - "astronomy": 0.6357615894039735, - "college_biology": 0.6223776223776224 - } - }, - "prompt_2": { - "accuracy": 0.545942080800858, - "category_acc": { - "high_school_european_history": 0.676829268292683, - "business_ethics": 0.5454545454545454, - "clinical_knowledge": 0.6098484848484849, - "medical_genetics": 0.6161616161616161, - "high_school_us_history": 0.6945812807881774, - "high_school_physics": 0.38, - "high_school_world_history": 0.788135593220339, - "virology": 0.4727272727272727, - "high_school_microeconomics": 0.5738396624472574, - "econometrics": 0.35398230088495575, - "college_computer_science": 0.494949494949495, - "high_school_biology": 0.6472491909385113, - "abstract_algebra": 0.26262626262626265, - "professional_accounting": 0.4092526690391459, - "philosophy": 0.5741935483870968, - "professional_medicine": 0.6236162361623616, - "nutrition": 0.6295081967213115, - "global_facts": 0.3333333333333333, - "machine_learning": 0.36936936936936937, - "security_studies": 0.6721311475409836, - "public_relations": 0.5137614678899083, - "professional_psychology": 0.5973813420621932, - "prehistory": 0.6160990712074303, - "anatomy": 0.5522388059701493, - "human_sexuality": 0.6461538461538462, - "college_medicine": 0.5174418604651163, - "high_school_government_and_politics": 0.7395833333333334, - "college_chemistry": 0.3434343434343434, - "logical_fallacies": 0.654320987654321, - "high_school_geography": 0.6446700507614214, - "elementary_mathematics": 0.5172413793103449, - "human_aging": 0.5765765765765766, - "college_mathematics": 0.35353535353535354, - "high_school_psychology": 0.7261029411764706, - "formal_logic": 0.352, - "high_school_statistics": 0.46511627906976744, - "international_law": 0.7, - "high_school_mathematics": 0.2936802973977695, - "high_school_computer_science": 0.6161616161616161, - "conceptual_physics": 0.43162393162393164, - "miscellaneous": 0.6854219948849105, - "high_school_chemistry": 0.4207920792079208, - "marketing": 0.703862660944206, - "professional_law": 0.42857142857142855, - "management": 0.7254901960784313, - "college_physics": 0.3564356435643564, - "jurisprudence": 0.6635514018691588, - "world_religions": 0.7235294117647059, - "sociology": 0.74, - "us_foreign_policy": 0.7373737373737373, - "high_school_macroeconomics": 0.5167095115681234, - "computer_security": 0.6767676767676768, - "moral_scenarios": 0.2785234899328859, - "moral_disputes": 0.5739130434782609, - "electrical_engineering": 0.4722222222222222, - "astronomy": 0.6291390728476821, - "college_biology": 0.6993006993006993 - } - }, - "prompt_3": { - "accuracy": 0.5590275294958884, - "category_acc": { - "high_school_european_history": 0.7073170731707317, - "business_ethics": 0.6262626262626263, - "clinical_knowledge": 0.5909090909090909, - "medical_genetics": 0.6868686868686869, - "high_school_us_history": 0.7487684729064039, - "high_school_physics": 0.37333333333333335, - "high_school_world_history": 0.8135593220338984, - "virology": 0.45454545454545453, - "high_school_microeconomics": 0.6160337552742616, - "econometrics": 0.34513274336283184, - "college_computer_science": 0.494949494949495, - "high_school_biology": 0.686084142394822, - "abstract_algebra": 0.24242424242424243, - "professional_accounting": 0.4128113879003559, - "philosophy": 0.5903225806451613, - "professional_medicine": 0.6162361623616236, - "nutrition": 0.6262295081967213, - "global_facts": 0.3333333333333333, - "machine_learning": 0.42342342342342343, - "security_studies": 0.6516393442622951, - "public_relations": 0.47706422018348627, - "professional_psychology": 0.5891980360065466, - "prehistory": 0.628482972136223, - "anatomy": 0.5522388059701493, - "human_sexuality": 0.6153846153846154, - "college_medicine": 0.563953488372093, - "high_school_government_and_politics": 0.7708333333333334, - "college_chemistry": 0.4444444444444444, - "logical_fallacies": 0.6728395061728395, - "high_school_geography": 0.6649746192893401, - "elementary_mathematics": 0.4880636604774536, - "human_aging": 0.5990990990990991, - "college_mathematics": 0.35353535353535354, - "high_school_psychology": 0.7610294117647058, - "formal_logic": 0.368, - "high_school_statistics": 0.4604651162790698, - "international_law": 0.675, - "high_school_mathematics": 0.31226765799256506, - "high_school_computer_science": 0.6262626262626263, - "conceptual_physics": 0.48717948717948717, - "miscellaneous": 0.7506393861892583, - "high_school_chemistry": 0.44554455445544555, - "marketing": 0.8111587982832618, - "professional_law": 0.4070450097847358, - "management": 0.7058823529411765, - "college_physics": 0.3465346534653465, - "jurisprudence": 0.6822429906542056, - "world_religions": 0.7647058823529411, - "sociology": 0.77, - "us_foreign_policy": 0.797979797979798, - "high_school_macroeconomics": 0.5526992287917738, - "computer_security": 0.5656565656565656, - "moral_scenarios": 0.2807606263982103, - "moral_disputes": 0.5884057971014492, - "electrical_engineering": 0.5, - "astronomy": 0.6291390728476821, - "college_biology": 0.6993006993006993 - } + "accuracy": 0.8887800097991181 }, - "prompt_4": { - "accuracy": 0.5232749374329639, - "category_acc": { - "high_school_european_history": 0.6707317073170732, - "business_ethics": 0.5555555555555556, - "clinical_knowledge": 0.5416666666666666, - "medical_genetics": 0.6767676767676768, - "high_school_us_history": 0.729064039408867, - "high_school_physics": 0.31333333333333335, - "high_school_world_history": 0.7923728813559322, - "virology": 0.4666666666666667, - "high_school_microeconomics": 0.540084388185654, - "econometrics": 0.4424778761061947, - "college_computer_science": 0.3838383838383838, - "high_school_biology": 0.6440129449838188, - "abstract_algebra": 0.29292929292929293, - "professional_accounting": 0.42704626334519574, - "philosophy": 0.5709677419354838, - "professional_medicine": 0.6273062730627307, - "nutrition": 0.5672131147540984, - "global_facts": 0.40404040404040403, - "machine_learning": 0.36936936936936937, - "security_studies": 0.6352459016393442, - "public_relations": 0.5137614678899083, - "professional_psychology": 0.5679214402618658, - "prehistory": 0.5944272445820433, - "anatomy": 0.5522388059701493, - "human_sexuality": 0.6384615384615384, - "college_medicine": 0.5116279069767442, - "high_school_government_and_politics": 0.671875, - "college_chemistry": 0.41414141414141414, - "logical_fallacies": 0.6049382716049383, - "high_school_geography": 0.5786802030456852, - "elementary_mathematics": 0.46949602122015915, - "human_aging": 0.5900900900900901, - "college_mathematics": 0.2828282828282828, - "high_school_psychology": 0.6893382352941176, - "formal_logic": 0.32, - "high_school_statistics": 0.413953488372093, - "international_law": 0.675, - "high_school_mathematics": 0.27137546468401486, - "high_school_computer_science": 0.5555555555555556, - "conceptual_physics": 0.41025641025641024, - "miscellaneous": 0.6828644501278772, - "high_school_chemistry": 0.4158415841584158, - "marketing": 0.6952789699570815, - "professional_law": 0.3953033268101761, - "management": 0.6274509803921569, - "college_physics": 0.26732673267326734, - "jurisprudence": 0.6355140186915887, - "world_religions": 0.6647058823529411, - "sociology": 0.745, - "us_foreign_policy": 0.6666666666666666, - "high_school_macroeconomics": 0.4987146529562982, - "computer_security": 0.6262626262626263, - "moral_scenarios": 0.2684563758389262, - "moral_disputes": 0.5420289855072464, - "electrical_engineering": 0.4930555555555556, - "astronomy": 0.6026490066225165, - "college_biology": 0.5524475524475524 - } + "prompt_2": { + "accuracy": 0.8912297893189612 + }, + "prompt_3": { + "accuracy": 0.8995590396864283 + }, + "prompt_4": { + "accuracy": 0.8731014208721215 }, "prompt_5": { - "accuracy": 0.5046835895602431, - "category_acc": { - "high_school_european_history": 0.6707317073170732, - "business_ethics": 0.48484848484848486, - "clinical_knowledge": 0.5454545454545454, - "medical_genetics": 0.5454545454545454, - "high_school_us_history": 0.6847290640394089, - "high_school_physics": 0.30666666666666664, - "high_school_world_history": 0.75, - "virology": 0.4121212121212121, - "high_school_microeconomics": 0.540084388185654, - "econometrics": 0.35398230088495575, - "college_computer_science": 0.41414141414141414, - "high_school_biology": 0.6148867313915858, - "abstract_algebra": 0.21212121212121213, - "professional_accounting": 0.398576512455516, - "philosophy": 0.5290322580645161, - "professional_medicine": 0.6014760147601476, - "nutrition": 0.6, - "global_facts": 0.35353535353535354, - "machine_learning": 0.36036036036036034, - "security_studies": 0.6762295081967213, - "public_relations": 0.5045871559633027, - "professional_psychology": 0.5450081833060556, - "prehistory": 0.5386996904024768, - "anatomy": 0.5074626865671642, - "human_sexuality": 0.5769230769230769, - "college_medicine": 0.47674418604651164, - "high_school_government_and_politics": 0.6822916666666666, - "college_chemistry": 0.3939393939393939, - "logical_fallacies": 0.6111111111111112, - "high_school_geography": 0.5685279187817259, - "elementary_mathematics": 0.4986737400530504, - "human_aging": 0.5540540540540541, - "college_mathematics": 0.2828282828282828, - "high_school_psychology": 0.6599264705882353, - "formal_logic": 0.36, - "high_school_statistics": 0.413953488372093, - "international_law": 0.6, - "high_school_mathematics": 0.21561338289962825, - "high_school_computer_science": 0.5656565656565656, - "conceptual_physics": 0.3888888888888889, - "miscellaneous": 0.649616368286445, - "high_school_chemistry": 0.38613861386138615, - "marketing": 0.6909871244635193, - "professional_law": 0.37573385518590996, - "management": 0.6078431372549019, - "college_physics": 0.3564356435643564, - "jurisprudence": 0.6074766355140186, - "world_religions": 0.6235294117647059, - "sociology": 0.705, - "us_foreign_policy": 0.7070707070707071, - "high_school_macroeconomics": 0.5038560411311054, - "computer_security": 0.5959595959595959, - "moral_scenarios": 0.2695749440715884, - "moral_disputes": 0.5391304347826087, - "electrical_engineering": 0.4097222222222222, - "astronomy": 0.5629139072847682, - "college_biology": 0.5874125874125874 - } + "accuracy": 0.8887800097991181 } }, - "c_eval": { + "samsum": { "prompt_1": { - "accuracy": 0.4182763744427935 + "rouge1": 0.32025871790184207, + "rouge2": 0.11252191613209159, + "rougeL": 0.23687649729124394, + "avg_rouge": 0.2232190437750592 }, "prompt_2": { - "accuracy": 0.42421991084695393 + "rouge1": 0.3539650693173222, + "rouge2": 0.12289059669247782, + "rougeL": 0.2621425399352911, + "avg_rouge": 0.24633273531503033 }, "prompt_3": { - "accuracy": 0.4279346210995542 + "rouge1": 0.3596383366340126, + "rouge2": 0.12306825565807013, + "rougeL": 0.2667316013053415, + "avg_rouge": 0.2498127311991414 }, "prompt_4": { - "accuracy": 0.37444279346210996 + "rouge1": 0.32746103807533683, + "rouge2": 0.11032578135156446, + "rougeL": 0.24118472863418863, + "avg_rouge": 0.22632384935369665 }, "prompt_5": { - "accuracy": 0.3922734026745914 + "rouge1": 0.35920934397741366, + "rouge2": 0.12008368532113817, + "rougeL": 0.26394933485526906, + "avg_rouge": 0.24774745471794027 } }, - "c_eval_full": { + "dialogsum": { "prompt_1": { - "accuracy": 0.4414694894146949, - "category_acc": { - "computer_network": 0.4583333333333333, - "operating_system": 0.625, - "computer_architecture": 0.46153846153846156, - "college_programming": 0.5714285714285714, - "college_physics": 0.2916666666666667, - "college_chemistry": 0.27586206896551724, - "advanced_mathematics": 0.2916666666666667, - "probability_and_statistics": 0.34782608695652173, - "discrete_mathematics": 0.42857142857142855, - "electrical_engineer": 0.2857142857142857, - "metrology_engineer": 0.41379310344827586, - "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.25, - "high_school_chemistry": 0.4166666666666667, - "high_school_biology": 0.4166666666666667, - "middle_school_mathematics": 0.4166666666666667, - "middle_school_biology": 0.6538461538461539, - "middle_school_physics": 0.5, - "middle_school_chemistry": 0.4, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.36666666666666664, - "business_administration": 0.4473684210526316, - "marxism": 0.5, - "mao_zedong_thought": 0.6896551724137931, - "education_science": 0.5588235294117647, - "teacher_qualification": 0.5714285714285714, - "high_school_politics": 0.5416666666666666, - "high_school_geography": 0.5, - "middle_school_politics": 0.5, - "middle_school_geography": 0.23529411764705882, - "modern_chinese_history": 0.39285714285714285, - "ideological_and_moral_cultivation": 0.625, - "logic": 0.5555555555555556, - "law": 0.3448275862068966, - "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.5526315789473685, - "professional_tour_guide": 0.47058823529411764, - "legal_professional": 0.42857142857142855, - "high_school_chinese": 0.25, - "high_school_history": 0.6, - "middle_school_history": 0.7037037037037037, - "civil_servant": 0.40384615384615385, - "sports_science": 0.3333333333333333, - "plant_protection": 0.48148148148148145, - "basic_medicine": 0.25, - "clinical_medicine": 0.4444444444444444, - "urban_and_rural_planner": 0.47058823529411764, - "accountant": 0.37037037037037035, - "fire_engineer": 0.3888888888888889, - "environmental_impact_assessment_engineer": 0.4444444444444444, - "tax_accountant": 0.3333333333333333, - "physician": 0.5 - } + "rouge1": 0.2156449509326601, + "rouge2": 0.051271372877158834, + "rougeL": 0.15762523547298707, + "avg_rouge": 0.14151385309426867 }, "prompt_2": { - "accuracy": 0.4439601494396015, - "category_acc": { - "computer_network": 0.375, - "operating_system": 0.5416666666666666, - "computer_architecture": 0.38461538461538464, - "college_programming": 0.4523809523809524, - "college_physics": 0.4166666666666667, - "college_chemistry": 0.27586206896551724, - "advanced_mathematics": 0.375, - "probability_and_statistics": 0.391304347826087, - "discrete_mathematics": 0.3333333333333333, - "electrical_engineer": 0.35714285714285715, - "metrology_engineer": 0.4827586206896552, - "high_school_mathematics": 0.30434782608695654, - "high_school_physics": 0.4166666666666667, - "high_school_chemistry": 0.375, - "high_school_biology": 0.5416666666666666, - "middle_school_mathematics": 0.25, - "middle_school_biology": 0.7307692307692307, - "middle_school_physics": 0.5833333333333334, - "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.5714285714285714, - "college_economics": 0.4, - "business_administration": 0.34210526315789475, - "marxism": 0.4583333333333333, - "mao_zedong_thought": 0.6206896551724138, - "education_science": 0.5294117647058824, - "teacher_qualification": 0.5714285714285714, - "high_school_politics": 0.4583333333333333, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.5384615384615384, - "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.42857142857142855, - "ideological_and_moral_cultivation": 0.625, - "logic": 0.4444444444444444, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.42857142857142855, - "art_studies": 0.5, - "professional_tour_guide": 0.4117647058823529, - "legal_professional": 0.32142857142857145, - "high_school_chinese": 0.20833333333333334, - "high_school_history": 0.68, - "middle_school_history": 0.5555555555555556, - "civil_servant": 0.4423076923076923, - "sports_science": 0.375, - "plant_protection": 0.5925925925925926, - "basic_medicine": 0.3333333333333333, - "clinical_medicine": 0.3333333333333333, - "urban_and_rural_planner": 0.5098039215686274, - "accountant": 0.4444444444444444, - "fire_engineer": 0.25, - "environmental_impact_assessment_engineer": 0.4444444444444444, - "tax_accountant": 0.3888888888888889, - "physician": 0.5 - } + "rouge1": 0.21196166804340755, + "rouge2": 0.048452163740065, + "rougeL": 0.1531021308500753, + "avg_rouge": 0.1378386542111826 }, "prompt_3": { - "accuracy": 0.43462017434620176, - "category_acc": { - "computer_network": 0.375, - "operating_system": 0.625, - "computer_architecture": 0.46153846153846156, - "college_programming": 0.47619047619047616, - "college_physics": 0.4166666666666667, - "college_chemistry": 0.3793103448275862, - "advanced_mathematics": 0.25, - "probability_and_statistics": 0.2608695652173913, - "discrete_mathematics": 0.23809523809523808, - "electrical_engineer": 0.42857142857142855, - "metrology_engineer": 0.41379310344827586, - "high_school_mathematics": 0.13043478260869565, - "high_school_physics": 0.375, - "high_school_chemistry": 0.2916666666666667, - "high_school_biology": 0.2916666666666667, - "middle_school_mathematics": 0.2916666666666667, - "middle_school_biology": 0.6538461538461539, - "middle_school_physics": 0.4583333333333333, - "middle_school_chemistry": 0.56, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.43333333333333335, - "business_administration": 0.2631578947368421, - "marxism": 0.5, - "mao_zedong_thought": 0.41379310344827586, - "education_science": 0.4411764705882353, - "teacher_qualification": 0.6122448979591837, - "high_school_politics": 0.625, - "high_school_geography": 0.375, - "middle_school_politics": 0.38461538461538464, - "middle_school_geography": 0.5294117647058824, - "modern_chinese_history": 0.42857142857142855, - "ideological_and_moral_cultivation": 0.5416666666666666, - "logic": 0.6296296296296297, - "law": 0.3448275862068966, - "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.5, - "professional_tour_guide": 0.2647058823529412, - "legal_professional": 0.42857142857142855, - "high_school_chinese": 0.16666666666666666, - "high_school_history": 0.64, - "middle_school_history": 0.6296296296296297, - "civil_servant": 0.4423076923076923, - "sports_science": 0.4166666666666667, - "plant_protection": 0.4074074074074074, - "basic_medicine": 0.20833333333333334, - "clinical_medicine": 0.5185185185185185, - "urban_and_rural_planner": 0.5882352941176471, - "accountant": 0.46296296296296297, - "fire_engineer": 0.4166666666666667, - "environmental_impact_assessment_engineer": 0.3333333333333333, - "tax_accountant": 0.4074074074074074, - "physician": 0.5370370370370371 - } + "rouge1": 0.21407338332262357, + "rouge2": 0.050122199903020484, + "rougeL": 0.1556449616356449, + "avg_rouge": 0.13994684828709633 + }, + "prompt_4": { + "rouge1": 0.21417352959751168, + "rouge2": 0.05186922938802024, + "rougeL": 0.15696263445646286, + "avg_rouge": 0.14100179781399827 + }, + "prompt_5": { + "rouge1": 0.2197022831312668, + "rouge2": 0.05280379885981328, + "rougeL": 0.1617012052571708, + "avg_rouge": 0.14473576241608363 + } + }, + "sst2": { + "prompt_1": { + "accuracy": 0.9311926605504587 + }, + "prompt_2": { + "accuracy": 0.9071100917431193 + }, + "prompt_3": { + "accuracy": 0.9174311926605505 + }, + "prompt_4": { + "accuracy": 0.9311926605504587 + }, + "prompt_5": { + "accuracy": 0.8623853211009175 + } + }, + "cola": { + "prompt_1": { + "accuracy": 0.822627037392138 + }, + "prompt_2": { + "accuracy": 0.8149568552253116 + }, + "prompt_3": { + "accuracy": 0.8207094918504314 + }, + "prompt_4": { + "accuracy": 0.8159156279961649 + }, + "prompt_5": { + "accuracy": 0.8139980824544583 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.7805 + }, + "prompt_2": { + "accuracy": 0.7745 + }, + "prompt_3": { + "accuracy": 0.7805 }, "prompt_4": { - "accuracy": 0.398505603985056, - "category_acc": { - "computer_network": 0.25, - "operating_system": 0.5833333333333334, - "computer_architecture": 0.6153846153846154, - "college_programming": 0.5, - "college_physics": 0.4166666666666667, - "college_chemistry": 0.3793103448275862, - "advanced_mathematics": 0.16666666666666666, - "probability_and_statistics": 0.08695652173913043, - "discrete_mathematics": 0.2857142857142857, - "electrical_engineer": 0.2857142857142857, - "metrology_engineer": 0.3793103448275862, - "high_school_mathematics": 0.30434782608695654, - "high_school_physics": 0.25, - "high_school_chemistry": 0.4583333333333333, - "high_school_biology": 0.375, - "middle_school_mathematics": 0.16666666666666666, - "middle_school_biology": 0.5384615384615384, - "middle_school_physics": 0.4166666666666667, - "middle_school_chemistry": 0.44, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.4, - "business_administration": 0.2894736842105263, - "marxism": 0.4583333333333333, - "mao_zedong_thought": 0.3793103448275862, - "education_science": 0.5588235294117647, - "teacher_qualification": 0.673469387755102, - "high_school_politics": 0.5833333333333334, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.5, - "middle_school_geography": 0.5294117647058824, - "modern_chinese_history": 0.2857142857142857, - "ideological_and_moral_cultivation": 0.4166666666666667, - "logic": 0.5555555555555556, - "law": 0.3103448275862069, - "chinese_language_and_literature": 0.2857142857142857, - "art_studies": 0.5263157894736842, - "professional_tour_guide": 0.38235294117647056, - "legal_professional": 0.2857142857142857, - "high_school_chinese": 0.125, - "high_school_history": 0.52, - "middle_school_history": 0.4074074074074074, - "civil_servant": 0.34615384615384615, - "sports_science": 0.375, - "plant_protection": 0.5925925925925926, - "basic_medicine": 0.125, - "clinical_medicine": 0.4444444444444444, - "urban_and_rural_planner": 0.47058823529411764, - "accountant": 0.4074074074074074, - "fire_engineer": 0.3611111111111111, - "environmental_impact_assessment_engineer": 0.4444444444444444, - "tax_accountant": 0.2222222222222222, - "physician": 0.42592592592592593 - } + "accuracy": 0.7885 }, "prompt_5": { - "accuracy": 0.410958904109589, - "category_acc": { - "computer_network": 0.4583333333333333, - "operating_system": 0.375, - "computer_architecture": 0.46153846153846156, - "college_programming": 0.5, - "college_physics": 0.4583333333333333, - "college_chemistry": 0.3103448275862069, - "advanced_mathematics": 0.4166666666666667, - "probability_and_statistics": 0.34782608695652173, - "discrete_mathematics": 0.3333333333333333, - "electrical_engineer": 0.35714285714285715, - "metrology_engineer": 0.3793103448275862, - "high_school_mathematics": 0.13043478260869565, - "high_school_physics": 0.2916666666666667, - "high_school_chemistry": 0.4166666666666667, - "high_school_biology": 0.375, - "middle_school_mathematics": 0.375, - "middle_school_biology": 0.5384615384615384, - "middle_school_physics": 0.625, - "middle_school_chemistry": 0.48, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.35, - "business_administration": 0.34210526315789475, - "marxism": 0.5833333333333334, - "mao_zedong_thought": 0.5517241379310345, - "education_science": 0.5294117647058824, - "teacher_qualification": 0.5714285714285714, - "high_school_politics": 0.625, - "high_school_geography": 0.5833333333333334, - "middle_school_politics": 0.4230769230769231, - "middle_school_geography": 0.35294117647058826, - "modern_chinese_history": 0.39285714285714285, - "ideological_and_moral_cultivation": 0.5416666666666666, - "logic": 0.4444444444444444, - "law": 0.3793103448275862, - "chinese_language_and_literature": 0.2857142857142857, - "art_studies": 0.5263157894736842, - "professional_tour_guide": 0.2647058823529412, - "legal_professional": 0.2857142857142857, - "high_school_chinese": 0.25, - "high_school_history": 0.52, - "middle_school_history": 0.48148148148148145, - "civil_servant": 0.36538461538461536, - "sports_science": 0.3333333333333333, - "plant_protection": 0.48148148148148145, - "basic_medicine": 0.20833333333333334, - "clinical_medicine": 0.4444444444444444, - "urban_and_rural_planner": 0.3137254901960784, - "accountant": 0.42592592592592593, - "fire_engineer": 0.3333333333333333, - "environmental_impact_assessment_engineer": 0.4722222222222222, - "tax_accountant": 0.2777777777777778, - "physician": 0.4444444444444444 - } + "accuracy": 0.7715 } }, - "cmmlu": { + "mnli": { "prompt_1": { - "accuracy": 0.4336917562724014 + "accuracy": 0.7 }, "prompt_2": { - "accuracy": 0.4659498207885305 + "accuracy": 0.7 }, "prompt_3": { - "accuracy": 0.40860215053763443 + "accuracy": 0.8 }, "prompt_4": { - "accuracy": 0.34767025089605735 + "accuracy": 0.7 }, "prompt_5": { - "accuracy": 0.4157706093189964 + "accuracy": 0.7 } }, - "cmmlu_full": { + "qnli": { "prompt_1": { - "accuracy": 0.4185805560352271, - "category_acc": { - "agronomy": 0.3431952662721893, - "anatomy": 0.34459459459459457, - "ancient_chinese": 0.2865853658536585, - "arts": 0.5125, - "astronomy": 0.24848484848484848, - "business_ethics": 0.507177033492823, - "chinese_civil_service_exam": 0.375, - "chinese_driving_rule": 0.5801526717557252, - "chinese_food_culture": 0.3897058823529412, - "chinese_foreign_policy": 0.514018691588785, - "chinese_history": 0.47987616099071206, - "chinese_literature": 0.3284313725490196, - "chinese_teacher_qualification": 0.49162011173184356, - "clinical_knowledge": 0.3333333333333333, - "college_actuarial_science": 0.22641509433962265, - "college_education": 0.42990654205607476, - "college_engineering_hydrology": 0.32075471698113206, - "college_law": 0.3148148148148148, - "college_mathematics": 0.23809523809523808, - "college_medical_statistics": 0.3867924528301887, - "college_medicine": 0.3333333333333333, - "computer_science": 0.4803921568627451, - "computer_security": 0.49707602339181284, - "conceptual_physics": 0.3877551020408163, - "construction_project_management": 0.33093525179856115, - "economics": 0.5283018867924528, - "education": 0.4723926380368098, - "electrical_engineering": 0.36046511627906974, - "elementary_chinese": 0.3373015873015873, - "elementary_commonsense": 0.3888888888888889, - "elementary_information_and_technology": 0.680672268907563, - "elementary_mathematics": 0.2826086956521739, - "ethnology": 0.362962962962963, - "food_science": 0.4195804195804196, - "genetics": 0.3977272727272727, - "global_facts": 0.5302013422818792, - "high_school_biology": 0.30177514792899407, - "high_school_chemistry": 0.3181818181818182, - "high_school_geography": 0.3644067796610169, - "high_school_mathematics": 0.25609756097560976, - "high_school_physics": 0.3, - "high_school_politics": 0.4195804195804196, - "human_sexuality": 0.49206349206349204, - "international_law": 0.42162162162162165, - "journalism": 0.4883720930232558, - "jurisprudence": 0.41849148418491483, - "legal_and_moral_basis": 0.7289719626168224, - "logical": 0.3902439024390244, - "machine_learning": 0.4918032786885246, - "management": 0.5142857142857142, - "marketing": 0.5, - "marxist_theory": 0.5132275132275133, - "modern_chinese": 0.3275862068965517, - "nutrition": 0.4413793103448276, - "philosophy": 0.41904761904761906, - "professional_accounting": 0.44, - "professional_law": 0.3175355450236967, - "professional_medicine": 0.3271276595744681, - "professional_psychology": 0.41810344827586204, - "public_relations": 0.4367816091954023, - "security_study": 0.4444444444444444, - "sociology": 0.49557522123893805, - "sports_science": 0.4303030303030303, - "traditional_chinese_medicine": 0.35135135135135137, - "virology": 0.4911242603550296, - "world_history": 0.4968944099378882, - "world_religions": 0.6 - } + "accuracy": 0.9 }, "prompt_2": { - "accuracy": 0.44025211535140735, - "category_acc": { - "agronomy": 0.3727810650887574, - "anatomy": 0.33783783783783783, - "ancient_chinese": 0.2682926829268293, - "arts": 0.5375, - "astronomy": 0.2727272727272727, - "business_ethics": 0.45933014354066987, - "chinese_civil_service_exam": 0.325, - "chinese_driving_rule": 0.5343511450381679, - "chinese_food_culture": 0.4117647058823529, - "chinese_foreign_policy": 0.4766355140186916, - "chinese_history": 0.5077399380804953, - "chinese_literature": 0.3382352941176471, - "chinese_teacher_qualification": 0.5195530726256983, - "clinical_knowledge": 0.39662447257383965, - "college_actuarial_science": 0.3113207547169811, - "college_education": 0.514018691588785, - "college_engineering_hydrology": 0.46226415094339623, - "college_law": 0.37037037037037035, - "college_mathematics": 0.37142857142857144, - "college_medical_statistics": 0.49056603773584906, - "college_medicine": 0.38095238095238093, - "computer_science": 0.5343137254901961, - "computer_security": 0.5497076023391813, - "conceptual_physics": 0.41496598639455784, - "construction_project_management": 0.35251798561151076, - "economics": 0.5723270440251572, - "education": 0.49693251533742333, - "electrical_engineering": 0.4941860465116279, - "elementary_chinese": 0.3333333333333333, - "elementary_commonsense": 0.40404040404040403, - "elementary_information_and_technology": 0.7100840336134454, - "elementary_mathematics": 0.29130434782608694, - "ethnology": 0.37037037037037035, - "food_science": 0.4195804195804196, - "genetics": 0.375, - "global_facts": 0.5369127516778524, - "high_school_biology": 0.33727810650887574, - "high_school_chemistry": 0.3787878787878788, - "high_school_geography": 0.3474576271186441, - "high_school_mathematics": 0.22560975609756098, - "high_school_physics": 0.3, - "high_school_politics": 0.4755244755244755, - "human_sexuality": 0.5317460317460317, - "international_law": 0.3837837837837838, - "journalism": 0.47093023255813954, - "jurisprudence": 0.43795620437956206, - "legal_and_moral_basis": 0.7663551401869159, - "logical": 0.43089430894308944, - "machine_learning": 0.5491803278688525, - "management": 0.5190476190476191, - "marketing": 0.5444444444444444, - "marxist_theory": 0.48148148148148145, - "modern_chinese": 0.3448275862068966, - "nutrition": 0.4206896551724138, - "philosophy": 0.5333333333333333, - "professional_accounting": 0.4685714285714286, - "professional_law": 0.35545023696682465, - "professional_medicine": 0.31648936170212766, - "professional_psychology": 0.47844827586206895, - "public_relations": 0.5459770114942529, - "security_study": 0.5111111111111111, - "sociology": 0.48672566371681414, - "sports_science": 0.46060606060606063, - "traditional_chinese_medicine": 0.32972972972972975, - "virology": 0.5088757396449705, - "world_history": 0.453416149068323, - "world_religions": 0.54375 - } + "accuracy": 0.8 + }, + "prompt_3": { + "accuracy": 0.9 + }, + "prompt_4": { + "accuracy": 0.8 + }, + "prompt_5": { + "accuracy": 0.9 + } + }, + "wnli": { + "prompt_1": { + "accuracy": 0.5 + }, + "prompt_2": { + "accuracy": 0.5 + }, + "prompt_3": { + "accuracy": 0.5 + }, + "prompt_4": { + "accuracy": 0.4 + }, + "prompt_5": { + "accuracy": 0.8 + } + }, + "rte": { + "prompt_1": { + "accuracy": 0.9 + }, + "prompt_2": { + "accuracy": 1.0 }, "prompt_3": { - "accuracy": 0.42013469176308066, - "category_acc": { - "agronomy": 0.38461538461538464, - "anatomy": 0.32432432432432434, - "ancient_chinese": 0.25, - "arts": 0.525, - "astronomy": 0.2909090909090909, - "business_ethics": 0.48325358851674644, - "chinese_civil_service_exam": 0.36875, - "chinese_driving_rule": 0.5190839694656488, - "chinese_food_culture": 0.3897058823529412, - "chinese_foreign_policy": 0.4953271028037383, - "chinese_history": 0.5325077399380805, - "chinese_literature": 0.30392156862745096, - "chinese_teacher_qualification": 0.4860335195530726, - "clinical_knowledge": 0.3291139240506329, - "college_actuarial_science": 0.33962264150943394, - "college_education": 0.514018691588785, - "college_engineering_hydrology": 0.3584905660377358, - "college_law": 0.3333333333333333, - "college_mathematics": 0.2857142857142857, - "college_medical_statistics": 0.4811320754716981, - "college_medicine": 0.3663003663003663, - "computer_science": 0.5098039215686274, - "computer_security": 0.52046783625731, - "conceptual_physics": 0.3877551020408163, - "construction_project_management": 0.26618705035971224, - "economics": 0.5345911949685535, - "education": 0.44171779141104295, - "electrical_engineering": 0.43023255813953487, - "elementary_chinese": 0.31746031746031744, - "elementary_commonsense": 0.3686868686868687, - "elementary_information_and_technology": 0.6764705882352942, - "elementary_mathematics": 0.29130434782608694, - "ethnology": 0.37037037037037035, - "food_science": 0.4125874125874126, - "genetics": 0.3409090909090909, - "global_facts": 0.5234899328859061, - "high_school_biology": 0.378698224852071, - "high_school_chemistry": 0.3333333333333333, - "high_school_geography": 0.3813559322033898, - "high_school_mathematics": 0.31097560975609756, - "high_school_physics": 0.3090909090909091, - "high_school_politics": 0.46153846153846156, - "human_sexuality": 0.47619047619047616, - "international_law": 0.3837837837837838, - "journalism": 0.45930232558139533, - "jurisprudence": 0.46715328467153283, - "legal_and_moral_basis": 0.6822429906542056, - "logical": 0.3333333333333333, - "machine_learning": 0.45901639344262296, - "management": 0.5047619047619047, - "marketing": 0.4722222222222222, - "marxist_theory": 0.5185185185185185, - "modern_chinese": 0.3793103448275862, - "nutrition": 0.3724137931034483, - "philosophy": 0.4380952380952381, - "professional_accounting": 0.4114285714285714, - "professional_law": 0.2890995260663507, - "professional_medicine": 0.31117021276595747, - "professional_psychology": 0.4267241379310345, - "public_relations": 0.4540229885057471, - "security_study": 0.45185185185185184, - "sociology": 0.4557522123893805, - "sports_science": 0.4303030303030303, - "traditional_chinese_medicine": 0.33513513513513515, - "virology": 0.4970414201183432, - "world_history": 0.4968944099378882, - "world_religions": 0.525 - } + "accuracy": 1.0 }, "prompt_4": { - "accuracy": 0.38628906924538076, - "category_acc": { - "agronomy": 0.31952662721893493, - "anatomy": 0.27702702702702703, - "ancient_chinese": 0.2926829268292683, - "arts": 0.41875, - "astronomy": 0.2787878787878788, - "business_ethics": 0.41626794258373206, - "chinese_civil_service_exam": 0.38125, - "chinese_driving_rule": 0.48854961832061067, - "chinese_food_culture": 0.4411764705882353, - "chinese_foreign_policy": 0.45794392523364486, - "chinese_history": 0.4613003095975232, - "chinese_literature": 0.35294117647058826, - "chinese_teacher_qualification": 0.39106145251396646, - "clinical_knowledge": 0.3080168776371308, - "college_actuarial_science": 0.3018867924528302, - "college_education": 0.4672897196261682, - "college_engineering_hydrology": 0.3113207547169811, - "college_law": 0.3611111111111111, - "college_mathematics": 0.26666666666666666, - "college_medical_statistics": 0.4339622641509434, - "college_medicine": 0.3076923076923077, - "computer_science": 0.4264705882352941, - "computer_security": 0.5087719298245614, - "conceptual_physics": 0.3469387755102041, - "construction_project_management": 0.28776978417266186, - "economics": 0.44654088050314467, - "education": 0.4049079754601227, - "electrical_engineering": 0.3372093023255814, - "elementary_chinese": 0.3134920634920635, - "elementary_commonsense": 0.3787878787878788, - "elementary_information_and_technology": 0.6428571428571429, - "elementary_mathematics": 0.3, - "ethnology": 0.34814814814814815, - "food_science": 0.34965034965034963, - "genetics": 0.29545454545454547, - "global_facts": 0.5100671140939598, - "high_school_biology": 0.33136094674556216, - "high_school_chemistry": 0.3181818181818182, - "high_school_geography": 0.3644067796610169, - "high_school_mathematics": 0.2865853658536585, - "high_school_physics": 0.21818181818181817, - "high_school_politics": 0.34965034965034963, - "human_sexuality": 0.4444444444444444, - "international_law": 0.3675675675675676, - "journalism": 0.3953488372093023, - "jurisprudence": 0.39659367396593675, - "legal_and_moral_basis": 0.6308411214953271, - "logical": 0.4065040650406504, - "machine_learning": 0.3360655737704918, - "management": 0.49523809523809526, - "marketing": 0.45555555555555555, - "marxist_theory": 0.455026455026455, - "modern_chinese": 0.25862068965517243, - "nutrition": 0.32413793103448274, - "philosophy": 0.3904761904761905, - "professional_accounting": 0.41714285714285715, - "professional_law": 0.2938388625592417, - "professional_medicine": 0.30319148936170215, - "professional_psychology": 0.4224137931034483, - "public_relations": 0.42528735632183906, - "security_study": 0.4740740740740741, - "sociology": 0.415929203539823, - "sports_science": 0.40606060606060607, - "traditional_chinese_medicine": 0.32972972972972975, - "virology": 0.46153846153846156, - "world_history": 0.40993788819875776, - "world_religions": 0.475 - } + "accuracy": 0.8 + }, + "prompt_5": { + "accuracy": 0.9 + } + }, + "mrpc": { + "prompt_1": { + "accuracy": 0.9 + }, + "prompt_2": { + "accuracy": 0.9 + }, + "prompt_3": { + "accuracy": 0.9 + }, + "prompt_4": { + "accuracy": 0.9 + }, + "prompt_5": { + "accuracy": 0.9 + } + } + }, + "five_shot": { + "cross_xquad": { + "prompt_1": -1 + }, + "cross_mmlu": { + "prompt_1": -1 + }, + "cross_logiqa": { + "prompt_1": -1 + }, + "sg_eval": { + "prompt_1": -1 + }, + "cn_eval": { + "prompt_1": -1 + }, + "us_eval": { + "prompt_1": -1 + }, + "ph_eval": { + "prompt_1": -1 + }, + "sing2eng": { + "prompt_1": -1 + }, + "indommlu": { + "prompt_1": -1 + }, + "flores_ind2eng": { + "prompt_1": -1 + }, + "flores_vie2eng": { + "prompt_1": -1 + }, + "flores_zho2eng": { + "prompt_1": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1 + }, + "mmlu": { + "prompt_1": -1 + }, + "mmlu_full": { + "prompt_1": -1 + }, + "c_eval": { + "prompt_1": -1 + }, + "c_eval_full": { + "prompt_1": -1 + }, + "cmmlu": { + "prompt_1": -1 + }, + "cmmlu_full": { + "prompt_1": -1 + }, + "zbench": { + "prompt_1": -1 + }, + "ind_emotion": { + "prompt_1": -1 + }, + "ocnli": { + "prompt_1": -1 + }, + "c3": { + "prompt_1": -1 + }, + "dream": { + "prompt_1": -1 + }, + "samsum": { + "prompt_1": -1 + }, + "dialogsum": { + "prompt_1": -1 + }, + "sst2": { + "prompt_1": -1 + }, + "cola": { + "prompt_1": -1 + }, + "qqp": { + "prompt_1": -1 + }, + "mnli": { + "prompt_1": -1 + }, + "qnli": { + "prompt_1": -1 + }, + "wnli": { + "prompt_1": -1 + }, + "rte": { + "prompt_1": -1 + }, + "mrpc": { + "prompt_1": -1 + } + } + }, + "LLaMA_3_Merlion_8B": { + "model_size": "8B", + "model_link": "https://seaeval.github.io/", + "zero_shot": { + "cross_xquad": { + "prompt_1": { + "overall_acc": 0.8876050420168068, + "language_acc": { + "Spanish": 0.8890756302521008, + "English": 0.9277310924369748, + "Chinese": 0.8663865546218488, + "Vietnamese": 0.8672268907563025 + }, + "consistency_score_2": 0.8428571428571429, + "consistency_score_3": 0.7733193277310924, + "consistency_score_4": 0.7235294117647059, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.880672268907563, + "Spanish,Chinese": 0.8151260504201681, + "Spanish,Vietnamese": 0.8369747899159664, + "English,Chinese": 0.8571428571428571, + "English,Vietnamese": 0.8512605042016806, + "Chinese,Vietnamese": 0.8159663865546218 + }, + "3_combine": { + "Spanish,English,Chinese": 0.7840336134453781, + "Spanish,English,Vietnamese": 0.7907563025210084, + "Spanish,Chinese,Vietnamese": 0.7453781512605042, + "English,Chinese,Vietnamese": 0.773109243697479 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.7235294117647059 + } + }, + "AC3_2": 0.8646525260084111, + "AC3_3": 0.8265302705456998, + "AC3_4": 0.7972126129915623 + }, + "prompt_2": { + "overall_acc": 0.8831932773109243, + "language_acc": { + "Spanish": 0.8705882352941177, + "English": 0.9218487394957983, + "Chinese": 0.8705882352941177, + "Vietnamese": 0.8697478991596639 + }, + "consistency_score_2": 0.838375350140056, + "consistency_score_3": 0.7674369747899159, + "consistency_score_4": 0.7151260504201681, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.865546218487395, + "Spanish,Chinese": 0.8126050420168067, + "Spanish,Vietnamese": 0.8168067226890756, + "English,Chinese": 0.8613445378151261, + "English,Vietnamese": 0.8621848739495799, + "Chinese,Vietnamese": 0.8117647058823529 + }, + "3_combine": { + "Spanish,English,Chinese": 0.7773109243697479, + "Spanish,English,Vietnamese": 0.7823529411764706, + "Spanish,Chinese,Vietnamese": 0.7336134453781512, + "English,Chinese,Vietnamese": 0.7764705882352941 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.7151260504201681 + } + }, + "AC3_2": 0.8602009368168079, + "AC3_3": 0.8212562153038939, + "AC3_4": 0.790323321708278 + }, + "prompt_3": { + "overall_acc": 0.8493697478991598, + "language_acc": { + "Spanish": 0.8176470588235294, + "English": 0.9008403361344538, + "Chinese": 0.8403361344537815, + "Vietnamese": 0.838655462184874 + }, + "consistency_score_2": 0.7768907563025209, + "consistency_score_3": 0.6796218487394958, + "consistency_score_4": 0.6067226890756302, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.7848739495798319, + "Spanish,Chinese": 0.7411764705882353, + "Spanish,Vietnamese": 0.7394957983193278, + "English,Chinese": 0.8084033613445378, + "English,Vietnamese": 0.8151260504201681, + "Chinese,Vietnamese": 0.7722689075630252 + }, + "3_combine": { + "Spanish,English,Chinese": 0.680672268907563, + "Spanish,English,Vietnamese": 0.6840336134453782, + "Spanish,Chinese,Vietnamese": 0.6453781512605042, + "English,Chinese,Vietnamese": 0.7084033613445379 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.6067226890756302 + } + }, + "AC3_2": 0.8115151343593726, + "AC3_3": 0.7550731339032906, + "AC3_4": 0.7078285476163063 + }, + "prompt_4": { + "overall_acc": 0.8831932773109245, + "language_acc": { + "Spanish": 0.892436974789916, + "English": 0.9277310924369748, + "Chinese": 0.8638655462184874, + "Vietnamese": 0.8487394957983193 + }, + "consistency_score_2": 0.8341736694677871, + "consistency_score_3": 0.7602941176470588, + "consistency_score_4": 0.7050420168067227, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.8798319327731092, + "Spanish,Chinese": 0.8201680672268907, + "Spanish,Vietnamese": 0.8142857142857143, + "English,Chinese": 0.8495798319327731, + "English,Vietnamese": 0.8428571428571429, + "Chinese,Vietnamese": 0.7983193277310925 + }, + "3_combine": { + "Spanish,English,Chinese": 0.780672268907563, + "Spanish,English,Vietnamese": 0.7756302521008404, + "Spanish,Chinese,Vietnamese": 0.7294117647058823, + "English,Chinese,Vietnamese": 0.7554621848739496 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.7050420168067227 + } + }, + "AC3_2": 0.8579838785447016, + "AC3_3": 0.8171485287980964, + "AC3_4": 0.7841260948330266 }, "prompt_5": { - "accuracy": 0.40856501467794853, - "category_acc": { - "agronomy": 0.3905325443786982, - "anatomy": 0.32432432432432434, - "ancient_chinese": 0.3170731707317073, - "arts": 0.48125, - "astronomy": 0.22424242424242424, - "business_ethics": 0.40669856459330145, - "chinese_civil_service_exam": 0.41875, - "chinese_driving_rule": 0.4732824427480916, - "chinese_food_culture": 0.40441176470588236, - "chinese_foreign_policy": 0.4485981308411215, - "chinese_history": 0.4458204334365325, - "chinese_literature": 0.30392156862745096, - "chinese_teacher_qualification": 0.4748603351955307, - "clinical_knowledge": 0.3670886075949367, - "college_actuarial_science": 0.2830188679245283, - "college_education": 0.4953271028037383, - "college_engineering_hydrology": 0.42452830188679247, - "college_law": 0.2777777777777778, - "college_mathematics": 0.3238095238095238, - "college_medical_statistics": 0.4811320754716981, - "college_medicine": 0.3626373626373626, - "computer_science": 0.49019607843137253, - "computer_security": 0.5087719298245614, - "conceptual_physics": 0.36054421768707484, - "construction_project_management": 0.34532374100719426, - "economics": 0.4276729559748428, - "education": 0.4723926380368098, - "electrical_engineering": 0.3953488372093023, - "elementary_chinese": 0.32142857142857145, - "elementary_commonsense": 0.41919191919191917, - "elementary_information_and_technology": 0.6890756302521008, - "elementary_mathematics": 0.29130434782608694, - "ethnology": 0.37777777777777777, - "food_science": 0.34265734265734266, - "genetics": 0.3465909090909091, - "global_facts": 0.5369127516778524, - "high_school_biology": 0.2958579881656805, - "high_school_chemistry": 0.29545454545454547, - "high_school_geography": 0.3728813559322034, - "high_school_mathematics": 0.24390243902439024, - "high_school_physics": 0.38181818181818183, - "high_school_politics": 0.32867132867132864, - "human_sexuality": 0.42857142857142855, - "international_law": 0.3621621621621622, - "journalism": 0.4186046511627907, - "jurisprudence": 0.4306569343065693, - "legal_and_moral_basis": 0.6869158878504673, - "logical": 0.3902439024390244, - "machine_learning": 0.4426229508196721, - "management": 0.5095238095238095, - "marketing": 0.49444444444444446, - "marxist_theory": 0.48148148148148145, - "modern_chinese": 0.3275862068965517, - "nutrition": 0.3103448275862069, - "philosophy": 0.4666666666666667, - "professional_accounting": 0.44, - "professional_law": 0.3127962085308057, - "professional_medicine": 0.30319148936170215, - "professional_psychology": 0.4267241379310345, - "public_relations": 0.4827586206896552, - "security_study": 0.5407407407407407, - "sociology": 0.4424778761061947, - "sports_science": 0.44242424242424244, - "traditional_chinese_medicine": 0.32972972972972975, - "virology": 0.4437869822485207, - "world_history": 0.4658385093167702, - "world_religions": 0.50625 - } + "overall_acc": 0.8684873949579832, + "language_acc": { + "Spanish": 0.8680672268907563, + "English": 0.9168067226890756, + "Chinese": 0.846218487394958, + "Vietnamese": 0.8428571428571429 + }, + "consistency_score_2": 0.8084033613445377, + "consistency_score_3": 0.7252100840336135, + "consistency_score_4": 0.6638655462184874, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.8453781512605042, + "Spanish,Chinese": 0.788235294117647, + "Spanish,Vietnamese": 0.7907563025210084, + "English,Chinese": 0.826890756302521, + "English,Vietnamese": 0.8226890756302521, + "Chinese,Vietnamese": 0.7764705882352941 + }, + "3_combine": { + "Spanish,English,Chinese": 0.7420168067226891, + "Spanish,English,Vietnamese": 0.7403361344537815, + "Spanish,Chinese,Vietnamese": 0.6932773109243697, + "English,Chinese,Vietnamese": 0.7252100840336134 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.6638655462184874 + } + }, + "AC3_2": 0.8373689540463608, + "AC3_3": 0.7904082486704036, + "AC3_4": 0.7525144414280582 } }, - "zbench": { + "cross_mmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cross_logiqa": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "sg_eval": { "prompt_1": { - "accuracy": 0.36363636363636365 + "accuracy": 0.5533980582524272 }, "prompt_2": { - "accuracy": 0.30303030303030304 + "accuracy": 0.6213592233009708 }, "prompt_3": { - "accuracy": 0.3939393939393939 + "accuracy": 0.6310679611650486 }, "prompt_4": { - "accuracy": 0.24242424242424243 + "accuracy": 0.5436893203883495 }, "prompt_5": { - "accuracy": 0.30303030303030304 + "accuracy": 0.5631067961165048 } }, + "cn_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "us_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "ph_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "sing2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "indommlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_ind2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_vie2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_zho2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mmlu_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c_eval_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "zbench": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, "ind_emotion": { - "prompt_1": { - "accuracy": 0.5772727272727273 - }, - "prompt_2": { - "accuracy": 0.5159090909090909 - }, - "prompt_3": { - "accuracy": 0.55 - }, - "prompt_4": { - "accuracy": 0.5113636363636364 - }, - "prompt_5": { - "accuracy": 0.5454545454545454 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "ocnli": { - "prompt_1": { - "accuracy": 0.4210169491525424 - }, - "prompt_2": { - "accuracy": 0.4311864406779661 - }, - "prompt_3": { - "accuracy": 0.45016949152542374 - }, - "prompt_4": { - "accuracy": 0.4322033898305085 - }, - "prompt_5": { - "accuracy": 0.4183050847457627 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "c3": { - "prompt_1": { - "accuracy": 0.7584143605086013 - }, - "prompt_2": { - "accuracy": 0.7535527299925205 - }, - "prompt_3": { - "accuracy": 0.7610321615557217 - }, - "prompt_4": { - "accuracy": 0.7797307404637247 - }, - "prompt_5": { - "accuracy": 0.7632759910246821 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "dream": { - "prompt_1": { - "accuracy": 0.8314551690347869 - }, - "prompt_2": { - "accuracy": 0.8716315531602156 - }, - "prompt_3": { - "accuracy": 0.8510534051935326 - }, - "prompt_4": { - "accuracy": 0.8407643312101911 - }, - "prompt_5": { - "accuracy": 0.8500734933855953 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "samsum": { - "prompt_1": { - "rouge1": 0.3579647669449489, - "rouge2": 0.13804737511608525, - "rougeL": 0.27027281393055447, - "avg_rouge": 0.2554283186638629 - }, - "prompt_2": { - "rouge1": 0.396495796367978, - "rouge2": 0.15377765802560428, - "rougeL": 0.3017766081553505, - "avg_rouge": 0.284016687516311 - }, - "prompt_3": { - "rouge1": 0.3576752519162336, - "rouge2": 0.13523642681555692, - "rougeL": 0.27150594485503227, - "avg_rouge": 0.25480587452894093 - }, - "prompt_4": { - "rouge1": 0.35948829874420807, - "rouge2": 0.13348649956595987, - "rougeL": 0.2712758118110613, - "avg_rouge": 0.25475020337374304 - }, - "prompt_5": { - "rouge1": 0.39047270151523533, - "rouge2": 0.13972034860096477, - "rougeL": 0.2993837181267835, - "avg_rouge": 0.27652558941432787 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "dialogsum": { - "prompt_1": { - "rouge1": 0.21714868673316118, - "rouge2": 0.062431926524471465, - "rougeL": 0.1603452337113643, - "avg_rouge": 0.14664194898966565 - }, - "prompt_2": { - "rouge1": 0.22068206987303055, - "rouge2": 0.06229011822144256, - "rougeL": 0.1626793794979957, - "avg_rouge": 0.14855052253082293 - }, - "prompt_3": { - "rouge1": 0.21816281802915602, - "rouge2": 0.06092060958820226, - "rougeL": 0.16046714922658, - "avg_rouge": 0.14651685894797942 - }, - "prompt_4": { - "rouge1": 0.21314498750877894, - "rouge2": 0.059533552171052787, - "rougeL": 0.1572601471655679, - "avg_rouge": 0.14331289561513322 - }, - "prompt_5": { - "rouge1": 0.2193786470836814, - "rouge2": 0.06072623355312886, - "rougeL": 0.16137392887799565, - "avg_rouge": 0.14715960317160195 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "sst2": { - "prompt_1": { - "accuracy": 0.8348623853211009 - }, - "prompt_2": { - "accuracy": 0.8555045871559633 - }, - "prompt_3": { - "accuracy": 0.8772935779816514 - }, - "prompt_4": { - "accuracy": 0.819954128440367 - }, - "prompt_5": { - "accuracy": 0.8658256880733946 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "cola": { - "prompt_1": { - "accuracy": 0.7689357622243528 - }, - "prompt_2": { - "accuracy": 0.775647171620326 - }, - "prompt_3": { - "accuracy": 0.7948226270373921 - }, - "prompt_4": { - "accuracy": 0.7794822627037392 - }, - "prompt_5": { - "accuracy": 0.7612655800575263 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "qqp": { - "prompt_1": { - "accuracy": 0.73 - }, - "prompt_2": { - "accuracy": 0.8005 - }, - "prompt_3": { - "accuracy": 0.7775 - }, - "prompt_4": { - "accuracy": 0.752 - }, - "prompt_5": { - "accuracy": 0.784 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "mnli": { - "prompt_1": { - "accuracy": 0.7 - }, - "prompt_2": { - "accuracy": 0.7 - }, - "prompt_3": { - "accuracy": 0.4 - }, - "prompt_4": { - "accuracy": 0.7 - }, - "prompt_5": { - "accuracy": 0.5 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "qnli": { - "prompt_1": { - "accuracy": 0.8 - }, - "prompt_2": { - "accuracy": 0.9 - }, - "prompt_3": { - "accuracy": 0.8 - }, - "prompt_4": { - "accuracy": 0.9 - }, - "prompt_5": { - "accuracy": 0.8 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "wnli": { - "prompt_1": { - "accuracy": 0.6 - }, - "prompt_2": { - "accuracy": 0.7 - }, - "prompt_3": { - "accuracy": 0.6 - }, - "prompt_4": { - "accuracy": 0.3 - }, - "prompt_5": { - "accuracy": 0.6 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "rte": { - "prompt_1": { - "accuracy": 0.9 - }, - "prompt_2": { - "accuracy": 0.9 - }, - "prompt_3": { - "accuracy": 0.8 - }, - "prompt_4": { - "accuracy": 0.8 - }, - "prompt_5": { - "accuracy": 0.9 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "mrpc": { - "prompt_1": { - "accuracy": 0.9 - }, - "prompt_2": { - "accuracy": 0.9 - }, - "prompt_3": { - "accuracy": 0.8 - }, - "prompt_4": { - "accuracy": 0.9 - }, - "prompt_5": { - "accuracy": 0.9 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 } }, "five_shot": {