diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -93867,6 +93867,7062 @@ } } }, + "sea_lion_7b_instruct": { + "model_size": "7B", + "model_link": "https://huggingface.co/aisingapore/sea-lion-7b-instruct", + "zero_shot": { + "cross_xquad": { + "prompt_1": { + "overall_acc": 0.42857142857142855, + "language_acc": { + "Vietnamese": 0.41680672268907565, + "Spanish": 0.4084033613445378, + "English": 0.47058823529411764, + "Chinese": 0.4184873949579832 + }, + "consistency_score_2": 0.5668067226890755, + "consistency_score_3": 0.4004201680672269, + "consistency_score_4": 0.3067226890756303, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.5697478991596638, + "Vietnamese,English": 0.6092436974789915, + "Vietnamese,Chinese": 0.5714285714285714, + "Spanish,English": 0.5647058823529412, + "Spanish,Chinese": 0.5327731092436975, + "English,Chinese": 0.5529411764705883 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.419327731092437, + "Vietnamese,Spanish,Chinese": 0.3907563025210084, + "Vietnamese,English,Chinese": 0.4092436974789916, + "Spanish,English,Chinese": 0.38235294117647056 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.3067226890756303 + } + }, + "AC3_2": 0.48809021281956466, + "AC3_3": 0.4140178118391316, + "AC3_4": 0.35755102035953634 + }, + "prompt_2": { + "overall_acc": 0.40903361344537814, + "language_acc": { + "Vietnamese": 0.380672268907563, + "Spanish": 0.3773109243697479, + "English": 0.4714285714285714, + "Chinese": 0.40672268907563025 + }, + "consistency_score_2": 0.5582633053221288, + "consistency_score_3": 0.38928571428571423, + "consistency_score_4": 0.2991596638655462, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.5436974789915966, + "Vietnamese,English": 0.5756302521008403, + "Vietnamese,Chinese": 0.5638655462184874, + "Spanish,English": 0.565546218487395, + "Spanish,Chinese": 0.5336134453781513, + "English,Chinese": 0.5672268907563025 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.3915966386554622, + "Vietnamese,Spanish,Chinese": 0.37815126050420167, + "Vietnamese,English,Chinese": 0.4016806722689076, + "Spanish,English,Chinese": 0.38571428571428573 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.2991596638655462 + } + }, + "AC3_2": 0.4721372570838266, + "AC3_3": 0.39891541348386517, + "AC3_4": 0.3455733405303898 + }, + "prompt_3": { + "overall_acc": 0.4350840336134454, + "language_acc": { + "Vietnamese": 0.4394957983193277, + "Spanish": 0.40336134453781514, + "English": 0.48739495798319327, + "Chinese": 0.41008403361344536 + }, + "consistency_score_2": 0.5186274509803922, + "consistency_score_3": 0.34096638655462186, + "consistency_score_4": 0.24789915966386555, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.5159663865546219, + "Vietnamese,English": 0.5554621848739496, + "Vietnamese,Chinese": 0.5008403361344538, + "Spanish,English": 0.5394957983193277, + "Spanish,Chinese": 0.47058823529411764, + "English,Chinese": 0.5294117647058824 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.36554621848739494, + "Vietnamese,Spanish,Chinese": 0.31092436974789917, + "Vietnamese,English,Chinese": 0.3546218487394958, + "Spanish,English,Chinese": 0.33277310924369746 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.24789915966386555 + } + }, + "AC3_2": 0.47319661540543567, + "AC3_3": 0.3823180219077586, + "AC3_4": 0.31584076258916977 + }, + "prompt_4": { + "overall_acc": 0.403781512605042, + "language_acc": { + "Vietnamese": 0.3840336134453782, + "Spanish": 0.38235294117647056, + "English": 0.4394957983193277, + "Chinese": 0.4092436974789916 + }, + "consistency_score_2": 0.5638655462184873, + "consistency_score_3": 0.3947478991596638, + "consistency_score_4": 0.2991596638655462, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.5470588235294118, + "Vietnamese,English": 0.5764705882352941, + "Vietnamese,Chinese": 0.5579831932773109, + "Spanish,English": 0.5798319327731093, + "Spanish,Chinese": 0.5504201680672269, + "English,Chinese": 0.5714285714285714 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.39663865546218485, + "Vietnamese,Spanish,Chinese": 0.37478991596638656, + "Vietnamese,English,Chinese": 0.40336134453781514, + "Spanish,English,Chinese": 0.4042016806722689 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.2991596638655462 + } + }, + "AC3_2": 0.47058166726875134, + "AC3_3": 0.39921360804517814, + "AC3_4": 0.3436849216185193 + }, + "prompt_5": { + "overall_acc": 0.42668067226890755, + "language_acc": { + "Vietnamese": 0.4134453781512605, + "Spanish": 0.3739495798319328, + "English": 0.4756302521008403, + "Chinese": 0.4436974789915966 + }, + "consistency_score_2": 0.5221288515406163, + "consistency_score_3": 0.34789915966386553, + "consistency_score_4": 0.2605042016806723, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.5033613445378151, + "Vietnamese,English": 0.5495798319327732, + "Vietnamese,Chinese": 0.5168067226890757, + "Spanish,English": 0.5277310924369748, + "Spanish,Chinese": 0.49747899159663866, + "English,Chinese": 0.5378151260504201 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.3495798319327731, + "Vietnamese,Spanish,Chinese": 0.3277310924369748, + "Vietnamese,English,Chinese": 0.3630252100840336, + "Spanish,English,Chinese": 0.35126050420168065 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.2605042016806723 + } + }, + "AC3_2": 0.4696038219944908, + "AC3_3": 0.38328353305456436, + "AC3_4": 0.3234998676468679 + } + }, + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.2733333333333333, + "language_acc": { + "Chinese": 0.24, + "Indonesian": 0.2733333333333333, + "Spanish": 0.23333333333333334, + "Vietnamese": 0.3, + "Malay": 0.28, + "English": 0.32, + "Filipino": 0.26666666666666666 + }, + "consistency_score_2": 0.511111111111111, + "consistency_score_3": 0.32742857142857146, + "consistency_score_4": 0.2335238095238095, + "consistency_score_5": 0.17714285714285713, + "consistency_score_6": 0.13999999999999999, + "consistency_score_7": 0.11333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.5, + "Chinese,Spanish": 0.4666666666666667, + "Chinese,Vietnamese": 0.5133333333333333, + "Chinese,Malay": 0.46, + "Chinese,English": 0.5266666666666666, + "Chinese,Filipino": 0.4266666666666667, + "Indonesian,Spanish": 0.54, + "Indonesian,Vietnamese": 0.62, + "Indonesian,Malay": 0.62, + "Indonesian,English": 0.52, + "Indonesian,Filipino": 0.49333333333333335, + "Spanish,Vietnamese": 0.5333333333333333, + "Spanish,Malay": 0.52, + "Spanish,English": 0.4866666666666667, + "Spanish,Filipino": 0.4533333333333333, + "Vietnamese,Malay": 0.52, + "Vietnamese,English": 0.5666666666666667, + "Vietnamese,Filipino": 0.5133333333333333, + "Malay,English": 0.47333333333333333, + "Malay,Filipino": 0.5066666666666667, + "English,Filipino": 0.47333333333333333 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.31333333333333335, + "Chinese,Indonesian,Vietnamese": 0.37333333333333335, + "Chinese,Indonesian,Malay": 0.3466666666666667, + "Chinese,Indonesian,English": 0.32666666666666666, + "Chinese,Indonesian,Filipino": 0.26666666666666666, + "Chinese,Spanish,Vietnamese": 0.32, + "Chinese,Spanish,Malay": 0.30666666666666664, + "Chinese,Spanish,English": 0.30666666666666664, + "Chinese,Spanish,Filipino": 0.23333333333333334, + "Chinese,Vietnamese,Malay": 0.31333333333333335, + "Chinese,Vietnamese,English": 0.36666666666666664, + "Chinese,Vietnamese,Filipino": 0.28, + "Chinese,Malay,English": 0.2866666666666667, + "Chinese,Malay,Filipino": 0.2733333333333333, + "Chinese,English,Filipino": 0.2866666666666667, + "Indonesian,Spanish,Vietnamese": 0.3933333333333333, + "Indonesian,Spanish,Malay": 0.3933333333333333, + "Indonesian,Spanish,English": 0.32666666666666666, + "Indonesian,Spanish,Filipino": 0.3, + "Indonesian,Vietnamese,Malay": 0.43333333333333335, + "Indonesian,Vietnamese,English": 0.4, + "Indonesian,Vietnamese,Filipino": 0.36666666666666664, + "Indonesian,Malay,English": 0.34, + "Indonesian,Malay,Filipino": 0.3933333333333333, + "Indonesian,English,Filipino": 0.31333333333333335, + "Spanish,Vietnamese,Malay": 0.35333333333333333, + "Spanish,Vietnamese,English": 0.32666666666666666, + "Spanish,Vietnamese,Filipino": 0.31333333333333335, + "Spanish,Malay,English": 0.32, + "Spanish,Malay,Filipino": 0.31333333333333335, + "Spanish,English,Filipino": 0.2733333333333333, + "Vietnamese,Malay,English": 0.34, + "Vietnamese,Malay,Filipino": 0.34, + "Vietnamese,English,Filipino": 0.3333333333333333, + "Malay,English,Filipino": 0.2866666666666667 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.26, + "Chinese,Indonesian,Spanish,Malay": 0.25333333333333335, + "Chinese,Indonesian,Spanish,English": 0.22, + "Chinese,Indonesian,Spanish,Filipino": 0.18, + "Chinese,Indonesian,Vietnamese,Malay": 0.28, + "Chinese,Indonesian,Vietnamese,English": 0.28, + "Chinese,Indonesian,Vietnamese,Filipino": 0.22666666666666666, + "Chinese,Indonesian,Malay,English": 0.23333333333333334, + "Chinese,Indonesian,Malay,Filipino": 0.22, + "Chinese,Indonesian,English,Filipino": 0.19333333333333333, + "Chinese,Spanish,Vietnamese,Malay": 0.24, + "Chinese,Spanish,Vietnamese,English": 0.22666666666666666, + "Chinese,Spanish,Vietnamese,Filipino": 0.18666666666666668, + "Chinese,Spanish,Malay,English": 0.21333333333333335, + "Chinese,Spanish,Malay,Filipino": 0.18666666666666668, + "Chinese,Spanish,English,Filipino": 0.17333333333333334, + "Chinese,Vietnamese,Malay,English": 0.22666666666666666, + "Chinese,Vietnamese,Malay,Filipino": 0.2, + "Chinese,Vietnamese,English,Filipino": 0.21333333333333335, + "Chinese,Malay,English,Filipino": 0.19333333333333333, + "Indonesian,Spanish,Vietnamese,Malay": 0.30666666666666664, + "Indonesian,Spanish,Vietnamese,English": 0.26666666666666666, + "Indonesian,Spanish,Vietnamese,Filipino": 0.23333333333333334, + "Indonesian,Spanish,Malay,English": 0.26666666666666666, + "Indonesian,Spanish,Malay,Filipino": 0.26, + "Indonesian,Spanish,English,Filipino": 0.21333333333333335, + "Indonesian,Vietnamese,Malay,English": 0.2866666666666667, + "Indonesian,Vietnamese,Malay,Filipino": 0.3, + "Indonesian,Vietnamese,English,Filipino": 0.25333333333333335, + "Indonesian,Malay,English,Filipino": 0.24, + "Spanish,Vietnamese,Malay,English": 0.24666666666666667, + "Spanish,Vietnamese,Malay,Filipino": 0.24, + "Spanish,Vietnamese,English,Filipino": 0.22, + "Spanish,Malay,English,Filipino": 0.21333333333333335, + "Vietnamese,Malay,English,Filipino": 0.22 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.22, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.19333333333333333, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.16, + "Chinese,Indonesian,Spanish,Malay,English": 0.18666666666666668, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.16666666666666666, + "Chinese,Indonesian,Spanish,English,Filipino": 0.14, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.20666666666666667, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.18666666666666668, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.16666666666666666, + "Chinese,Indonesian,Malay,English,Filipino": 0.15333333333333332, + "Chinese,Spanish,Vietnamese,Malay,English": 0.18, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.15333333333333332, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.14, + "Chinese,Spanish,Malay,English,Filipino": 0.14666666666666667, + "Chinese,Vietnamese,Malay,English,Filipino": 0.14666666666666667, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.22666666666666666, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.20666666666666667, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.17333333333333334, + "Indonesian,Spanish,Malay,English,Filipino": 0.19333333333333333, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.2, + "Spanish,Vietnamese,Malay,English,Filipino": 0.17333333333333334 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.16666666666666666, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.14666666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.12, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.13333333333333333, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.13333333333333333, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.12, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.16 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.11333333333333333 + } + }, + "AC3_2": 0.3561850802189943, + "AC3_3": 0.297945466025231, + "AC3_4": 0.2518652135291635, + "AC3_5": 0.2149682874787068, + "AC3_6": 0.1851612902777835, + "AC3_7": 0.16022988501603247 + }, + "prompt_2": { + "overall_acc": 0.2561904761904762, + "language_acc": { + "Chinese": 0.22, + "Indonesian": 0.2733333333333333, + "Spanish": 0.26666666666666666, + "Vietnamese": 0.25333333333333335, + "Malay": 0.24666666666666667, + "English": 0.32, + "Filipino": 0.21333333333333335 + }, + "consistency_score_2": 0.5746031746031746, + "consistency_score_3": 0.4005714285714286, + "consistency_score_4": 0.30533333333333335, + "consistency_score_5": 0.24380952380952373, + "consistency_score_6": 0.1980952380952381, + "consistency_score_7": 0.16, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.5733333333333334, + "Chinese,Spanish": 0.4533333333333333, + "Chinese,Vietnamese": 0.5866666666666667, + "Chinese,Malay": 0.5533333333333333, + "Chinese,English": 0.58, + "Chinese,Filipino": 0.56, + "Indonesian,Spanish": 0.5866666666666667, + "Indonesian,Vietnamese": 0.6133333333333333, + "Indonesian,Malay": 0.74, + "Indonesian,English": 0.5866666666666667, + "Indonesian,Filipino": 0.6266666666666667, + "Spanish,Vietnamese": 0.4866666666666667, + "Spanish,Malay": 0.5266666666666666, + "Spanish,English": 0.5266666666666666, + "Spanish,Filipino": 0.58, + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,English": 0.6, + "Vietnamese,Filipino": 0.6333333333333333, + "Malay,English": 0.5066666666666667, + "Malay,Filipino": 0.6, + "English,Filipino": 0.6 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Indonesian,Vietnamese": 0.4266666666666667, + "Chinese,Indonesian,Malay": 0.4533333333333333, + "Chinese,Indonesian,English": 0.4066666666666667, + "Chinese,Indonesian,Filipino": 0.42, + "Chinese,Spanish,Vietnamese": 0.3, + "Chinese,Spanish,Malay": 0.31333333333333335, + "Chinese,Spanish,English": 0.32, + "Chinese,Spanish,Filipino": 0.34, + "Chinese,Vietnamese,Malay": 0.38666666666666666, + "Chinese,Vietnamese,English": 0.42, + "Chinese,Vietnamese,Filipino": 0.42, + "Chinese,Malay,English": 0.37333333333333335, + "Chinese,Malay,Filipino": 0.4066666666666667, + "Chinese,English,Filipino": 0.41333333333333333, + "Indonesian,Spanish,Vietnamese": 0.38, + "Indonesian,Spanish,Malay": 0.44666666666666666, + "Indonesian,Spanish,English": 0.3933333333333333, + "Indonesian,Spanish,Filipino": 0.44, + "Indonesian,Vietnamese,Malay": 0.47333333333333333, + "Indonesian,Vietnamese,English": 0.43333333333333335, + "Indonesian,Vietnamese,Filipino": 0.47333333333333333, + "Indonesian,Malay,English": 0.44, + "Indonesian,Malay,Filipino": 0.5066666666666667, + "Indonesian,English,Filipino": 0.44666666666666666, + "Spanish,Vietnamese,Malay": 0.3333333333333333, + "Spanish,Vietnamese,English": 0.3466666666666667, + "Spanish,Vietnamese,Filipino": 0.38666666666666666, + "Spanish,Malay,English": 0.3333333333333333, + "Spanish,Malay,Filipino": 0.38666666666666666, + "Spanish,English,Filipino": 0.38, + "Vietnamese,Malay,English": 0.37333333333333335, + "Vietnamese,Malay,Filipino": 0.44, + "Vietnamese,English,Filipino": 0.4666666666666667, + "Malay,English,Filipino": 0.3933333333333333 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.26, + "Chinese,Indonesian,Spanish,Malay": 0.2866666666666667, + "Chinese,Indonesian,Spanish,English": 0.26666666666666666, + "Chinese,Indonesian,Spanish,Filipino": 0.3, + "Chinese,Indonesian,Vietnamese,Malay": 0.34, + "Chinese,Indonesian,Vietnamese,English": 0.34, + "Chinese,Indonesian,Vietnamese,Filipino": 0.35333333333333333, + "Chinese,Indonesian,Malay,English": 0.32666666666666666, + "Chinese,Indonesian,Malay,Filipino": 0.35333333333333333, + "Chinese,Indonesian,English,Filipino": 0.34, + "Chinese,Spanish,Vietnamese,Malay": 0.22666666666666666, + "Chinese,Spanish,Vietnamese,English": 0.24, + "Chinese,Spanish,Vietnamese,Filipino": 0.26, + "Chinese,Spanish,Malay,English": 0.23333333333333334, + "Chinese,Spanish,Malay,Filipino": 0.2733333333333333, + "Chinese,Spanish,English,Filipino": 0.2733333333333333, + "Chinese,Vietnamese,Malay,English": 0.2866666666666667, + "Chinese,Vietnamese,Malay,Filipino": 0.32, + "Chinese,Vietnamese,English,Filipino": 0.32666666666666666, + "Chinese,Malay,English,Filipino": 0.30666666666666664, + "Indonesian,Spanish,Vietnamese,Malay": 0.3, + "Indonesian,Spanish,Vietnamese,English": 0.29333333333333333, + "Indonesian,Spanish,Vietnamese,Filipino": 0.32, + "Indonesian,Spanish,Malay,English": 0.30666666666666664, + "Indonesian,Spanish,Malay,Filipino": 0.34, + "Indonesian,Spanish,English,Filipino": 0.32666666666666666, + "Indonesian,Vietnamese,Malay,English": 0.35333333333333333, + "Indonesian,Vietnamese,Malay,Filipino": 0.3933333333333333, + "Indonesian,Vietnamese,English,Filipino": 0.36666666666666664, + "Indonesian,Malay,English,Filipino": 0.35333333333333333, + "Spanish,Vietnamese,Malay,English": 0.25333333333333335, + "Spanish,Vietnamese,Malay,Filipino": 0.28, + "Spanish,Vietnamese,English,Filipino": 0.3, + "Spanish,Malay,English,Filipino": 0.2733333333333333, + "Vietnamese,Malay,English,Filipino": 0.31333333333333335 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.21333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.22, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.24, + "Chinese,Indonesian,Spanish,Malay,English": 0.22, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.24666666666666667, + "Chinese,Indonesian,Spanish,English,Filipino": 0.24666666666666667, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.2733333333333333, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.29333333333333333, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.29333333333333333, + "Chinese,Indonesian,Malay,English,Filipino": 0.28, + "Chinese,Spanish,Vietnamese,Malay,English": 0.18666666666666668, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.20666666666666667, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.21333333333333335, + "Chinese,Spanish,Malay,English,Filipino": 0.21333333333333335, + "Chinese,Vietnamese,Malay,English,Filipino": 0.24666666666666667, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.24, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.25333333333333335, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.26, + "Indonesian,Spanish,Malay,English,Filipino": 0.25333333333333335, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.3, + "Spanish,Vietnamese,Malay,English,Filipino": 0.22 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.18, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.19333333333333333, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.2, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.2, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.24, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.16666666666666666, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.20666666666666667 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.16 + } + }, + "AC3_2": 0.3543788779892604, + "AC3_3": 0.31251077224277113, + "AC3_4": 0.278611487964856, + "AC3_5": 0.24984671196817124, + "AC3_6": 0.2234281720582351, + "AC3_7": 0.19697940498699576 + }, + "prompt_3": { + "overall_acc": 0.2580952380952381, + "language_acc": { + "Chinese": 0.21333333333333335, + "Indonesian": 0.2733333333333333, + "Spanish": 0.26, + "Vietnamese": 0.28, + "Malay": 0.26666666666666666, + "English": 0.28, + "Filipino": 0.23333333333333334 + }, + "consistency_score_2": 0.5387301587301586, + "consistency_score_3": 0.3567619047619048, + "consistency_score_4": 0.2584761904761905, + "consistency_score_5": 0.1952380952380952, + "consistency_score_6": 0.14952380952380953, + "consistency_score_7": 0.11333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.5666666666666667, + "Chinese,Spanish": 0.46, + "Chinese,Vietnamese": 0.5733333333333334, + "Chinese,Malay": 0.4666666666666667, + "Chinese,English": 0.54, + "Chinese,Filipino": 0.5133333333333333, + "Indonesian,Spanish": 0.54, + "Indonesian,Vietnamese": 0.5866666666666667, + "Indonesian,Malay": 0.6533333333333333, + "Indonesian,English": 0.5266666666666666, + "Indonesian,Filipino": 0.6, + "Spanish,Vietnamese": 0.4666666666666667, + "Spanish,Malay": 0.52, + "Spanish,English": 0.5266666666666666, + "Spanish,Filipino": 0.5266666666666666, + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,English": 0.5666666666666667, + "Vietnamese,Filipino": 0.5666666666666667, + "Malay,English": 0.5066666666666667, + "Malay,Filipino": 0.5666666666666667, + "English,Filipino": 0.5333333333333333 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.32, + "Chinese,Indonesian,Vietnamese": 0.4266666666666667, + "Chinese,Indonesian,Malay": 0.3933333333333333, + "Chinese,Indonesian,English": 0.36666666666666664, + "Chinese,Indonesian,Filipino": 0.38666666666666666, + "Chinese,Spanish,Vietnamese": 0.2866666666666667, + "Chinese,Spanish,Malay": 0.26666666666666666, + "Chinese,Spanish,English": 0.3, + "Chinese,Spanish,Filipino": 0.29333333333333333, + "Chinese,Vietnamese,Malay": 0.34, + "Chinese,Vietnamese,English": 0.38, + "Chinese,Vietnamese,Filipino": 0.37333333333333335, + "Chinese,Malay,English": 0.30666666666666664, + "Chinese,Malay,Filipino": 0.34, + "Chinese,English,Filipino": 0.34, + "Indonesian,Spanish,Vietnamese": 0.35333333333333333, + "Indonesian,Spanish,Malay": 0.41333333333333333, + "Indonesian,Spanish,English": 0.3466666666666667, + "Indonesian,Spanish,Filipino": 0.36666666666666664, + "Indonesian,Vietnamese,Malay": 0.4, + "Indonesian,Vietnamese,English": 0.4, + "Indonesian,Vietnamese,Filipino": 0.42, + "Indonesian,Malay,English": 0.38, + "Indonesian,Malay,Filipino": 0.44, + "Indonesian,English,Filipino": 0.38, + "Spanish,Vietnamese,Malay": 0.32666666666666666, + "Spanish,Vietnamese,English": 0.3333333333333333, + "Spanish,Vietnamese,Filipino": 0.3333333333333333, + "Spanish,Malay,English": 0.32666666666666666, + "Spanish,Malay,Filipino": 0.35333333333333333, + "Spanish,English,Filipino": 0.3333333333333333, + "Vietnamese,Malay,English": 0.34, + "Vietnamese,Malay,Filipino": 0.38, + "Vietnamese,English,Filipino": 0.38666666666666666, + "Malay,English,Filipino": 0.35333333333333333 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.25333333333333335, + "Chinese,Indonesian,Spanish,Malay": 0.24, + "Chinese,Indonesian,Spanish,English": 0.24666666666666667, + "Chinese,Indonesian,Spanish,Filipino": 0.22, + "Chinese,Indonesian,Vietnamese,Malay": 0.3, + "Chinese,Indonesian,Vietnamese,English": 0.31333333333333335, + "Chinese,Indonesian,Vietnamese,Filipino": 0.32, + "Chinese,Indonesian,Malay,English": 0.26, + "Chinese,Indonesian,Malay,Filipino": 0.29333333333333333, + "Chinese,Indonesian,English,Filipino": 0.2733333333333333, + "Chinese,Spanish,Vietnamese,Malay": 0.20666666666666667, + "Chinese,Spanish,Vietnamese,English": 0.22666666666666666, + "Chinese,Spanish,Vietnamese,Filipino": 0.21333333333333335, + "Chinese,Spanish,Malay,English": 0.2, + "Chinese,Spanish,Malay,Filipino": 0.19333333333333333, + "Chinese,Spanish,English,Filipino": 0.21333333333333335, + "Chinese,Vietnamese,Malay,English": 0.24666666666666667, + "Chinese,Vietnamese,Malay,Filipino": 0.26, + "Chinese,Vietnamese,English,Filipino": 0.2733333333333333, + "Chinese,Malay,English,Filipino": 0.24, + "Indonesian,Spanish,Vietnamese,Malay": 0.28, + "Indonesian,Spanish,Vietnamese,English": 0.2733333333333333, + "Indonesian,Spanish,Vietnamese,Filipino": 0.2733333333333333, + "Indonesian,Spanish,Malay,English": 0.26666666666666666, + "Indonesian,Spanish,Malay,Filipino": 0.29333333333333333, + "Indonesian,Spanish,English,Filipino": 0.25333333333333335, + "Indonesian,Vietnamese,Malay,English": 0.2866666666666667, + "Indonesian,Vietnamese,Malay,Filipino": 0.31333333333333335, + "Indonesian,Vietnamese,English,Filipino": 0.31333333333333335, + "Indonesian,Malay,English,Filipino": 0.28, + "Spanish,Vietnamese,Malay,English": 0.24, + "Spanish,Vietnamese,Malay,Filipino": 0.25333333333333335, + "Spanish,Vietnamese,English,Filipino": 0.24666666666666667, + "Spanish,Malay,English,Filipino": 0.22666666666666666, + "Vietnamese,Malay,English,Filipino": 0.25333333333333335 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.19333333333333333, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.21333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.19333333333333333, + "Chinese,Indonesian,Spanish,Malay,English": 0.18, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.16666666666666666, + "Chinese,Indonesian,Spanish,English,Filipino": 0.17333333333333334, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.22666666666666666, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.24, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.24666666666666667, + "Chinese,Indonesian,Malay,English,Filipino": 0.2, + "Chinese,Spanish,Vietnamese,Malay,English": 0.16666666666666666, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.16, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.17333333333333334, + "Chinese,Spanish,Malay,English,Filipino": 0.14666666666666667, + "Chinese,Vietnamese,Malay,English,Filipino": 0.18666666666666668, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.21333333333333335, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.22, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.21333333333333335, + "Indonesian,Spanish,Malay,English,Filipino": 0.19333333333333333, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.22, + "Spanish,Vietnamese,Malay,English,Filipino": 0.17333333333333334 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.16, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.14666666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.16, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.12666666666666668, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.17333333333333334, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.12, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.16 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.11333333333333333 + } + }, + "AC3_2": 0.348994118719245, + "AC3_3": 0.2995120079763067, + "AC3_4": 0.2582855737665473, + "AC3_5": 0.222308923520389, + "AC3_6": 0.18935024472435277, + "AC3_7": 0.1575042734618685 + }, + "prompt_4": { + "overall_acc": 0.26857142857142857, + "language_acc": { + "Chinese": 0.2733333333333333, + "Indonesian": 0.29333333333333333, + "Spanish": 0.2733333333333333, + "Vietnamese": 0.2733333333333333, + "Malay": 0.25333333333333335, + "English": 0.3, + "Filipino": 0.21333333333333335 + }, + "consistency_score_2": 0.5736507936507935, + "consistency_score_3": 0.4060952380952382, + "consistency_score_4": 0.316, + "consistency_score_5": 0.26031746031746034, + "consistency_score_6": 0.22380952380952385, + "consistency_score_7": 0.2, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.52, + "Chinese,Spanish": 0.4866666666666667, + "Chinese,Vietnamese": 0.6066666666666667, + "Chinese,Malay": 0.5266666666666666, + "Chinese,English": 0.5666666666666667, + "Chinese,Filipino": 0.5333333333333333, + "Indonesian,Spanish": 0.5666666666666667, + "Indonesian,Vietnamese": 0.6133333333333333, + "Indonesian,Malay": 0.7333333333333333, + "Indonesian,English": 0.5666666666666667, + "Indonesian,Filipino": 0.58, + "Spanish,Vietnamese": 0.5133333333333333, + "Spanish,Malay": 0.5933333333333334, + "Spanish,English": 0.5733333333333334, + "Spanish,Filipino": 0.5866666666666667, + "Vietnamese,Malay": 0.6133333333333333, + "Vietnamese,English": 0.5266666666666666, + "Vietnamese,Filipino": 0.6133333333333333, + "Malay,English": 0.5533333333333333, + "Malay,Filipino": 0.62, + "English,Filipino": 0.5533333333333333 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.34, + "Chinese,Indonesian,Vietnamese": 0.41333333333333333, + "Chinese,Indonesian,Malay": 0.41333333333333333, + "Chinese,Indonesian,English": 0.37333333333333335, + "Chinese,Indonesian,Filipino": 0.36666666666666664, + "Chinese,Spanish,Vietnamese": 0.3466666666666667, + "Chinese,Spanish,Malay": 0.3466666666666667, + "Chinese,Spanish,English": 0.36666666666666664, + "Chinese,Spanish,Filipino": 0.35333333333333333, + "Chinese,Vietnamese,Malay": 0.41333333333333333, + "Chinese,Vietnamese,English": 0.3933333333333333, + "Chinese,Vietnamese,Filipino": 0.41333333333333333, + "Chinese,Malay,English": 0.36666666666666664, + "Chinese,Malay,Filipino": 0.37333333333333335, + "Chinese,English,Filipino": 0.37333333333333335, + "Indonesian,Spanish,Vietnamese": 0.38666666666666666, + "Indonesian,Spanish,Malay": 0.4866666666666667, + "Indonesian,Spanish,English": 0.4, + "Indonesian,Spanish,Filipino": 0.41333333333333333, + "Indonesian,Vietnamese,Malay": 0.5066666666666667, + "Indonesian,Vietnamese,English": 0.41333333333333333, + "Indonesian,Vietnamese,Filipino": 0.46, + "Indonesian,Malay,English": 0.4666666666666667, + "Indonesian,Malay,Filipino": 0.49333333333333335, + "Indonesian,English,Filipino": 0.3933333333333333, + "Spanish,Vietnamese,Malay": 0.4066666666666667, + "Spanish,Vietnamese,English": 0.36666666666666664, + "Spanish,Vietnamese,Filipino": 0.41333333333333333, + "Spanish,Malay,English": 0.41333333333333333, + "Spanish,Malay,Filipino": 0.44666666666666666, + "Spanish,English,Filipino": 0.4, + "Vietnamese,Malay,English": 0.41333333333333333, + "Vietnamese,Malay,Filipino": 0.47333333333333333, + "Vietnamese,English,Filipino": 0.4, + "Malay,English,Filipino": 0.4066666666666667 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.28, + "Chinese,Indonesian,Spanish,Malay": 0.29333333333333333, + "Chinese,Indonesian,Spanish,English": 0.2733333333333333, + "Chinese,Indonesian,Spanish,Filipino": 0.2733333333333333, + "Chinese,Indonesian,Vietnamese,Malay": 0.34, + "Chinese,Indonesian,Vietnamese,English": 0.32, + "Chinese,Indonesian,Vietnamese,Filipino": 0.3333333333333333, + "Chinese,Indonesian,Malay,English": 0.31333333333333335, + "Chinese,Indonesian,Malay,Filipino": 0.32666666666666666, + "Chinese,Indonesian,English,Filipino": 0.29333333333333333, + "Chinese,Spanish,Vietnamese,Malay": 0.2866666666666667, + "Chinese,Spanish,Vietnamese,English": 0.28, + "Chinese,Spanish,Vietnamese,Filipino": 0.3, + "Chinese,Spanish,Malay,English": 0.2733333333333333, + "Chinese,Spanish,Malay,Filipino": 0.2866666666666667, + "Chinese,Spanish,English,Filipino": 0.2733333333333333, + "Chinese,Vietnamese,Malay,English": 0.31333333333333335, + "Chinese,Vietnamese,Malay,Filipino": 0.32666666666666666, + "Chinese,Vietnamese,English,Filipino": 0.31333333333333335, + "Chinese,Malay,English,Filipino": 0.2866666666666667, + "Indonesian,Spanish,Vietnamese,Malay": 0.3466666666666667, + "Indonesian,Spanish,Vietnamese,English": 0.28, + "Indonesian,Spanish,Vietnamese,Filipino": 0.3333333333333333, + "Indonesian,Spanish,Malay,English": 0.3466666666666667, + "Indonesian,Spanish,Malay,Filipino": 0.37333333333333335, + "Indonesian,Spanish,English,Filipino": 0.30666666666666664, + "Indonesian,Vietnamese,Malay,English": 0.36666666666666664, + "Indonesian,Vietnamese,Malay,Filipino": 0.41333333333333333, + "Indonesian,Vietnamese,English,Filipino": 0.32666666666666666, + "Indonesian,Malay,English,Filipino": 0.36, + "Spanish,Vietnamese,Malay,English": 0.30666666666666664, + "Spanish,Vietnamese,Malay,Filipino": 0.3466666666666667, + "Spanish,Vietnamese,English,Filipino": 0.30666666666666664, + "Spanish,Malay,English,Filipino": 0.32666666666666666, + "Vietnamese,Malay,English,Filipino": 0.3333333333333333 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.24666666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.22666666666666666, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.25333333333333335, + "Chinese,Indonesian,Spanish,Malay,English": 0.24, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.25333333333333335, + "Chinese,Indonesian,Spanish,English,Filipino": 0.22666666666666666, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.28, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.3, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.2733333333333333, + "Chinese,Indonesian,Malay,English,Filipino": 0.26666666666666666, + "Chinese,Spanish,Vietnamese,Malay,English": 0.24, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.25333333333333335, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.24666666666666667, + "Chinese,Spanish,Malay,English,Filipino": 0.22666666666666666, + "Chinese,Vietnamese,Malay,English,Filipino": 0.26666666666666666, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.26, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.30666666666666664, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.24666666666666667, + "Indonesian,Spanish,Malay,English,Filipino": 0.2866666666666667, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.30666666666666664, + "Spanish,Vietnamese,Malay,English,Filipino": 0.26 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.20666666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.23333333333333334, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.21333333333333335, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.21333333333333335, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.25333333333333335, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.21333333333333335, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.23333333333333334 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.2 + } + }, + "AC3_2": 0.36585644279754914, + "AC3_3": 0.3233169314677668, + "AC3_4": 0.2903616812797524, + "AC3_5": 0.2643800376793896, + "AC3_6": 0.2441558441062574, + "AC3_7": 0.22926829263399762 + }, + "prompt_5": { + "overall_acc": 0.26, + "language_acc": { + "Chinese": 0.2, + "Indonesian": 0.26666666666666666, + "Spanish": 0.24, + "Vietnamese": 0.3, + "Malay": 0.26, + "English": 0.3, + "Filipino": 0.25333333333333335 + }, + "consistency_score_2": 0.5720634920634919, + "consistency_score_3": 0.40190476190476193, + "consistency_score_4": 0.3114285714285715, + "consistency_score_5": 0.2558730158730159, + "consistency_score_6": 0.21904761904761902, + "consistency_score_7": 0.19333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.5466666666666666, + "Chinese,Spanish": 0.5133333333333333, + "Chinese,Vietnamese": 0.5666666666666667, + "Chinese,Malay": 0.5, + "Chinese,English": 0.5733333333333334, + "Chinese,Filipino": 0.5466666666666666, + "Indonesian,Spanish": 0.5133333333333333, + "Indonesian,Vietnamese": 0.6666666666666666, + "Indonesian,Malay": 0.7, + "Indonesian,English": 0.64, + "Indonesian,Filipino": 0.6, + "Spanish,Vietnamese": 0.5066666666666667, + "Spanish,Malay": 0.5133333333333333, + "Spanish,English": 0.54, + "Spanish,Filipino": 0.56, + "Vietnamese,Malay": 0.52, + "Vietnamese,English": 0.6266666666666667, + "Vietnamese,Filipino": 0.5866666666666667, + "Malay,English": 0.5733333333333334, + "Malay,Filipino": 0.6, + "English,Filipino": 0.62 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.32666666666666666, + "Chinese,Indonesian,Vietnamese": 0.44666666666666666, + "Chinese,Indonesian,Malay": 0.4, + "Chinese,Indonesian,English": 0.41333333333333333, + "Chinese,Indonesian,Filipino": 0.3933333333333333, + "Chinese,Spanish,Vietnamese": 0.34, + "Chinese,Spanish,Malay": 0.32, + "Chinese,Spanish,English": 0.3466666666666667, + "Chinese,Spanish,Filipino": 0.34, + "Chinese,Vietnamese,Malay": 0.34, + "Chinese,Vietnamese,English": 0.43333333333333335, + "Chinese,Vietnamese,Filipino": 0.38666666666666666, + "Chinese,Malay,English": 0.36666666666666664, + "Chinese,Malay,Filipino": 0.37333333333333335, + "Chinese,English,Filipino": 0.4066666666666667, + "Indonesian,Spanish,Vietnamese": 0.38, + "Indonesian,Spanish,Malay": 0.42, + "Indonesian,Spanish,English": 0.3933333333333333, + "Indonesian,Spanish,Filipino": 0.38666666666666666, + "Indonesian,Vietnamese,Malay": 0.47333333333333333, + "Indonesian,Vietnamese,English": 0.5066666666666667, + "Indonesian,Vietnamese,Filipino": 0.48, + "Indonesian,Malay,English": 0.47333333333333333, + "Indonesian,Malay,Filipino": 0.48, + "Indonesian,English,Filipino": 0.46, + "Spanish,Vietnamese,Malay": 0.34, + "Spanish,Vietnamese,English": 0.38, + "Spanish,Vietnamese,Filipino": 0.38, + "Spanish,Malay,English": 0.38666666666666666, + "Spanish,Malay,Filipino": 0.38666666666666666, + "Spanish,English,Filipino": 0.4, + "Vietnamese,Malay,English": 0.4066666666666667, + "Vietnamese,Malay,Filipino": 0.41333333333333333, + "Vietnamese,English,Filipino": 0.46, + "Malay,English,Filipino": 0.4266666666666667 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.3, + "Chinese,Indonesian,Spanish,Malay": 0.28, + "Chinese,Indonesian,Spanish,English": 0.2866666666666667, + "Chinese,Indonesian,Spanish,Filipino": 0.26666666666666666, + "Chinese,Indonesian,Vietnamese,Malay": 0.31333333333333335, + "Chinese,Indonesian,Vietnamese,English": 0.37333333333333335, + "Chinese,Indonesian,Vietnamese,Filipino": 0.3466666666666667, + "Chinese,Indonesian,Malay,English": 0.29333333333333333, + "Chinese,Indonesian,Malay,Filipino": 0.30666666666666664, + "Chinese,Indonesian,English,Filipino": 0.32, + "Chinese,Spanish,Vietnamese,Malay": 0.26666666666666666, + "Chinese,Spanish,Vietnamese,English": 0.2866666666666667, + "Chinese,Spanish,Vietnamese,Filipino": 0.26, + "Chinese,Spanish,Malay,English": 0.2733333333333333, + "Chinese,Spanish,Malay,Filipino": 0.26, + "Chinese,Spanish,English,Filipino": 0.28, + "Chinese,Vietnamese,Malay,English": 0.2866666666666667, + "Chinese,Vietnamese,Malay,Filipino": 0.28, + "Chinese,Vietnamese,English,Filipino": 0.34, + "Chinese,Malay,English,Filipino": 0.3, + "Indonesian,Spanish,Vietnamese,Malay": 0.31333333333333335, + "Indonesian,Spanish,Vietnamese,English": 0.32, + "Indonesian,Spanish,Vietnamese,Filipino": 0.31333333333333335, + "Indonesian,Spanish,Malay,English": 0.32666666666666666, + "Indonesian,Spanish,Malay,Filipino": 0.34, + "Indonesian,Spanish,English,Filipino": 0.32, + "Indonesian,Vietnamese,Malay,English": 0.37333333333333335, + "Indonesian,Vietnamese,Malay,Filipino": 0.38, + "Indonesian,Vietnamese,English,Filipino": 0.4066666666666667, + "Indonesian,Malay,English,Filipino": 0.35333333333333333, + "Spanish,Vietnamese,Malay,English": 0.2866666666666667, + "Spanish,Vietnamese,Malay,Filipino": 0.2866666666666667, + "Spanish,Vietnamese,English,Filipino": 0.31333333333333335, + "Spanish,Malay,English,Filipino": 0.31333333333333335, + "Vietnamese,Malay,English,Filipino": 0.3333333333333333 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.25333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.26666666666666666, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.24666666666666667, + "Chinese,Indonesian,Spanish,Malay,English": 0.24, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.23333333333333334, + "Chinese,Indonesian,Spanish,English,Filipino": 0.24, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.26, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.26, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.30666666666666664, + "Chinese,Indonesian,Malay,English,Filipino": 0.24, + "Chinese,Spanish,Vietnamese,Malay,English": 0.23333333333333334, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.22, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.24, + "Chinese,Spanish,Malay,English,Filipino": 0.23333333333333334, + "Chinese,Vietnamese,Malay,English,Filipino": 0.24666666666666667, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.26666666666666666, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.2733333333333333, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.28, + "Indonesian,Spanish,Malay,English,Filipino": 0.2733333333333333, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.31333333333333335, + "Spanish,Vietnamese,Malay,English,Filipino": 0.24666666666666667 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.22, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.21333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.22666666666666666, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.20666666666666667, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.22666666666666666, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.2, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.24 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.19333333333333333 + } + }, + "AC3_2": 0.3575123998044195, + "AC3_3": 0.3157410071465427, + "AC3_4": 0.28339999995040505, + "AC3_5": 0.25791999995000325, + "AC3_6": 0.23777335979131967, + "AC3_7": 0.22176470583343424 + } + }, + "cross_logiqa": { + "prompt_1": { + "overall_acc": 0.26866883116883117, + "language_acc": { + "Vietnamese": 0.26704545454545453, + "Indonesian": 0.2556818181818182, + "Spanish": 0.2840909090909091, + "Malay": 0.26704545454545453, + "Filipino": 0.2784090909090909, + "English": 0.26136363636363635, + "Chinese": 0.26704545454545453 + }, + "consistency_score_2": 0.49648268398268397, + "consistency_score_3": 0.3099025974025974, + "consistency_score_4": 0.21915584415584416, + "consistency_score_5": 0.1677489177489178, + "consistency_score_6": 0.13555194805194806, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Spanish": 0.4034090909090909, + "Vietnamese,Malay": 0.5284090909090909, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.4659090909090909, + "Vietnamese,Chinese": 0.5056818181818182, + "Indonesian,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Filipino": 0.5, + "Indonesian,English": 0.48863636363636365, + "Indonesian,Chinese": 0.4772727272727273, + "Spanish,Malay": 0.4943181818181818, + "Spanish,Filipino": 0.45454545454545453, + "Spanish,English": 0.5284090909090909, + "Spanish,Chinese": 0.4659090909090909, + "Malay,Filipino": 0.5738636363636364, + "Malay,English": 0.5170454545454546, + "Malay,Chinese": 0.5227272727272727, + "Filipino,English": 0.4375, + "Filipino,Chinese": 0.5340909090909091, + "English,Chinese": 0.5227272727272727 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Malay": 0.3522727272727273, + "Vietnamese,Indonesian,Filipino": 0.3352272727272727, + "Vietnamese,Indonesian,English": 0.2897727272727273, + "Vietnamese,Indonesian,Chinese": 0.3068181818181818, + "Vietnamese,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Spanish,Filipino": 0.2556818181818182, + "Vietnamese,Spanish,English": 0.2727272727272727, + "Vietnamese,Spanish,Chinese": 0.26136363636363635, + "Vietnamese,Malay,Filipino": 0.36363636363636365, + "Vietnamese,Malay,English": 0.3181818181818182, + "Vietnamese,Malay,Chinese": 0.3409090909090909, + "Vietnamese,Filipino,English": 0.29545454545454547, + "Vietnamese,Filipino,Chinese": 0.3409090909090909, + "Vietnamese,English,Chinese": 0.3181818181818182, + "Indonesian,Spanish,Malay": 0.3068181818181818, + "Indonesian,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Spanish,English": 0.29545454545454547, + "Indonesian,Spanish,Chinese": 0.26704545454545453, + "Indonesian,Malay,Filipino": 0.375, + "Indonesian,Malay,English": 0.32386363636363635, + "Indonesian,Malay,Chinese": 0.3352272727272727, + "Indonesian,Filipino,English": 0.2727272727272727, + "Indonesian,Filipino,Chinese": 0.30113636363636365, + "Indonesian,English,Chinese": 0.2897727272727273, + "Spanish,Malay,Filipino": 0.3352272727272727, + "Spanish,Malay,English": 0.3409090909090909, + "Spanish,Malay,Chinese": 0.29545454545454547, + "Spanish,Filipino,English": 0.2840909090909091, + "Spanish,Filipino,Chinese": 0.2897727272727273, + "Spanish,English,Chinese": 0.3181818181818182, + "Malay,Filipino,English": 0.3068181818181818, + "Malay,Filipino,Chinese": 0.3693181818181818, + "Malay,English,Chinese": 0.3409090909090909, + "Filipino,English,Chinese": 0.32386363636363635 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,Spanish,English": 0.1875, + "Vietnamese,Indonesian,Spanish,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,English": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino,English": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Chinese": 0.23295454545454544, + "Vietnamese,Indonesian,English,Chinese": 0.20454545454545456, + "Vietnamese,Spanish,Malay,Filipino": 0.20454545454545456, + "Vietnamese,Spanish,Malay,English": 0.2159090909090909, + "Vietnamese,Spanish,Malay,Chinese": 0.19318181818181818, + "Vietnamese,Spanish,Filipino,English": 0.1875, + "Vietnamese,Spanish,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Spanish,English,Chinese": 0.20454545454545456, + "Vietnamese,Malay,Filipino,English": 0.21022727272727273, + "Vietnamese,Malay,Filipino,Chinese": 0.26704545454545453, + "Vietnamese,Malay,English,Chinese": 0.23863636363636365, + "Vietnamese,Filipino,English,Chinese": 0.23863636363636365, + "Indonesian,Spanish,Malay,Filipino": 0.24431818181818182, + "Indonesian,Spanish,Malay,English": 0.2215909090909091, + "Indonesian,Spanish,Malay,Chinese": 0.21022727272727273, + "Indonesian,Spanish,Filipino,English": 0.1875, + "Indonesian,Spanish,Filipino,Chinese": 0.21022727272727273, + "Indonesian,Spanish,English,Chinese": 0.19318181818181818, + "Indonesian,Malay,Filipino,English": 0.2215909090909091, + "Indonesian,Malay,Filipino,Chinese": 0.26136363636363635, + "Indonesian,Malay,English,Chinese": 0.22727272727272727, + "Indonesian,Filipino,English,Chinese": 0.19886363636363635, + "Spanish,Malay,Filipino,English": 0.23863636363636365, + "Spanish,Malay,Filipino,Chinese": 0.23295454545454544, + "Spanish,Malay,English,Chinese": 0.23295454545454544, + "Spanish,Filipino,English,Chinese": 0.21022727272727273, + "Malay,Filipino,English,Chinese": 0.23863636363636365 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.16477272727272727, + "Vietnamese,Spanish,Malay,Filipino,English": 0.1534090909090909, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Spanish,Malay,English,Chinese": 0.16477272727272727, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.1590909090909091, + "Vietnamese,Malay,Filipino,English,Chinese": 0.17613636363636365, + "Indonesian,Spanish,Malay,Filipino,English": 0.17613636363636365, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.1875, + "Indonesian,Spanish,Malay,English,Chinese": 0.1590909090909091, + "Indonesian,Spanish,Filipino,English,Chinese": 0.14772727272727273, + "Indonesian,Malay,Filipino,English,Chinese": 0.17613636363636365, + "Spanish,Malay,Filipino,English,Chinese": 0.18181818181818182 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.125, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.13068181818181818, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.13636363636363635 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.11363636363636363 + } + }, + "AC3_2": 0.3486614604882408, + "AC3_3": 0.2878163853045181, + "AC3_4": 0.241399615310197, + "AC3_5": 0.20654020494297173, + "AC3_6": 0.18019154535781126, + "AC3_7": 0.15971820107769732 + }, + "prompt_2": { + "overall_acc": 0.2719155844155844, + "language_acc": { + "Vietnamese": 0.26136363636363635, + "Indonesian": 0.25, + "Spanish": 0.32386363636363635, + "Malay": 0.2784090909090909, + "Filipino": 0.29545454545454547, + "English": 0.23863636363636365, + "Chinese": 0.2556818181818182 + }, + "consistency_score_2": 0.5197510822510822, + "consistency_score_3": 0.3422077922077921, + "consistency_score_4": 0.2530844155844155, + "consistency_score_5": 0.19886363636363635, + "consistency_score_6": 0.16071428571428573, + "consistency_score_7": 0.13068181818181818, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.6022727272727273, + "Vietnamese,Spanish": 0.4943181818181818, + "Vietnamese,Malay": 0.5795454545454546, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.48863636363636365, + "Vietnamese,Chinese": 0.5113636363636364, + "Indonesian,Spanish": 0.5056818181818182, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Filipino": 0.5227272727272727, + "Indonesian,English": 0.5511363636363636, + "Indonesian,Chinese": 0.5284090909090909, + "Spanish,Malay": 0.5511363636363636, + "Spanish,Filipino": 0.4659090909090909, + "Spanish,English": 0.5625, + "Spanish,Chinese": 0.48295454545454547, + "Malay,Filipino": 0.5170454545454546, + "Malay,English": 0.5340909090909091, + "Malay,Chinese": 0.5397727272727273, + "Filipino,English": 0.44886363636363635, + "Filipino,Chinese": 0.4659090909090909, + "English,Chinese": 0.5113636363636364 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.3465909090909091, + "Vietnamese,Indonesian,Malay": 0.42045454545454547, + "Vietnamese,Indonesian,Filipino": 0.35795454545454547, + "Vietnamese,Indonesian,English": 0.38636363636363635, + "Vietnamese,Indonesian,Chinese": 0.38636363636363635, + "Vietnamese,Spanish,Malay": 0.35795454545454547, + "Vietnamese,Spanish,Filipino": 0.29545454545454547, + "Vietnamese,Spanish,English": 0.3352272727272727, + "Vietnamese,Spanish,Chinese": 0.3068181818181818, + "Vietnamese,Malay,Filipino": 0.3465909090909091, + "Vietnamese,Malay,English": 0.35795454545454547, + "Vietnamese,Malay,Chinese": 0.3806818181818182, + "Vietnamese,Filipino,English": 0.2727272727272727, + "Vietnamese,Filipino,Chinese": 0.30113636363636365, + "Vietnamese,English,Chinese": 0.3181818181818182, + "Indonesian,Spanish,Malay": 0.35795454545454547, + "Indonesian,Spanish,Filipino": 0.32954545454545453, + "Indonesian,Spanish,English": 0.3806818181818182, + "Indonesian,Spanish,Chinese": 0.3181818181818182, + "Indonesian,Malay,Filipino": 0.3522727272727273, + "Indonesian,Malay,English": 0.3977272727272727, + "Indonesian,Malay,Chinese": 0.3693181818181818, + "Indonesian,Filipino,English": 0.32954545454545453, + "Indonesian,Filipino,Chinese": 0.3181818181818182, + "Indonesian,English,Chinese": 0.35795454545454547, + "Spanish,Malay,Filipino": 0.3352272727272727, + "Spanish,Malay,English": 0.39204545454545453, + "Spanish,Malay,Chinese": 0.35795454545454547, + "Spanish,Filipino,English": 0.3125, + "Spanish,Filipino,Chinese": 0.2840909090909091, + "Spanish,English,Chinese": 0.32386363636363635, + "Malay,Filipino,English": 0.3125, + "Malay,Filipino,Chinese": 0.3181818181818182, + "Malay,English,Chinese": 0.3693181818181818, + "Filipino,English,Chinese": 0.2897727272727273 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Indonesian,Spanish,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish,English": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish,Chinese": 0.25, + "Vietnamese,Indonesian,Malay,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,English": 0.3125, + "Vietnamese,Indonesian,Malay,Chinese": 0.3125, + "Vietnamese,Indonesian,Filipino,English": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,English,Chinese": 0.2784090909090909, + "Vietnamese,Spanish,Malay,Filipino": 0.23863636363636365, + "Vietnamese,Spanish,Malay,English": 0.2727272727272727, + "Vietnamese,Spanish,Malay,Chinese": 0.2556818181818182, + "Vietnamese,Spanish,Filipino,English": 0.21022727272727273, + "Vietnamese,Spanish,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Spanish,English,Chinese": 0.2215909090909091, + "Vietnamese,Malay,Filipino,English": 0.23295454545454544, + "Vietnamese,Malay,Filipino,Chinese": 0.25, + "Vietnamese,Malay,English,Chinese": 0.26704545454545453, + "Vietnamese,Filipino,English,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Malay,Filipino": 0.25, + "Indonesian,Spanish,Malay,English": 0.30113636363636365, + "Indonesian,Spanish,Malay,Chinese": 0.26704545454545453, + "Indonesian,Spanish,Filipino,English": 0.2556818181818182, + "Indonesian,Spanish,Filipino,Chinese": 0.2215909090909091, + "Indonesian,Spanish,English,Chinese": 0.25, + "Indonesian,Malay,Filipino,English": 0.26704545454545453, + "Indonesian,Malay,Filipino,Chinese": 0.24431818181818182, + "Indonesian,Malay,English,Chinese": 0.2897727272727273, + "Indonesian,Filipino,English,Chinese": 0.23863636363636365, + "Spanish,Malay,Filipino,English": 0.2556818181818182, + "Spanish,Malay,Filipino,Chinese": 0.23295454545454544, + "Spanish,Malay,English,Chinese": 0.2784090909090909, + "Spanish,Filipino,English,Chinese": 0.2215909090909091, + "Malay,Filipino,English,Chinese": 0.23295454545454544 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.1875, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.1875, + "Vietnamese,Spanish,Malay,Filipino,English": 0.1875, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,Spanish,Malay,English,Chinese": 0.20454545454545456, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.1534090909090909, + "Vietnamese,Malay,Filipino,English,Chinese": 0.18181818181818182, + "Indonesian,Spanish,Malay,Filipino,English": 0.2159090909090909, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.1875, + "Indonesian,Spanish,Malay,English,Chinese": 0.2215909090909091, + "Indonesian,Spanish,Filipino,English,Chinese": 0.1875, + "Indonesian,Malay,Filipino,English,Chinese": 0.20454545454545456, + "Spanish,Malay,Filipino,English,Chinese": 0.19318181818181818 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.17045454545454544, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.14204545454545456, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.16477272727272727 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.13068181818181818 + } + }, + "AC3_2": 0.3570402170856846, + "AC3_3": 0.30303888546376734, + "AC3_4": 0.26216227336001857, + "AC3_5": 0.2297217867850597, + "AC3_6": 0.202023586123231, + "AC3_7": 0.17652584306465763 + }, + "prompt_3": { + "overall_acc": 0.26136363636363635, + "language_acc": { + "Vietnamese": 0.26136363636363635, + "Indonesian": 0.2556818181818182, + "Spanish": 0.2784090909090909, + "Malay": 0.2556818181818182, + "Filipino": 0.2556818181818182, + "English": 0.2840909090909091, + "Chinese": 0.23863636363636365 + }, + "consistency_score_2": 0.5192099567099567, + "consistency_score_3": 0.3410714285714286, + "consistency_score_4": 0.249512987012987, + "consistency_score_5": 0.19426406926406928, + "consistency_score_6": 0.15746753246753248, + "consistency_score_7": 0.13068181818181818, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5852272727272727, + "Vietnamese,Spanish": 0.5113636363636364, + "Vietnamese,Malay": 0.5795454545454546, + "Vietnamese,Filipino": 0.4772727272727273, + "Vietnamese,English": 0.5284090909090909, + "Vietnamese,Chinese": 0.48863636363636365, + "Indonesian,Spanish": 0.5056818181818182, + "Indonesian,Malay": 0.5965909090909091, + "Indonesian,Filipino": 0.48863636363636365, + "Indonesian,English": 0.5625, + "Indonesian,Chinese": 0.48863636363636365, + "Spanish,Malay": 0.5340909090909091, + "Spanish,Filipino": 0.48295454545454547, + "Spanish,English": 0.5454545454545454, + "Spanish,Chinese": 0.4772727272727273, + "Malay,Filipino": 0.4715909090909091, + "Malay,English": 0.5625, + "Malay,Chinese": 0.5738636363636364, + "Filipino,English": 0.44886363636363635, + "Filipino,Chinese": 0.4147727272727273, + "English,Chinese": 0.5795454545454546 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.35795454545454547, + "Vietnamese,Indonesian,Malay": 0.42613636363636365, + "Vietnamese,Indonesian,Filipino": 0.3352272727272727, + "Vietnamese,Indonesian,English": 0.38636363636363635, + "Vietnamese,Indonesian,Chinese": 0.3352272727272727, + "Vietnamese,Spanish,Malay": 0.3693181818181818, + "Vietnamese,Spanish,Filipino": 0.3181818181818182, + "Vietnamese,Spanish,English": 0.3465909090909091, + "Vietnamese,Spanish,Chinese": 0.2897727272727273, + "Vietnamese,Malay,Filipino": 0.32954545454545453, + "Vietnamese,Malay,English": 0.38636363636363635, + "Vietnamese,Malay,Chinese": 0.3693181818181818, + "Vietnamese,Filipino,English": 0.3068181818181818, + "Vietnamese,Filipino,Chinese": 0.2556818181818182, + "Vietnamese,English,Chinese": 0.36363636363636365, + "Indonesian,Spanish,Malay": 0.3693181818181818, + "Indonesian,Spanish,Filipino": 0.32386363636363635, + "Indonesian,Spanish,English": 0.3522727272727273, + "Indonesian,Spanish,Chinese": 0.30113636363636365, + "Indonesian,Malay,Filipino": 0.3465909090909091, + "Indonesian,Malay,English": 0.4034090909090909, + "Indonesian,Malay,Chinese": 0.3693181818181818, + "Indonesian,Filipino,English": 0.3181818181818182, + "Indonesian,Filipino,Chinese": 0.2784090909090909, + "Indonesian,English,Chinese": 0.3806818181818182, + "Spanish,Malay,Filipino": 0.3181818181818182, + "Spanish,Malay,English": 0.3806818181818182, + "Spanish,Malay,Chinese": 0.35795454545454547, + "Spanish,Filipino,English": 0.3125, + "Spanish,Filipino,Chinese": 0.2727272727272727, + "Spanish,English,Chinese": 0.3522727272727273, + "Malay,Filipino,English": 0.3181818181818182, + "Malay,Filipino,Chinese": 0.29545454545454547, + "Malay,English,Chinese": 0.4034090909090909, + "Filipino,English,Chinese": 0.3068181818181818 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish,Filipino": 0.25, + "Vietnamese,Indonesian,Spanish,English": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish,Chinese": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,English": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Chinese": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.25, + "Vietnamese,Indonesian,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,English,Chinese": 0.2784090909090909, + "Vietnamese,Spanish,Malay,Filipino": 0.2556818181818182, + "Vietnamese,Spanish,Malay,English": 0.2784090909090909, + "Vietnamese,Spanish,Malay,Chinese": 0.24431818181818182, + "Vietnamese,Spanish,Filipino,English": 0.23295454545454544, + "Vietnamese,Spanish,Filipino,Chinese": 0.1875, + "Vietnamese,Spanish,English,Chinese": 0.23295454545454544, + "Vietnamese,Malay,Filipino,English": 0.25, + "Vietnamese,Malay,Filipino,Chinese": 0.2159090909090909, + "Vietnamese,Malay,English,Chinese": 0.29545454545454547, + "Vietnamese,Filipino,English,Chinese": 0.2159090909090909, + "Indonesian,Spanish,Malay,Filipino": 0.2556818181818182, + "Indonesian,Spanish,Malay,English": 0.2784090909090909, + "Indonesian,Spanish,Malay,Chinese": 0.25, + "Indonesian,Spanish,Filipino,English": 0.23863636363636365, + "Indonesian,Spanish,Filipino,Chinese": 0.21022727272727273, + "Indonesian,Spanish,English,Chinese": 0.2556818181818182, + "Indonesian,Malay,Filipino,English": 0.2556818181818182, + "Indonesian,Malay,Filipino,Chinese": 0.2215909090909091, + "Indonesian,Malay,English,Chinese": 0.29545454545454547, + "Indonesian,Filipino,English,Chinese": 0.23863636363636365, + "Spanish,Malay,Filipino,English": 0.24431818181818182, + "Spanish,Malay,Filipino,Chinese": 0.2159090909090909, + "Spanish,Malay,English,Chinese": 0.2897727272727273, + "Spanish,Filipino,English,Chinese": 0.22727272727272727, + "Malay,Filipino,English,Chinese": 0.23863636363636365 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.1875, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.18181818181818182, + "Vietnamese,Spanish,Malay,Filipino,English": 0.20454545454545456, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Spanish,Malay,English,Chinese": 0.21022727272727273, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.16477272727272727, + "Vietnamese,Malay,Filipino,English,Chinese": 0.1875, + "Indonesian,Spanish,Malay,Filipino,English": 0.20454545454545456, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.17613636363636365, + "Indonesian,Spanish,Malay,English,Chinese": 0.2159090909090909, + "Indonesian,Spanish,Filipino,English,Chinese": 0.1875, + "Indonesian,Malay,Filipino,English,Chinese": 0.19318181818181818, + "Spanish,Malay,Filipino,English,Chinese": 0.1875 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.1590909090909091, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.1534090909090909, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.1590909090909091 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.13068181818181818 + } + }, + "AC3_2": 0.3476997006014303, + "AC3_3": 0.295944489306844, + "AC3_4": 0.25530086368749694, + "AC3_5": 0.22287302953432775, + "AC3_6": 0.19652924590092735, + "AC3_7": 0.17424242419797978 + }, + "prompt_4": { + "overall_acc": 0.2792207792207792, + "language_acc": { + "Vietnamese": 0.23863636363636365, + "Indonesian": 0.25, + "Spanish": 0.3352272727272727, + "Malay": 0.2784090909090909, + "Filipino": 0.3068181818181818, + "English": 0.2727272727272727, + "Chinese": 0.2727272727272727 + }, + "consistency_score_2": 0.531655844155844, + "consistency_score_3": 0.34610389610389614, + "consistency_score_4": 0.2457792207792207, + "consistency_score_5": 0.18262987012987017, + "consistency_score_6": 0.1396103896103896, + "consistency_score_7": 0.10795454545454546, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5340909090909091, + "Vietnamese,Spanish": 0.5056818181818182, + "Vietnamese,Malay": 0.5738636363636364, + "Vietnamese,Filipino": 0.5170454545454546, + "Vietnamese,English": 0.5, + "Vietnamese,Chinese": 0.5, + "Indonesian,Spanish": 0.5795454545454546, + "Indonesian,Malay": 0.6761363636363636, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,English": 0.5625, + "Indonesian,Chinese": 0.45454545454545453, + "Spanish,Malay": 0.5625, + "Spanish,Filipino": 0.5227272727272727, + "Spanish,English": 0.5795454545454546, + "Spanish,Chinese": 0.5056818181818182, + "Malay,Filipino": 0.5511363636363636, + "Malay,English": 0.5511363636363636, + "Malay,Chinese": 0.4943181818181818, + "Filipino,English": 0.4715909090909091, + "Filipino,Chinese": 0.48295454545454547, + "English,Chinese": 0.5 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.3465909090909091, + "Vietnamese,Indonesian,Malay": 0.42045454545454547, + "Vietnamese,Indonesian,Filipino": 0.3352272727272727, + "Vietnamese,Indonesian,English": 0.3522727272727273, + "Vietnamese,Indonesian,Chinese": 0.29545454545454547, + "Vietnamese,Spanish,Malay": 0.36363636363636365, + "Vietnamese,Spanish,Filipino": 0.32386363636363635, + "Vietnamese,Spanish,English": 0.3409090909090909, + "Vietnamese,Spanish,Chinese": 0.3181818181818182, + "Vietnamese,Malay,Filipino": 0.35795454545454547, + "Vietnamese,Malay,English": 0.3693181818181818, + "Vietnamese,Malay,Chinese": 0.32386363636363635, + "Vietnamese,Filipino,English": 0.3068181818181818, + "Vietnamese,Filipino,Chinese": 0.29545454545454547, + "Vietnamese,English,Chinese": 0.30113636363636365, + "Indonesian,Spanish,Malay": 0.4318181818181818, + "Indonesian,Spanish,Filipino": 0.36363636363636365, + "Indonesian,Spanish,English": 0.38636363636363635, + "Indonesian,Spanish,Chinese": 0.3181818181818182, + "Indonesian,Malay,Filipino": 0.42613636363636365, + "Indonesian,Malay,English": 0.4375, + "Indonesian,Malay,Chinese": 0.3522727272727273, + "Indonesian,Filipino,English": 0.32954545454545453, + "Indonesian,Filipino,Chinese": 0.3068181818181818, + "Indonesian,English,Chinese": 0.32386363636363635, + "Spanish,Malay,Filipino": 0.375, + "Spanish,Malay,English": 0.38636363636363635, + "Spanish,Malay,Chinese": 0.3409090909090909, + "Spanish,Filipino,English": 0.32954545454545453, + "Spanish,Filipino,Chinese": 0.3125, + "Spanish,English,Chinese": 0.3409090909090909, + "Malay,Filipino,English": 0.3352272727272727, + "Malay,Filipino,Chinese": 0.3125, + "Malay,English,Chinese": 0.3465909090909091, + "Filipino,English,Chinese": 0.3068181818181818 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish,Filipino": 0.25, + "Vietnamese,Indonesian,Spanish,English": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,English": 0.30113636363636365, + "Vietnamese,Indonesian,Malay,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino,English": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,English,Chinese": 0.2159090909090909, + "Vietnamese,Spanish,Malay,Filipino": 0.25, + "Vietnamese,Spanish,Malay,English": 0.26704545454545453, + "Vietnamese,Spanish,Malay,Chinese": 0.2215909090909091, + "Vietnamese,Spanish,Filipino,English": 0.23863636363636365, + "Vietnamese,Spanish,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Spanish,English,Chinese": 0.23295454545454544, + "Vietnamese,Malay,Filipino,English": 0.23295454545454544, + "Vietnamese,Malay,Filipino,Chinese": 0.21022727272727273, + "Vietnamese,Malay,English,Chinese": 0.24431818181818182, + "Vietnamese,Filipino,English,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Malay,Filipino": 0.3125, + "Indonesian,Spanish,Malay,English": 0.3068181818181818, + "Indonesian,Spanish,Malay,Chinese": 0.26136363636363635, + "Indonesian,Spanish,Filipino,English": 0.24431818181818182, + "Indonesian,Spanish,Filipino,Chinese": 0.23863636363636365, + "Indonesian,Spanish,English,Chinese": 0.24431818181818182, + "Indonesian,Malay,Filipino,English": 0.2784090909090909, + "Indonesian,Malay,Filipino,Chinese": 0.24431818181818182, + "Indonesian,Malay,English,Chinese": 0.2727272727272727, + "Indonesian,Filipino,English,Chinese": 0.22727272727272727, + "Spanish,Malay,Filipino,English": 0.26136363636363635, + "Spanish,Malay,Filipino,Chinese": 0.23863636363636365, + "Spanish,Malay,English,Chinese": 0.26704545454545453, + "Spanish,Filipino,English,Chinese": 0.2215909090909091, + "Malay,Filipino,English,Chinese": 0.23295454545454544 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.1534090909090909, + "Vietnamese,Spanish,Malay,Filipino,English": 0.1875, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Malay,English,Chinese": 0.1875, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.1590909090909091, + "Vietnamese,Malay,Filipino,English,Chinese": 0.1590909090909091, + "Indonesian,Spanish,Malay,Filipino,English": 0.2159090909090909, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Malay,English,Chinese": 0.21022727272727273, + "Indonesian,Spanish,Filipino,English,Chinese": 0.18181818181818182, + "Indonesian,Malay,Filipino,English,Chinese": 0.19318181818181818, + "Spanish,Malay,Filipino,English,Chinese": 0.1875 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.13068181818181818, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.125, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.1590909090909091 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.10795454545454546 + } + }, + "AC3_2": 0.36614536610021187, + "AC3_3": 0.3090855146435634, + "AC3_4": 0.26143491630163124, + "AC3_5": 0.22082486928320746, + "AC3_6": 0.1861471861027417, + "AC3_7": 0.15570802359233551 + }, + "prompt_5": { + "overall_acc": 0.2711038961038961, + "language_acc": { + "Vietnamese": 0.22727272727272727, + "Indonesian": 0.25, + "Spanish": 0.3352272727272727, + "Malay": 0.2784090909090909, + "Filipino": 0.2840909090909091, + "English": 0.2727272727272727, + "Chinese": 0.25 + }, + "consistency_score_2": 0.6158008658008657, + "consistency_score_3": 0.45876623376623393, + "consistency_score_4": 0.36948051948051946, + "consistency_score_5": 0.3087121212121212, + "consistency_score_6": 0.262987012987013, + "consistency_score_7": 0.22727272727272727, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.6079545454545454, + "Vietnamese,Spanish": 0.5852272727272727, + "Vietnamese,Malay": 0.6136363636363636, + "Vietnamese,Filipino": 0.5625, + "Vietnamese,English": 0.5738636363636364, + "Vietnamese,Chinese": 0.5852272727272727, + "Indonesian,Spanish": 0.6193181818181818, + "Indonesian,Malay": 0.6704545454545454, + "Indonesian,Filipino": 0.5625, + "Indonesian,English": 0.6306818181818182, + "Indonesian,Chinese": 0.5340909090909091, + "Spanish,Malay": 0.6761363636363636, + "Spanish,Filipino": 0.5681818181818182, + "Spanish,English": 0.6306818181818182, + "Spanish,Chinese": 0.6022727272727273, + "Malay,Filipino": 0.6193181818181818, + "Malay,English": 0.6931818181818182, + "Malay,Chinese": 0.6875, + "Filipino,English": 0.6136363636363636, + "Filipino,Chinese": 0.6079545454545454, + "English,Chinese": 0.6875 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.4375, + "Vietnamese,Indonesian,Malay": 0.4772727272727273, + "Vietnamese,Indonesian,Filipino": 0.4090909090909091, + "Vietnamese,Indonesian,English": 0.44886363636363635, + "Vietnamese,Indonesian,Chinese": 0.4090909090909091, + "Vietnamese,Spanish,Malay": 0.4715909090909091, + "Vietnamese,Spanish,Filipino": 0.4090909090909091, + "Vietnamese,Spanish,English": 0.4431818181818182, + "Vietnamese,Spanish,Chinese": 0.4375, + "Vietnamese,Malay,Filipino": 0.4431818181818182, + "Vietnamese,Malay,English": 0.4715909090909091, + "Vietnamese,Malay,Chinese": 0.4772727272727273, + "Vietnamese,Filipino,English": 0.4318181818181818, + "Vietnamese,Filipino,Chinese": 0.4375, + "Vietnamese,English,Chinese": 0.4715909090909091, + "Indonesian,Spanish,Malay": 0.5056818181818182, + "Indonesian,Spanish,Filipino": 0.3977272727272727, + "Indonesian,Spanish,English": 0.4659090909090909, + "Indonesian,Spanish,Chinese": 0.42045454545454547, + "Indonesian,Malay,Filipino": 0.4431818181818182, + "Indonesian,Malay,English": 0.5227272727272727, + "Indonesian,Malay,Chinese": 0.4659090909090909, + "Indonesian,Filipino,English": 0.42613636363636365, + "Indonesian,Filipino,Chinese": 0.4034090909090909, + "Indonesian,English,Chinese": 0.45454545454545453, + "Spanish,Malay,Filipino": 0.45454545454545453, + "Spanish,Malay,English": 0.5170454545454546, + "Spanish,Malay,Chinese": 0.5170454545454546, + "Spanish,Filipino,English": 0.4431818181818182, + "Spanish,Filipino,Chinese": 0.4375, + "Spanish,English,Chinese": 0.48295454545454547, + "Malay,Filipino,English": 0.48863636363636365, + "Malay,Filipino,Chinese": 0.4943181818181818, + "Malay,English,Chinese": 0.5454545454545454, + "Filipino,English,Chinese": 0.4943181818181818 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.375, + "Vietnamese,Indonesian,Spanish,Filipino": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish,English": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish,Chinese": 0.3409090909090909, + "Vietnamese,Indonesian,Malay,Filipino": 0.3465909090909091, + "Vietnamese,Indonesian,Malay,English": 0.38636363636363635, + "Vietnamese,Indonesian,Malay,Chinese": 0.36363636363636365, + "Vietnamese,Indonesian,Filipino,English": 0.32954545454545453, + "Vietnamese,Indonesian,Filipino,Chinese": 0.32386363636363635, + "Vietnamese,Indonesian,English,Chinese": 0.35795454545454547, + "Vietnamese,Spanish,Malay,Filipino": 0.36363636363636365, + "Vietnamese,Spanish,Malay,English": 0.3977272727272727, + "Vietnamese,Spanish,Malay,Chinese": 0.3977272727272727, + "Vietnamese,Spanish,Filipino,English": 0.3465909090909091, + "Vietnamese,Spanish,Filipino,Chinese": 0.3522727272727273, + "Vietnamese,Spanish,English,Chinese": 0.3806818181818182, + "Vietnamese,Malay,Filipino,English": 0.375, + "Vietnamese,Malay,Filipino,Chinese": 0.3806818181818182, + "Vietnamese,Malay,English,Chinese": 0.4034090909090909, + "Vietnamese,Filipino,English,Chinese": 0.39204545454545453, + "Indonesian,Spanish,Malay,Filipino": 0.3522727272727273, + "Indonesian,Spanish,Malay,English": 0.4090909090909091, + "Indonesian,Spanish,Malay,Chinese": 0.38636363636363635, + "Indonesian,Spanish,Filipino,English": 0.32954545454545453, + "Indonesian,Spanish,Filipino,Chinese": 0.32386363636363635, + "Indonesian,Spanish,English,Chinese": 0.36363636363636365, + "Indonesian,Malay,Filipino,English": 0.3693181818181818, + "Indonesian,Malay,Filipino,Chinese": 0.3522727272727273, + "Indonesian,Malay,English,Chinese": 0.4034090909090909, + "Indonesian,Filipino,English,Chinese": 0.3465909090909091, + "Spanish,Malay,Filipino,English": 0.3806818181818182, + "Spanish,Malay,Filipino,Chinese": 0.38636363636363635, + "Spanish,Malay,English,Chinese": 0.4318181818181818, + "Spanish,Filipino,English,Chinese": 0.3806818181818182, + "Malay,Filipino,English,Chinese": 0.42045454545454547 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.32386363636363635, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.3125, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.30113636363636365, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.29545454545454547, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.2897727272727273, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.3181818181818182, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.2897727272727273, + "Vietnamese,Spanish,Malay,Filipino,English": 0.3181818181818182, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.32386363636363635, + "Vietnamese,Spanish,Malay,English,Chinese": 0.3522727272727273, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.3181818181818182, + "Vietnamese,Malay,Filipino,English,Chinese": 0.3409090909090909, + "Indonesian,Spanish,Malay,Filipino,English": 0.30113636363636365, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.29545454545454547, + "Indonesian,Spanish,Malay,English,Chinese": 0.3352272727272727, + "Indonesian,Spanish,Filipino,English,Chinese": 0.2840909090909091, + "Indonesian,Malay,Filipino,English,Chinese": 0.3125, + "Spanish,Malay,Filipino,English,Chinese": 0.3409090909090909 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.25, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.25, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.2784090909090909, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.26136363636363635, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.29545454545454547, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.26136363636363635 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.22727272727272727 + } + }, + "AC3_2": 0.3764688636137255, + "AC3_3": 0.34080943518000056, + "AC3_4": 0.31273819941010855, + "AC3_5": 0.2886883298191282, + "AC3_6": 0.2669837760523287, + "AC3_7": 0.2472608823904223 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.2524271844660194 + }, + "prompt_2": { + "accuracy": 0.21359223300970873 + }, + "prompt_3": { + "accuracy": 0.2912621359223301 + }, + "prompt_4": { + "accuracy": 0.18446601941747573 + }, + "prompt_5": { + "accuracy": 0.23300970873786409 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.26666666666666666 + }, + "prompt_2": { + "accuracy": 0.2 + }, + "prompt_3": { + "accuracy": 0.26666666666666666 + }, + "prompt_4": { + "accuracy": 0.24761904761904763 + }, + "prompt_5": { + "accuracy": 0.2571428571428571 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.2803738317757009 + }, + "prompt_2": { + "accuracy": 0.2523364485981308 + }, + "prompt_3": { + "accuracy": 0.2523364485981308 + }, + "prompt_4": { + "accuracy": 0.205607476635514 + }, + "prompt_5": { + "accuracy": 0.2803738317757009 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.1, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.1, + "politics": 0.5, + "culture": 0.3, + "film": 0.4, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.1, + "demographics": 0.4, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.4, + "culture": 0.4, + "film": 0.6, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.26, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.1, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.2, + "film": 0.2, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.2, + "demographics": 0.4, + "biology": 0.1, + "history": 0.13333333333333333, + "literature": 0.3, + "politics": 0.5, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.2, + "history": 0.2, + "literature": 0.2, + "politics": 0.7, + "culture": 0.3, + "film": 0.3, + "law": 0.4, + "geography": 0.6 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.16461335693348497 + }, + "prompt_2": { + "bleu_score": 0.1803638271010127 + }, + "prompt_3": { + "bleu_score": 0.17197946970947084 + }, + "prompt_4": { + "bleu_score": 0.15162380191650587 + }, + "prompt_5": { + "bleu_score": 0.15299296083439926 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.2653047599973296, + "category_acc": { + "History": 0.21887550200803213, + "Geography": 0.20612244897959184, + "Lampungic": 0.3401360544217687, + "Social science": 0.35058430717863104, + "Balinese": 0.21868365180467092, + "Makassarese": 0.24193548387096775, + "Banjarese": 0.2638888888888889, + "Chemistry": 0.1927007299270073, + "Biology": 0.22366863905325443, + "Science": 0.27450980392156865, + "Christian religion": 0.32338308457711445, + "Art": 0.2762063227953411, + "Islam religion": 0.30867709815078237, + "Hindu religion": 0.24, + "Madurese": 0.26440677966101694, + "Sport": 0.2702702702702703, + "Indonesian language": 0.2898505603985056, + "Physics": 0.21616161616161617, + "Minangkabau culture": 0.3015075376884422, + "Dayak language": 0.1834862385321101, + "Sociology": 0.2439516129032258, + "Economy": 0.26229508196721313, + "Sundanese": 0.2549697493517718, + "Javanese": 0.26310483870967744, + "Civic education": 0.2947067238912732 + } + }, + "prompt_2": { + "accuracy": 0.27458441818545964, + "category_acc": { + "History": 0.23092369477911648, + "Geography": 0.20204081632653062, + "Lampungic": 0.4013605442176871, + "Social science": 0.38564273789649417, + "Balinese": 0.24203821656050956, + "Makassarese": 0.27956989247311825, + "Banjarese": 0.2916666666666667, + "Chemistry": 0.17372262773722627, + "Biology": 0.22840236686390533, + "Science": 0.27450980392156865, + "Christian religion": 0.2885572139303483, + "Art": 0.2961730449251248, + "Islam religion": 0.3328591749644381, + "Hindu religion": 0.26666666666666666, + "Madurese": 0.24067796610169492, + "Sport": 0.27702702702702703, + "Indonesian language": 0.3116438356164384, + "Physics": 0.20606060606060606, + "Minangkabau culture": 0.25125628140703515, + "Dayak language": 0.22018348623853212, + "Sociology": 0.24596774193548387, + "Economy": 0.23565573770491804, + "Sundanese": 0.27052722558340536, + "Javanese": 0.2590725806451613, + "Civic education": 0.3104434907010014 + } + }, + "prompt_3": { + "accuracy": 0.27158021229721613, + "category_acc": { + "History": 0.23493975903614459, + "Geography": 0.20408163265306123, + "Lampungic": 0.3741496598639456, + "Social science": 0.3656093489148581, + "Balinese": 0.23991507430997877, + "Makassarese": 0.24731182795698925, + "Banjarese": 0.3055555555555556, + "Chemistry": 0.19562043795620437, + "Biology": 0.22366863905325443, + "Science": 0.27038183694530443, + "Christian religion": 0.31343283582089554, + "Art": 0.28618968386023297, + "Islam religion": 0.33001422475106684, + "Hindu religion": 0.2866666666666667, + "Madurese": 0.24745762711864408, + "Sport": 0.25, + "Indonesian language": 0.3013698630136986, + "Physics": 0.19393939393939394, + "Minangkabau culture": 0.2864321608040201, + "Dayak language": 0.22935779816513763, + "Sociology": 0.24193548387096775, + "Economy": 0.21106557377049182, + "Sundanese": 0.28694900605012963, + "Javanese": 0.2590725806451613, + "Civic education": 0.30185979971387694 + } + }, + "prompt_4": { + "accuracy": 0.2681086854930236, + "category_acc": { + "History": 0.23092369477911648, + "Geography": 0.17755102040816326, + "Lampungic": 0.35374149659863946, + "Social science": 0.34223706176961605, + "Balinese": 0.24416135881104034, + "Makassarese": 0.23118279569892472, + "Banjarese": 0.3194444444444444, + "Chemistry": 0.1927007299270073, + "Biology": 0.2319526627218935, + "Science": 0.2714138286893705, + "Christian religion": 0.31840796019900497, + "Art": 0.2512479201331115, + "Islam religion": 0.3186344238975818, + "Hindu religion": 0.2733333333333333, + "Madurese": 0.2305084745762712, + "Sport": 0.27702702702702703, + "Indonesian language": 0.29047322540473225, + "Physics": 0.23232323232323232, + "Minangkabau culture": 0.27638190954773867, + "Dayak language": 0.24770642201834864, + "Sociology": 0.22782258064516128, + "Economy": 0.25, + "Sundanese": 0.2964563526361279, + "Javanese": 0.2530241935483871, + "Civic education": 0.3061516452074392 + } + }, + "prompt_5": { + "accuracy": 0.2604312704452901, + "category_acc": { + "History": 0.23493975903614459, + "Geography": 0.19387755102040816, + "Lampungic": 0.3877551020408163, + "Social science": 0.328881469115192, + "Balinese": 0.23779193205944799, + "Makassarese": 0.23655913978494625, + "Banjarese": 0.2708333333333333, + "Chemistry": 0.18394160583941604, + "Biology": 0.2106508875739645, + "Science": 0.26522187822497423, + "Christian religion": 0.2736318407960199, + "Art": 0.27287853577371046, + "Islam religion": 0.3186344238975818, + "Hindu religion": 0.2733333333333333, + "Madurese": 0.23728813559322035, + "Sport": 0.22297297297297297, + "Indonesian language": 0.2795765877957659, + "Physics": 0.20606060606060606, + "Minangkabau culture": 0.2562814070351759, + "Dayak language": 0.21100917431192662, + "Sociology": 0.22782258064516128, + "Economy": 0.2581967213114754, + "Sundanese": 0.2713915298184961, + "Javanese": 0.26411290322580644, + "Civic education": 0.290414878397711 + } + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.18523858955613606 + }, + "prompt_2": { + "bleu_score": 0.22158136711946616 + }, + "prompt_3": { + "bleu_score": 0.20338966028994132 + }, + "prompt_4": { + "bleu_score": 0.1586468439271198 + }, + "prompt_5": { + "bleu_score": 0.19019498739527976 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.1591392085980787 + }, + "prompt_2": { + "bleu_score": 0.17488500580141955 + }, + "prompt_3": { + "bleu_score": 0.16292936397893423 + }, + "prompt_4": { + "bleu_score": 0.13323443204675203 + }, + "prompt_5": { + "bleu_score": 0.16057356491137148 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.10778876313415212 + }, + "prompt_2": { + "bleu_score": 0.13599373317576255 + }, + "prompt_3": { + "bleu_score": 0.1334890909367673 + }, + "prompt_4": { + "bleu_score": 0.1318143268998727 + }, + "prompt_5": { + "bleu_score": 0.11445806333679333 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.18892468272373472 + }, + "prompt_2": { + "bleu_score": 0.24639640858735495 + }, + "prompt_3": { + "bleu_score": 0.21619818853556907 + }, + "prompt_4": { + "bleu_score": 0.17330118965903155 + }, + "prompt_5": { + "bleu_score": 0.2010206475324375 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.2742123687281214 + }, + "prompt_2": { + "accuracy": 0.26837806301050177 + }, + "prompt_3": { + "accuracy": 0.27887981330221706 + }, + "prompt_4": { + "accuracy": 0.29638273045507585 + }, + "prompt_5": { + "accuracy": 0.2765460910151692 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.27944225956381835, + "category_acc": { + "high_school_european_history": 0.3231707317073171, + "business_ethics": 0.25252525252525254, + "clinical_knowledge": 0.2840909090909091, + "medical_genetics": 0.35353535353535354, + "high_school_us_history": 0.2857142857142857, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.2838983050847458, + "virology": 0.24242424242424243, + "high_school_microeconomics": 0.2869198312236287, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.2783171521035599, + "abstract_algebra": 0.18181818181818182, + "professional_accounting": 0.2775800711743772, + "philosophy": 0.3161290322580645, + "professional_medicine": 0.23616236162361623, + "nutrition": 0.28524590163934427, + "global_facts": 0.1919191919191919, + "machine_learning": 0.25225225225225223, + "security_studies": 0.29508196721311475, + "public_relations": 0.27522935779816515, + "professional_psychology": 0.28477905073649756, + "prehistory": 0.25386996904024767, + "anatomy": 0.39552238805970147, + "human_sexuality": 0.23846153846153847, + "college_medicine": 0.28488372093023256, + "high_school_government_and_politics": 0.3177083333333333, + "college_chemistry": 0.18181818181818182, + "logical_fallacies": 0.2716049382716049, + "high_school_geography": 0.2639593908629442, + "elementary_mathematics": 0.20424403183023873, + "human_aging": 0.30180180180180183, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.2922794117647059, + "formal_logic": 0.36, + "high_school_statistics": 0.20465116279069767, + "international_law": 0.35833333333333334, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.3434343434343434, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.309462915601023, + "high_school_chemistry": 0.23267326732673269, + "marketing": 0.3776824034334764, + "professional_law": 0.2811480756686236, + "management": 0.28431372549019607, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.2803738317757009, + "world_religions": 0.3411764705882353, + "sociology": 0.345, + "us_foreign_policy": 0.37373737373737376, + "high_school_macroeconomics": 0.2776349614395887, + "computer_security": 0.30303030303030304, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.25507246376811593, + "electrical_engineering": 0.2708333333333333, + "astronomy": 0.2582781456953642, + "college_biology": 0.2727272727272727 + } + }, + "prompt_2": { + "accuracy": 0.27179120486235253, + "category_acc": { + "high_school_european_history": 0.2926829268292683, + "business_ethics": 0.26262626262626265, + "clinical_knowledge": 0.2916666666666667, + "medical_genetics": 0.23232323232323232, + "high_school_us_history": 0.2857142857142857, + "high_school_physics": 0.23333333333333334, + "high_school_world_history": 0.2966101694915254, + "virology": 0.24848484848484848, + "high_school_microeconomics": 0.2911392405063291, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.23232323232323232, + "high_school_biology": 0.23624595469255663, + "abstract_algebra": 0.20202020202020202, + "professional_accounting": 0.2669039145907473, + "philosophy": 0.2967741935483871, + "professional_medicine": 0.22140221402214022, + "nutrition": 0.2819672131147541, + "global_facts": 0.1919191919191919, + "machine_learning": 0.23423423423423423, + "security_studies": 0.29098360655737704, + "public_relations": 0.26605504587155965, + "professional_psychology": 0.3027823240589198, + "prehistory": 0.26625386996904027, + "anatomy": 0.373134328358209, + "human_sexuality": 0.2230769230769231, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.2760416666666667, + "college_chemistry": 0.18181818181818182, + "logical_fallacies": 0.2654320987654321, + "high_school_geography": 0.27411167512690354, + "elementary_mathematics": 0.22546419098143236, + "human_aging": 0.27927927927927926, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.2702205882352941, + "formal_logic": 0.36, + "high_school_statistics": 0.17674418604651163, + "international_law": 0.375, + "high_school_mathematics": 0.22304832713754646, + "high_school_computer_science": 0.3434343434343434, + "conceptual_physics": 0.2692307692307692, + "miscellaneous": 0.29028132992327366, + "high_school_chemistry": 0.24752475247524752, + "marketing": 0.33047210300429186, + "professional_law": 0.29354207436399216, + "management": 0.27450980392156865, + "college_physics": 0.15841584158415842, + "jurisprudence": 0.3177570093457944, + "world_religions": 0.31176470588235294, + "sociology": 0.32, + "us_foreign_policy": 0.2727272727272727, + "high_school_macroeconomics": 0.2853470437017995, + "computer_security": 0.2727272727272727, + "moral_scenarios": 0.2360178970917226, + "moral_disputes": 0.2608695652173913, + "electrical_engineering": 0.24305555555555555, + "astronomy": 0.24503311258278146, + "college_biology": 0.2867132867132867 + } + }, + "prompt_3": { + "accuracy": 0.2800143010368252, + "category_acc": { + "high_school_european_history": 0.25609756097560976, + "business_ethics": 0.25252525252525254, + "clinical_knowledge": 0.29545454545454547, + "medical_genetics": 0.25252525252525254, + "high_school_us_history": 0.3103448275862069, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.2627118644067797, + "virology": 0.24242424242424243, + "high_school_microeconomics": 0.32489451476793246, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.3042071197411003, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.2704626334519573, + "philosophy": 0.33548387096774196, + "professional_medicine": 0.22509225092250923, + "nutrition": 0.26885245901639343, + "global_facts": 0.20202020202020202, + "machine_learning": 0.2072072072072072, + "security_studies": 0.3114754098360656, + "public_relations": 0.3211009174311927, + "professional_psychology": 0.27823240589198034, + "prehistory": 0.28173374613003094, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.2923076923076923, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.3385416666666667, + "college_chemistry": 0.20202020202020202, + "logical_fallacies": 0.2654320987654321, + "high_school_geography": 0.28426395939086296, + "elementary_mathematics": 0.20689655172413793, + "human_aging": 0.2972972972972973, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.2977941176470588, + "formal_logic": 0.36, + "high_school_statistics": 0.24651162790697675, + "international_law": 0.325, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.37373737373737376, + "conceptual_physics": 0.2564102564102564, + "miscellaneous": 0.30434782608695654, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.2918454935622318, + "professional_law": 0.2713633398564905, + "management": 0.3137254901960784, + "college_physics": 0.16831683168316833, + "jurisprudence": 0.34579439252336447, + "world_religions": 0.32941176470588235, + "sociology": 0.295, + "us_foreign_policy": 0.31313131313131315, + "high_school_macroeconomics": 0.3110539845758355, + "computer_security": 0.30303030303030304, + "moral_scenarios": 0.2360178970917226, + "moral_disputes": 0.2318840579710145, + "electrical_engineering": 0.3055555555555556, + "astronomy": 0.2913907284768212, + "college_biology": 0.32867132867132864 + } + }, + "prompt_4": { + "accuracy": 0.27908473364318914, + "category_acc": { + "high_school_european_history": 0.2865853658536585, + "business_ethics": 0.2727272727272727, + "clinical_knowledge": 0.3106060606060606, + "medical_genetics": 0.26262626262626265, + "high_school_us_history": 0.3103448275862069, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.288135593220339, + "virology": 0.26666666666666666, + "high_school_microeconomics": 0.3333333333333333, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.284789644012945, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.26334519572953735, + "philosophy": 0.2903225806451613, + "professional_medicine": 0.22140221402214022, + "nutrition": 0.3081967213114754, + "global_facts": 0.20202020202020202, + "machine_learning": 0.22522522522522523, + "security_studies": 0.35655737704918034, + "public_relations": 0.24770642201834864, + "professional_psychology": 0.2618657937806874, + "prehistory": 0.2786377708978328, + "anatomy": 0.34328358208955223, + "human_sexuality": 0.23846153846153847, + "college_medicine": 0.313953488372093, + "high_school_government_and_politics": 0.3020833333333333, + "college_chemistry": 0.18181818181818182, + "logical_fallacies": 0.2839506172839506, + "high_school_geography": 0.29949238578680204, + "elementary_mathematics": 0.23872679045092837, + "human_aging": 0.24774774774774774, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.29411764705882354, + "formal_logic": 0.392, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.38333333333333336, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.3434343434343434, + "conceptual_physics": 0.2222222222222222, + "miscellaneous": 0.27877237851662406, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.351931330472103, + "professional_law": 0.2883235485975212, + "management": 0.3333333333333333, + "college_physics": 0.1782178217821782, + "jurisprudence": 0.3177570093457944, + "world_religions": 0.32941176470588235, + "sociology": 0.285, + "us_foreign_policy": 0.36363636363636365, + "high_school_macroeconomics": 0.33676092544987146, + "computer_security": 0.2727272727272727, + "moral_scenarios": 0.22930648769574943, + "moral_disputes": 0.22608695652173913, + "electrical_engineering": 0.22916666666666666, + "astronomy": 0.2913907284768212, + "college_biology": 0.2937062937062937 + } + }, + "prompt_5": { + "accuracy": 0.2760815159099035, + "category_acc": { + "high_school_european_history": 0.2804878048780488, + "business_ethics": 0.26262626262626265, + "clinical_knowledge": 0.2803030303030303, + "medical_genetics": 0.26262626262626265, + "high_school_us_history": 0.2955665024630542, + "high_school_physics": 0.26, + "high_school_world_history": 0.3050847457627119, + "virology": 0.28484848484848485, + "high_school_microeconomics": 0.3459915611814346, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.26262626262626265, + "high_school_biology": 0.24919093851132687, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.2704626334519573, + "philosophy": 0.2870967741935484, + "professional_medicine": 0.24723247232472326, + "nutrition": 0.30491803278688523, + "global_facts": 0.16161616161616163, + "machine_learning": 0.21621621621621623, + "security_studies": 0.36475409836065575, + "public_relations": 0.26605504587155965, + "professional_psychology": 0.2684124386252046, + "prehistory": 0.2786377708978328, + "anatomy": 0.3208955223880597, + "human_sexuality": 0.23076923076923078, + "college_medicine": 0.3372093023255814, + "high_school_government_and_politics": 0.2708333333333333, + "college_chemistry": 0.1717171717171717, + "logical_fallacies": 0.2345679012345679, + "high_school_geography": 0.30456852791878175, + "elementary_mathematics": 0.22546419098143236, + "human_aging": 0.24324324324324326, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.2555147058823529, + "formal_logic": 0.352, + "high_school_statistics": 0.26046511627906976, + "international_law": 0.38333333333333336, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.31313131313131315, + "conceptual_physics": 0.26495726495726496, + "miscellaneous": 0.29411764705882354, + "high_school_chemistry": 0.2376237623762376, + "marketing": 0.3261802575107296, + "professional_law": 0.28701891715590344, + "management": 0.30392156862745096, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.24299065420560748, + "world_religions": 0.3235294117647059, + "sociology": 0.3, + "us_foreign_policy": 0.32323232323232326, + "high_school_macroeconomics": 0.3264781491002571, + "computer_security": 0.29292929292929293, + "moral_scenarios": 0.22483221476510068, + "moral_disputes": 0.26666666666666666, + "electrical_engineering": 0.2847222222222222, + "astronomy": 0.304635761589404, + "college_biology": 0.23776223776223776 + } + } + }, + "c_eval": { + "prompt_1": { + "accuracy": 0.26820208023774145 + }, + "prompt_2": { + "accuracy": 0.27340267459138184 + }, + "prompt_3": { + "accuracy": 0.27191679049034173 + }, + "prompt_4": { + "accuracy": 0.2771173848439822 + }, + "prompt_5": { + "accuracy": 0.2800891530460624 + } + }, + "c_eval_full": { + "prompt_1": { + "accuracy": 0.27334993773349936, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.2857142857142857, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.08333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.25, + "college_economics": 0.26666666666666666, + "business_administration": 0.2894736842105263, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.1836734693877551, + "high_school_politics": 0.375, + "high_school_geography": 0.125, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.18518518518518517, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.125, + "high_school_history": 0.44, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.2692307692307692, + "sports_science": 0.4583333333333333, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.375, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.13725490196078433, + "accountant": 0.2962962962962963, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.24074074074074073, + "physician": 0.2962962962962963 + } + }, + "prompt_2": { + "accuracy": 0.2770859277708593, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.25, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.3333333333333333, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.5238095238095238, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.125, + "high_school_chemistry": 0.125, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.23333333333333334, + "business_administration": 0.2894736842105263, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.1836734693877551, + "high_school_politics": 0.375, + "high_school_geography": 0.125, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.18518518518518517, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.125, + "high_school_history": 0.48, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.28846153846153844, + "sports_science": 0.375, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.13725490196078433, + "accountant": 0.37037037037037035, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.2777777777777778 + } + }, + "prompt_3": { + "accuracy": 0.2727272727272727, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.21428571428571427, + "college_physics": 0.375, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.25, + "high_school_chemistry": 0.125, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.25, + "business_administration": 0.2894736842105263, + "marxism": 0.25, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.375, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.18518518518518517, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.36, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.28846153846153844, + "sports_science": 0.375, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.375, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.17647058823529413, + "accountant": 0.37037037037037035, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2222222222222222, + "physician": 0.3148148148148148 + } + }, + "prompt_4": { + "accuracy": 0.286425902864259, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.30952380952380953, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.041666666666666664, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.2833333333333333, + "business_administration": 0.15789473684210525, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.25, + "high_school_geography": 0.125, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.18518518518518517, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.10714285714285714, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.4, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.38461538461538464, + "sports_science": 0.5, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.14814814814814814, + "urban_and_rural_planner": 0.17647058823529413, + "accountant": 0.37037037037037035, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3333333333333333, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.2876712328767123, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.2619047619047619, + "college_physics": 0.375, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.25, + "business_administration": 0.3157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.125, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.18518518518518517, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.14285714285714285, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.4, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.3269230769230769, + "sports_science": 0.4166666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.11764705882352941, + "accountant": 0.3148148148148148, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2777777777777778, + "physician": 0.3333333333333333 + } + } + }, + "cmmlu": { + "prompt_1": { + "accuracy": 0.3046594982078853 + }, + "prompt_2": { + "accuracy": 0.2939068100358423 + }, + "prompt_3": { + "accuracy": 0.2974910394265233 + }, + "prompt_4": { + "accuracy": 0.2831541218637993 + }, + "prompt_5": { + "accuracy": 0.26523297491039427 + } + }, + "cmmlu_full": { + "prompt_1": { + "accuracy": 0.2725781384907615, + "category_acc": { + "agronomy": 0.2781065088757396, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.23170731707317074, + "arts": 0.3, + "astronomy": 0.2606060606060606, + "business_ethics": 0.291866028708134, + "chinese_civil_service_exam": 0.31875, + "chinese_driving_rule": 0.2900763358778626, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.24299065420560748, + "chinese_history": 0.2755417956656347, + "chinese_literature": 0.27450980392156865, + "chinese_teacher_qualification": 0.29608938547486036, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.24299065420560748, + "college_engineering_hydrology": 0.22641509433962265, + "college_law": 0.25925925925925924, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.2600732600732601, + "computer_science": 0.30392156862745096, + "computer_security": 0.29239766081871343, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.31654676258992803, + "economics": 0.27044025157232704, + "education": 0.26380368098159507, + "electrical_engineering": 0.25, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.30303030303030304, + "elementary_information_and_technology": 0.3487394957983193, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.22962962962962963, + "food_science": 0.26573426573426573, + "genetics": 0.26704545454545453, + "global_facts": 0.2348993288590604, + "high_school_biology": 0.1893491124260355, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.2857142857142857, + "international_law": 0.2972972972972973, + "journalism": 0.29651162790697677, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.3598130841121495, + "logical": 0.2601626016260163, + "machine_learning": 0.28688524590163933, + "management": 0.2714285714285714, + "marketing": 0.26666666666666666, + "marxist_theory": 0.25925925925925924, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.2896551724137931, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.21142857142857144, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.27586206896551724, + "public_relations": 0.25862068965517243, + "security_study": 0.28888888888888886, + "sociology": 0.26991150442477874, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.2485207100591716, + "world_history": 0.2670807453416149, + "world_religions": 0.29375 + } + }, + "prompt_2": { + "accuracy": 0.27542738732515976, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.23780487804878048, + "arts": 0.30625, + "astronomy": 0.2606060606060606, + "business_ethics": 0.2966507177033493, + "chinese_civil_service_exam": 0.2875, + "chinese_driving_rule": 0.3053435114503817, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.27932960893854747, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.20754716981132076, + "college_law": 0.2222222222222222, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.26373626373626374, + "computer_science": 0.29901960784313725, + "computer_security": 0.32748538011695905, + "conceptual_physics": 0.23809523809523808, + "construction_project_management": 0.28776978417266186, + "economics": 0.3081761006289308, + "education": 0.26380368098159507, + "electrical_engineering": 0.25, + "elementary_chinese": 0.25793650793650796, + "elementary_commonsense": 0.3383838383838384, + "elementary_information_and_technology": 0.31092436974789917, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.25925925925925924, + "food_science": 0.2517482517482518, + "genetics": 0.26704545454545453, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.25, + "high_school_geography": 0.19491525423728814, + "high_school_mathematics": 0.3048780487804878, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.23076923076923078, + "human_sexuality": 0.3333333333333333, + "international_law": 0.31891891891891894, + "journalism": 0.3023255813953488, + "jurisprudence": 0.26763990267639903, + "legal_and_moral_basis": 0.3317757009345794, + "logical": 0.3252032520325203, + "machine_learning": 0.2786885245901639, + "management": 0.28095238095238095, + "marketing": 0.25, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.32413793103448274, + "philosophy": 0.21904761904761905, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.23275862068965517, + "public_relations": 0.27586206896551724, + "security_study": 0.3111111111111111, + "sociology": 0.252212389380531, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.24260355029585798, + "world_history": 0.2795031055900621, + "world_religions": 0.3625 + } + }, + "prompt_3": { + "accuracy": 0.2760317734415472, + "category_acc": { + "agronomy": 0.30177514792899407, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.25, + "arts": 0.3, + "astronomy": 0.2545454545454545, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.3125, + "chinese_driving_rule": 0.3282442748091603, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.29102167182662536, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.24022346368715083, + "clinical_knowledge": 0.2911392405063291, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.26851851851851855, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.23443223443223443, + "computer_science": 0.2696078431372549, + "computer_security": 0.30994152046783624, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.2733812949640288, + "economics": 0.2893081761006289, + "education": 0.294478527607362, + "electrical_engineering": 0.22093023255813954, + "elementary_chinese": 0.24206349206349206, + "elementary_commonsense": 0.32323232323232326, + "elementary_information_and_technology": 0.3235294117647059, + "elementary_mathematics": 0.24782608695652175, + "ethnology": 0.2814814814814815, + "food_science": 0.2867132867132867, + "genetics": 0.26136363636363635, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.23076923076923078, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.19491525423728814, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.2909090909090909, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.35714285714285715, + "international_law": 0.31351351351351353, + "journalism": 0.29651162790697677, + "jurisprudence": 0.26763990267639903, + "legal_and_moral_basis": 0.3411214953271028, + "logical": 0.3089430894308943, + "machine_learning": 0.30327868852459017, + "management": 0.2523809523809524, + "marketing": 0.28888888888888886, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.20689655172413793, + "nutrition": 0.31724137931034485, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.36018957345971564, + "professional_medicine": 0.23138297872340424, + "professional_psychology": 0.2413793103448276, + "public_relations": 0.2988505747126437, + "security_study": 0.2814814814814815, + "sociology": 0.24778761061946902, + "sports_science": 0.296969696969697, + "traditional_chinese_medicine": 0.23783783783783785, + "virology": 0.24260355029585798, + "world_history": 0.2670807453416149, + "world_religions": 0.33125 + } + }, + "prompt_4": { + "accuracy": 0.2769815230530133, + "category_acc": { + "agronomy": 0.2958579881656805, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.2621951219512195, + "arts": 0.3125, + "astronomy": 0.2545454545454545, + "business_ethics": 0.3157894736842105, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.3282442748091603, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.30030959752321984, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.2905027932960894, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.24528301886792453, + "college_law": 0.3055555555555556, + "college_mathematics": 0.34285714285714286, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.2564102564102564, + "computer_science": 0.25980392156862747, + "computer_security": 0.28654970760233917, + "conceptual_physics": 0.1836734693877551, + "construction_project_management": 0.31654676258992803, + "economics": 0.27044025157232704, + "education": 0.3128834355828221, + "electrical_engineering": 0.25, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.35714285714285715, + "elementary_mathematics": 0.2217391304347826, + "ethnology": 0.23703703703703705, + "food_science": 0.2937062937062937, + "genetics": 0.23863636363636365, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.21301775147928995, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.211864406779661, + "high_school_mathematics": 0.32926829268292684, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.27972027972027974, + "human_sexuality": 0.31746031746031744, + "international_law": 0.2918918918918919, + "journalism": 0.3023255813953488, + "jurisprudence": 0.2798053527980535, + "legal_and_moral_basis": 0.35046728971962615, + "logical": 0.2682926829268293, + "machine_learning": 0.28688524590163933, + "management": 0.2714285714285714, + "marketing": 0.3, + "marxist_theory": 0.2751322751322751, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.3103448275862069, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.21714285714285714, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.21982758620689655, + "public_relations": 0.25862068965517243, + "security_study": 0.3333333333333333, + "sociology": 0.25663716814159293, + "sports_science": 0.2606060606060606, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.27218934911242604, + "world_history": 0.2670807453416149, + "world_religions": 0.325 + } + }, + "prompt_5": { + "accuracy": 0.27654981868416506, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.25, + "arts": 0.325, + "astronomy": 0.24848484848484848, + "business_ethics": 0.2966507177033493, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.3511450381679389, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.2631578947368421, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.27848101265822783, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.2897196261682243, + "college_engineering_hydrology": 0.20754716981132076, + "college_law": 0.24074074074074073, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.2600732600732601, + "computer_science": 0.28431372549019607, + "computer_security": 0.29239766081871343, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.2949640287769784, + "economics": 0.3018867924528302, + "education": 0.32515337423312884, + "electrical_engineering": 0.23255813953488372, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.32323232323232326, + "elementary_information_and_technology": 0.3277310924369748, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.28888888888888886, + "food_science": 0.26573426573426573, + "genetics": 0.26704545454545453, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.20710059171597633, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.211864406779661, + "high_school_mathematics": 0.31097560975609756, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.22377622377622378, + "human_sexuality": 0.31746031746031744, + "international_law": 0.33513513513513515, + "journalism": 0.3081395348837209, + "jurisprudence": 0.2749391727493917, + "legal_and_moral_basis": 0.3130841121495327, + "logical": 0.2926829268292683, + "machine_learning": 0.2786885245901639, + "management": 0.24285714285714285, + "marketing": 0.26666666666666666, + "marxist_theory": 0.2751322751322751, + "modern_chinese": 0.1810344827586207, + "nutrition": 0.31724137931034485, + "philosophy": 0.26666666666666666, + "professional_accounting": 0.22857142857142856, + "professional_law": 0.2938388625592417, + "professional_medicine": 0.25, + "professional_psychology": 0.2672413793103448, + "public_relations": 0.28735632183908044, + "security_study": 0.3111111111111111, + "sociology": 0.27876106194690264, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.27218934911242604, + "world_history": 0.2670807453416149, + "world_religions": 0.29375 + } + } + }, + "zbench": { + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.36363636363636365 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.21212121212121213 + }, + "prompt_5": { + "accuracy": 0.30303030303030304 + } + }, + "ind_emotion": { + "prompt_1": { + "accuracy": 0.425 + }, + "prompt_2": { + "accuracy": 0.3568181818181818 + }, + "prompt_3": { + "accuracy": 0.3840909090909091 + }, + "prompt_4": { + "accuracy": 0.28863636363636364 + }, + "prompt_5": { + "accuracy": 0.27954545454545454 + } + }, + "ocnli": { + "prompt_1": { + "accuracy": 0.3298305084745763 + }, + "prompt_2": { + "accuracy": 0.3264406779661017 + }, + "prompt_3": { + "accuracy": 0.3352542372881356 + }, + "prompt_4": { + "accuracy": 0.3501694915254237 + }, + "prompt_5": { + "accuracy": 0.363728813559322 + } + }, + "c3": { + "prompt_1": { + "accuracy": 0.4042632759910247 + }, + "prompt_2": { + "accuracy": 0.40089753178758414 + }, + "prompt_3": { + "accuracy": 0.4035153328347046 + }, + "prompt_4": { + "accuracy": 0.4177262528047868 + }, + "prompt_5": { + "accuracy": 0.4080029917726253 + } + }, + "dream": { + "prompt_1": { + "accuracy": 0.40813326800587946 + }, + "prompt_2": { + "accuracy": 0.4223419892209701 + }, + "prompt_3": { + "accuracy": 0.4130328270455659 + }, + "prompt_4": { + "accuracy": 0.3762861342479177 + }, + "prompt_5": { + "accuracy": 0.38755512003919645 + } + }, + "samsum": { + "prompt_1": { + "rouge1": 0.29377103774853364, + "rouge2": 0.09538068883457688, + "rougeL": 0.23129052665985503, + "avg_rouge": 0.20681408441432184 + }, + "prompt_2": { + "rouge1": 0.27230248741371144, + "rouge2": 0.0942736500840302, + "rougeL": 0.21459037135565592, + "avg_rouge": 0.1937221696177992 + }, + "prompt_3": { + "rouge1": 0.22393085485897568, + "rouge2": 0.07245653934551864, + "rougeL": 0.17898589488917158, + "avg_rouge": 0.15845776303122197 + }, + "prompt_4": { + "rouge1": 0.2929738581579208, + "rouge2": 0.09669147559664419, + "rougeL": 0.23040389533163763, + "avg_rouge": 0.20668974302873422 + }, + "prompt_5": { + "rouge1": 0.2937327223627466, + "rouge2": 0.09773489743135011, + "rougeL": 0.23263308739726699, + "avg_rouge": 0.2080335690637879 + } + }, + "dialogsum": { + "prompt_1": { + "rouge1": 0.21094256929850014, + "rouge2": 0.05834685628040306, + "rougeL": 0.15589659259369687, + "avg_rouge": 0.1417286727242 + }, + "prompt_2": { + "rouge1": 0.2051630840453159, + "rouge2": 0.05695186126269857, + "rougeL": 0.15327111873659163, + "avg_rouge": 0.13846202134820204 + }, + "prompt_3": { + "rouge1": 0.210332679631842, + "rouge2": 0.059914756521914594, + "rougeL": 0.158071789981449, + "avg_rouge": 0.14277307537840186 + }, + "prompt_4": { + "rouge1": 0.21377062892940069, + "rouge2": 0.06023397181537684, + "rougeL": 0.1598316221088106, + "avg_rouge": 0.14461207428452938 + }, + "prompt_5": { + "rouge1": 0.22696242207835782, + "rouge2": 0.06197004544113014, + "rougeL": 0.17020020111912634, + "avg_rouge": 0.15304422287953812 + } + }, + "sst2": { + "prompt_1": { + "accuracy": 0.8944954128440367 + }, + "prompt_2": { + "accuracy": 0.8623853211009175 + }, + "prompt_3": { + "accuracy": 0.8474770642201835 + }, + "prompt_4": { + "accuracy": 0.8577981651376146 + }, + "prompt_5": { + "accuracy": 0.9185779816513762 + } + }, + "cola": { + "prompt_1": { + "accuracy": 0.5234899328859061 + }, + "prompt_2": { + "accuracy": 0.5311601150527325 + }, + "prompt_3": { + "accuracy": 0.535953978906999 + }, + "prompt_4": { + "accuracy": 0.5580057526366251 + }, + "prompt_5": { + "accuracy": 0.5378715244487057 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.4595 + }, + "prompt_2": { + "accuracy": 0.493 + }, + "prompt_3": { + "accuracy": 0.448 + }, + "prompt_4": { + "accuracy": 0.516 + }, + "prompt_5": { + "accuracy": 0.4825 + } + }, + "mnli": { + "prompt_1": { + "accuracy": 0.5 + }, + "prompt_2": { + "accuracy": 0.5 + }, + "prompt_3": { + "accuracy": 0.5 + }, + "prompt_4": { + "accuracy": 0.4 + }, + "prompt_5": { + "accuracy": 0.4 + } + }, + "qnli": { + "prompt_1": { + "accuracy": 0.8 + }, + "prompt_2": { + "accuracy": 0.8 + }, + "prompt_3": { + "accuracy": 0.8 + }, + "prompt_4": { + "accuracy": 0.8 + }, + "prompt_5": { + "accuracy": 0.8 + } + }, + "wnli": { + "prompt_1": { + "accuracy": 0.3 + }, + "prompt_2": { + "accuracy": 0.4 + }, + "prompt_3": { + "accuracy": 0.3 + }, + "prompt_4": { + "accuracy": 0.3 + }, + "prompt_5": { + "accuracy": 0.3 + } + }, + "rte": { + "prompt_1": { + "accuracy": 0.7 + }, + "prompt_2": { + "accuracy": 0.7 + }, + "prompt_3": { + "accuracy": 0.7 + }, + "prompt_4": { + "accuracy": 0.7 + }, + "prompt_5": { + "accuracy": 0.8 + } + }, + "mrpc": { + "prompt_1": { + "accuracy": 0.6 + }, + "prompt_2": { + "accuracy": 0.8 + }, + "prompt_3": { + "accuracy": 0.7 + }, + "prompt_4": { + "accuracy": 0.8 + }, + "prompt_5": { + "accuracy": 0.8 + } + } + }, + "five_shot": { + "cross_xquad": { + "prompt_1": -1 + }, + "cross_mmlu": { + "prompt_1": -1 + }, + "cross_logiqa": { + "prompt_1": -1 + }, + "sg_eval": { + "prompt_1": -1 + }, + "cn_eval": { + "prompt_1": -1 + }, + "us_eval": { + "prompt_1": -1 + }, + "ph_eval": { + "prompt_1": -1 + }, + "sing2eng": { + "prompt_1": -1 + }, + "indommlu": { + "prompt_1": -1 + }, + "flores_ind2eng": { + "prompt_1": -1 + }, + "flores_vie2eng": { + "prompt_1": -1 + }, + "flores_zho2eng": { + "prompt_1": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1 + }, + "mmlu": { + "prompt_1": -1 + }, + "mmlu_full": { + "prompt_1": -1 + }, + "c_eval": { + "prompt_1": -1 + }, + "c_eval_full": { + "prompt_1": -1 + }, + "cmmlu": { + "prompt_1": -1 + }, + "cmmlu_full": { + "prompt_1": -1 + }, + "zbench": { + "prompt_1": -1 + }, + "ind_emotion": { + "prompt_1": -1 + }, + "ocnli": { + "prompt_1": -1 + }, + "c3": { + "prompt_1": -1 + }, + "dream": { + "prompt_1": -1 + }, + "samsum": { + "prompt_1": -1 + }, + "dialogsum": { + "prompt_1": -1 + }, + "sst2": { + "prompt_1": -1 + }, + "cola": { + "prompt_1": -1 + }, + "qqp": { + "prompt_1": -1 + }, + "mnli": { + "prompt_1": -1 + }, + "qnli": { + "prompt_1": -1 + }, + "wnli": { + "prompt_1": -1 + }, + "rte": { + "prompt_1": -1 + }, + "mrpc": { + "prompt_1": -1 + } + } + }, + "sea_lion_7b_instruct_research": { + "model_size": "7B", + "model_link": "https://huggingface.co/aisingapore/sea-lion-7b-instruct-research", + "zero_shot": { + "cross_xquad": { + "prompt_1": { + "overall_acc": 0.3865546218487395, + "language_acc": { + "Vietnamese": 0.4647058823529412, + "Spanish": 0.3680672268907563, + "English": 0.37815126050420167, + "Chinese": 0.3352941176470588 + }, + "consistency_score_2": 0.4966386554621849, + "consistency_score_3": 0.3058823529411765, + "consistency_score_4": 0.20336134453781513, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.48739495798319327, + "Vietnamese,English": 0.46218487394957986, + "Vietnamese,Chinese": 0.42773109243697477, + "Spanish,English": 0.5252100840336135, + "Spanish,Chinese": 0.5092436974789916, + "English,Chinese": 0.5680672268907563 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.3042016806722689, + "Vietnamese,Spanish,Chinese": 0.2781512605042017, + "Vietnamese,English,Chinese": 0.292436974789916, + "Spanish,English,Chinese": 0.3487394957983193 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.20336134453781513 + } + }, + "AC3_2": 0.4347360256645828, + "AC3_3": 0.3415191318752932, + "AC3_4": 0.266513442938853 + }, + "prompt_2": { + "overall_acc": 0.35, + "language_acc": { + "Vietnamese": 0.4436974789915966, + "Spanish": 0.32184873949579834, + "English": 0.3277310924369748, + "Chinese": 0.3067226890756303 + }, + "consistency_score_2": 0.5028011204481792, + "consistency_score_3": 0.31176470588235294, + "consistency_score_4": 0.20756302521008405, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.46974789915966386, + "Vietnamese,English": 0.426890756302521, + "Vietnamese,Chinese": 0.40756302521008403, + "Spanish,English": 0.5243697478991597, + "Spanish,Chinese": 0.542016806722689, + "English,Chinese": 0.646218487394958 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.280672268907563, + "Vietnamese,Spanish,Chinese": 0.280672268907563, + "Vietnamese,English,Chinese": 0.29747899159663865, + "Spanish,English,Chinese": 0.38823529411764707 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.20756302521008405 + } + }, + "AC3_2": 0.41271144682301264, + "AC3_3": 0.3297777777279447, + "AC3_4": 0.26058779196532034 + }, + "prompt_3": { + "overall_acc": 0.3621848739495798, + "language_acc": { + "Vietnamese": 0.4504201680672269, + "Spanish": 0.3369747899159664, + "English": 0.32605042016806723, + "Chinese": 0.3352941176470588 + }, + "consistency_score_2": 0.47689075630252103, + "consistency_score_3": 0.28025210084033614, + "consistency_score_4": 0.18067226890756302, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.4319327731092437, + "Vietnamese,English": 0.4008403361344538, + "Vietnamese,Chinese": 0.39915966386554624, + "Spanish,English": 0.5109243697478991, + "Spanish,Chinese": 0.5260504201680672, + "English,Chinese": 0.592436974789916 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.2529411764705882, + "Vietnamese,Spanish,Chinese": 0.24873949579831933, + "Vietnamese,English,Chinese": 0.26218487394957984, + "Spanish,English,Chinese": 0.35714285714285715 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.18067226890756302 + } + }, + "AC3_2": 0.4116973779507072, + "AC3_3": 0.31599386643134925, + "AC3_4": 0.24108281078890167 + }, + "prompt_4": { + "overall_acc": 0.37563025210084033, + "language_acc": { + "Vietnamese": 0.45294117647058824, + "Spanish": 0.3697478991596639, + "English": 0.36134453781512604, + "Chinese": 0.31848739495798317 + }, + "consistency_score_2": 0.4970588235294117, + "consistency_score_3": 0.3086134453781513, + "consistency_score_4": 0.2092436974789916, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.47478991596638653, + "Vietnamese,English": 0.4815126050420168, + "Vietnamese,Chinese": 0.44201680672268906, + "Spanish,English": 0.5109243697478991, + "Spanish,Chinese": 0.5411764705882353, + "English,Chinese": 0.5319327731092437 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.3, + "Vietnamese,Spanish,Chinese": 0.29411764705882354, + "Vietnamese,English,Chinese": 0.2966386554621849, + "Spanish,English,Chinese": 0.34369747899159664 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.2092436974789916 + } + }, + "AC3_2": 0.42789657023050015, + "AC3_3": 0.338839938750685, + "AC3_4": 0.2687699217158547 + }, + "prompt_5": { + "overall_acc": 0.3325630252100841, + "language_acc": { + "Vietnamese": 0.3840336134453782, + "Spanish": 0.3453781512605042, + "English": 0.3142857142857143, + "Chinese": 0.2865546218487395 + }, + "consistency_score_2": 0.5487394957983193, + "consistency_score_3": 0.37899159663865545, + "consistency_score_4": 0.2865546218487395, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Spanish": 0.49411764705882355, + "Vietnamese,English": 0.5588235294117647, + "Vietnamese,Chinese": 0.4680672268907563, + "Spanish,English": 0.5705882352941176, + "Spanish,Chinese": 0.542016806722689, + "English,Chinese": 0.6588235294117647 + }, + "3_combine": { + "Vietnamese,Spanish,English": 0.36554621848739494, + "Vietnamese,Spanish,Chinese": 0.32605042016806723, + "Vietnamese,English,Chinese": 0.39327731092436974, + "Spanish,English,Chinese": 0.43109243697478994 + }, + "4_combine": { + "Vietnamese,Spanish,English,Chinese": 0.2865546218487395 + } + }, + "AC3_2": 0.4141380794994873, + "AC3_3": 0.3542625907261158, + "AC3_4": 0.3078493154484469 + } + }, + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.2523809523809524, + "language_acc": { + "Chinese": 0.28, + "Indonesian": 0.2733333333333333, + "Spanish": 0.29333333333333333, + "Vietnamese": 0.21333333333333335, + "Malay": 0.24, + "English": 0.26666666666666666, + "Filipino": 0.2 + }, + "consistency_score_2": 0.5704761904761904, + "consistency_score_3": 0.4001904761904762, + "consistency_score_4": 0.31314285714285717, + "consistency_score_5": 0.2631746031746032, + "consistency_score_6": 0.23047619047619047, + "consistency_score_7": 0.20666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.54, + "Chinese,Spanish": 0.6, + "Chinese,Vietnamese": 0.5666666666666667, + "Chinese,Malay": 0.5733333333333334, + "Chinese,English": 0.6533333333333333, + "Chinese,Filipino": 0.5666666666666667, + "Indonesian,Spanish": 0.47333333333333333, + "Indonesian,Vietnamese": 0.48, + "Indonesian,Malay": 0.72, + "Indonesian,English": 0.6466666666666666, + "Indonesian,Filipino": 0.5266666666666666, + "Spanish,Vietnamese": 0.5333333333333333, + "Spanish,Malay": 0.5466666666666666, + "Spanish,English": 0.5466666666666666, + "Spanish,Filipino": 0.5533333333333333, + "Vietnamese,Malay": 0.5333333333333333, + "Vietnamese,English": 0.52, + "Vietnamese,Filipino": 0.5333333333333333, + "Malay,English": 0.7066666666666667, + "Malay,Filipino": 0.5666666666666667, + "English,Filipino": 0.5933333333333334 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Indonesian,Vietnamese": 0.36, + "Chinese,Indonesian,Malay": 0.44666666666666666, + "Chinese,Indonesian,English": 0.44666666666666666, + "Chinese,Indonesian,Filipino": 0.3933333333333333, + "Chinese,Spanish,Vietnamese": 0.4066666666666667, + "Chinese,Spanish,Malay": 0.4, + "Chinese,Spanish,English": 0.43333333333333335, + "Chinese,Spanish,Filipino": 0.3933333333333333, + "Chinese,Vietnamese,Malay": 0.38666666666666666, + "Chinese,Vietnamese,English": 0.4066666666666667, + "Chinese,Vietnamese,Filipino": 0.4066666666666667, + "Chinese,Malay,English": 0.49333333333333335, + "Chinese,Malay,Filipino": 0.41333333333333333, + "Chinese,English,Filipino": 0.44666666666666666, + "Indonesian,Spanish,Vietnamese": 0.3, + "Indonesian,Spanish,Malay": 0.42, + "Indonesian,Spanish,English": 0.36666666666666664, + "Indonesian,Spanish,Filipino": 0.34, + "Indonesian,Vietnamese,Malay": 0.3933333333333333, + "Indonesian,Vietnamese,English": 0.37333333333333335, + "Indonesian,Vietnamese,Filipino": 0.3333333333333333, + "Indonesian,Malay,English": 0.56, + "Indonesian,Malay,Filipino": 0.46, + "Indonesian,English,Filipino": 0.43333333333333335, + "Spanish,Vietnamese,Malay": 0.36, + "Spanish,Vietnamese,English": 0.3333333333333333, + "Spanish,Vietnamese,Filipino": 0.37333333333333335, + "Spanish,Malay,English": 0.41333333333333333, + "Spanish,Malay,Filipino": 0.37333333333333335, + "Spanish,English,Filipino": 0.37333333333333335, + "Vietnamese,Malay,English": 0.41333333333333333, + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,English,Filipino": 0.38, + "Malay,English,Filipino": 0.4533333333333333 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.26666666666666666, + "Chinese,Indonesian,Spanish,Malay": 0.32, + "Chinese,Indonesian,Spanish,English": 0.30666666666666664, + "Chinese,Indonesian,Spanish,Filipino": 0.2866666666666667, + "Chinese,Indonesian,Vietnamese,Malay": 0.32, + "Chinese,Indonesian,Vietnamese,English": 0.31333333333333335, + "Chinese,Indonesian,Vietnamese,Filipino": 0.3, + "Chinese,Indonesian,Malay,English": 0.4066666666666667, + "Chinese,Indonesian,Malay,Filipino": 0.36, + "Chinese,Indonesian,English,Filipino": 0.35333333333333333, + "Chinese,Spanish,Vietnamese,Malay": 0.29333333333333333, + "Chinese,Spanish,Vietnamese,English": 0.29333333333333333, + "Chinese,Spanish,Vietnamese,Filipino": 0.30666666666666664, + "Chinese,Spanish,Malay,English": 0.34, + "Chinese,Spanish,Malay,Filipino": 0.31333333333333335, + "Chinese,Spanish,English,Filipino": 0.31333333333333335, + "Chinese,Vietnamese,Malay,English": 0.3466666666666667, + "Chinese,Vietnamese,Malay,Filipino": 0.31333333333333335, + "Chinese,Vietnamese,English,Filipino": 0.32, + "Chinese,Malay,English,Filipino": 0.36666666666666664, + "Indonesian,Spanish,Vietnamese,Malay": 0.2866666666666667, + "Indonesian,Spanish,Vietnamese,English": 0.26, + "Indonesian,Spanish,Vietnamese,Filipino": 0.24666666666666667, + "Indonesian,Spanish,Malay,English": 0.3466666666666667, + "Indonesian,Spanish,Malay,Filipino": 0.31333333333333335, + "Indonesian,Spanish,English,Filipino": 0.2866666666666667, + "Indonesian,Vietnamese,Malay,English": 0.34, + "Indonesian,Vietnamese,Malay,Filipino": 0.30666666666666664, + "Indonesian,Vietnamese,English,Filipino": 0.2866666666666667, + "Indonesian,Malay,English,Filipino": 0.3933333333333333, + "Spanish,Vietnamese,Malay,English": 0.29333333333333333, + "Spanish,Vietnamese,Malay,Filipino": 0.2866666666666667, + "Spanish,Vietnamese,English,Filipino": 0.26, + "Spanish,Malay,English,Filipino": 0.3, + "Vietnamese,Malay,English,Filipino": 0.31333333333333335 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.25333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.24, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.23333333333333334, + "Chinese,Indonesian,Spanish,Malay,English": 0.29333333333333333, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.2733333333333333, + "Chinese,Indonesian,Spanish,English,Filipino": 0.26, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.3, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.28, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.26666666666666666, + "Chinese,Indonesian,Malay,English,Filipino": 0.3333333333333333, + "Chinese,Spanish,Vietnamese,Malay,English": 0.26, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.25333333333333335, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.24, + "Chinese,Spanish,Malay,English,Filipino": 0.2733333333333333, + "Chinese,Vietnamese,Malay,English,Filipino": 0.28, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.25333333333333335, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.24, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.21333333333333335, + "Indonesian,Spanish,Malay,English,Filipino": 0.2733333333333333, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.2733333333333333, + "Spanish,Vietnamese,Malay,English,Filipino": 0.23333333333333334 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.23333333333333334, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.22666666666666666, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.20666666666666667, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.25333333333333335, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.26, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.22, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.21333333333333335 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.20666666666666667 + } + }, + "AC3_2": 0.349944885319024, + "AC3_3": 0.30954604837777294, + "AC3_4": 0.2794976663125076, + "AC3_5": 0.2576647900039746, + "AC3_6": 0.24093171780489767, + "AC3_7": 0.22724757948023305 + }, + "prompt_2": { + "overall_acc": 0.23714285714285716, + "language_acc": { + "Chinese": 0.24666666666666667, + "Indonesian": 0.22666666666666666, + "Spanish": 0.26, + "Vietnamese": 0.20666666666666667, + "Malay": 0.22, + "English": 0.26, + "Filipino": 0.24 + }, + "consistency_score_2": 0.634920634920635, + "consistency_score_3": 0.4714285714285714, + "consistency_score_4": 0.3750476190476192, + "consistency_score_5": 0.3095238095238095, + "consistency_score_6": 0.2619047619047619, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.6066666666666667, + "Chinese,Spanish": 0.6333333333333333, + "Chinese,Vietnamese": 0.66, + "Chinese,Malay": 0.6333333333333333, + "Chinese,English": 0.7133333333333334, + "Chinese,Filipino": 0.6133333333333333, + "Indonesian,Spanish": 0.6066666666666667, + "Indonesian,Vietnamese": 0.5533333333333333, + "Indonesian,Malay": 0.7866666666666666, + "Indonesian,English": 0.7666666666666667, + "Indonesian,Filipino": 0.52, + "Spanish,Vietnamese": 0.6266666666666667, + "Spanish,Malay": 0.62, + "Spanish,English": 0.6866666666666666, + "Spanish,Filipino": 0.62, + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,English": 0.58, + "Vietnamese,Filipino": 0.6533333333333333, + "Malay,English": 0.7866666666666666, + "Malay,Filipino": 0.5133333333333333, + "English,Filipino": 0.6066666666666667 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.4533333333333333, + "Chinese,Indonesian,Vietnamese": 0.42, + "Chinese,Indonesian,Malay": 0.5266666666666666, + "Chinese,Indonesian,English": 0.5666666666666667, + "Chinese,Indonesian,Filipino": 0.4066666666666667, + "Chinese,Spanish,Vietnamese": 0.47333333333333333, + "Chinese,Spanish,Malay": 0.4666666666666667, + "Chinese,Spanish,English": 0.5333333333333333, + "Chinese,Spanish,Filipino": 0.46, + "Chinese,Vietnamese,Malay": 0.43333333333333335, + "Chinese,Vietnamese,English": 0.4866666666666667, + "Chinese,Vietnamese,Filipino": 0.4866666666666667, + "Chinese,Malay,English": 0.5733333333333334, + "Chinese,Malay,Filipino": 0.4066666666666667, + "Chinese,English,Filipino": 0.48, + "Indonesian,Spanish,Vietnamese": 0.4066666666666667, + "Indonesian,Spanish,Malay": 0.52, + "Indonesian,Spanish,English": 0.5466666666666666, + "Indonesian,Spanish,Filipino": 0.4066666666666667, + "Indonesian,Vietnamese,Malay": 0.46, + "Indonesian,Vietnamese,English": 0.4666666666666667, + "Indonesian,Vietnamese,Filipino": 0.38666666666666666, + "Indonesian,Malay,English": 0.6733333333333333, + "Indonesian,Malay,Filipino": 0.43333333333333335, + "Indonesian,English,Filipino": 0.47333333333333333, + "Spanish,Vietnamese,Malay": 0.4066666666666667, + "Spanish,Vietnamese,English": 0.4666666666666667, + "Spanish,Vietnamese,Filipino": 0.4866666666666667, + "Spanish,Malay,English": 0.5666666666666667, + "Spanish,Malay,Filipino": 0.4, + "Spanish,English,Filipino": 0.4666666666666667, + "Vietnamese,Malay,English": 0.4666666666666667, + "Vietnamese,Malay,Filipino": 0.38, + "Vietnamese,English,Filipino": 0.44, + "Malay,English,Filipino": 0.47333333333333333 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.3333333333333333, + "Chinese,Indonesian,Spanish,Malay": 0.3933333333333333, + "Chinese,Indonesian,Spanish,English": 0.4266666666666667, + "Chinese,Indonesian,Spanish,Filipino": 0.3333333333333333, + "Chinese,Indonesian,Vietnamese,Malay": 0.36666666666666664, + "Chinese,Indonesian,Vietnamese,English": 0.38666666666666666, + "Chinese,Indonesian,Vietnamese,Filipino": 0.31333333333333335, + "Chinese,Indonesian,Malay,English": 0.5133333333333333, + "Chinese,Indonesian,Malay,Filipino": 0.35333333333333333, + "Chinese,Indonesian,English,Filipino": 0.38666666666666666, + "Chinese,Spanish,Vietnamese,Malay": 0.3466666666666667, + "Chinese,Spanish,Vietnamese,English": 0.4066666666666667, + "Chinese,Spanish,Vietnamese,Filipino": 0.38, + "Chinese,Spanish,Malay,English": 0.44, + "Chinese,Spanish,Malay,Filipino": 0.3333333333333333, + "Chinese,Spanish,English,Filipino": 0.4, + "Chinese,Vietnamese,Malay,English": 0.4, + "Chinese,Vietnamese,Malay,Filipino": 0.32666666666666666, + "Chinese,Vietnamese,English,Filipino": 0.38, + "Chinese,Malay,English,Filipino": 0.38666666666666666, + "Indonesian,Spanish,Vietnamese,Malay": 0.35333333333333333, + "Indonesian,Spanish,Vietnamese,English": 0.36666666666666664, + "Indonesian,Spanish,Vietnamese,Filipino": 0.32, + "Indonesian,Spanish,Malay,English": 0.49333333333333335, + "Indonesian,Spanish,Malay,Filipino": 0.3466666666666667, + "Indonesian,Spanish,English,Filipino": 0.36666666666666664, + "Indonesian,Vietnamese,Malay,English": 0.41333333333333333, + "Indonesian,Vietnamese,Malay,Filipino": 0.32, + "Indonesian,Vietnamese,English,Filipino": 0.3466666666666667, + "Indonesian,Malay,English,Filipino": 0.41333333333333333, + "Spanish,Vietnamese,Malay,English": 0.38, + "Spanish,Vietnamese,Malay,Filipino": 0.32, + "Spanish,Vietnamese,English,Filipino": 0.36, + "Spanish,Malay,English,Filipino": 0.37333333333333335, + "Vietnamese,Malay,English,Filipino": 0.3466666666666667 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.29333333333333333, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.31333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.26666666666666666, + "Chinese,Indonesian,Spanish,Malay,English": 0.38666666666666666, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.2866666666666667, + "Chinese,Indonesian,Spanish,English,Filipino": 0.31333333333333335, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.35333333333333333, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.2733333333333333, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.29333333333333333, + "Chinese,Indonesian,Malay,English,Filipino": 0.3466666666666667, + "Chinese,Spanish,Vietnamese,Malay,English": 0.3333333333333333, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.28, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.32666666666666666, + "Chinese,Spanish,Malay,English,Filipino": 0.32, + "Chinese,Vietnamese,Malay,English,Filipino": 0.30666666666666664, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.3333333333333333, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.2733333333333333, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.28, + "Indonesian,Spanish,Malay,English,Filipino": 0.32666666666666666, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.3, + "Spanish,Vietnamese,Malay,English,Filipino": 0.29333333333333333 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.2866666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.23333333333333334, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.24666666666666667, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.28, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.26666666666666666, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.26666666666666666, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.25333333333333335 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.22666666666666666 + } + }, + "AC3_2": 0.34531176864312174, + "AC3_3": 0.31555299534717135, + "AC3_4": 0.29056271663846484, + "AC3_5": 0.2685415629175266, + "AC3_6": 0.24890948740922889, + "AC3_7": 0.23178644758862924 + }, + "prompt_3": { + "overall_acc": 0.2580952380952381, + "language_acc": { + "Chinese": 0.3, + "Indonesian": 0.25333333333333335, + "Spanish": 0.3, + "Vietnamese": 0.23333333333333334, + "Malay": 0.21333333333333335, + "English": 0.26666666666666666, + "Filipino": 0.24 + }, + "consistency_score_2": 0.6063492063492065, + "consistency_score_3": 0.4588571428571428, + "consistency_score_4": 0.379047619047619, + "consistency_score_5": 0.326984126984127, + "consistency_score_6": 0.28952380952380957, + "consistency_score_7": 0.26, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.5933333333333334, + "Chinese,Spanish": 0.6066666666666667, + "Chinese,Vietnamese": 0.6066666666666667, + "Chinese,Malay": 0.5866666666666667, + "Chinese,English": 0.66, + "Chinese,Filipino": 0.5733333333333334, + "Indonesian,Spanish": 0.62, + "Indonesian,Vietnamese": 0.5333333333333333, + "Indonesian,Malay": 0.7, + "Indonesian,English": 0.7133333333333334, + "Indonesian,Filipino": 0.5733333333333334, + "Spanish,Vietnamese": 0.58, + "Spanish,Malay": 0.6133333333333333, + "Spanish,English": 0.7, + "Spanish,Filipino": 0.56, + "Vietnamese,Malay": 0.54, + "Vietnamese,English": 0.56, + "Vietnamese,Filipino": 0.5466666666666666, + "Malay,English": 0.6866666666666666, + "Malay,Filipino": 0.5533333333333333, + "English,Filipino": 0.6266666666666667 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.46, + "Chinese,Indonesian,Vietnamese": 0.43333333333333335, + "Chinese,Indonesian,Malay": 0.4866666666666667, + "Chinese,Indonesian,English": 0.52, + "Chinese,Indonesian,Filipino": 0.44, + "Chinese,Spanish,Vietnamese": 0.44, + "Chinese,Spanish,Malay": 0.44, + "Chinese,Spanish,English": 0.52, + "Chinese,Spanish,Filipino": 0.41333333333333333, + "Chinese,Vietnamese,Malay": 0.41333333333333333, + "Chinese,Vietnamese,English": 0.4533333333333333, + "Chinese,Vietnamese,Filipino": 0.42, + "Chinese,Malay,English": 0.5066666666666667, + "Chinese,Malay,Filipino": 0.41333333333333333, + "Chinese,English,Filipino": 0.47333333333333333, + "Indonesian,Spanish,Vietnamese": 0.42, + "Indonesian,Spanish,Malay": 0.5066666666666667, + "Indonesian,Spanish,English": 0.5533333333333333, + "Indonesian,Spanish,Filipino": 0.42, + "Indonesian,Vietnamese,Malay": 0.44, + "Indonesian,Vietnamese,English": 0.46, + "Indonesian,Vietnamese,Filipino": 0.38666666666666666, + "Indonesian,Malay,English": 0.5933333333333334, + "Indonesian,Malay,Filipino": 0.47333333333333333, + "Indonesian,English,Filipino": 0.5066666666666667, + "Spanish,Vietnamese,Malay": 0.4266666666666667, + "Spanish,Vietnamese,English": 0.46, + "Spanish,Vietnamese,Filipino": 0.4, + "Spanish,Malay,English": 0.54, + "Spanish,Malay,Filipino": 0.42, + "Spanish,English,Filipino": 0.4866666666666667, + "Vietnamese,Malay,English": 0.44666666666666666, + "Vietnamese,Malay,Filipino": 0.38666666666666666, + "Vietnamese,English,Filipino": 0.43333333333333335, + "Malay,English,Filipino": 0.4666666666666667 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.36, + "Chinese,Indonesian,Spanish,Malay": 0.4, + "Chinese,Indonesian,Spanish,English": 0.43333333333333335, + "Chinese,Indonesian,Spanish,Filipino": 0.3466666666666667, + "Chinese,Indonesian,Vietnamese,Malay": 0.38, + "Chinese,Indonesian,Vietnamese,English": 0.3933333333333333, + "Chinese,Indonesian,Vietnamese,Filipino": 0.34, + "Chinese,Indonesian,Malay,English": 0.4533333333333333, + "Chinese,Indonesian,Malay,Filipino": 0.38666666666666666, + "Chinese,Indonesian,English,Filipino": 0.4066666666666667, + "Chinese,Spanish,Vietnamese,Malay": 0.35333333333333333, + "Chinese,Spanish,Vietnamese,English": 0.3933333333333333, + "Chinese,Spanish,Vietnamese,Filipino": 0.32666666666666666, + "Chinese,Spanish,Malay,English": 0.41333333333333333, + "Chinese,Spanish,Malay,Filipino": 0.3333333333333333, + "Chinese,Spanish,English,Filipino": 0.38666666666666666, + "Chinese,Vietnamese,Malay,English": 0.38666666666666666, + "Chinese,Vietnamese,Malay,Filipino": 0.32666666666666666, + "Chinese,Vietnamese,English,Filipino": 0.35333333333333333, + "Chinese,Malay,English,Filipino": 0.38, + "Indonesian,Spanish,Vietnamese,Malay": 0.36666666666666664, + "Indonesian,Spanish,Vietnamese,English": 0.3933333333333333, + "Indonesian,Spanish,Vietnamese,Filipino": 0.32, + "Indonesian,Spanish,Malay,English": 0.4866666666666667, + "Indonesian,Spanish,Malay,Filipino": 0.36666666666666664, + "Indonesian,Spanish,English,Filipino": 0.4, + "Indonesian,Vietnamese,Malay,English": 0.41333333333333333, + "Indonesian,Vietnamese,Malay,Filipino": 0.3466666666666667, + "Indonesian,Vietnamese,English,Filipino": 0.36666666666666664, + "Indonesian,Malay,English,Filipino": 0.43333333333333335, + "Spanish,Vietnamese,Malay,English": 0.38666666666666666, + "Spanish,Vietnamese,Malay,Filipino": 0.3333333333333333, + "Spanish,Vietnamese,English,Filipino": 0.36, + "Spanish,Malay,English,Filipino": 0.3933333333333333, + "Vietnamese,Malay,English,Filipino": 0.3466666666666667 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.32666666666666666, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.3466666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.2866666666666667, + "Chinese,Indonesian,Spanish,Malay,English": 0.38666666666666666, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.31333333333333335, + "Chinese,Indonesian,Spanish,English,Filipino": 0.3333333333333333, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.36, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.31333333333333335, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.32, + "Chinese,Indonesian,Malay,English,Filipino": 0.36, + "Chinese,Spanish,Vietnamese,Malay,English": 0.34, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.28, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.30666666666666664, + "Chinese,Spanish,Malay,English,Filipino": 0.32666666666666666, + "Chinese,Vietnamese,Malay,English,Filipino": 0.30666666666666664, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.36, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.29333333333333333, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.31333333333333335, + "Indonesian,Spanish,Malay,English,Filipino": 0.36, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.32666666666666666, + "Spanish,Vietnamese,Malay,English,Filipino": 0.30666666666666664 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.32, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.26666666666666666, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.28, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.30666666666666664, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.29333333333333333, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.2733333333333333, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.2866666666666667 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.26 + } + }, + "AC3_2": 0.3620726439257278, + "AC3_3": 0.3303673902681759, + "AC3_4": 0.307090896100994, + "AC3_5": 0.2884840967907314, + "AC3_6": 0.27290766040565123, + "AC3_7": 0.2590441175970595 + }, + "prompt_4": { + "overall_acc": 0.24380952380952378, + "language_acc": { + "Chinese": 0.2733333333333333, + "Indonesian": 0.23333333333333334, + "Spanish": 0.2733333333333333, + "Vietnamese": 0.2, + "Malay": 0.24666666666666667, + "English": 0.24666666666666667, + "Filipino": 0.23333333333333334 + }, + "consistency_score_2": 0.6422222222222222, + "consistency_score_3": 0.4885714285714285, + "consistency_score_4": 0.40304761904761904, + "consistency_score_5": 0.34888888888888886, + "consistency_score_6": 0.31238095238095237, + "consistency_score_7": 0.2866666666666667, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.5866666666666667, + "Chinese,Spanish": 0.7133333333333334, + "Chinese,Vietnamese": 0.6466666666666666, + "Chinese,Malay": 0.6066666666666667, + "Chinese,English": 0.6533333333333333, + "Chinese,Filipino": 0.6866666666666666, + "Indonesian,Spanish": 0.54, + "Indonesian,Vietnamese": 0.5933333333333334, + "Indonesian,Malay": 0.7533333333333333, + "Indonesian,English": 0.68, + "Indonesian,Filipino": 0.5866666666666667, + "Spanish,Vietnamese": 0.6533333333333333, + "Spanish,Malay": 0.64, + "Spanish,English": 0.6733333333333333, + "Spanish,Filipino": 0.6333333333333333, + "Vietnamese,Malay": 0.6133333333333333, + "Vietnamese,English": 0.6333333333333333, + "Vietnamese,Filipino": 0.64, + "Malay,English": 0.7066666666666667, + "Malay,Filipino": 0.6333333333333333, + "English,Filipino": 0.6133333333333333 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.4533333333333333, + "Chinese,Indonesian,Vietnamese": 0.44666666666666666, + "Chinese,Indonesian,Malay": 0.49333333333333335, + "Chinese,Indonesian,English": 0.4866666666666667, + "Chinese,Indonesian,Filipino": 0.46, + "Chinese,Spanish,Vietnamese": 0.5266666666666666, + "Chinese,Spanish,Malay": 0.5, + "Chinese,Spanish,English": 0.5333333333333333, + "Chinese,Spanish,Filipino": 0.5466666666666666, + "Chinese,Vietnamese,Malay": 0.46, + "Chinese,Vietnamese,English": 0.47333333333333333, + "Chinese,Vietnamese,Filipino": 0.5066666666666667, + "Chinese,Malay,English": 0.5066666666666667, + "Chinese,Malay,Filipino": 0.4866666666666667, + "Chinese,English,Filipino": 0.5, + "Indonesian,Spanish,Vietnamese": 0.43333333333333335, + "Indonesian,Spanish,Malay": 0.4866666666666667, + "Indonesian,Spanish,English": 0.4666666666666667, + "Indonesian,Spanish,Filipino": 0.4266666666666667, + "Indonesian,Vietnamese,Malay": 0.5066666666666667, + "Indonesian,Vietnamese,English": 0.4866666666666667, + "Indonesian,Vietnamese,Filipino": 0.4533333333333333, + "Indonesian,Malay,English": 0.5866666666666667, + "Indonesian,Malay,Filipino": 0.52, + "Indonesian,English,Filipino": 0.47333333333333333, + "Spanish,Vietnamese,Malay": 0.48, + "Spanish,Vietnamese,English": 0.49333333333333335, + "Spanish,Vietnamese,Filipino": 0.4866666666666667, + "Spanish,Malay,English": 0.52, + "Spanish,Malay,Filipino": 0.4866666666666667, + "Spanish,English,Filipino": 0.4866666666666667, + "Vietnamese,Malay,English": 0.5, + "Vietnamese,Malay,Filipino": 0.4666666666666667, + "Vietnamese,English,Filipino": 0.46, + "Malay,English,Filipino": 0.5 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.38, + "Chinese,Indonesian,Spanish,Malay": 0.4066666666666667, + "Chinese,Indonesian,Spanish,English": 0.4, + "Chinese,Indonesian,Spanish,Filipino": 0.3933333333333333, + "Chinese,Indonesian,Vietnamese,Malay": 0.38666666666666666, + "Chinese,Indonesian,Vietnamese,English": 0.38, + "Chinese,Indonesian,Vietnamese,Filipino": 0.37333333333333335, + "Chinese,Indonesian,Malay,English": 0.44, + "Chinese,Indonesian,Malay,Filipino": 0.42, + "Chinese,Indonesian,English,Filipino": 0.4, + "Chinese,Spanish,Vietnamese,Malay": 0.42, + "Chinese,Spanish,Vietnamese,English": 0.4266666666666667, + "Chinese,Spanish,Vietnamese,Filipino": 0.43333333333333335, + "Chinese,Spanish,Malay,English": 0.42, + "Chinese,Spanish,Malay,Filipino": 0.43333333333333335, + "Chinese,Spanish,English,Filipino": 0.44666666666666666, + "Chinese,Vietnamese,Malay,English": 0.4066666666666667, + "Chinese,Vietnamese,Malay,Filipino": 0.37333333333333335, + "Chinese,Vietnamese,English,Filipino": 0.38666666666666666, + "Chinese,Malay,English,Filipino": 0.41333333333333333, + "Indonesian,Spanish,Vietnamese,Malay": 0.3933333333333333, + "Indonesian,Spanish,Vietnamese,English": 0.38, + "Indonesian,Spanish,Vietnamese,Filipino": 0.36, + "Indonesian,Spanish,Malay,English": 0.42, + "Indonesian,Spanish,Malay,Filipino": 0.4066666666666667, + "Indonesian,Spanish,English,Filipino": 0.37333333333333335, + "Indonesian,Vietnamese,Malay,English": 0.43333333333333335, + "Indonesian,Vietnamese,Malay,Filipino": 0.4066666666666667, + "Indonesian,Vietnamese,English,Filipino": 0.38666666666666666, + "Indonesian,Malay,English,Filipino": 0.44666666666666666, + "Spanish,Vietnamese,Malay,English": 0.4066666666666667, + "Spanish,Vietnamese,Malay,Filipino": 0.38666666666666666, + "Spanish,Vietnamese,English,Filipino": 0.38, + "Spanish,Malay,English,Filipino": 0.3933333333333333, + "Vietnamese,Malay,English,Filipino": 0.3933333333333333 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.3466666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.34, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.3333333333333333, + "Chinese,Indonesian,Spanish,Malay,English": 0.36, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.37333333333333335, + "Chinese,Indonesian,Spanish,English,Filipino": 0.35333333333333333, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.35333333333333333, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.3333333333333333, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.32666666666666666, + "Chinese,Indonesian,Malay,English,Filipino": 0.38, + "Chinese,Spanish,Vietnamese,Malay,English": 0.36666666666666664, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.35333333333333333, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.36, + "Chinese,Spanish,Malay,English,Filipino": 0.36666666666666664, + "Chinese,Vietnamese,Malay,English,Filipino": 0.3333333333333333, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.3466666666666667, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.34, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.32, + "Indonesian,Spanish,Malay,English,Filipino": 0.35333333333333333, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.36, + "Spanish,Vietnamese,Malay,English,Filipino": 0.32666666666666666 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.31333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.31333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.30666666666666664, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.3333333333333333, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.30666666666666664, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.31333333333333335, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.3 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.2866666666666667 + } + }, + "AC3_2": 0.3534408216488712, + "AC3_3": 0.3252907300317502, + "AC3_4": 0.3038285938169596, + "AC3_5": 0.28703445808404787, + "AC3_6": 0.27386823217515655, + "AC3_7": 0.2635068820568494 + }, + "prompt_5": { + "overall_acc": 0.26095238095238094, + "language_acc": { + "Chinese": 0.26, + "Indonesian": 0.23333333333333334, + "Spanish": 0.31333333333333335, + "Vietnamese": 0.2733333333333333, + "Malay": 0.24, + "English": 0.2733333333333333, + "Filipino": 0.23333333333333334 + }, + "consistency_score_2": 0.74, + "consistency_score_3": 0.6253333333333333, + "consistency_score_4": 0.5506666666666666, + "consistency_score_5": 0.49428571428571433, + "consistency_score_6": 0.44952380952380955, + "consistency_score_7": 0.41333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Chinese,Indonesian": 0.7133333333333334, + "Chinese,Spanish": 0.7066666666666667, + "Chinese,Vietnamese": 0.6466666666666666, + "Chinese,Malay": 0.7266666666666667, + "Chinese,English": 0.7733333333333333, + "Chinese,Filipino": 0.6933333333333334, + "Indonesian,Spanish": 0.74, + "Indonesian,Vietnamese": 0.68, + "Indonesian,Malay": 0.8333333333333334, + "Indonesian,English": 0.8266666666666667, + "Indonesian,Filipino": 0.7533333333333333, + "Spanish,Vietnamese": 0.68, + "Spanish,Malay": 0.7866666666666666, + "Spanish,English": 0.8066666666666666, + "Spanish,Filipino": 0.7466666666666667, + "Vietnamese,Malay": 0.6933333333333334, + "Vietnamese,English": 0.72, + "Vietnamese,Filipino": 0.68, + "Malay,English": 0.8266666666666667, + "Malay,Filipino": 0.7466666666666667, + "English,Filipino": 0.76 + }, + "3_combine": { + "Chinese,Indonesian,Spanish": 0.5933333333333334, + "Chinese,Indonesian,Vietnamese": 0.5333333333333333, + "Chinese,Indonesian,Malay": 0.64, + "Chinese,Indonesian,English": 0.6733333333333333, + "Chinese,Indonesian,Filipino": 0.5933333333333334, + "Chinese,Spanish,Vietnamese": 0.5333333333333333, + "Chinese,Spanish,Malay": 0.6266666666666667, + "Chinese,Spanish,English": 0.66, + "Chinese,Spanish,Filipino": 0.5866666666666667, + "Chinese,Vietnamese,Malay": 0.5533333333333333, + "Chinese,Vietnamese,English": 0.5866666666666667, + "Chinese,Vietnamese,Filipino": 0.5333333333333333, + "Chinese,Malay,English": 0.6866666666666666, + "Chinese,Malay,Filipino": 0.5933333333333334, + "Chinese,English,Filipino": 0.6333333333333333, + "Indonesian,Spanish,Vietnamese": 0.5666666666666667, + "Indonesian,Spanish,Malay": 0.6866666666666666, + "Indonesian,Spanish,English": 0.7066666666666667, + "Indonesian,Spanish,Filipino": 0.64, + "Indonesian,Vietnamese,Malay": 0.6133333333333333, + "Indonesian,Vietnamese,English": 0.6333333333333333, + "Indonesian,Vietnamese,Filipino": 0.58, + "Indonesian,Malay,English": 0.76, + "Indonesian,Malay,Filipino": 0.6733333333333333, + "Indonesian,English,Filipino": 0.6866666666666666, + "Spanish,Vietnamese,Malay": 0.5866666666666667, + "Spanish,Vietnamese,English": 0.6133333333333333, + "Spanish,Vietnamese,Filipino": 0.5733333333333334, + "Spanish,Malay,English": 0.72, + "Spanish,Malay,Filipino": 0.6533333333333333, + "Spanish,English,Filipino": 0.6733333333333333, + "Vietnamese,Malay,English": 0.6333333333333333, + "Vietnamese,Malay,Filipino": 0.58, + "Vietnamese,English,Filipino": 0.6, + "Malay,English,Filipino": 0.68 + }, + "4_combine": { + "Chinese,Indonesian,Spanish,Vietnamese": 0.46, + "Chinese,Indonesian,Spanish,Malay": 0.56, + "Chinese,Indonesian,Spanish,English": 0.58, + "Chinese,Indonesian,Spanish,Filipino": 0.54, + "Chinese,Indonesian,Vietnamese,Malay": 0.5, + "Chinese,Indonesian,Vietnamese,English": 0.5133333333333333, + "Chinese,Indonesian,Vietnamese,Filipino": 0.4866666666666667, + "Chinese,Indonesian,Malay,English": 0.6266666666666667, + "Chinese,Indonesian,Malay,Filipino": 0.56, + "Chinese,Indonesian,English,Filipino": 0.5733333333333334, + "Chinese,Spanish,Vietnamese,Malay": 0.48, + "Chinese,Spanish,Vietnamese,English": 0.5066666666666667, + "Chinese,Spanish,Vietnamese,Filipino": 0.4666666666666667, + "Chinese,Spanish,Malay,English": 0.6, + "Chinese,Spanish,Malay,Filipino": 0.54, + "Chinese,Spanish,English,Filipino": 0.5733333333333334, + "Chinese,Vietnamese,Malay,English": 0.5333333333333333, + "Chinese,Vietnamese,Malay,Filipino": 0.48, + "Chinese,Vietnamese,English,Filipino": 0.5, + "Chinese,Malay,English,Filipino": 0.58, + "Indonesian,Spanish,Vietnamese,Malay": 0.5266666666666666, + "Indonesian,Spanish,Vietnamese,English": 0.5466666666666666, + "Indonesian,Spanish,Vietnamese,Filipino": 0.52, + "Indonesian,Spanish,Malay,English": 0.6733333333333333, + "Indonesian,Spanish,Malay,Filipino": 0.6, + "Indonesian,Spanish,English,Filipino": 0.62, + "Indonesian,Vietnamese,Malay,English": 0.5866666666666667, + "Indonesian,Vietnamese,Malay,Filipino": 0.54, + "Indonesian,Vietnamese,English,Filipino": 0.56, + "Indonesian,Malay,English,Filipino": 0.6466666666666666, + "Spanish,Vietnamese,Malay,English": 0.56, + "Spanish,Vietnamese,Malay,Filipino": 0.5266666666666666, + "Spanish,Vietnamese,English,Filipino": 0.54, + "Spanish,Malay,English,Filipino": 0.62, + "Vietnamese,Malay,English,Filipino": 0.5466666666666666 + }, + "5_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay": 0.43333333333333335, + "Chinese,Indonesian,Spanish,Vietnamese,English": 0.44666666666666666, + "Chinese,Indonesian,Spanish,Vietnamese,Filipino": 0.44, + "Chinese,Indonesian,Spanish,Malay,English": 0.5533333333333333, + "Chinese,Indonesian,Spanish,Malay,Filipino": 0.5133333333333333, + "Chinese,Indonesian,Spanish,English,Filipino": 0.5266666666666666, + "Chinese,Indonesian,Vietnamese,Malay,English": 0.4866666666666667, + "Chinese,Indonesian,Vietnamese,Malay,Filipino": 0.46, + "Chinese,Indonesian,Vietnamese,English,Filipino": 0.4666666666666667, + "Chinese,Indonesian,Malay,English,Filipino": 0.5466666666666666, + "Chinese,Spanish,Vietnamese,Malay,English": 0.4666666666666667, + "Chinese,Spanish,Vietnamese,Malay,Filipino": 0.44, + "Chinese,Spanish,Vietnamese,English,Filipino": 0.4533333333333333, + "Chinese,Spanish,Malay,English,Filipino": 0.5333333333333333, + "Chinese,Vietnamese,Malay,English,Filipino": 0.4666666666666667, + "Indonesian,Spanish,Vietnamese,Malay,English": 0.52, + "Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.49333333333333335, + "Indonesian,Spanish,Vietnamese,English,Filipino": 0.5066666666666667, + "Indonesian,Spanish,Malay,English,Filipino": 0.5933333333333334, + "Indonesian,Vietnamese,Malay,English,Filipino": 0.5266666666666666, + "Spanish,Vietnamese,Malay,English,Filipino": 0.5066666666666667 + }, + "6_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English": 0.4266666666666667, + "Chinese,Indonesian,Spanish,Vietnamese,Malay,Filipino": 0.42, + "Chinese,Indonesian,Spanish,Vietnamese,English,Filipino": 0.4266666666666667, + "Chinese,Indonesian,Spanish,Malay,English,Filipino": 0.5066666666666667, + "Chinese,Indonesian,Vietnamese,Malay,English,Filipino": 0.44666666666666666, + "Chinese,Spanish,Vietnamese,Malay,English,Filipino": 0.43333333333333335, + "Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.4866666666666667 + }, + "7_combine": { + "Chinese,Indonesian,Spanish,Vietnamese,Malay,English,Filipino": 0.41333333333333333 + } + }, + "AC3_2": 0.38584205514699005, + "AC3_3": 0.36823841245218314, + "AC3_4": 0.3541015410656567, + "AC3_5": 0.3415744910374604, + "AC3_6": 0.3302132005152483, + "AC3_7": 0.31992467038569855 + } + }, + "cross_logiqa": { + "prompt_1": { + "overall_acc": 0.28814935064935066, + "language_acc": { + "Vietnamese": 0.2840909090909091, + "Indonesian": 0.3409090909090909, + "Spanish": 0.2840909090909091, + "Malay": 0.2784090909090909, + "Filipino": 0.3125, + "English": 0.2727272727272727, + "Chinese": 0.24431818181818182 + }, + "consistency_score_2": 0.5457251082251081, + "consistency_score_3": 0.35275974025974033, + "consistency_score_4": 0.24074675324675318, + "consistency_score_5": 0.16585497835497837, + "consistency_score_6": 0.1112012987012987, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Spanish": 0.4772727272727273, + "Vietnamese,Malay": 0.5568181818181818, + "Vietnamese,Filipino": 0.5568181818181818, + "Vietnamese,English": 0.625, + "Vietnamese,Chinese": 0.6136363636363636, + "Indonesian,Spanish": 0.23863636363636365, + "Indonesian,Malay": 0.7215909090909091, + "Indonesian,Filipino": 0.5852272727272727, + "Indonesian,English": 0.6079545454545454, + "Indonesian,Chinese": 0.4715909090909091, + "Spanish,Malay": 0.3125, + "Spanish,Filipino": 0.39204545454545453, + "Spanish,English": 0.4318181818181818, + "Spanish,Chinese": 0.6477272727272727, + "Malay,Filipino": 0.6306818181818182, + "Malay,English": 0.6988636363636364, + "Malay,Chinese": 0.5454545454545454, + "Filipino,English": 0.6420454545454546, + "Filipino,Chinese": 0.5340909090909091, + "English,Chinese": 0.6704545454545454 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Malay": 0.42045454545454547, + "Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,English": 0.4147727272727273, + "Vietnamese,Indonesian,Chinese": 0.3465909090909091, + "Vietnamese,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Spanish,Filipino": 0.26704545454545453, + "Vietnamese,Spanish,English": 0.29545454545454547, + "Vietnamese,Spanish,Chinese": 0.39204545454545453, + "Vietnamese,Malay,Filipino": 0.42613636363636365, + "Vietnamese,Malay,English": 0.4772727272727273, + "Vietnamese,Malay,Chinese": 0.3977272727272727, + "Vietnamese,Filipino,English": 0.4602272727272727, + "Vietnamese,Filipino,Chinese": 0.3977272727272727, + "Vietnamese,English,Chinese": 0.4772727272727273, + "Indonesian,Spanish,Malay": 0.18181818181818182, + "Indonesian,Spanish,Filipino": 0.1534090909090909, + "Indonesian,Spanish,English": 0.1590909090909091, + "Indonesian,Spanish,Chinese": 0.19886363636363635, + "Indonesian,Malay,Filipino": 0.5, + "Indonesian,Malay,English": 0.5454545454545454, + "Indonesian,Malay,Chinese": 0.4034090909090909, + "Indonesian,Filipino,English": 0.44886363636363635, + "Indonesian,Filipino,Chinese": 0.32386363636363635, + "Indonesian,English,Chinese": 0.39204545454545453, + "Spanish,Malay,Filipino": 0.21022727272727273, + "Spanish,Malay,English": 0.23863636363636365, + "Spanish,Malay,Chinese": 0.2727272727272727, + "Spanish,Filipino,English": 0.2727272727272727, + "Spanish,Filipino,Chinese": 0.3125, + "Spanish,English,Chinese": 0.38636363636363635, + "Malay,Filipino,English": 0.5170454545454546, + "Malay,Filipino,Chinese": 0.38636363636363635, + "Malay,English,Chinese": 0.4715909090909091, + "Filipino,English,Chinese": 0.44886363636363635 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,Spanish,English": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino": 0.3409090909090909, + "Vietnamese,Indonesian,Malay,English": 0.375, + "Vietnamese,Indonesian,Malay,Chinese": 0.29545454545454547, + "Vietnamese,Indonesian,Filipino,English": 0.3352272727272727, + "Vietnamese,Indonesian,Filipino,Chinese": 0.26136363636363635, + "Vietnamese,Indonesian,English,Chinese": 0.3068181818181818, + "Vietnamese,Spanish,Malay,Filipino": 0.1534090909090909, + "Vietnamese,Spanish,Malay,English": 0.1875, + "Vietnamese,Spanish,Malay,Chinese": 0.20454545454545456, + "Vietnamese,Spanish,Filipino,English": 0.20454545454545456, + "Vietnamese,Spanish,Filipino,Chinese": 0.23295454545454544, + "Vietnamese,Spanish,English,Chinese": 0.2727272727272727, + "Vietnamese,Malay,Filipino,English": 0.3806818181818182, + "Vietnamese,Malay,Filipino,Chinese": 0.3068181818181818, + "Vietnamese,Malay,English,Chinese": 0.36363636363636365, + "Vietnamese,Filipino,English,Chinese": 0.3522727272727273, + "Indonesian,Spanish,Malay,Filipino": 0.13068181818181818, + "Indonesian,Spanish,Malay,English": 0.14204545454545456, + "Indonesian,Spanish,Malay,Chinese": 0.1534090909090909, + "Indonesian,Spanish,Filipino,English": 0.11931818181818182, + "Indonesian,Spanish,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Spanish,English,Chinese": 0.14204545454545456, + "Indonesian,Malay,Filipino,English": 0.4147727272727273, + "Indonesian,Malay,Filipino,Chinese": 0.29545454545454547, + "Indonesian,Malay,English,Chinese": 0.36363636363636365, + "Indonesian,Filipino,English,Chinese": 0.2897727272727273, + "Spanish,Malay,Filipino,English": 0.17613636363636365, + "Spanish,Malay,Filipino,Chinese": 0.19318181818181818, + "Spanish,Malay,English,Chinese": 0.2159090909090909, + "Spanish,Filipino,English,Chinese": 0.2556818181818182, + "Malay,Filipino,English,Chinese": 0.3465909090909091 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.08522727272727272, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.10227272727272728, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.09090909090909091, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.24431818181818182, + "Vietnamese,Spanish,Malay,Filipino,English": 0.13636363636363635, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Malay,English,Chinese": 0.17613636363636365, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.19318181818181818, + "Vietnamese,Malay,Filipino,English,Chinese": 0.2840909090909091, + "Indonesian,Spanish,Malay,Filipino,English": 0.10227272727272728, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.11363636363636363, + "Indonesian,Spanish,Malay,English,Chinese": 0.125, + "Indonesian,Spanish,Filipino,English,Chinese": 0.10795454545454546, + "Indonesian,Malay,Filipino,English,Chinese": 0.26704545454545453, + "Spanish,Malay,Filipino,English,Chinese": 0.16477272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.07386363636363637, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.09659090909090909, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.2215909090909091, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.13068181818181818, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.09090909090909091 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.06818181818181818 + } + }, + "AC3_2": 0.3771558988903581, + "AC3_3": 0.3171978413694742, + "AC3_4": 0.26232381027144946, + "AC3_5": 0.21053105114320123, + "AC3_6": 0.1604734188173816, + "AC3_7": 0.1102712776662995 + }, + "prompt_2": { + "overall_acc": 0.2857142857142857, + "language_acc": { + "Vietnamese": 0.3068181818181818, + "Indonesian": 0.2727272727272727, + "Spanish": 0.30113636363636365, + "Malay": 0.2840909090909091, + "Filipino": 0.3181818181818182, + "English": 0.26704545454545453, + "Chinese": 0.25 + }, + "consistency_score_2": 0.5519480519480519, + "consistency_score_3": 0.35373376623376623, + "consistency_score_4": 0.24853896103896106, + "consistency_score_5": 0.18722943722943722, + "consistency_score_6": 0.14935064935064934, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4659090909090909, + "Vietnamese,Spanish": 0.5397727272727273, + "Vietnamese,Malay": 0.5284090909090909, + "Vietnamese,Filipino": 0.6534090909090909, + "Vietnamese,English": 0.5511363636363636, + "Vietnamese,Chinese": 0.6136363636363636, + "Indonesian,Spanish": 0.3181818181818182, + "Indonesian,Malay": 0.7215909090909091, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,English": 0.6818181818181818, + "Indonesian,Chinese": 0.45454545454545453, + "Spanish,Malay": 0.4090909090909091, + "Spanish,Filipino": 0.5738636363636364, + "Spanish,English": 0.3352272727272727, + "Spanish,Chinese": 0.5852272727272727, + "Malay,Filipino": 0.5738636363636364, + "Malay,English": 0.6818181818181818, + "Malay,Chinese": 0.5170454545454546, + "Filipino,English": 0.5965909090909091, + "Filipino,Chinese": 0.6193181818181818, + "English,Chinese": 0.6363636363636364 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Indonesian,Filipino": 0.36363636363636365, + "Vietnamese,Indonesian,English": 0.375, + "Vietnamese,Indonesian,Chinese": 0.30113636363636365, + "Vietnamese,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Spanish,Filipino": 0.42045454545454547, + "Vietnamese,Spanish,English": 0.2556818181818182, + "Vietnamese,Spanish,Chinese": 0.39204545454545453, + "Vietnamese,Malay,Filipino": 0.4090909090909091, + "Vietnamese,Malay,English": 0.3977272727272727, + "Vietnamese,Malay,Chinese": 0.35795454545454547, + "Vietnamese,Filipino,English": 0.4431818181818182, + "Vietnamese,Filipino,Chinese": 0.4772727272727273, + "Vietnamese,English,Chinese": 0.42613636363636365, + "Indonesian,Spanish,Malay": 0.25, + "Indonesian,Spanish,Filipino": 0.23863636363636365, + "Indonesian,Spanish,English": 0.20454545454545456, + "Indonesian,Spanish,Chinese": 0.19886363636363635, + "Indonesian,Malay,Filipino": 0.4318181818181818, + "Indonesian,Malay,English": 0.5568181818181818, + "Indonesian,Malay,Chinese": 0.375, + "Indonesian,Filipino,English": 0.4318181818181818, + "Indonesian,Filipino,Chinese": 0.32954545454545453, + "Indonesian,English,Chinese": 0.4034090909090909, + "Spanish,Malay,Filipino": 0.30113636363636365, + "Spanish,Malay,English": 0.2215909090909091, + "Spanish,Malay,Chinese": 0.26704545454545453, + "Spanish,Filipino,English": 0.2840909090909091, + "Spanish,Filipino,Chinese": 0.4147727272727273, + "Spanish,English,Chinese": 0.2840909090909091, + "Malay,Filipino,English": 0.44886363636363635, + "Malay,Filipino,Chinese": 0.38636363636363635, + "Malay,English,Chinese": 0.4318181818181818, + "Filipino,English,Chinese": 0.4375 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish,English": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,Malay,English": 0.3125, + "Vietnamese,Indonesian,Malay,Chinese": 0.25, + "Vietnamese,Indonesian,Filipino,English": 0.3125, + "Vietnamese,Indonesian,Filipino,Chinese": 0.26704545454545453, + "Vietnamese,Indonesian,English,Chinese": 0.2897727272727273, + "Vietnamese,Spanish,Malay,Filipino": 0.25, + "Vietnamese,Spanish,Malay,English": 0.1875, + "Vietnamese,Spanish,Malay,Chinese": 0.21022727272727273, + "Vietnamese,Spanish,Filipino,English": 0.25, + "Vietnamese,Spanish,Filipino,Chinese": 0.32954545454545453, + "Vietnamese,Spanish,English,Chinese": 0.22727272727272727, + "Vietnamese,Malay,Filipino,English": 0.32954545454545453, + "Vietnamese,Malay,Filipino,Chinese": 0.30113636363636365, + "Vietnamese,Malay,English,Chinese": 0.3125, + "Vietnamese,Filipino,English,Chinese": 0.36363636363636365, + "Indonesian,Spanish,Malay,Filipino": 0.19886363636363635, + "Indonesian,Spanish,Malay,English": 0.16477272727272727, + "Indonesian,Spanish,Malay,Chinese": 0.17045454545454544, + "Indonesian,Spanish,Filipino,English": 0.17613636363636365, + "Indonesian,Spanish,Filipino,Chinese": 0.17045454545454544, + "Indonesian,Spanish,English,Chinese": 0.16477272727272727, + "Indonesian,Malay,Filipino,English": 0.36363636363636365, + "Indonesian,Malay,Filipino,Chinese": 0.2727272727272727, + "Indonesian,Malay,English,Chinese": 0.3409090909090909, + "Indonesian,Filipino,English,Chinese": 0.29545454545454547, + "Spanish,Malay,Filipino,English": 0.20454545454545456, + "Spanish,Malay,Filipino,Chinese": 0.24431818181818182, + "Spanish,Malay,English,Chinese": 0.19318181818181818, + "Spanish,Filipino,English,Chinese": 0.25, + "Malay,Filipino,English,Chinese": 0.3181818181818182 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.2556818181818182, + "Vietnamese,Spanish,Malay,Filipino,English": 0.1875, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Spanish,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.22727272727272727, + "Vietnamese,Malay,Filipino,English,Chinese": 0.26704545454545453, + "Indonesian,Spanish,Malay,Filipino,English": 0.14772727272727273, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.1534090909090909, + "Indonesian,Spanish,Malay,English,Chinese": 0.14204545454545456, + "Indonesian,Spanish,Filipino,English,Chinese": 0.14772727272727273, + "Indonesian,Malay,Filipino,English,Chinese": 0.24431818181818182, + "Spanish,Malay,Filipino,English,Chinese": 0.18181818181818182 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.125, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.21022727272727273, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.17045454545454544, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.13068181818181818 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.125 + } + }, + "AC3_2": 0.37652270205914823, + "AC3_3": 0.31610633585888187, + "AC3_4": 0.2658332247625006, + "AC3_5": 0.22621771816073305, + "AC3_6": 0.19616204686322755, + "AC3_7": 0.17391304343591682 + }, + "prompt_3": { + "overall_acc": 0.2670454545454546, + "language_acc": { + "Vietnamese": 0.2556818181818182, + "Indonesian": 0.23863636363636365, + "Spanish": 0.3125, + "Malay": 0.26704545454545453, + "Filipino": 0.2840909090909091, + "English": 0.26704545454545453, + "Chinese": 0.24431818181818182 + }, + "consistency_score_2": 0.502164502164502, + "consistency_score_3": 0.3016233766233766, + "consistency_score_4": 0.1972402597402598, + "consistency_score_5": 0.13501082251082253, + "consistency_score_6": 0.09496753246753246, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.38636363636363635, + "Vietnamese,Spanish": 0.4375, + "Vietnamese,Malay": 0.4375, + "Vietnamese,Filipino": 0.5056818181818182, + "Vietnamese,English": 0.5284090909090909, + "Vietnamese,Chinese": 0.5340909090909091, + "Indonesian,Spanish": 0.3465909090909091, + "Indonesian,Malay": 0.6306818181818182, + "Indonesian,Filipino": 0.5113636363636364, + "Indonesian,English": 0.5625, + "Indonesian,Chinese": 0.4090909090909091, + "Spanish,Malay": 0.42613636363636365, + "Spanish,Filipino": 0.44886363636363635, + "Spanish,English": 0.3977272727272727, + "Spanish,Chinese": 0.5511363636363636, + "Malay,Filipino": 0.5738636363636364, + "Malay,English": 0.6193181818181818, + "Malay,Chinese": 0.48863636363636365, + "Filipino,English": 0.6022727272727273, + "Filipino,Chinese": 0.5511363636363636, + "English,Chinese": 0.5965909090909091 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,English": 0.29545454545454547, + "Vietnamese,Indonesian,Chinese": 0.22727272727272727, + "Vietnamese,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Spanish,Filipino": 0.2784090909090909, + "Vietnamese,Spanish,English": 0.23863636363636365, + "Vietnamese,Spanish,Chinese": 0.30113636363636365, + "Vietnamese,Malay,Filipino": 0.3181818181818182, + "Vietnamese,Malay,English": 0.3409090909090909, + "Vietnamese,Malay,Chinese": 0.2840909090909091, + "Vietnamese,Filipino,English": 0.3806818181818182, + "Vietnamese,Filipino,Chinese": 0.3465909090909091, + "Vietnamese,English,Chinese": 0.3806818181818182, + "Indonesian,Spanish,Malay": 0.24431818181818182, + "Indonesian,Spanish,Filipino": 0.21022727272727273, + "Indonesian,Spanish,English": 0.2159090909090909, + "Indonesian,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Malay,Filipino": 0.39204545454545453, + "Indonesian,Malay,English": 0.4375, + "Indonesian,Malay,Chinese": 0.3125, + "Indonesian,Filipino,English": 0.375, + "Indonesian,Filipino,Chinese": 0.2727272727272727, + "Indonesian,English,Chinese": 0.3125, + "Spanish,Malay,Filipino": 0.2784090909090909, + "Spanish,Malay,English": 0.26136363636363635, + "Spanish,Malay,Chinese": 0.26704545454545453, + "Spanish,Filipino,English": 0.2784090909090909, + "Spanish,Filipino,Chinese": 0.3125, + "Spanish,English,Chinese": 0.2840909090909091, + "Malay,Filipino,English": 0.4431818181818182, + "Malay,Filipino,Chinese": 0.3522727272727273, + "Malay,English,Chinese": 0.375, + "Filipino,English,Chinese": 0.4034090909090909 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,English": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,English": 0.23295454545454544, + "Vietnamese,Indonesian,Malay,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,English": 0.23863636363636365, + "Vietnamese,Indonesian,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,English,Chinese": 0.19886363636363635, + "Vietnamese,Spanish,Malay,Filipino": 0.17045454545454544, + "Vietnamese,Spanish,Malay,English": 0.1590909090909091, + "Vietnamese,Spanish,Malay,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Filipino,English": 0.1875, + "Vietnamese,Spanish,Filipino,Chinese": 0.19318181818181818, + "Vietnamese,Spanish,English,Chinese": 0.17613636363636365, + "Vietnamese,Malay,Filipino,English": 0.2784090909090909, + "Vietnamese,Malay,Filipino,Chinese": 0.2215909090909091, + "Vietnamese,Malay,English,Chinese": 0.24431818181818182, + "Vietnamese,Filipino,English,Chinese": 0.2784090909090909, + "Indonesian,Spanish,Malay,Filipino": 0.17613636363636365, + "Indonesian,Spanish,Malay,English": 0.18181818181818182, + "Indonesian,Spanish,Malay,Chinese": 0.16477272727272727, + "Indonesian,Spanish,Filipino,English": 0.18181818181818182, + "Indonesian,Spanish,Filipino,Chinese": 0.13636363636363635, + "Indonesian,Spanish,English,Chinese": 0.13636363636363635, + "Indonesian,Malay,Filipino,English": 0.3181818181818182, + "Indonesian,Malay,Filipino,Chinese": 0.22727272727272727, + "Indonesian,Malay,English,Chinese": 0.25, + "Indonesian,Filipino,English,Chinese": 0.25, + "Spanish,Malay,Filipino,English": 0.2159090909090909, + "Spanish,Malay,Filipino,Chinese": 0.19318181818181818, + "Spanish,Malay,English,Chinese": 0.17613636363636365, + "Spanish,Filipino,English,Chinese": 0.19318181818181818, + "Malay,Filipino,English,Chinese": 0.29545454545454547 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.09090909090909091, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.125, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.16477272727272727, + "Vietnamese,Spanish,Malay,Filipino,English": 0.14204545454545456, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Spanish,Malay,English,Chinese": 0.10795454545454546, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.125, + "Vietnamese,Malay,Filipino,English,Chinese": 0.19886363636363635, + "Indonesian,Spanish,Malay,Filipino,English": 0.1590909090909091, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.11931818181818182, + "Indonesian,Spanish,Malay,English,Chinese": 0.11363636363636363, + "Indonesian,Spanish,Filipino,English,Chinese": 0.11931818181818182, + "Indonesian,Malay,Filipino,English,Chinese": 0.21022727272727273, + "Spanish,Malay,Filipino,English,Chinese": 0.14204545454545456 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.13636363636363635, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.09090909090909091, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.10227272727272728 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.06818181818181818 + } + }, + "AC3_2": 0.34867137782056223, + "AC3_3": 0.28328315983911123, + "AC3_4": 0.22689526377821229, + "AC3_5": 0.17934815852629651, + "AC3_6": 0.14010905010397964, + "AC3_7": 0.10862865944371262 + }, + "prompt_4": { + "overall_acc": 0.28733766233766234, + "language_acc": { + "Vietnamese": 0.2727272727272727, + "Indonesian": 0.29545454545454547, + "Spanish": 0.3125, + "Malay": 0.2840909090909091, + "Filipino": 0.2897727272727273, + "English": 0.2897727272727273, + "Chinese": 0.26704545454545453 + }, + "consistency_score_2": 0.5633116883116882, + "consistency_score_3": 0.3743506493506494, + "consistency_score_4": 0.26737012987012987, + "consistency_score_5": 0.19967532467532467, + "consistency_score_6": 0.15503246753246752, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.42613636363636365, + "Vietnamese,Spanish": 0.5227272727272727, + "Vietnamese,Malay": 0.5170454545454546, + "Vietnamese,Filipino": 0.5965909090909091, + "Vietnamese,English": 0.5795454545454546, + "Vietnamese,Chinese": 0.5795454545454546, + "Indonesian,Spanish": 0.26704545454545453, + "Indonesian,Malay": 0.7215909090909091, + "Indonesian,Filipino": 0.5056818181818182, + "Indonesian,English": 0.5965909090909091, + "Indonesian,Chinese": 0.3806818181818182, + "Spanish,Malay": 0.375, + "Spanish,Filipino": 0.6136363636363636, + "Spanish,English": 0.5340909090909091, + "Spanish,Chinese": 0.7784090909090909, + "Malay,Filipino": 0.6022727272727273, + "Malay,English": 0.6761363636363636, + "Malay,Chinese": 0.4943181818181818, + "Filipino,English": 0.7215909090909091, + "Filipino,Chinese": 0.6477272727272727, + "English,Chinese": 0.6931818181818182 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay": 0.375, + "Vietnamese,Indonesian,Filipino": 0.32386363636363635, + "Vietnamese,Indonesian,English": 0.3409090909090909, + "Vietnamese,Indonesian,Chinese": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Spanish,Filipino": 0.4090909090909091, + "Vietnamese,Spanish,English": 0.36363636363636365, + "Vietnamese,Spanish,Chinese": 0.4602272727272727, + "Vietnamese,Malay,Filipino": 0.3977272727272727, + "Vietnamese,Malay,English": 0.4147727272727273, + "Vietnamese,Malay,Chinese": 0.3409090909090909, + "Vietnamese,Filipino,English": 0.4715909090909091, + "Vietnamese,Filipino,Chinese": 0.44886363636363635, + "Vietnamese,English,Chinese": 0.4659090909090909, + "Indonesian,Spanish,Malay": 0.21022727272727273, + "Indonesian,Spanish,Filipino": 0.23295454545454544, + "Indonesian,Spanish,English": 0.2159090909090909, + "Indonesian,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Malay,Filipino": 0.4318181818181818, + "Indonesian,Malay,English": 0.5056818181818182, + "Indonesian,Malay,Chinese": 0.32386363636363635, + "Indonesian,Filipino,English": 0.42613636363636365, + "Indonesian,Filipino,Chinese": 0.29545454545454547, + "Indonesian,English,Chinese": 0.3465909090909091, + "Spanish,Malay,Filipino": 0.32386363636363635, + "Spanish,Malay,English": 0.30113636363636365, + "Spanish,Malay,Chinese": 0.3352272727272727, + "Spanish,Filipino,English": 0.4659090909090909, + "Spanish,Filipino,Chinese": 0.5397727272727273, + "Spanish,English,Chinese": 0.5170454545454546, + "Malay,Filipino,English": 0.5113636363636364, + "Malay,Filipino,Chinese": 0.39204545454545453, + "Malay,English,Chinese": 0.4375, + "Filipino,English,Chinese": 0.5511363636363636 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,English": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,Malay,English": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino,English": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,English,Chinese": 0.25, + "Vietnamese,Spanish,Malay,Filipino": 0.23863636363636365, + "Vietnamese,Spanish,Malay,English": 0.23295454545454544, + "Vietnamese,Spanish,Malay,Chinese": 0.24431818181818182, + "Vietnamese,Spanish,Filipino,English": 0.32386363636363635, + "Vietnamese,Spanish,Filipino,Chinese": 0.375, + "Vietnamese,Spanish,English,Chinese": 0.3522727272727273, + "Vietnamese,Malay,Filipino,English": 0.3465909090909091, + "Vietnamese,Malay,Filipino,Chinese": 0.2840909090909091, + "Vietnamese,Malay,English,Chinese": 0.3181818181818182, + "Vietnamese,Filipino,English,Chinese": 0.38636363636363635, + "Indonesian,Spanish,Malay,Filipino": 0.19886363636363635, + "Indonesian,Spanish,Malay,English": 0.17613636363636365, + "Indonesian,Spanish,Malay,Chinese": 0.1875, + "Indonesian,Spanish,Filipino,English": 0.21022727272727273, + "Indonesian,Spanish,Filipino,Chinese": 0.20454545454545456, + "Indonesian,Spanish,English,Chinese": 0.19886363636363635, + "Indonesian,Malay,Filipino,English": 0.3693181818181818, + "Indonesian,Malay,Filipino,Chinese": 0.2556818181818182, + "Indonesian,Malay,English,Chinese": 0.30113636363636365, + "Indonesian,Filipino,English,Chinese": 0.2840909090909091, + "Spanish,Malay,Filipino,English": 0.2840909090909091, + "Spanish,Malay,Filipino,Chinese": 0.30113636363636365, + "Spanish,Malay,English,Chinese": 0.2897727272727273, + "Spanish,Filipino,English,Chinese": 0.44886363636363635, + "Malay,Filipino,English,Chinese": 0.36363636363636365 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2556818181818182, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.1875, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.2159090909090909, + "Vietnamese,Spanish,Malay,Filipino,English": 0.2159090909090909, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.2215909090909091, + "Vietnamese,Spanish,Malay,English,Chinese": 0.2215909090909091, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.3125, + "Vietnamese,Malay,Filipino,English,Chinese": 0.26704545454545453, + "Indonesian,Spanish,Malay,Filipino,English": 0.17613636363636365, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.17613636363636365, + "Indonesian,Spanish,Malay,English,Chinese": 0.16477272727272727, + "Indonesian,Spanish,Filipino,English,Chinese": 0.19318181818181818, + "Indonesian,Malay,Filipino,English,Chinese": 0.24431818181818182, + "Spanish,Malay,Filipino,English,Chinese": 0.2727272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.125, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.125, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.1875, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.16477272727272727 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.125 + } + }, + "AC3_2": 0.38055789625743375, + "AC3_3": 0.3251229878559919, + "AC3_4": 0.2769945155310597, + "AC3_5": 0.23561688306850312, + "AC3_6": 0.20139997612509086, + "AC3_7": 0.17421259838294686 + }, + "prompt_5": { + "overall_acc": 0.25974025974025977, + "language_acc": { + "Vietnamese": 0.23863636363636365, + "Indonesian": 0.2727272727272727, + "Spanish": 0.3125, + "Malay": 0.23295454545454544, + "Filipino": 0.22727272727272727, + "English": 0.2840909090909091, + "Chinese": 0.25 + }, + "consistency_score_2": 0.48674242424242414, + "consistency_score_3": 0.29724025974025975, + "consistency_score_4": 0.2051948051948052, + "consistency_score_5": 0.15151515151515152, + "consistency_score_6": 0.11444805194805195, + "consistency_score_7": 0.08522727272727272, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4772727272727273, + "Vietnamese,Spanish": 0.42613636363636365, + "Vietnamese,Malay": 0.5, + "Vietnamese,Filipino": 0.48863636363636365, + "Vietnamese,English": 0.5738636363636364, + "Vietnamese,Chinese": 0.5625, + "Indonesian,Spanish": 0.3806818181818182, + "Indonesian,Malay": 0.6420454545454546, + "Indonesian,Filipino": 0.39204545454545453, + "Indonesian,English": 0.4659090909090909, + "Indonesian,Chinese": 0.3977272727272727, + "Spanish,Malay": 0.45454545454545453, + "Spanish,Filipino": 0.3977272727272727, + "Spanish,English": 0.39204545454545453, + "Spanish,Chinese": 0.5113636363636364, + "Malay,Filipino": 0.48295454545454547, + "Malay,English": 0.5738636363636364, + "Malay,Chinese": 0.4943181818181818, + "Filipino,English": 0.4715909090909091, + "Filipino,Chinese": 0.4659090909090909, + "English,Chinese": 0.6704545454545454 + }, + "3_combine": { + "Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Indonesian,Filipino": 0.25, + "Vietnamese,Indonesian,English": 0.32386363636363635, + "Vietnamese,Indonesian,Chinese": 0.2784090909090909, + "Vietnamese,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Spanish,Filipino": 0.2556818181818182, + "Vietnamese,Spanish,English": 0.26704545454545453, + "Vietnamese,Spanish,Chinese": 0.30113636363636365, + "Vietnamese,Malay,Filipino": 0.29545454545454547, + "Vietnamese,Malay,English": 0.3806818181818182, + "Vietnamese,Malay,Chinese": 0.3352272727272727, + "Vietnamese,Filipino,English": 0.35795454545454547, + "Vietnamese,Filipino,Chinese": 0.3465909090909091, + "Vietnamese,English,Chinese": 0.4431818181818182, + "Indonesian,Spanish,Malay": 0.30113636363636365, + "Indonesian,Spanish,Filipino": 0.1590909090909091, + "Indonesian,Spanish,English": 0.2215909090909091, + "Indonesian,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Malay,Filipino": 0.30113636363636365, + "Indonesian,Malay,English": 0.38636363636363635, + "Indonesian,Malay,Chinese": 0.3181818181818182, + "Indonesian,Filipino,English": 0.24431818181818182, + "Indonesian,Filipino,Chinese": 0.21022727272727273, + "Indonesian,English,Chinese": 0.3181818181818182, + "Spanish,Malay,Filipino": 0.23295454545454544, + "Spanish,Malay,English": 0.2897727272727273, + "Spanish,Malay,Chinese": 0.29545454545454547, + "Spanish,Filipino,English": 0.24431818181818182, + "Spanish,Filipino,Chinese": 0.2727272727272727, + "Spanish,English,Chinese": 0.3068181818181818, + "Malay,Filipino,English": 0.32386363636363635, + "Malay,Filipino,Chinese": 0.2897727272727273, + "Malay,English,Chinese": 0.39204545454545453, + "Filipino,English,Chinese": 0.375 + }, + "4_combine": { + "Vietnamese,Indonesian,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,English": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,English": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1875, + "Vietnamese,Indonesian,English,Chinese": 0.24431818181818182, + "Vietnamese,Spanish,Malay,Filipino": 0.17045454545454544, + "Vietnamese,Spanish,Malay,English": 0.21022727272727273, + "Vietnamese,Spanish,Malay,Chinese": 0.21022727272727273, + "Vietnamese,Spanish,Filipino,English": 0.19886363636363635, + "Vietnamese,Spanish,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Spanish,English,Chinese": 0.2215909090909091, + "Vietnamese,Malay,Filipino,English": 0.26136363636363635, + "Vietnamese,Malay,Filipino,Chinese": 0.23863636363636365, + "Vietnamese,Malay,English,Chinese": 0.29545454545454547, + "Vietnamese,Filipino,English,Chinese": 0.30113636363636365, + "Indonesian,Spanish,Malay,Filipino": 0.13068181818181818, + "Indonesian,Spanish,Malay,English": 0.20454545454545456, + "Indonesian,Spanish,Malay,Chinese": 0.19886363636363635, + "Indonesian,Spanish,Filipino,English": 0.13068181818181818, + "Indonesian,Spanish,Filipino,Chinese": 0.125, + "Indonesian,Spanish,English,Chinese": 0.17045454545454544, + "Indonesian,Malay,Filipino,English": 0.2159090909090909, + "Indonesian,Malay,Filipino,Chinese": 0.18181818181818182, + "Indonesian,Malay,English,Chinese": 0.26136363636363635, + "Indonesian,Filipino,English,Chinese": 0.19318181818181818, + "Spanish,Malay,Filipino,English": 0.17613636363636365, + "Spanish,Malay,Filipino,Chinese": 0.17613636363636365, + "Spanish,Malay,English,Chinese": 0.22727272727272727, + "Spanish,Filipino,English,Chinese": 0.20454545454545456, + "Malay,Filipino,English,Chinese": 0.26136363636363635 + }, + "5_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish,Malay,English": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,Malay,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Filipino,English": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino,English,Chinese": 0.17045454545454544, + "Vietnamese,Spanish,Malay,Filipino,English": 0.1534090909090909, + "Vietnamese,Spanish,Malay,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Malay,English,Chinese": 0.18181818181818182, + "Vietnamese,Spanish,Filipino,English,Chinese": 0.17045454545454544, + "Vietnamese,Malay,Filipino,English,Chinese": 0.2215909090909091, + "Indonesian,Spanish,Malay,Filipino,English": 0.11363636363636363, + "Indonesian,Spanish,Malay,Filipino,Chinese": 0.10227272727272728, + "Indonesian,Spanish,Malay,English,Chinese": 0.1534090909090909, + "Indonesian,Spanish,Filipino,English,Chinese": 0.10795454545454546, + "Indonesian,Malay,Filipino,English,Chinese": 0.17045454545454544, + "Spanish,Malay,Filipino,English,Chinese": 0.1534090909090909 + }, + "6_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English": 0.10227272727272728, + "Vietnamese,Indonesian,Spanish,Malay,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Indonesian,Spanish,Malay,English,Chinese": 0.125, + "Vietnamese,Indonesian,Spanish,Filipino,English,Chinese": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,Filipino,English,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Malay,Filipino,English,Chinese": 0.13636363636363635, + "Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.09090909090909091 + }, + "7_combine": { + "Vietnamese,Indonesian,Spanish,Malay,Filipino,English,Chinese": 0.08522727272727272 + } + }, + "AC3_2": 0.33872615238139225, + "AC3_3": 0.27722787263132, + "AC3_4": 0.22926793871583367, + "AC3_5": 0.19138755976207503, + "AC3_6": 0.15888666643639568, + "AC3_7": 0.12834224595210064 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.30097087378640774 + }, + "prompt_2": { + "accuracy": 0.2524271844660194 + }, + "prompt_3": { + "accuracy": 0.27184466019417475 + }, + "prompt_4": { + "accuracy": 0.24271844660194175 + }, + "prompt_5": { + "accuracy": 0.27184466019417475 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.2571428571428571 + }, + "prompt_2": { + "accuracy": 0.3047619047619048 + }, + "prompt_3": { + "accuracy": 0.29523809523809524 + }, + "prompt_4": { + "accuracy": 0.3142857142857143 + }, + "prompt_5": { + "accuracy": 0.2857142857142857 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.29906542056074764 + }, + "prompt_2": { + "accuracy": 0.27102803738317754 + }, + "prompt_3": { + "accuracy": 0.29906542056074764 + }, + "prompt_4": { + "accuracy": 0.24299065420560748 + }, + "prompt_5": { + "accuracy": 0.3177570093457944 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.5, + "demographics": 0.6, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.3, + "culture": 0.5, + "film": 0.1, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.29, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.1, + "history": 0.2, + "literature": 0.4, + "politics": 0.5, + "culture": 0.5, + "film": 0.1, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_3": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.6, + "culture": 0.4, + "film": 0.3, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.3, + "film": 0.3, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.29, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.1, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.5, + "film": 0.1, + "law": 0.2, + "geography": 0.4 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.055226080493065925 + }, + "prompt_2": { + "bleu_score": 0.05425414646135315 + }, + "prompt_3": { + "bleu_score": 0.0526110157723912 + }, + "prompt_4": { + "bleu_score": 0.05525879545320293 + }, + "prompt_5": { + "bleu_score": 0.05448446403407131 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.24908204820081448, + "category_acc": { + "History": 0.21485943775100402, + "Geography": 0.21020408163265306, + "Lampungic": 0.2857142857142857, + "Social science": 0.2904841402337229, + "Balinese": 0.24203821656050956, + "Makassarese": 0.23118279569892472, + "Banjarese": 0.3055555555555556, + "Chemistry": 0.2116788321167883, + "Biology": 0.21301775147928995, + "Science": 0.25696594427244585, + "Christian religion": 0.19900497512437812, + "Art": 0.30449251247920134, + "Islam religion": 0.27453769559032715, + "Hindu religion": 0.2733333333333333, + "Madurese": 0.21694915254237288, + "Sport": 0.2702702702702703, + "Indonesian language": 0.2602739726027397, + "Physics": 0.2222222222222222, + "Minangkabau culture": 0.25125628140703515, + "Dayak language": 0.22018348623853212, + "Sociology": 0.2217741935483871, + "Economy": 0.2151639344262295, + "Sundanese": 0.2696629213483146, + "Javanese": 0.2399193548387097, + "Civic education": 0.2632331902718169 + } + }, + "prompt_2": { + "accuracy": 0.25435609853795316, + "category_acc": { + "History": 0.2248995983935743, + "Geography": 0.20612244897959184, + "Lampungic": 0.2857142857142857, + "Social science": 0.2921535893155259, + "Balinese": 0.24628450106157113, + "Makassarese": 0.25806451612903225, + "Banjarese": 0.3055555555555556, + "Chemistry": 0.22335766423357664, + "Biology": 0.23076923076923078, + "Science": 0.26625386996904027, + "Christian religion": 0.21393034825870647, + "Art": 0.28286189683860236, + "Islam religion": 0.2532005689900427, + "Hindu religion": 0.26666666666666666, + "Madurese": 0.26440677966101694, + "Sport": 0.25, + "Indonesian language": 0.2686799501867995, + "Physics": 0.23030303030303031, + "Minangkabau culture": 0.25125628140703515, + "Dayak language": 0.24770642201834864, + "Sociology": 0.24193548387096775, + "Economy": 0.18237704918032788, + "Sundanese": 0.2722558340535869, + "Javanese": 0.24798387096774194, + "Civic education": 0.2804005722460658 + } + }, + "prompt_3": { + "accuracy": 0.26263435476333535, + "category_acc": { + "History": 0.24497991967871485, + "Geography": 0.22653061224489796, + "Lampungic": 0.30612244897959184, + "Social science": 0.32387312186978295, + "Balinese": 0.27176220806794055, + "Makassarese": 0.23118279569892472, + "Banjarese": 0.2361111111111111, + "Chemistry": 0.22335766423357664, + "Biology": 0.23550295857988165, + "Science": 0.28792569659442724, + "Christian religion": 0.22885572139303484, + "Art": 0.32945091514143093, + "Islam religion": 0.2759601706970128, + "Hindu religion": 0.25333333333333335, + "Madurese": 0.2440677966101695, + "Sport": 0.28378378378378377, + "Indonesian language": 0.2661892901618929, + "Physics": 0.23232323232323232, + "Minangkabau culture": 0.2914572864321608, + "Dayak language": 0.29357798165137616, + "Sociology": 0.23588709677419356, + "Economy": 0.23770491803278687, + "Sundanese": 0.2636127917026793, + "Javanese": 0.2439516129032258, + "Civic education": 0.2804005722460658 + } + }, + "prompt_4": { + "accuracy": 0.253955537752854, + "category_acc": { + "History": 0.2289156626506024, + "Geography": 0.20816326530612245, + "Lampungic": 0.2925170068027211, + "Social science": 0.31886477462437396, + "Balinese": 0.26963906581740976, + "Makassarese": 0.1881720430107527, + "Banjarese": 0.2708333333333333, + "Chemistry": 0.22481751824817517, + "Biology": 0.22485207100591717, + "Science": 0.27450980392156865, + "Christian religion": 0.18407960199004975, + "Art": 0.28286189683860236, + "Islam religion": 0.26884779516358465, + "Hindu religion": 0.24666666666666667, + "Madurese": 0.2711864406779661, + "Sport": 0.3108108108108108, + "Indonesian language": 0.26774595267745954, + "Physics": 0.2222222222222222, + "Minangkabau culture": 0.2814070351758794, + "Dayak language": 0.28440366972477066, + "Sociology": 0.23588709677419356, + "Economy": 0.1987704918032787, + "Sundanese": 0.2497839239412273, + "Javanese": 0.2399193548387097, + "Civic education": 0.26609442060085836 + } + }, + "prompt_5": { + "accuracy": 0.24908204820081448, + "category_acc": { + "History": 0.20682730923694778, + "Geography": 0.20816326530612245, + "Lampungic": 0.2653061224489796, + "Social science": 0.27879799666110183, + "Balinese": 0.2484076433121019, + "Makassarese": 0.1989247311827957, + "Banjarese": 0.2847222222222222, + "Chemistry": 0.24087591240875914, + "Biology": 0.23431952662721894, + "Science": 0.26625386996904027, + "Christian religion": 0.22885572139303484, + "Art": 0.2778702163061564, + "Islam religion": 0.2603129445234708, + "Hindu religion": 0.25333333333333335, + "Madurese": 0.2847457627118644, + "Sport": 0.2635135135135135, + "Indonesian language": 0.25840597758405975, + "Physics": 0.2202020202020202, + "Minangkabau culture": 0.21105527638190955, + "Dayak language": 0.22018348623853212, + "Sociology": 0.2399193548387097, + "Economy": 0.22131147540983606, + "Sundanese": 0.26188418323249785, + "Javanese": 0.2399193548387097, + "Civic education": 0.24892703862660945 + } + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.075484492639603 + }, + "prompt_2": { + "bleu_score": 0.0736303975416232 + }, + "prompt_3": { + "bleu_score": 0.07421157385168264 + }, + "prompt_4": { + "bleu_score": 0.07377779128846335 + }, + "prompt_5": { + "bleu_score": 0.07087880065742456 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.0643823396686227 + }, + "prompt_2": { + "bleu_score": 0.06358955989400157 + }, + "prompt_3": { + "bleu_score": 0.0638364728306826 + }, + "prompt_4": { + "bleu_score": 0.06324483870667971 + }, + "prompt_5": { + "bleu_score": 0.06096507382561518 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.04373475992483948 + }, + "prompt_2": { + "bleu_score": 0.04256495714076402 + }, + "prompt_3": { + "bleu_score": 0.04315933574012693 + }, + "prompt_4": { + "bleu_score": 0.0429969238020829 + }, + "prompt_5": { + "bleu_score": 0.04174367744161577 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.0741363765799575 + }, + "prompt_2": { + "bleu_score": 0.07378800348336066 + }, + "prompt_3": { + "bleu_score": 0.07440800662583703 + }, + "prompt_4": { + "bleu_score": 0.07359250641100737 + }, + "prompt_5": { + "bleu_score": 0.06915246733099724 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.24620770128354727 + }, + "prompt_2": { + "accuracy": 0.2613768961493582 + }, + "prompt_3": { + "accuracy": 0.24737456242707118 + }, + "prompt_4": { + "accuracy": 0.2718786464410735 + }, + "prompt_5": { + "accuracy": 0.24037339556592766 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.2562745799070433, + "category_acc": { + "high_school_european_history": 0.29878048780487804, + "business_ethics": 0.25252525252525254, + "clinical_knowledge": 0.2689393939393939, + "medical_genetics": 0.25252525252525254, + "high_school_us_history": 0.26108374384236455, + "high_school_physics": 0.20666666666666667, + "high_school_world_history": 0.2457627118644068, + "virology": 0.21212121212121213, + "high_school_microeconomics": 0.21518987341772153, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.2750809061488673, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.2775800711743772, + "philosophy": 0.2709677419354839, + "professional_medicine": 0.1992619926199262, + "nutrition": 0.25573770491803277, + "global_facts": 0.29292929292929293, + "machine_learning": 0.23423423423423423, + "security_studies": 0.22540983606557377, + "public_relations": 0.2018348623853211, + "professional_psychology": 0.2700490998363339, + "prehistory": 0.30030959752321984, + "anatomy": 0.3208955223880597, + "human_sexuality": 0.26153846153846155, + "college_medicine": 0.23837209302325582, + "high_school_government_and_politics": 0.22916666666666666, + "college_chemistry": 0.1717171717171717, + "logical_fallacies": 0.29012345679012347, + "high_school_geography": 0.2131979695431472, + "elementary_mathematics": 0.23342175066312998, + "human_aging": 0.24774774774774774, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.22977941176470587, + "formal_logic": 0.28, + "high_school_statistics": 0.1813953488372093, + "international_law": 0.3416666666666667, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.31313131313131315, + "conceptual_physics": 0.24358974358974358, + "miscellaneous": 0.2710997442455243, + "high_school_chemistry": 0.2871287128712871, + "marketing": 0.27467811158798283, + "professional_law": 0.29354207436399216, + "management": 0.18627450980392157, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.2897196261682243, + "world_religions": 0.3058823529411765, + "sociology": 0.24, + "us_foreign_policy": 0.2727272727272727, + "high_school_macroeconomics": 0.20565552699228792, + "computer_security": 0.37373737373737376, + "moral_scenarios": 0.23042505592841164, + "moral_disputes": 0.263768115942029, + "electrical_engineering": 0.19444444444444445, + "astronomy": 0.271523178807947, + "college_biology": 0.2727272727272727 + } + }, + "prompt_2": { + "accuracy": 0.25277082588487665, + "category_acc": { + "high_school_european_history": 0.27439024390243905, + "business_ethics": 0.31313131313131315, + "clinical_knowledge": 0.24621212121212122, + "medical_genetics": 0.26262626262626265, + "high_school_us_history": 0.270935960591133, + "high_school_physics": 0.24, + "high_school_world_history": 0.2754237288135593, + "virology": 0.26666666666666666, + "high_school_microeconomics": 0.21940928270042195, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.2828282828282828, + "high_school_biology": 0.2588996763754045, + "abstract_algebra": 0.21212121212121213, + "professional_accounting": 0.24555160142348753, + "philosophy": 0.24838709677419354, + "professional_medicine": 0.16605166051660517, + "nutrition": 0.2459016393442623, + "global_facts": 0.30303030303030304, + "machine_learning": 0.27927927927927926, + "security_studies": 0.24180327868852458, + "public_relations": 0.2018348623853211, + "professional_psychology": 0.2684124386252046, + "prehistory": 0.2786377708978328, + "anatomy": 0.29850746268656714, + "human_sexuality": 0.2692307692307692, + "college_medicine": 0.22093023255813954, + "high_school_government_and_politics": 0.25, + "college_chemistry": 0.16161616161616163, + "logical_fallacies": 0.24074074074074073, + "high_school_geography": 0.23857868020304568, + "elementary_mathematics": 0.23342175066312998, + "human_aging": 0.25225225225225223, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.24448529411764705, + "formal_logic": 0.28, + "high_school_statistics": 0.19534883720930232, + "international_law": 0.325, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.23076923076923078, + "miscellaneous": 0.26342710997442453, + "high_school_chemistry": 0.2524752475247525, + "marketing": 0.296137339055794, + "professional_law": 0.2609262883235486, + "management": 0.19607843137254902, + "college_physics": 0.16831683168316833, + "jurisprudence": 0.35514018691588783, + "world_religions": 0.27647058823529413, + "sociology": 0.245, + "us_foreign_policy": 0.32323232323232326, + "high_school_macroeconomics": 0.21079691516709512, + "computer_security": 0.32323232323232326, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.2782608695652174, + "electrical_engineering": 0.25, + "astronomy": 0.2781456953642384, + "college_biology": 0.2727272727272727 + } + }, + "prompt_3": { + "accuracy": 0.2544154451197712, + "category_acc": { + "high_school_european_history": 0.2804878048780488, + "business_ethics": 0.2828282828282828, + "clinical_knowledge": 0.2727272727272727, + "medical_genetics": 0.2222222222222222, + "high_school_us_history": 0.26108374384236455, + "high_school_physics": 0.23333333333333334, + "high_school_world_history": 0.2669491525423729, + "virology": 0.23030303030303031, + "high_school_microeconomics": 0.22362869198312235, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.2828282828282828, + "high_school_biology": 0.23300970873786409, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.2597864768683274, + "philosophy": 0.2838709677419355, + "professional_medicine": 0.17712177121771217, + "nutrition": 0.24918032786885247, + "global_facts": 0.2828282828282828, + "machine_learning": 0.2702702702702703, + "security_studies": 0.25, + "public_relations": 0.21100917431192662, + "professional_psychology": 0.27168576104746317, + "prehistory": 0.28792569659442724, + "anatomy": 0.3283582089552239, + "human_sexuality": 0.26153846153846155, + "college_medicine": 0.22093023255813954, + "high_school_government_and_politics": 0.25, + "college_chemistry": 0.1919191919191919, + "logical_fallacies": 0.2839506172839506, + "high_school_geography": 0.23857868020304568, + "elementary_mathematics": 0.21220159151193635, + "human_aging": 0.24324324324324326, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.22426470588235295, + "formal_logic": 0.312, + "high_school_statistics": 0.20930232558139536, + "international_law": 0.375, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.31313131313131315, + "conceptual_physics": 0.2222222222222222, + "miscellaneous": 0.26854219948849106, + "high_school_chemistry": 0.2871287128712871, + "marketing": 0.24034334763948498, + "professional_law": 0.2622309197651663, + "management": 0.21568627450980393, + "college_physics": 0.297029702970297, + "jurisprudence": 0.308411214953271, + "world_religions": 0.3176470588235294, + "sociology": 0.245, + "us_foreign_policy": 0.2727272727272727, + "high_school_macroeconomics": 0.21079691516709512, + "computer_security": 0.32323232323232326, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.26666666666666666, + "electrical_engineering": 0.1736111111111111, + "astronomy": 0.2847682119205298, + "college_biology": 0.23776223776223776 + } + }, + "prompt_4": { + "accuracy": 0.25377189846263853, + "category_acc": { + "high_school_european_history": 0.2621951219512195, + "business_ethics": 0.30303030303030304, + "clinical_knowledge": 0.25, + "medical_genetics": 0.25252525252525254, + "high_school_us_history": 0.2512315270935961, + "high_school_physics": 0.2, + "high_school_world_history": 0.2330508474576271, + "virology": 0.28484848484848485, + "high_school_microeconomics": 0.21940928270042195, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.23624595469255663, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.23843416370106763, + "philosophy": 0.24516129032258063, + "professional_medicine": 0.22140221402214022, + "nutrition": 0.2459016393442623, + "global_facts": 0.23232323232323232, + "machine_learning": 0.3063063063063063, + "security_studies": 0.22540983606557377, + "public_relations": 0.21100917431192662, + "professional_psychology": 0.2569558101472995, + "prehistory": 0.28173374613003094, + "anatomy": 0.291044776119403, + "human_sexuality": 0.26153846153846155, + "college_medicine": 0.25, + "high_school_government_and_politics": 0.2760416666666667, + "college_chemistry": 0.21212121212121213, + "logical_fallacies": 0.2654320987654321, + "high_school_geography": 0.2182741116751269, + "elementary_mathematics": 0.22811671087533156, + "human_aging": 0.27927927927927926, + "college_mathematics": 0.21212121212121213, + "high_school_psychology": 0.25919117647058826, + "formal_logic": 0.304, + "high_school_statistics": 0.15813953488372093, + "international_law": 0.25, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.2222222222222222, + "miscellaneous": 0.2813299232736573, + "high_school_chemistry": 0.22772277227722773, + "marketing": 0.2918454935622318, + "professional_law": 0.2602739726027397, + "management": 0.21568627450980393, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.3644859813084112, + "world_religions": 0.28823529411764703, + "sociology": 0.295, + "us_foreign_policy": 0.29292929292929293, + "high_school_macroeconomics": 0.2210796915167095, + "computer_security": 0.3434343434343434, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.2579710144927536, + "electrical_engineering": 0.25, + "astronomy": 0.2582781456953642, + "college_biology": 0.2937062937062937 + } + }, + "prompt_5": { + "accuracy": 0.2574901680371827, + "category_acc": { + "high_school_european_history": 0.2926829268292683, + "business_ethics": 0.2727272727272727, + "clinical_knowledge": 0.25757575757575757, + "medical_genetics": 0.25252525252525254, + "high_school_us_history": 0.27586206896551724, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.24152542372881355, + "virology": 0.2, + "high_school_microeconomics": 0.21518987341772153, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.2524271844660194, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.2669039145907473, + "philosophy": 0.25806451612903225, + "professional_medicine": 0.1992619926199262, + "nutrition": 0.26557377049180325, + "global_facts": 0.3434343434343434, + "machine_learning": 0.23423423423423423, + "security_studies": 0.24180327868852458, + "public_relations": 0.1926605504587156, + "professional_psychology": 0.281505728314239, + "prehistory": 0.29411764705882354, + "anatomy": 0.29850746268656714, + "human_sexuality": 0.25384615384615383, + "college_medicine": 0.22674418604651161, + "high_school_government_and_politics": 0.23958333333333334, + "college_chemistry": 0.1111111111111111, + "logical_fallacies": 0.2716049382716049, + "high_school_geography": 0.25888324873096447, + "elementary_mathematics": 0.2625994694960212, + "human_aging": 0.21621621621621623, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.23713235294117646, + "formal_logic": 0.248, + "high_school_statistics": 0.19069767441860466, + "international_law": 0.4166666666666667, + "high_school_mathematics": 0.2936802973977695, + "high_school_computer_science": 0.37373737373737376, + "conceptual_physics": 0.23504273504273504, + "miscellaneous": 0.26854219948849106, + "high_school_chemistry": 0.2623762376237624, + "marketing": 0.2703862660944206, + "professional_law": 0.2857142857142857, + "management": 0.18627450980392157, + "college_physics": 0.16831683168316833, + "jurisprudence": 0.2523364485981308, + "world_religions": 0.3, + "sociology": 0.245, + "us_foreign_policy": 0.2828282828282828, + "high_school_macroeconomics": 0.20051413881748073, + "computer_security": 0.31313131313131315, + "moral_scenarios": 0.2360178970917226, + "moral_disputes": 0.26956521739130435, + "electrical_engineering": 0.24305555555555555, + "astronomy": 0.2913907284768212, + "college_biology": 0.25874125874125875 + } + } + }, + "c_eval": { + "prompt_1": { + "accuracy": 0.25482912332838037 + }, + "prompt_2": { + "accuracy": 0.2659732540861813 + }, + "prompt_3": { + "accuracy": 0.2674591381872214 + }, + "prompt_4": { + "accuracy": 0.27340267459138184 + }, + "prompt_5": { + "accuracy": 0.2607726597325409 + } + }, + "c_eval_full": { + "prompt_1": { + "accuracy": 0.24968866749688667, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.125, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.23809523809523808, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.125, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.26666666666666666, + "business_administration": 0.2894736842105263, + "marxism": 0.16666666666666666, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.1111111111111111, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.48, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.17307692307692307, + "sports_science": 0.20833333333333334, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.25, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.21568627450980393, + "accountant": 0.2222222222222222, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.1388888888888889, + "tax_accountant": 0.16666666666666666, + "physician": 0.24074074074074073 + } + }, + "prompt_2": { + "accuracy": 0.25653798256537985, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.125, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.2857142857142857, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.125, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.2833333333333333, + "business_administration": 0.3684210526315789, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.20689655172413793, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.25925925925925924, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.25, + "high_school_chinese": 0.25, + "high_school_history": 0.4, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.19230769230769232, + "sports_science": 0.20833333333333334, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.25, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.2037037037037037, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.16666666666666666, + "tax_accountant": 0.2037037037037037, + "physician": 0.24074074074074073 + } + }, + "prompt_3": { + "accuracy": 0.2590286425902864, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.125, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.21428571428571427, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.25, + "high_school_chemistry": 0.125, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.11538461538461539, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.2833333333333333, + "business_administration": 0.34210526315789475, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.25, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.14814814814814814, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.25, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.25, + "sports_science": 0.20833333333333334, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.18518518518518517, + "physician": 0.2222222222222222 + } + }, + "prompt_4": { + "accuracy": 0.25840597758405975, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.08333333333333333, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.21428571428571427, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.08333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.3333333333333333, + "business_administration": 0.2894736842105263, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.14814814814814814, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.5, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.23076923076923078, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.25, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.23529411764705882, + "accountant": 0.25925925925925924, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.16666666666666666, + "tax_accountant": 0.2037037037037037, + "physician": 0.2777777777777778 + } + }, + "prompt_5": { + "accuracy": 0.25529265255292655, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.08333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.23809523809523808, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.125, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.11538461538461539, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.31666666666666665, + "business_administration": 0.3684210526315789, + "marxism": 0.25, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.07407407407407407, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.48, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.25, + "sports_science": 0.16666666666666666, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.25, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.24074074074074073, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.18518518518518517, + "physician": 0.24074074074074073 + } + } + }, + "cmmlu": { + "prompt_1": { + "accuracy": 0.2903225806451613 + }, + "prompt_2": { + "accuracy": 0.3010752688172043 + }, + "prompt_3": { + "accuracy": 0.27956989247311825 + }, + "prompt_4": { + "accuracy": 0.2903225806451613 + }, + "prompt_5": { + "accuracy": 0.25448028673835127 + } + }, + "cmmlu_full": { + "prompt_1": { + "accuracy": 0.2593679848040062, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.20270270270270271, + "ancient_chinese": 0.2865853658536585, + "arts": 0.2625, + "astronomy": 0.2787878787878788, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.20610687022900764, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.2693498452012384, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.22346368715083798, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.25, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.2087912087912088, + "computer_science": 0.23529411764705882, + "computer_security": 0.26900584795321636, + "conceptual_physics": 0.24489795918367346, + "construction_project_management": 0.28776978417266186, + "economics": 0.24528301886792453, + "education": 0.294478527607362, + "electrical_engineering": 0.22674418604651161, + "elementary_chinese": 0.3253968253968254, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.2605042016806723, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.21481481481481482, + "food_science": 0.2937062937062937, + "genetics": 0.2784090909090909, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.21818181818181817, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2864864864864865, + "journalism": 0.29069767441860467, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.24390243902439024, + "machine_learning": 0.26229508196721313, + "management": 0.3, + "marketing": 0.26666666666666666, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.21379310344827587, + "philosophy": 0.26666666666666666, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.25, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.25862068965517243, + "security_study": 0.25925925925925924, + "sociology": 0.23893805309734514, + "sports_science": 0.24848484848484848, + "traditional_chinese_medicine": 0.2594594594594595, + "virology": 0.2485207100591716, + "world_history": 0.2732919254658385, + "world_religions": 0.25 + } + }, + "prompt_2": { + "accuracy": 0.26592989121049904, + "category_acc": { + "agronomy": 0.28994082840236685, + "anatomy": 0.21621621621621623, + "ancient_chinese": 0.2865853658536585, + "arts": 0.28125, + "astronomy": 0.28484848484848485, + "business_ethics": 0.2966507177033493, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.22900763358778625, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.29906542056074764, + "chinese_history": 0.29721362229102166, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.2122905027932961, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.1792452830188679, + "college_education": 0.2897196261682243, + "college_engineering_hydrology": 0.2830188679245283, + "college_law": 0.28703703703703703, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.20512820512820512, + "computer_science": 0.23039215686274508, + "computer_security": 0.24561403508771928, + "conceptual_physics": 0.24489795918367346, + "construction_project_management": 0.2733812949640288, + "economics": 0.27672955974842767, + "education": 0.26993865030674846, + "electrical_engineering": 0.29651162790697677, + "elementary_chinese": 0.32936507936507936, + "elementary_commonsense": 0.30303030303030304, + "elementary_information_and_technology": 0.2689075630252101, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.22962962962962963, + "food_science": 0.26573426573426573, + "genetics": 0.30113636363636365, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.25, + "high_school_geography": 0.288135593220339, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.22727272727272727, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.31746031746031744, + "international_law": 0.2918918918918919, + "journalism": 0.28488372093023256, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.2601626016260163, + "machine_learning": 0.23770491803278687, + "management": 0.2857142857142857, + "marketing": 0.2833333333333333, + "marxist_theory": 0.24338624338624337, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.23448275862068965, + "philosophy": 0.23809523809523808, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.26540284360189575, + "professional_medicine": 0.23138297872340424, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.28160919540229884, + "security_study": 0.32592592592592595, + "sociology": 0.25663716814159293, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.2603550295857988, + "world_history": 0.2795031055900621, + "world_religions": 0.25 + } + }, + "prompt_3": { + "accuracy": 0.26126748402693833, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.2926829268292683, + "arts": 0.2625, + "astronomy": 0.2727272727272727, + "business_ethics": 0.28708133971291866, + "chinese_civil_service_exam": 0.2625, + "chinese_driving_rule": 0.1984732824427481, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.3177570093457944, + "chinese_history": 0.29721362229102166, + "chinese_literature": 0.27450980392156865, + "chinese_teacher_qualification": 0.19553072625698323, + "clinical_knowledge": 0.24050632911392406, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.24074074074074073, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.23076923076923078, + "computer_science": 0.23039215686274508, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.2805755395683453, + "economics": 0.2578616352201258, + "education": 0.25153374233128833, + "electrical_engineering": 0.26744186046511625, + "elementary_chinese": 0.2976190476190476, + "elementary_commonsense": 0.2777777777777778, + "elementary_information_and_technology": 0.2857142857142857, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.23703703703703705, + "food_science": 0.23776223776223776, + "genetics": 0.30113636363636365, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.22727272727272727, + "high_school_politics": 0.23776223776223776, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2864864864864865, + "journalism": 0.26744186046511625, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.25203252032520324, + "machine_learning": 0.23770491803278687, + "management": 0.2714285714285714, + "marketing": 0.2611111111111111, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.20689655172413793, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.25862068965517243, + "security_study": 0.25925925925925924, + "sociology": 0.252212389380531, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.2485207100591716, + "world_history": 0.2795031055900621, + "world_religions": 0.28125 + } + }, + "prompt_4": { + "accuracy": 0.25634605422206874, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.21621621621621623, + "ancient_chinese": 0.2865853658536585, + "arts": 0.29375, + "astronomy": 0.3151515151515151, + "business_ethics": 0.2727272727272727, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.1984732824427481, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.26006191950464397, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.22905027932960895, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.23148148148148148, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.22344322344322345, + "computer_science": 0.24509803921568626, + "computer_security": 0.2807017543859649, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.2805755395683453, + "economics": 0.23270440251572327, + "education": 0.22699386503067484, + "electrical_engineering": 0.26744186046511625, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.2828282828282828, + "elementary_information_and_technology": 0.29831932773109243, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.22962962962962963, + "food_science": 0.2727272727272727, + "genetics": 0.29545454545454547, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.23728813559322035, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.23636363636363636, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.31746031746031744, + "international_law": 0.22162162162162163, + "journalism": 0.2616279069767442, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2757009345794392, + "logical": 0.25203252032520324, + "machine_learning": 0.23770491803278687, + "management": 0.2619047619047619, + "marketing": 0.2388888888888889, + "marxist_theory": 0.25925925925925924, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.23448275862068965, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.26857142857142857, + "professional_law": 0.23696682464454977, + "professional_medicine": 0.2393617021276596, + "professional_psychology": 0.2629310344827586, + "public_relations": 0.26436781609195403, + "security_study": 0.25925925925925924, + "sociology": 0.22566371681415928, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.24260355029585798, + "world_history": 0.2670807453416149, + "world_religions": 0.2625 + } + }, + "prompt_5": { + "accuracy": 0.25677775859091695, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.2865853658536585, + "arts": 0.275, + "astronomy": 0.26666666666666666, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.2625, + "chinese_driving_rule": 0.20610687022900764, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.29411764705882354, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.21787709497206703, + "clinical_knowledge": 0.22784810126582278, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.23148148148148148, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.21245421245421245, + "computer_science": 0.2107843137254902, + "computer_security": 0.2631578947368421, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.31654676258992803, + "economics": 0.2389937106918239, + "education": 0.2147239263803681, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.29365079365079366, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.24782608695652175, + "ethnology": 0.2222222222222222, + "food_science": 0.2517482517482518, + "genetics": 0.2727272727272727, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.25, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2857142857142857, + "international_law": 0.2702702702702703, + "journalism": 0.28488372093023256, + "jurisprudence": 0.25304136253041365, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.24390243902439024, + "machine_learning": 0.2459016393442623, + "management": 0.2619047619047619, + "marketing": 0.26666666666666666, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.2206896551724138, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.24, + "professional_law": 0.26066350710900477, + "professional_medicine": 0.23670212765957446, + "professional_psychology": 0.25, + "public_relations": 0.26436781609195403, + "security_study": 0.2740740740740741, + "sociology": 0.24778761061946902, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.25443786982248523, + "world_history": 0.2857142857142857, + "world_religions": 0.2625 + } + } + }, + "zbench": { + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.18181818181818182 + }, + "prompt_3": { + "accuracy": 0.18181818181818182 + }, + "prompt_4": { + "accuracy": 0.24242424242424243 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } + }, + "ind_emotion": { + "prompt_1": { + "accuracy": 0.17727272727272728 + }, + "prompt_2": { + "accuracy": 0.17045454545454544 + }, + "prompt_3": { + "accuracy": 0.17045454545454544 + }, + "prompt_4": { + "accuracy": 0.17272727272727273 + }, + "prompt_5": { + "accuracy": 0.17727272727272728 + } + }, + "ocnli": { + "prompt_1": { + "accuracy": 0.34271186440677964 + }, + "prompt_2": { + "accuracy": 0.3447457627118644 + }, + "prompt_3": { + "accuracy": 0.3416949152542373 + }, + "prompt_4": { + "accuracy": 0.34372881355932206 + }, + "prompt_5": { + "accuracy": 0.34372881355932206 + } + }, + "c3": { + "prompt_1": { + "accuracy": 0.31338818249813016 + }, + "prompt_2": { + "accuracy": 0.31264023934181 + }, + "prompt_3": { + "accuracy": 0.31114435302916976 + }, + "prompt_4": { + "accuracy": 0.29655946148092743 + }, + "prompt_5": { + "accuracy": 0.29094988780852654 + } + }, + "dream": { + "prompt_1": { + "accuracy": 0.3390494855463008 + }, + "prompt_2": { + "accuracy": 0.34835864772170505 + }, + "prompt_3": { + "accuracy": 0.3512983831455169 + }, + "prompt_4": { + "accuracy": 0.3488486036256737 + }, + "prompt_5": { + "accuracy": 0.3493385595296423 + } + }, + "samsum": { + "prompt_1": { + "rouge1": 0.16197581762091354, + "rouge2": 0.051621800009737595, + "rougeL": 0.1226278414407338, + "avg_rouge": 0.11207515302379496 + }, + "prompt_2": { + "rouge1": 0.14708317425955947, + "rouge2": 0.0463376435494789, + "rougeL": 0.11223005068856871, + "avg_rouge": 0.1018836228325357 + }, + "prompt_3": { + "rouge1": 0.1442079228929008, + "rouge2": 0.04649327722189907, + "rougeL": 0.11109790506934418, + "avg_rouge": 0.10059970172804801 + }, + "prompt_4": { + "rouge1": 0.152834266509089, + "rouge2": 0.04963833623878602, + "rougeL": 0.11769885590581287, + "avg_rouge": 0.1067238195512293 + }, + "prompt_5": { + "rouge1": 0.1378482418551559, + "rouge2": 0.04570600196741324, + "rougeL": 0.10789638063677803, + "avg_rouge": 0.09715020815311572 + } + }, + "dialogsum": { + "prompt_1": { + "rouge1": 0.11046019755256263, + "rouge2": 0.029267523344006317, + "rougeL": 0.08632379028310769, + "avg_rouge": 0.07535050372655888 + }, + "prompt_2": { + "rouge1": 0.10287233336323474, + "rouge2": 0.027064827846601178, + "rougeL": 0.08113168025569371, + "avg_rouge": 0.07035628048850988 + }, + "prompt_3": { + "rouge1": 0.10997154033973273, + "rouge2": 0.029478863246335345, + "rougeL": 0.08620038921620636, + "avg_rouge": 0.07521693093409149 + }, + "prompt_4": { + "rouge1": 0.1184207954582665, + "rouge2": 0.03194211172376678, + "rougeL": 0.09140949597727019, + "avg_rouge": 0.08059080105310115 + }, + "prompt_5": { + "rouge1": 0.1317908861803621, + "rouge2": 0.034736622014649804, + "rougeL": 0.10160989649866022, + "avg_rouge": 0.08937913489789072 + } + }, + "sst2": { + "prompt_1": { + "accuracy": 0.6238532110091743 + }, + "prompt_2": { + "accuracy": 0.5458715596330275 + }, + "prompt_3": { + "accuracy": 0.6330275229357798 + }, + "prompt_4": { + "accuracy": 0.5 + }, + "prompt_5": { + "accuracy": 0.7259174311926605 + } + }, + "cola": { + "prompt_1": { + "accuracy": 0.5608820709491851 + }, + "prompt_2": { + "accuracy": 0.6097794822627037 + }, + "prompt_3": { + "accuracy": 0.6356663470757431 + }, + "prompt_4": { + "accuracy": 0.5532118887823586 + }, + "prompt_5": { + "accuracy": 0.4966442953020134 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.495 + }, + "prompt_2": { + "accuracy": 0.4955 + }, + "prompt_3": { + "accuracy": 0.4945 + }, + "prompt_4": { + "accuracy": 0.495 + }, + "prompt_5": { + "accuracy": 0.495 + } + }, + "mnli": { + "prompt_1": { + "accuracy": 0.3 + }, + "prompt_2": { + "accuracy": 0.3 + }, + "prompt_3": { + "accuracy": 0.3 + }, + "prompt_4": { + "accuracy": 0.3 + }, + "prompt_5": { + "accuracy": 0.3 + } + }, + "qnli": { + "prompt_1": { + "accuracy": 0.7 + }, + "prompt_2": { + "accuracy": 0.4 + }, + "prompt_3": { + "accuracy": 0.7 + }, + "prompt_4": { + "accuracy": 0.3 + }, + "prompt_5": { + "accuracy": 0.7 + } + }, + "wnli": { + "prompt_1": { + "accuracy": 0.4 + }, + "prompt_2": { + "accuracy": 0.5 + }, + "prompt_3": { + "accuracy": 0.4 + }, + "prompt_4": { + "accuracy": 0.4 + }, + "prompt_5": { + "accuracy": 0.4 + } + }, + "rte": { + "prompt_1": { + "accuracy": 0.4 + }, + "prompt_2": { + "accuracy": 0.4 + }, + "prompt_3": { + "accuracy": 0.3 + }, + "prompt_4": { + "accuracy": 0.4 + }, + "prompt_5": { + "accuracy": 0.4 + } + }, + "mrpc": { + "prompt_1": { + "accuracy": 0.6 + }, + "prompt_2": { + "accuracy": 0.5 + }, + "prompt_3": { + "accuracy": 0.6 + }, + "prompt_4": { + "accuracy": 0.6 + }, + "prompt_5": { + "accuracy": 0.6 + } + } + }, + "five_shot": { + "cross_xquad": { + "prompt_1": -1 + }, + "cross_mmlu": { + "prompt_1": -1 + }, + "cross_logiqa": { + "prompt_1": -1 + }, + "sg_eval": { + "prompt_1": -1 + }, + "cn_eval": { + "prompt_1": -1 + }, + "us_eval": { + "prompt_1": -1 + }, + "ph_eval": { + "prompt_1": -1 + }, + "sing2eng": { + "prompt_1": -1 + }, + "indommlu": { + "prompt_1": -1 + }, + "flores_ind2eng": { + "prompt_1": -1 + }, + "flores_vie2eng": { + "prompt_1": -1 + }, + "flores_zho2eng": { + "prompt_1": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1 + }, + "mmlu": { + "prompt_1": -1 + }, + "mmlu_full": { + "prompt_1": -1 + }, + "c_eval": { + "prompt_1": -1 + }, + "c_eval_full": { + "prompt_1": -1 + }, + "cmmlu": { + "prompt_1": -1 + }, + "cmmlu_full": { + "prompt_1": -1 + }, + "zbench": { + "prompt_1": -1 + }, + "ind_emotion": { + "prompt_1": -1 + }, + "ocnli": { + "prompt_1": -1 + }, + "c3": { + "prompt_1": -1 + }, + "dream": { + "prompt_1": -1 + }, + "samsum": { + "prompt_1": -1 + }, + "dialogsum": { + "prompt_1": -1 + }, + "sst2": { + "prompt_1": -1 + }, + "cola": { + "prompt_1": -1 + }, + "qqp": { + "prompt_1": -1 + }, + "mnli": { + "prompt_1": -1 + }, + "qnli": { + "prompt_1": -1 + }, + "wnli": { + "prompt_1": -1 + }, + "rte": { + "prompt_1": -1 + }, + "mrpc": { + "prompt_1": -1 + } + } + }, "LLaMA_3_Merlion_8B": { "model_size": "8B", "model_link": "https://seaeval.github.io/",