Spaces:
Running
Running
new
Browse files- all_results.json +40 -40
- app.py +5 -5
all_results.json
CHANGED
@@ -32277,70 +32277,70 @@
|
|
32277 |
},
|
32278 |
"flores_ind2eng": {
|
32279 |
"prompt_1": {
|
32280 |
-
"bleu_score": 0.
|
32281 |
},
|
32282 |
"prompt_2": {
|
32283 |
-
"bleu_score": 0.
|
32284 |
},
|
32285 |
"prompt_3": {
|
32286 |
-
"bleu_score": 0.
|
32287 |
},
|
32288 |
"prompt_4": {
|
32289 |
-
"bleu_score": 0.
|
32290 |
},
|
32291 |
"prompt_5": {
|
32292 |
-
"bleu_score": 0.
|
32293 |
}
|
32294 |
},
|
32295 |
"flores_vie2eng": {
|
32296 |
"prompt_1": {
|
32297 |
-
"bleu_score": 0.
|
32298 |
},
|
32299 |
"prompt_2": {
|
32300 |
-
"bleu_score": 0.
|
32301 |
},
|
32302 |
"prompt_3": {
|
32303 |
-
"bleu_score": 0.
|
32304 |
},
|
32305 |
"prompt_4": {
|
32306 |
-
"bleu_score": 0.
|
32307 |
},
|
32308 |
"prompt_5": {
|
32309 |
-
"bleu_score": 0.
|
32310 |
}
|
32311 |
},
|
32312 |
"flores_zho2eng": {
|
32313 |
"prompt_1": {
|
32314 |
-
"bleu_score": 0.
|
32315 |
},
|
32316 |
"prompt_2": {
|
32317 |
-
"bleu_score": 0.
|
32318 |
},
|
32319 |
"prompt_3": {
|
32320 |
-
"bleu_score": 0.
|
32321 |
},
|
32322 |
"prompt_4": {
|
32323 |
-
"bleu_score": 0.
|
32324 |
},
|
32325 |
"prompt_5": {
|
32326 |
-
"bleu_score": 0.
|
32327 |
}
|
32328 |
},
|
32329 |
"flores_zsm2eng": {
|
32330 |
"prompt_1": {
|
32331 |
-
"bleu_score": 0.
|
32332 |
},
|
32333 |
"prompt_2": {
|
32334 |
-
"bleu_score": 0.
|
32335 |
},
|
32336 |
"prompt_3": {
|
32337 |
-
"bleu_score": 0.
|
32338 |
},
|
32339 |
"prompt_4": {
|
32340 |
-
"bleu_score": 0.
|
32341 |
},
|
32342 |
"prompt_5": {
|
32343 |
-
"bleu_score": 0.
|
32344 |
}
|
32345 |
},
|
32346 |
"mmlu": {
|
@@ -67059,70 +67059,70 @@
|
|
67059 |
},
|
67060 |
"flores_ind2eng": {
|
67061 |
"prompt_1": {
|
67062 |
-
"bleu_score": 0.
|
67063 |
},
|
67064 |
"prompt_2": {
|
67065 |
-
"bleu_score": 0.
|
67066 |
},
|
67067 |
"prompt_3": {
|
67068 |
-
"bleu_score": 0.
|
67069 |
},
|
67070 |
"prompt_4": {
|
67071 |
-
"bleu_score": 0.
|
67072 |
},
|
67073 |
"prompt_5": {
|
67074 |
-
"bleu_score": 0.
|
67075 |
}
|
67076 |
},
|
67077 |
"flores_vie2eng": {
|
67078 |
"prompt_1": {
|
67079 |
-
"bleu_score": 0.
|
67080 |
},
|
67081 |
"prompt_2": {
|
67082 |
-
"bleu_score": 0.
|
67083 |
},
|
67084 |
"prompt_3": {
|
67085 |
-
"bleu_score": 0.
|
67086 |
},
|
67087 |
"prompt_4": {
|
67088 |
-
"bleu_score": 0.
|
67089 |
},
|
67090 |
"prompt_5": {
|
67091 |
-
"bleu_score": 0.
|
67092 |
}
|
67093 |
},
|
67094 |
"flores_zho2eng": {
|
67095 |
"prompt_1": {
|
67096 |
-
"bleu_score": 0.
|
67097 |
},
|
67098 |
"prompt_2": {
|
67099 |
-
"bleu_score": 0.
|
67100 |
},
|
67101 |
"prompt_3": {
|
67102 |
-
"bleu_score": 0.
|
67103 |
},
|
67104 |
"prompt_4": {
|
67105 |
-
"bleu_score": 0.
|
67106 |
},
|
67107 |
"prompt_5": {
|
67108 |
-
"bleu_score": 0.
|
67109 |
}
|
67110 |
},
|
67111 |
"flores_zsm2eng": {
|
67112 |
"prompt_1": {
|
67113 |
-
"bleu_score": 0.
|
67114 |
},
|
67115 |
"prompt_2": {
|
67116 |
-
"bleu_score": 0.
|
67117 |
},
|
67118 |
"prompt_3": {
|
67119 |
-
"bleu_score": 0.
|
67120 |
},
|
67121 |
"prompt_4": {
|
67122 |
-
"bleu_score": 0.
|
67123 |
},
|
67124 |
"prompt_5": {
|
67125 |
-
"bleu_score": 0.
|
67126 |
}
|
67127 |
},
|
67128 |
"mmlu": {
|
|
|
32277 |
},
|
32278 |
"flores_ind2eng": {
|
32279 |
"prompt_1": {
|
32280 |
+
"bleu_score": 0.3087387231733152
|
32281 |
},
|
32282 |
"prompt_2": {
|
32283 |
+
"bleu_score": 0.3094226547039261
|
32284 |
},
|
32285 |
"prompt_3": {
|
32286 |
+
"bleu_score": 0.3061124934874166
|
32287 |
},
|
32288 |
"prompt_4": {
|
32289 |
+
"bleu_score": 0.30135340693301044
|
32290 |
},
|
32291 |
"prompt_5": {
|
32292 |
+
"bleu_score": 0.30791510943643785
|
32293 |
}
|
32294 |
},
|
32295 |
"flores_vie2eng": {
|
32296 |
"prompt_1": {
|
32297 |
+
"bleu_score": 0.24226557595813872
|
32298 |
},
|
32299 |
"prompt_2": {
|
32300 |
+
"bleu_score": 0.24374681205197152
|
32301 |
},
|
32302 |
"prompt_3": {
|
32303 |
+
"bleu_score": 0.23865746431889961
|
32304 |
},
|
32305 |
"prompt_4": {
|
32306 |
+
"bleu_score": 0.24343786296993222
|
32307 |
},
|
32308 |
"prompt_5": {
|
32309 |
+
"bleu_score": 0.2496790676198905
|
32310 |
}
|
32311 |
},
|
32312 |
"flores_zho2eng": {
|
32313 |
"prompt_1": {
|
32314 |
+
"bleu_score": 0.18741482916807534
|
32315 |
},
|
32316 |
"prompt_2": {
|
32317 |
+
"bleu_score": 0.18861522471729936
|
32318 |
},
|
32319 |
"prompt_3": {
|
32320 |
+
"bleu_score": 0.1828941675772202
|
32321 |
},
|
32322 |
"prompt_4": {
|
32323 |
+
"bleu_score": 0.18500544495397628
|
32324 |
},
|
32325 |
"prompt_5": {
|
32326 |
+
"bleu_score": 0.19088057936700595
|
32327 |
}
|
32328 |
},
|
32329 |
"flores_zsm2eng": {
|
32330 |
"prompt_1": {
|
32331 |
+
"bleu_score": 0.31040973391193794
|
32332 |
},
|
32333 |
"prompt_2": {
|
32334 |
+
"bleu_score": 0.31410450445911836
|
32335 |
},
|
32336 |
"prompt_3": {
|
32337 |
+
"bleu_score": 0.30742063457580054
|
32338 |
},
|
32339 |
"prompt_4": {
|
32340 |
+
"bleu_score": 0.2954984182513215
|
32341 |
},
|
32342 |
"prompt_5": {
|
32343 |
+
"bleu_score": 0.3059634141807576
|
32344 |
}
|
32345 |
},
|
32346 |
"mmlu": {
|
|
|
67059 |
},
|
67060 |
"flores_ind2eng": {
|
67061 |
"prompt_1": {
|
67062 |
+
"bleu_score": 0.011674358088733964
|
67063 |
},
|
67064 |
"prompt_2": {
|
67065 |
+
"bleu_score": 0.34299290022800966
|
67066 |
},
|
67067 |
"prompt_3": {
|
67068 |
+
"bleu_score": 0.34235818894094877
|
67069 |
},
|
67070 |
"prompt_4": {
|
67071 |
+
"bleu_score": 0.344471697570177
|
67072 |
},
|
67073 |
"prompt_5": {
|
67074 |
+
"bleu_score": 0.34330146854458155
|
67075 |
}
|
67076 |
},
|
67077 |
"flores_vie2eng": {
|
67078 |
"prompt_1": {
|
67079 |
+
"bleu_score": 0.004016316923669523
|
67080 |
},
|
67081 |
"prompt_2": {
|
67082 |
+
"bleu_score": 0.28858215451103547
|
67083 |
},
|
67084 |
"prompt_3": {
|
67085 |
+
"bleu_score": 0.2874460615046707
|
67086 |
},
|
67087 |
"prompt_4": {
|
67088 |
+
"bleu_score": 0.2895463893365964
|
67089 |
},
|
67090 |
"prompt_5": {
|
67091 |
+
"bleu_score": 0.28765593996471855
|
67092 |
}
|
67093 |
},
|
67094 |
"flores_zho2eng": {
|
67095 |
"prompt_1": {
|
67096 |
+
"bleu_score": 0.004745747326013472
|
67097 |
},
|
67098 |
"prompt_2": {
|
67099 |
+
"bleu_score": 0.21742919083139323
|
67100 |
},
|
67101 |
"prompt_3": {
|
67102 |
+
"bleu_score": 0.21718999888377416
|
67103 |
},
|
67104 |
"prompt_4": {
|
67105 |
+
"bleu_score": 0.2171201069019555
|
67106 |
},
|
67107 |
"prompt_5": {
|
67108 |
+
"bleu_score": 0.21609843798223957
|
67109 |
}
|
67110 |
},
|
67111 |
"flores_zsm2eng": {
|
67112 |
"prompt_1": {
|
67113 |
+
"bleu_score": 0.010737551256522686
|
67114 |
},
|
67115 |
"prompt_2": {
|
67116 |
+
"bleu_score": 0.35662624808916016
|
67117 |
},
|
67118 |
"prompt_3": {
|
67119 |
+
"bleu_score": 0.35860534258636234
|
67120 |
},
|
67121 |
"prompt_4": {
|
67122 |
+
"bleu_score": 0.35739510518617695
|
67123 |
},
|
67124 |
"prompt_5": {
|
67125 |
+
"bleu_score": 0.3485870508300006
|
67126 |
}
|
67127 |
},
|
67128 |
"mmlu": {
|
app.py
CHANGED
@@ -2297,11 +2297,11 @@ with block:
|
|
2297 |
""")
|
2298 |
|
2299 |
|
2300 |
-
with gr.TabItem("Reasoning"):
|
2301 |
|
2302 |
|
2303 |
# dataset 12:
|
2304 |
-
with gr.TabItem("MMLU"):
|
2305 |
with gr.TabItem("Zero Shot"):
|
2306 |
with gr.TabItem("Overall"):
|
2307 |
with gr.Row():
|
@@ -2355,7 +2355,7 @@ with block:
|
|
2355 |
|
2356 |
|
2357 |
# dataset 14:
|
2358 |
-
with gr.TabItem("C_EVAL"):
|
2359 |
with gr.TabItem("Zero Shot"):
|
2360 |
with gr.TabItem("Overall"):
|
2361 |
with gr.Row():
|
@@ -2408,7 +2408,7 @@ with block:
|
|
2408 |
|
2409 |
|
2410 |
# dataset 16:
|
2411 |
-
with gr.TabItem("CMMLU"):
|
2412 |
with gr.TabItem("Zero Shot"):
|
2413 |
with gr.TabItem("Overall"):
|
2414 |
with gr.Row():
|
@@ -2622,7 +2622,7 @@ with block:
|
|
2622 |
with gr.TabItem("Emotion"):
|
2623 |
|
2624 |
# dataset 18:
|
2625 |
-
with gr.TabItem("
|
2626 |
with gr.TabItem("Zero Shot"):
|
2627 |
with gr.TabItem("Overall"):
|
2628 |
with gr.Row():
|
|
|
2297 |
""")
|
2298 |
|
2299 |
|
2300 |
+
with gr.TabItem("General Reasoning"):
|
2301 |
|
2302 |
|
2303 |
# dataset 12:
|
2304 |
+
with gr.TabItem("MMLU Subset"):
|
2305 |
with gr.TabItem("Zero Shot"):
|
2306 |
with gr.TabItem("Overall"):
|
2307 |
with gr.Row():
|
|
|
2355 |
|
2356 |
|
2357 |
# dataset 14:
|
2358 |
+
with gr.TabItem("C_EVAL Subset"):
|
2359 |
with gr.TabItem("Zero Shot"):
|
2360 |
with gr.TabItem("Overall"):
|
2361 |
with gr.Row():
|
|
|
2408 |
|
2409 |
|
2410 |
# dataset 16:
|
2411 |
+
with gr.TabItem("CMMLU Subset"):
|
2412 |
with gr.TabItem("Zero Shot"):
|
2413 |
with gr.TabItem("Overall"):
|
2414 |
with gr.Row():
|
|
|
2622 |
with gr.TabItem("Emotion"):
|
2623 |
|
2624 |
# dataset 18:
|
2625 |
+
with gr.TabItem("Indonesian Emotion Classification"):
|
2626 |
with gr.TabItem("Zero Shot"):
|
2627 |
with gr.TabItem("Overall"):
|
2628 |
with gr.Row():
|